Merge branch 'sync-v2' into master

Adds P2P sync (protobuf API, iroh QUIC transport, gossip + pkarr
discovery), SSE live refresh, plain-English code comments across
all source files and examples.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jason Tudisco 2026-03-17 14:36:29 -06:00
commit e7def4b819
51 changed files with 3715 additions and 2899 deletions

41
Cargo.lock generated
View File

@ -172,6 +172,7 @@ dependencies = [
"mime",
"mime_guess",
"notify",
"prost",
"reqwest",
"rusqlite",
"serde",
@ -181,6 +182,7 @@ dependencies = [
"tempfile",
"thiserror",
"tokio",
"tokio-stream",
"tokio-test",
"tokio-util",
"tower-http",
@ -300,6 +302,12 @@ dependencies = [
"syn",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "encoding_rs"
version = "0.8.35"
@ -913,6 +921,15 @@ dependencies = [
"serde",
]
[[package]]
name = "itertools"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.17"
@ -1310,6 +1327,29 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "prost"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
dependencies = [
"bytes",
"prost-derive",
]
[[package]]
name = "prost-derive"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
dependencies = [
"anyhow",
"itertools",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pxfm"
version = "0.1.28"
@ -1875,6 +1915,7 @@ dependencies = [
"futures-core",
"pin-project-lite",
"tokio",
"tokio-util",
]
[[package]]

View File

@ -37,6 +37,12 @@ mime = "0.3"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
# Protobuf (sync API)
prost = "0.13"
# Stream utilities (SSE for sync events)
tokio-stream = { version = "0.1", features = ["sync"] }
# Utilities
chrono = { version = "0.4", features = ["serde"] }
anyhow = "1"

View File

@ -3,3 +3,4 @@ admin_token: "super_secret_rebuild"
enable_thumbnail_cache: true
rebuild_error_threshold: 50
verify_interval_hours: 12
sync_api_key: "can-sync-default-key"

File diff suppressed because it is too large Load Diff

View File

@ -1,44 +1,60 @@
[package]
name = "can-sync"
version = "0.1.0"
version = "0.2.0"
edition = "2021"
description = "P2P sync service for CAN content-addressable storage"
description = "P2P sync agent for CAN service — full mirror replication via iroh"
[[bin]]
name = "can-sync"
path = "src/main.rs"
[[bin]]
name = "sync-test"
path = "tests/sync_test.rs"
[dependencies]
# P2P networking
# P2P networking (iroh for transport + gossip for discovery — NO iroh-docs)
iroh = "0.96"
iroh-blobs = "0.98"
iroh-docs = "0.96"
iroh-gossip = "0.96"
# HTTP server + client
axum = "0.8"
tokio = { version = "1", features = ["full"] }
reqwest = { version = "0.12", features = ["json", "multipart"] }
tower-http = { version = "0.6", features = ["cors"] }
# Protobuf (same message types as CAN service sync API)
prost = "0.13"
# HTTP client for CAN service sync API
reqwest = { version = "0.12", features = ["json", "multipart", "blocking"] }
# Serialization
serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_yaml = "0.9"
postcard = { version = "1", features = ["alloc"] }
# Storage
rusqlite = { version = "0.32", features = ["bundled"] }
# Crypto
blake3 = "1"
ed25519-dalek = "3.0.0-pre.1"
# Utilities
# Pkarr (internet rendezvous via relay servers — relay only, no DHT to avoid digest conflict)
pkarr = { version = "5", default-features = false, features = ["relays"] }
# DNS record parsing (used by pkarr)
simple-dns = "0.9"
# Async runtime
tokio = { version = "1", features = ["full"] }
# Logging
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
# Stream utilities (needed for gossip event stream)
n0-future = "0.1"
# SSE client (for real-time events from CAN service)
tokio-stream = "0.1"
futures-util = "0.3"
# Utilities
anyhow = "1"
open = "5"
sha2 = "0.10"
hex = "0.4"
uuid = { version = "1", features = ["v4"] }
chrono = { version = "0.4", features = ["serde"] }
bytes = "1"
futures-lite = "2"
tokio-util = { version = "0.7", features = ["io"] }
hex = "0.4"
serde_json = "1"
tempfile = "3"
rand = "0.9"

View File

@ -1,7 +1,20 @@
# CAN Sync configuration
can_service_url: "http://127.0.0.1:3210/api/v1/can/0"
listen_addr: "127.0.0.1:3213"
data_dir: "./can_sync_data"
relay_url: null
poll_interval_secs: 5
full_scan_interval_secs: 300
# CAN Sync v2 configuration
#
# This config is used by the go_example_1.ps1 script.
# All machines that clone this repo and run the script will
# auto-discover each other via iroh's relay network as long
# as they share the same sync_passphrase.
# URL of the local CAN Service (sync API is at /sync/*)
can_service_url: "http://127.0.0.1:3210"
# API key for CAN service's sync endpoints (must match sync_api_key in CAN config)
sync_api_key: "can-sync-default-key"
# Shared passphrase for peer discovery — all peers with the same passphrase
# find each other automatically over the internet via iroh relay servers.
# Change this to something unique to your team/project.
sync_passphrase: "duke-canman-sync"
# Seconds between fallback polls (SSE handles instant sync, this is a safety net)
poll_interval_secs: 30

View File

@ -0,0 +1,75 @@
#!/usr/bin/env pwsh
# CAN Sync v2 Integration Test Runner
#
# Usage:
# .\run-integration-test.ps1 # Build + run test
# .\run-integration-test.ps1 -NoBuild # Skip building, just run
param(
[switch]$NoBuild
)
$ErrorActionPreference = "Stop"
$canServiceRoot = Resolve-Path (Join-Path $PSScriptRoot "../..")
$canSyncRoot = $PSScriptRoot
Write-Host ""
Write-Host "========================================" -ForegroundColor Cyan
Write-Host " CAN Sync v2 - Integration Test Runner" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host ""
# Step 1: Build CAN service
if (-not $NoBuild) {
Write-Host "[1/3] Building CAN service..." -ForegroundColor Yellow
Push-Location $canServiceRoot
try {
cargo build 2>&1 | ForEach-Object { Write-Host " $_" -ForegroundColor DarkGray }
if ($LASTEXITCODE -ne 0) {
Write-Host "FAILED: CAN service build failed!" -ForegroundColor Red
exit 1
}
Write-Host " CAN service built OK" -ForegroundColor Green
} finally {
Pop-Location
}
# Step 2: Build can-sync + sync-test
Write-Host ""
Write-Host "[2/3] Building can-sync and sync-test..." -ForegroundColor Yellow
Push-Location $canSyncRoot
try {
cargo build --bin can-sync --bin sync-test 2>&1 | ForEach-Object { Write-Host " $_" -ForegroundColor DarkGray }
if ($LASTEXITCODE -ne 0) {
Write-Host "FAILED: can-sync build failed!" -ForegroundColor Red
exit 1
}
Write-Host " can-sync built OK" -ForegroundColor Green
} finally {
Pop-Location
}
} else {
Write-Host "[SKIP] Builds skipped (-NoBuild)" -ForegroundColor DarkYellow
}
# Step 3: Run integration test
Write-Host ""
Write-Host "[3/3] Running integration test..." -ForegroundColor Yellow
Write-Host ""
Push-Location $canSyncRoot
try {
cargo run --bin sync-test
$testResult = $LASTEXITCODE
} finally {
Pop-Location
}
Write-Host ""
if ($testResult -eq 0) {
Write-Host "ALL TESTS PASSED" -ForegroundColor Green
} else {
Write-Host "SOME TESTS FAILED (exit code: $testResult)" -ForegroundColor Red
}
exit $testResult

View File

@ -1,234 +0,0 @@
use std::sync::Arc;
use std::time::Duration;
use anyhow::Result;
use tracing::{debug, error, info, warn};
use crate::can_client::CanClient;
use crate::library::SyncState;
use crate::manifest::AssetSyncEntry;
use crate::node::SyncNode;
/// The announcer periodically polls CAN service for new or changed assets
/// and writes matching entries into iroh library documents.
pub struct Announcer {
can: CanClient,
state: Arc<SyncState>,
node: Arc<SyncNode>,
poll_interval: Duration,
full_scan_interval: Duration,
}
impl Announcer {
pub fn new(
can: CanClient,
state: Arc<SyncState>,
node: Arc<SyncNode>,
poll_interval_secs: u64,
full_scan_interval_secs: u64,
) -> Self {
Self {
can,
state,
node,
poll_interval: Duration::from_secs(poll_interval_secs),
full_scan_interval: Duration::from_secs(full_scan_interval_secs),
}
}
/// Run the announcer loop — fast polls + periodic full scans
pub async fn run(self) {
let mut fast_tick = tokio::time::interval(self.poll_interval);
let mut full_tick = tokio::time::interval(self.full_scan_interval);
// Skip the first immediate tick for full scan (let fast poll get first data)
full_tick.tick().await;
info!(
"Announcer started (fast poll: {}s, full scan: {}s)",
self.poll_interval.as_secs(),
self.full_scan_interval.as_secs(),
);
loop {
tokio::select! {
_ = fast_tick.tick() => {
if let Err(e) = self.fast_poll().await {
warn!("Fast poll error: {:#}", e);
}
}
_ = full_tick.tick() => {
if let Err(e) = self.full_scan().await {
warn!("Full scan error: {:#}", e);
}
}
}
}
}
/// Fast poll: check for recently ingested assets
async fn fast_poll(&self) -> Result<()> {
let last_ts = self
.state
.get_state("last_seen_timestamp")?
.and_then(|s| s.parse::<i64>().ok())
.unwrap_or(0);
// Get recent assets ordered newest first
let resp = self.can.list(50, 0, "desc", Some(last_ts)).await?;
if resp.items.is_empty() {
return Ok(());
}
debug!("Fast poll found {} new assets since ts={}", resp.items.len(), last_ts);
// Track the newest timestamp we see
let mut max_ts = last_ts;
for asset in &resp.items {
if asset.timestamp > max_ts {
max_ts = asset.timestamp;
}
}
// Process assets against libraries
let libraries = self.state.list_libraries()?;
for asset in &resp.items {
for lib in &libraries {
if lib.filter.matches(asset) {
self.announce_asset(lib, asset).await?;
}
}
}
// Update last seen timestamp
self.state.set_state("last_seen_timestamp", &max_ts.to_string())?;
Ok(())
}
/// Full scan: paginate through all assets, checking for metadata changes
async fn full_scan(&self) -> Result<()> {
info!("Starting full scan...");
let libraries = self.state.list_libraries()?;
if libraries.is_empty() {
debug!("No libraries configured, skipping full scan");
return Ok(());
}
let page_size = 100;
let mut offset = 0;
let mut total_scanned = 0;
let mut total_announced = 0;
loop {
let resp = self.can.list_all(page_size, offset, true).await?;
let count = resp.items.len();
total_scanned += count;
for asset in &resp.items {
for lib in &libraries {
if lib.filter.matches(asset) {
let was_new = self.announce_asset(lib, asset).await?;
if was_new {
total_announced += 1;
}
}
}
}
if (count as i64) < page_size {
break;
}
offset += page_size;
}
info!(
"Full scan complete: scanned {}, announced {} new/updated",
total_scanned, total_announced
);
Ok(())
}
/// Announce a single asset to a library's iroh document.
/// Returns true if the asset was newly announced or updated.
async fn announce_asset(
&self,
lib: &crate::library::Library,
asset: &crate::can_client::AssetMeta,
) -> Result<bool> {
let doc_id = match &lib.doc_id {
Some(id) => id.clone(),
None => {
debug!("Library '{}' has no doc_id yet, skipping", lib.name);
return Ok(false);
}
};
// Check if already announced at current version
if self.state.is_announced(&lib.id, &asset.hash)? {
// Already announced — skip unless metadata changed
// (full scan handles re-announcement on metadata change)
return Ok(false);
}
// Download file content from CAN service and add as iroh blob
let iroh_blob_hash = match self.can.get_asset(&asset.hash).await {
Ok(content) => {
// Add to iroh blob store so remote peers can download it
match self.node.blobs.add_bytes(content).await {
Ok(tag_info) => Some(tag_info.hash.to_string()),
Err(e) => {
warn!(
"Failed to add blob for asset {}: {:#}",
&asset.hash[..12],
e
);
None
}
}
}
Err(e) => {
warn!(
"Failed to download asset {} from CAN service: {:#}",
&asset.hash[..12],
e
);
None
}
};
// Create sync entry with the iroh blob hash
let mut entry = AssetSyncEntry::from_asset_meta(asset, &self.node.peer_id());
entry.iroh_blob_hash = iroh_blob_hash;
let entry_bytes = entry.to_bytes();
// Write to iroh document (CRDT — concurrent writes merge automatically)
if let Err(e) = self
.node
.write_to_doc(&doc_id, asset.hash.as_bytes(), &entry_bytes)
.await
{
error!(
"Failed to write asset {} to doc {}: {:#}",
&asset.hash[..12],
&doc_id[..12],
e
);
return Ok(false);
}
// Mark as announced in local state
self.state.mark_announced(&lib.id, &asset.hash, entry.version)?;
debug!(
"Announced asset {} to library '{}' (doc {})",
&asset.hash[..12],
lib.name,
&doc_id[..12]
);
Ok(true)
}
}

View File

@ -1,291 +1,244 @@
//! HTTP client for CAN service's private sync API (protobuf-encoded).
//!
//! Includes SSE subscription for real-time ingest notifications and
//! incremental hash queries via `?since=` parameter.
use anyhow::{Context, Result};
use bytes::Bytes;
use reqwest::multipart;
use serde::{Deserialize, Serialize};
use futures_util::StreamExt;
use prost::Message;
use tokio::sync::mpsc;
use tracing::{debug, info, warn};
/// HTTP client for CAN service API
use crate::protocol::*;
/// Event received from the CAN service SSE stream.
#[derive(Debug, Clone)]
pub struct CanClient {
client: reqwest::Client,
pub struct SyncEvent {
pub hash: String,
pub timestamp: i64,
}
/// Client for CAN service's /sync/* endpoints.
#[derive(Clone)]
pub struct CanSyncClient {
http: reqwest::Client,
base_url: String,
sync_key: String,
}
// ── API response types (mirror CAN service) ──
#[derive(Debug, Deserialize)]
pub struct ApiResponse<T> {
pub status: String,
pub data: T,
}
#[derive(Debug, Deserialize)]
pub struct ErrorResponse {
pub status: String,
pub error: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AssetMeta {
pub hash: String,
pub mime_type: String,
pub application: Option<String>,
pub user: Option<String>,
pub tags: Vec<String>,
pub description: Option<String>,
pub human_filename: Option<String>,
pub human_path: Option<String>,
pub timestamp: i64,
pub is_trashed: bool,
#[serde(default)]
pub is_corrupted: bool,
pub size: i64,
}
#[derive(Debug, Deserialize)]
pub struct ListResponse {
pub items: Vec<AssetMeta>,
pub pagination: Pagination,
}
#[derive(Debug, Deserialize)]
pub struct Pagination {
pub limit: i64,
pub offset: i64,
pub total: i64,
}
#[derive(Debug, Deserialize)]
pub struct IngestResult {
pub timestamp: i64,
pub hash: String,
pub filename: String,
}
// ── Search parameters ──
#[derive(Debug, Default, Serialize)]
pub struct SearchParams {
#[serde(skip_serializing_if = "Option::is_none")]
pub hash: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub start_time: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub end_time: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tags: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mime_type: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub user: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub application: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub limit: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub offset: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub order: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub include_trashed: Option<bool>,
}
// ── Ingest metadata ──
#[derive(Debug, Default)]
pub struct IngestMeta {
pub mime_type: Option<String>,
pub human_file_name: Option<String>,
pub human_readable_path: Option<String>,
pub application: Option<String>,
pub user: Option<String>,
pub tags: Option<String>,
pub description: Option<String>,
}
// ── Client implementation ──
impl CanClient {
pub fn new(base_url: &str) -> Self {
impl CanSyncClient {
/// Create a new client pointed at the given CAN service URL, authenticated with the sync API key.
pub fn new(base_url: &str, sync_key: &str) -> Self {
Self {
client: reqwest::Client::new(),
http: reqwest::Client::new(),
base_url: base_url.trim_end_matches('/').to_string(),
sync_key: sync_key.to_string(),
}
}
/// List assets with pagination and ordering
pub async fn list(
&self,
limit: i64,
offset: i64,
order: &str,
offset_time: Option<i64>,
) -> Result<ListResponse> {
let mut url = format!("{}/list?limit={}&offset={}&order={}", self.base_url, limit, offset, order);
if let Some(ts) = offset_time {
url.push_str(&format!("&offset_time={}", ts));
}
let resp = self.client.get(&url).send().await.context("list request failed")?;
let status = resp.status();
if !status.is_success() {
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("CAN list failed ({}): {}", status, text);
}
let api: ApiResponse<ListResponse> = resp.json().await.context("parse list response")?;
Ok(api.data)
}
/// List all assets (paginated, including trashed for full sync)
pub async fn list_all(
&self,
limit: i64,
offset: i64,
include_trashed: bool,
) -> Result<ListResponse> {
let mut url = format!("{}/list?limit={}&offset={}&order=asc", self.base_url, limit, offset);
if include_trashed {
url.push_str("&include_trashed=true");
}
let resp = self.client.get(&url).send().await.context("list_all request failed")?;
let status = resp.status();
if !status.is_success() {
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("CAN list_all failed ({}): {}", status, text);
}
let api: ApiResponse<ListResponse> = resp.json().await.context("parse list_all response")?;
Ok(api.data)
}
/// Search assets by filters
pub async fn search(&self, params: &SearchParams) -> Result<ListResponse> {
/// POST protobuf request, return protobuf response bytes
async fn post_proto(&self, path: &str, body: Vec<u8>) -> Result<bytes::Bytes> {
let url = format!("{}{}", self.base_url, path);
let resp = self
.client
.get(&format!("{}/search", self.base_url))
.query(params)
.http
.post(&url)
.header("X-Sync-Key", &self.sync_key)
.header("Content-Type", "application/x-protobuf")
.body(body)
.send()
.await
.context("search request failed")?;
.with_context(|| format!("POST {}", url))?;
if !resp.status().is_success() {
let status = resp.status();
if !status.is_success() {
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("CAN search failed ({}): {}", status, text);
}
let api: ApiResponse<ListResponse> = resp.json().await.context("parse search response")?;
Ok(api.data)
anyhow::bail!("{} returned {}: {}", path, status, text);
}
/// Download asset content by hash
pub async fn get_asset(&self, hash: &str) -> Result<Bytes> {
resp.bytes().await.with_context(|| format!("reading body from {}", path))
}
/// Get all asset digests (full list — use for initial reconciliation only).
pub async fn get_hashes(&self) -> Result<HashListResponse> {
let req = HashListRequest {};
let mut buf = Vec::with_capacity(req.encoded_len());
req.encode(&mut buf)?;
let resp_bytes = self.post_proto("/sync/hashes", buf).await?;
HashListResponse::decode(resp_bytes).context("decode HashListResponse")
}
/// Get only asset digests newer than `since` timestamp (incremental query).
pub async fn get_hashes_since(&self, since: i64) -> Result<HashListResponse> {
let req = HashListRequest {};
let mut buf = Vec::with_capacity(req.encoded_len());
req.encode(&mut buf)?;
let url = format!("{}/sync/hashes?since={}", self.base_url, since);
let resp = self
.client
.get(&format!("{}/asset/{}", self.base_url, hash))
.http
.post(&url)
.header("X-Sync-Key", &self.sync_key)
.header("Content-Type", "application/x-protobuf")
.body(buf)
.send()
.await
.context("get_asset request failed")?;
.with_context(|| format!("POST {}", url))?;
if !resp.status().is_success() {
let status = resp.status();
if !status.is_success() {
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("CAN get_asset failed ({}): {}", status, text);
}
resp.bytes().await.context("read asset bytes")
anyhow::bail!("/sync/hashes?since={} returned {}: {}", since, status, text);
}
/// Get asset metadata by hash
pub async fn get_meta(&self, hash: &str) -> Result<AssetMeta> {
let resp = self
.client
.get(&format!("{}/asset/{}/meta", self.base_url, hash))
.send()
.await
.context("get_meta request failed")?;
let status = resp.status();
if !status.is_success() {
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("CAN get_meta failed ({}): {}", status, text);
}
let api: ApiResponse<AssetMeta> = resp.json().await.context("parse meta response")?;
Ok(api.data)
let resp_bytes = resp.bytes().await?;
HashListResponse::decode(resp_bytes).context("decode HashListResponse")
}
/// Ingest a file into CAN service via multipart upload
pub async fn ingest(&self, content: Bytes, meta: IngestMeta) -> Result<IngestResult> {
let file_part = multipart::Part::bytes(content.to_vec())
.file_name(meta.human_file_name.clone().unwrap_or_else(|| "file".to_string()))
.mime_str(meta.mime_type.as_deref().unwrap_or("application/octet-stream"))?;
/// Pull full assets by hash.
pub async fn pull(&self, hashes: Vec<String>) -> Result<PullResponse> {
let req = PullRequest { hashes };
let mut buf = Vec::with_capacity(req.encoded_len());
req.encode(&mut buf)?;
let mut form = multipart::Form::new().part("file", file_part);
if let Some(ref v) = meta.mime_type {
form = form.text("mime_type", v.clone());
}
if let Some(ref v) = meta.human_file_name {
form = form.text("human_file_name", v.clone());
}
if let Some(ref v) = meta.human_readable_path {
form = form.text("human_readable_path", v.clone());
}
if let Some(ref v) = meta.application {
form = form.text("application", v.clone());
}
if let Some(ref v) = meta.user {
form = form.text("user", v.clone());
}
if let Some(ref v) = meta.tags {
form = form.text("tags", v.clone());
}
if let Some(ref v) = meta.description {
form = form.text("description", v.clone());
let resp_bytes = self.post_proto("/sync/pull", buf).await?;
PullResponse::decode(resp_bytes).context("decode PullResponse")
}
let resp = self
.client
.post(&format!("{}/ingest", self.base_url))
.multipart(form)
.send()
.await
.context("ingest request failed")?;
let status = resp.status();
if !status.is_success() {
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("CAN ingest failed ({}): {}", status, text);
}
let api: ApiResponse<IngestResult> = resp.json().await.context("parse ingest response")?;
Ok(api.data)
/// Push a single asset bundle.
pub async fn push(&self, bundle: AssetBundle) -> Result<PushResponse> {
let req = PushRequest {
bundle: Some(bundle),
};
let mut buf = Vec::with_capacity(req.encoded_len());
req.encode(&mut buf)?;
let resp_bytes = self.post_proto("/sync/push", buf).await?;
PushResponse::decode(resp_bytes).context("decode PushResponse")
}
/// Update asset metadata (tags, description)
/// Update metadata for an existing asset.
pub async fn update_meta(
&self,
hash: &str,
tags: Option<Vec<String>>,
hash: String,
description: Option<String>,
) -> Result<()> {
#[derive(Serialize)]
struct MetadataUpdate {
#[serde(skip_serializing_if = "Option::is_none")]
tags: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
}
let resp = self
.client
.patch(&format!("{}/asset/{}", self.base_url, hash))
.json(&MetadataUpdate { tags, description })
.send()
.await
.context("update_meta request failed")?;
let status = resp.status();
if !status.is_success() {
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("CAN update_meta failed ({}): {}", status, text);
}
Ok(())
tags: Vec<String>,
is_trashed: bool,
) -> Result<MetaUpdateResponse> {
let req = MetaUpdateRequest {
hash,
description,
tags,
is_trashed,
};
let mut buf = Vec::with_capacity(req.encoded_len());
req.encode(&mut buf)?;
let resp_bytes = self.post_proto("/sync/meta", buf).await?;
MetaUpdateResponse::decode(resp_bytes).context("decode MetaUpdateResponse")
}
/// Check if CAN service is reachable
pub async fn health_check(&self) -> Result<bool> {
match self.list(1, 0, "desc", None).await {
Ok(_) => Ok(true),
Err(_) => Ok(false),
/// Health check: try to get hashes (will fail if sync API disabled).
pub async fn health_check(&self) -> bool {
self.get_hashes().await.is_ok()
}
/// Subscribe to SSE events from CAN service. Sends `SyncEvent` on the
/// returned channel whenever the CAN service ingests a new asset.
///
/// Automatically reconnects on disconnect (with incremental catch-up).
/// Returns a channel receiver that yields events.
pub fn subscribe_events(&self) -> mpsc::UnboundedReceiver<SyncEvent> {
let (tx, rx) = mpsc::unbounded_channel();
let url = format!(
"{}/sync/events?key={}",
self.base_url, self.sync_key
);
let http = self.http.clone();
tokio::spawn(async move {
loop {
info!("Connecting to SSE stream: {}", url.split('?').next().unwrap_or(&url));
match Self::run_sse_stream(&http, &url, &tx).await {
Ok(()) => {
info!("SSE stream ended cleanly");
}
Err(e) => {
warn!("SSE stream error: {:#}", e);
}
}
// If the receiver is dropped, stop reconnecting
if tx.is_closed() {
debug!("SSE subscriber dropped, stopping reconnect loop");
break;
}
// Reconnect after a short delay
info!("Reconnecting SSE in 2s...");
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
});
rx
}
// Connect to the SSE endpoint and forward parsed events to the channel
// until the stream ends or an error occurs.
async fn run_sse_stream(
http: &reqwest::Client,
url: &str,
tx: &mpsc::UnboundedSender<SyncEvent>,
) -> Result<()> {
let resp = http
.get(url)
.header("Accept", "text/event-stream")
.send()
.await
.context("SSE connect")?;
if !resp.status().is_success() {
anyhow::bail!("SSE returned status {}", resp.status());
}
let mut stream = resp.bytes_stream();
let mut buffer = String::new();
while let Some(chunk) = stream.next().await {
let chunk = chunk.context("reading SSE chunk")?;
buffer.push_str(&String::from_utf8_lossy(&chunk));
// Process complete SSE messages (separated by double newlines)
while let Some(pos) = buffer.find("\n\n") {
let message = buffer[..pos].to_string();
buffer = buffer[pos + 2..].to_string();
// Parse SSE message: look for "data: {...}" lines
for line in message.lines() {
if let Some(data) = line.strip_prefix("data:") {
let data = data.trim();
if data == "ping" || data.is_empty() {
continue;
}
// Parse JSON: {"hash":"...","timestamp":...}
if let Ok(value) = serde_json::from_str::<serde_json::Value>(data) {
if let (Some(hash), Some(ts)) = (
value["hash"].as_str(),
value["timestamp"].as_i64(),
) {
debug!("SSE event: new_asset hash={}", &hash[..hash.len().min(12)]);
let _ = tx.send(SyncEvent {
hash: hash.to_string(),
timestamp: ts,
});
}
}
}
}
}
}
Ok(())
}
}

View File

@ -1,78 +1,42 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use serde::Deserialize;
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
/// All settings needed to run the sync agent, loaded from a YAML file.
#[derive(Debug, Clone, Deserialize)]
pub struct SyncConfig {
/// Base URL for the CAN service API (e.g. "http://127.0.0.1:3210/api/v1/can/0")
/// Base URL of the local CAN service (e.g. "http://127.0.0.1:3210")
pub can_service_url: String,
/// Address for the CAN Sync HTTP API (e.g. "127.0.0.1:3213")
pub listen_addr: String,
/// API key for CAN service's sync endpoints (must match config.sync_api_key)
pub sync_api_key: String,
/// Directory for persistent data (peer key, sync state DB)
pub data_dir: String,
/// Shared passphrase for peer discovery (all peers must use the same one)
pub sync_passphrase: String,
/// Optional custom relay URL; null uses iroh's public relay
pub relay_url: Option<String>,
/// Seconds between fast polls for new assets
/// Seconds between polls for new local assets
#[serde(default = "default_poll_interval")]
pub poll_interval_secs: u64,
/// Seconds between full scans of all assets
#[serde(default = "default_full_scan_interval")]
pub full_scan_interval_secs: u64,
/// Optional: path to write this node's ticket to (for direct connection)
#[serde(default)]
pub ticket_file: Option<String>,
/// Optional: path to a file containing a peer's node ticket (for direct connection).
/// If set, the agent will read this ticket and connect directly instead of waiting
/// for gossip discovery. The file is polled until it exists and is non-empty.
#[serde(default)]
pub connect_ticket_file: Option<String>,
}
fn default_poll_interval() -> u64 {
5
}
fn default_full_scan_interval() -> u64 {
300
3
}
impl SyncConfig {
/// Load config from a YAML file, falling back to defaults if not found
pub fn load(path: &Path) -> Result<Self> {
if path.exists() {
let contents =
std::fs::read_to_string(path).context("Failed to read config file")?;
let config: SyncConfig =
serde_yaml::from_str(&contents).context("Failed to parse config YAML")?;
/// Read a YAML config file from disk and parse it into a SyncConfig.
pub fn load(path: &Path) -> anyhow::Result<Self> {
let contents = std::fs::read_to_string(path)?;
let config: Self = serde_yaml::from_str(&contents)?;
Ok(config)
} else {
tracing::warn!("Config file not found at {}, using defaults", path.display());
Ok(Self::default())
}
}
/// Resolved data directory path
pub fn data_path(&self) -> PathBuf {
PathBuf::from(&self.data_dir)
}
/// Path to the peer keypair file
pub fn peer_key_path(&self) -> PathBuf {
self.data_path().join("peer_key")
}
/// Path to the sync state SQLite database
pub fn db_path(&self) -> PathBuf {
self.data_path().join("can_sync.db")
}
}
impl Default for SyncConfig {
fn default() -> Self {
Self {
can_service_url: "http://127.0.0.1:3210/api/v1/can/0".to_string(),
listen_addr: "127.0.0.1:3213".to_string(),
data_dir: "./can_sync_data".to_string(),
relay_url: None,
poll_interval_secs: default_poll_interval(),
full_scan_interval_secs: default_full_scan_interval(),
}
}
}

View File

@ -0,0 +1,110 @@
//! Peer discovery via iroh-gossip using a shared passphrase.
//!
//! All CAN sync agents with the same `sync_passphrase` derive the same
//! BLAKE3 gossip topic and discover each other automatically.
use std::collections::HashSet;
use anyhow::Result;
use iroh::{Endpoint, EndpointId};
use iroh_gossip::net::Gossip;
use iroh_gossip::proto::TopicId;
use n0_future::StreamExt;
use tokio::sync::mpsc;
use tracing::{debug, info, warn};
/// Derive a deterministic gossip TopicId from a shared passphrase.
pub fn derive_topic(passphrase: &str) -> TopicId {
let hash = blake3::hash(format!("can-sync-v1:{}", passphrase).as_bytes());
TopicId::from_bytes(*hash.as_bytes())
}
/// Manages peer discovery via gossip announcements.
pub struct Discovery {
gossip: Gossip,
topic: TopicId,
endpoint: Endpoint,
}
impl Discovery {
/// Create a new Discovery that listens on a gossip topic derived from the shared passphrase.
pub fn new(endpoint: Endpoint, gossip: Gossip, passphrase: &str) -> Self {
let topic = derive_topic(passphrase);
info!("Gossip topic: {}", hex::encode(topic.as_bytes()));
Self {
gossip,
topic,
endpoint,
}
}
/// Subscribe to the gossip topic and yield newly discovered peer EndpointIds.
///
/// Sends discovered EndpointIds on the channel. Runs forever.
pub async fn run(self, tx: mpsc::Sender<EndpointId>) -> Result<()> {
info!("Joining gossip topic for peer discovery...");
// Subscribe to the topic with no bootstrap peers (we discover via gossip)
let mut topic = self
.gossip
.subscribe(self.topic, vec![])
.await
.map_err(|e| anyhow::anyhow!("gossip subscribe failed: {}", e))?;
// Wait until we have at least one neighbor
info!("Waiting for gossip neighbors...");
// Broadcast our EndpointId periodically
let our_id = self.endpoint.id();
let (sender, mut receiver) = topic.split();
let sender_clone = sender.clone();
tokio::spawn(async move {
let msg = our_id.as_bytes().to_vec();
loop {
if let Err(e) = sender_clone.broadcast(msg.clone().into()).await {
warn!("Failed to broadcast discovery: {}", e);
}
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
}
});
// Listen for peer announcements
let mut known_peers: HashSet<EndpointId> = HashSet::new();
while let Some(event) = receiver.next().await {
match event {
Ok(iroh_gossip::api::Event::Received(msg)) => {
if msg.content.len() == 32 {
if let Ok(bytes) = <[u8; 32]>::try_from(msg.content.as_ref()) {
if let Ok(peer_id) = EndpointId::from_bytes(&bytes) {
if peer_id != our_id && known_peers.insert(peer_id) {
info!("Discovered new peer: {}", peer_id.fmt_short());
let _ = tx.send(peer_id).await;
}
}
}
}
}
Ok(iroh_gossip::api::Event::NeighborUp(peer_id)) => {
if peer_id != our_id && known_peers.insert(peer_id) {
info!("Gossip neighbor up: {}", peer_id.fmt_short());
let _ = tx.send(peer_id).await;
}
}
Ok(iroh_gossip::api::Event::NeighborDown(peer_id)) => {
info!("Gossip neighbor down: {}", peer_id.fmt_short());
known_peers.remove(&peer_id);
}
Ok(iroh_gossip::api::Event::Lagged) => {
warn!("Gossip receiver lagged, may have missed messages");
}
Err(e) => {
warn!("Gossip receive error: {}", e);
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
}
}
}
Ok(())
}
}

View File

@ -1,352 +0,0 @@
use std::sync::Arc;
use anyhow::Result;
use futures_lite::StreamExt;
use sha2::{Digest, Sha256};
use tokio::io::AsyncReadExt;
use tracing::{debug, error, info, warn};
use crate::can_client::{CanClient, IngestMeta};
use crate::library::SyncState;
use crate::manifest::AssetSyncEntry;
use crate::node::SyncNode;
/// The fetcher receives remote asset entries from iroh documents
/// and ingests them into the local CAN service.
pub struct Fetcher {
can: CanClient,
state: Arc<SyncState>,
node: Arc<SyncNode>,
}
impl Fetcher {
pub fn new(can: CanClient, state: Arc<SyncState>, node: Arc<SyncNode>) -> Self {
Self { can, state, node }
}
/// Run the fetcher — subscribes to library document events for real-time sync,
/// falls back to periodic polling for documents without active subscriptions
pub async fn run(self) {
info!("Fetcher started — watching for remote asset entries");
// Run two loops concurrently:
// 1. Subscription watcher — subscribes to active library docs
// 2. Periodic checker — catches anything missed
let poll_interval = tokio::time::interval(std::time::Duration::from_secs(10));
let sub_interval = tokio::time::interval(std::time::Duration::from_secs(5));
tokio::pin!(poll_interval);
tokio::pin!(sub_interval);
loop {
tokio::select! {
_ = poll_interval.tick() => {
if let Err(e) = self.check_for_new_entries().await {
warn!("Fetcher poll error: {:#}", e);
}
}
_ = sub_interval.tick() => {
// Try to subscribe to any library docs that we haven't subscribed to yet
if let Err(e) = self.subscribe_to_libraries().await {
debug!("Fetcher subscription check: {:#}", e);
}
}
}
}
}
/// Subscribe to document events for all libraries that have doc_ids
async fn subscribe_to_libraries(&self) -> Result<()> {
let libraries = self.state.list_libraries()?;
for lib in &libraries {
if let Some(ref doc_id_hex) = lib.doc_id {
// Open the doc and subscribe to events
let doc = match self.node.open_doc(doc_id_hex).await {
Ok(d) => d,
Err(_) => continue,
};
let mut events = match doc.subscribe().await {
Ok(e) => e,
Err(_) => continue,
};
// Spawn a task to process events from this doc
let can = self.can.clone();
let node_peer_id = self.node.peer_id();
let node = self.node.clone();
let lib_name = lib.name.clone();
tokio::spawn(async move {
while let Some(event) = events.next().await {
match event {
Ok(iroh_docs::engine::LiveEvent::InsertRemote {
entry,
content_status,
..
}) => {
let key = entry.key().to_vec();
let can_hash = String::from_utf8_lossy(&key).to_string();
if content_status == iroh_docs::ContentStatus::Complete {
// The entry value (our AssetSyncEntry) is available
// Read the entry content from the blob store
let content_hash = entry.content_hash();
let mut reader = node.blobs.reader(content_hash);
let mut buf = Vec::new();
if reader.read_to_end(&mut buf).await.is_ok() {
if let Ok(sync_entry) = AssetSyncEntry::from_bytes(&buf) {
if sync_entry.last_modified_by == node_peer_id {
continue; // Skip our own entries
}
info!(
"Received remote entry for {} in library '{}'",
&can_hash[..can_hash.len().min(12)],
lib_name
);
if let Err(e) = process_remote_entry(
&can,
&node,
&node_peer_id,
&can_hash,
sync_entry,
)
.await
{
error!(
"Error processing remote entry {}: {:#}",
&can_hash[..can_hash.len().min(12)],
e
);
}
}
}
}
}
Ok(iroh_docs::engine::LiveEvent::NeighborUp(peer)) => {
info!("Peer connected: {}", peer.fmt_short());
}
Ok(iroh_docs::engine::LiveEvent::NeighborDown(peer)) => {
info!("Peer disconnected: {}", peer.fmt_short());
}
Ok(_) => {} // Ignore other events
Err(e) => {
warn!("Document event error: {:#}", e);
break;
}
}
}
});
// Only subscribe to one doc per tick to avoid overwhelming
return Ok(());
}
}
Ok(())
}
/// Check all library documents for entries we don't have locally (polling fallback)
async fn check_for_new_entries(&self) -> Result<()> {
let libraries = self.state.list_libraries()?;
for lib in &libraries {
if let Some(ref doc_id_hex) = lib.doc_id {
let doc = match self.node.open_doc(doc_id_hex).await {
Ok(d) => d,
Err(_) => continue,
};
// Query all entries (latest per key)
let query = iroh_docs::store::Query::single_latest_per_key().build();
let entries = match doc.get_many(query).await {
Ok(e) => e,
Err(_) => continue,
};
tokio::pin!(entries);
while let Some(Ok(entry)) = entries.next().await {
let key = entry.key().to_vec();
let can_hash = String::from_utf8_lossy(&key).to_string();
// Read the entry value (AssetSyncEntry)
let content_hash = entry.content_hash();
let mut reader = self.node.blobs.reader(content_hash);
let mut buf = Vec::new();
if reader.read_to_end(&mut buf).await.is_err() {
continue;
}
let sync_entry = match AssetSyncEntry::from_bytes(&buf) {
Ok(e) => e,
Err(_) => continue,
};
// Skip our own entries
if sync_entry.last_modified_by == self.node.peer_id() {
continue;
}
// Check if already processed
if self.state.is_announced(&lib.id, &can_hash).unwrap_or(false) {
continue;
}
info!(
"Polling found remote entry for {} in library '{}'",
&can_hash[..can_hash.len().min(12)],
lib.name
);
if let Err(e) = process_remote_entry(
&self.can,
&self.node,
&self.node.peer_id(),
&can_hash,
sync_entry,
)
.await
{
error!(
"Error processing remote entry {}: {:#}",
&can_hash[..can_hash.len().min(12)],
e
);
}
// Mark as processed
let _ = self.state.mark_announced(&lib.id, &can_hash, 1);
}
}
}
Ok(())
}
}
/// Process a remote asset entry — download blob and ingest into CAN service
async fn process_remote_entry(
can: &CanClient,
node: &SyncNode,
local_peer_id: &str,
can_hash: &str,
entry: AssetSyncEntry,
) -> Result<()> {
// Skip if this is our own entry
if entry.last_modified_by == local_peer_id {
return Ok(());
}
// Check if already in local CAN service
match can.get_meta(can_hash).await {
Ok(existing) => {
// Asset exists — check if metadata needs updating
if entry.tags != existing.tags
|| entry.description != existing.description
|| entry.is_trashed != existing.is_trashed
{
info!("Updating metadata for {} from remote peer", &can_hash[..12]);
can.update_meta(
can_hash,
Some(entry.tags.clone()),
entry.description.clone(),
)
.await?;
}
return Ok(());
}
Err(_) => {
// Asset not found locally — need to fetch and ingest
}
}
info!(
"Fetching remote asset {} ({}B) from peer {}",
&can_hash[..12],
entry.size,
&entry.last_modified_by[..entry.last_modified_by.len().min(12)]
);
// Download blob via iroh
let content = download_blob(node, &entry).await?;
if content.is_empty() {
warn!("Downloaded empty blob for {} — skipping", &can_hash[..12]);
return Ok(());
}
// Verify CAN hash: SHA256(timestamp_bytes + content)
if !verify_can_hash(can_hash, entry.timestamp, &content) {
error!(
"CAN hash verification failed for {} — rejecting",
&can_hash[..12]
);
return Ok(());
}
// Ingest into local CAN service
let meta = IngestMeta {
mime_type: Some(entry.mime_type.clone()),
human_file_name: entry.human_filename.clone(),
human_readable_path: entry.human_path.clone(),
application: entry.application.clone(),
user: entry.user.clone(),
tags: if entry.tags.is_empty() {
None
} else {
Some(entry.tags.join(","))
},
description: entry.description.clone(),
};
match can.ingest(content.into(), meta).await {
Ok(result) => {
info!(
"Ingested remote asset: hash={}, filename={}",
&result.hash[..12],
result.filename
);
}
Err(e) => {
error!("Failed to ingest remote asset {}: {:#}", &can_hash[..12], e);
}
}
Ok(())
}
/// Download a blob via iroh using the blob hash from the sync entry
async fn download_blob(node: &SyncNode, entry: &AssetSyncEntry) -> Result<Vec<u8>> {
let blob_hash_str = match &entry.iroh_blob_hash {
Some(h) => h,
None => {
warn!("No iroh blob hash in sync entry — cannot download");
return Ok(Vec::new());
}
};
// Parse the BLAKE3 hash
let blob_hash: iroh_blobs::Hash = blob_hash_str
.parse()
.map_err(|_| anyhow::anyhow!("Invalid iroh blob hash: {}", &blob_hash_str[..12]))?;
// Read from the local blob store (iroh-docs should have synced it)
let mut reader = node.blobs.reader(blob_hash);
let mut buf = Vec::with_capacity(entry.size as usize);
reader.read_to_end(&mut buf).await?;
debug!(
"Downloaded blob {} ({} bytes)",
&blob_hash_str[..12],
buf.len()
);
Ok(buf)
}
/// Verify CAN hash: SHA256(timestamp_string + content) matches expected hash
fn verify_can_hash(expected_hash: &str, timestamp: i64, content: &[u8]) -> bool {
let mut hasher = Sha256::new();
hasher.update(timestamp.to_string().as_bytes());
hasher.update(content);
let computed = hex::encode(hasher.finalize());
computed == expected_hash
}

View File

@ -1,288 +0,0 @@
use anyhow::{Context, Result};
use rusqlite::Connection;
use serde::{Deserialize, Serialize};
use crate::can_client::AssetMeta;
/// Filter criteria that determines which CAN assets belong to a library
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LibraryFilter {
/// Match assets with this application tag
#[serde(skip_serializing_if = "Option::is_none")]
pub application: Option<String>,
/// Match assets with any of these tags
#[serde(skip_serializing_if = "Option::is_none")]
pub tags: Option<Vec<String>>,
/// Match assets from this user
#[serde(skip_serializing_if = "Option::is_none")]
pub user: Option<String>,
/// Match assets with MIME type prefix (e.g. "image/")
#[serde(skip_serializing_if = "Option::is_none")]
pub mime_prefix: Option<String>,
/// Manual list of specific hashes to include
#[serde(skip_serializing_if = "Option::is_none")]
pub hashes: Option<Vec<String>>,
}
impl LibraryFilter {
/// Check if an asset matches this filter
pub fn matches(&self, asset: &AssetMeta) -> bool {
// If hashes list is set, only match those exact hashes
if let Some(ref hashes) = self.hashes {
return hashes.contains(&asset.hash);
}
// All set criteria must match (AND logic)
if let Some(ref app) = self.application {
if asset.application.as_deref() != Some(app.as_str()) {
return false;
}
}
if let Some(ref required_tags) = self.tags {
// Asset must have at least one of the required tags
if !required_tags.iter().any(|t| asset.tags.contains(t)) {
return false;
}
}
if let Some(ref user) = self.user {
if asset.user.as_deref() != Some(user.as_str()) {
return false;
}
}
if let Some(ref prefix) = self.mime_prefix {
if !asset.mime_type.starts_with(prefix.as_str()) {
return false;
}
}
true
}
}
/// A library definition stored locally
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Library {
/// Unique library ID (UUID)
pub id: String,
/// Human-readable name
pub name: String,
/// Filter criteria
pub filter: LibraryFilter,
/// iroh document ID (namespace) — set after creation
pub doc_id: Option<String>,
/// Whether this library was created locally or joined from remote
pub is_local: bool,
/// Creation timestamp
pub created_at: i64,
}
/// Tracks which assets have been announced to which libraries.
/// Uses std::sync::Mutex because rusqlite::Connection is !Send,
/// so tokio::sync::RwLock won't work across .await points.
pub struct SyncState {
db: std::sync::Mutex<Connection>,
}
impl SyncState {
/// Open or create the sync state database
pub fn open(path: &std::path::Path) -> Result<Self> {
let db = Connection::open(path).context("open sync state DB")?;
db.execute_batch(
"
CREATE TABLE IF NOT EXISTS libraries (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
filter_json TEXT NOT NULL,
doc_id TEXT,
is_local INTEGER NOT NULL DEFAULT 1,
created_at INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS announced_assets (
library_id TEXT NOT NULL,
hash TEXT NOT NULL,
version INTEGER NOT NULL DEFAULT 1,
announced_at INTEGER NOT NULL,
PRIMARY KEY (library_id, hash),
FOREIGN KEY (library_id) REFERENCES libraries(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS sync_state (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
",
)
.context("init sync state tables")?;
Ok(Self {
db: std::sync::Mutex::new(db),
})
}
fn lock_db(&self) -> std::sync::MutexGuard<'_, Connection> {
self.db.lock().expect("sync state DB lock poisoned")
}
// ── Library CRUD ──
pub fn save_library(&self, lib: &Library) -> Result<()> {
let db = self.lock_db();
let filter_json = serde_json::to_string(&lib.filter)?;
db.execute(
"INSERT OR REPLACE INTO libraries (id, name, filter_json, doc_id, is_local, created_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
rusqlite::params![
lib.id,
lib.name,
filter_json,
lib.doc_id,
lib.is_local as i32,
lib.created_at,
],
)?;
Ok(())
}
pub fn list_libraries(&self) -> Result<Vec<Library>> {
let db = self.lock_db();
let mut stmt =
db.prepare("SELECT id, name, filter_json, doc_id, is_local, created_at FROM libraries")?;
let libs = stmt
.query_map([], |row| {
let filter_json: String = row.get(2)?;
Ok(Library {
id: row.get(0)?,
name: row.get(1)?,
filter: serde_json::from_str(&filter_json).unwrap_or(LibraryFilter {
application: None,
tags: None,
user: None,
mime_prefix: None,
hashes: None,
}),
doc_id: row.get(3)?,
is_local: row.get::<_, i32>(4)? != 0,
created_at: row.get(5)?,
})
})?
.collect::<Result<Vec<_>, _>>()?;
Ok(libs)
}
pub fn get_library(&self, id: &str) -> Result<Option<Library>> {
let db = self.lock_db();
let mut stmt = db.prepare(
"SELECT id, name, filter_json, doc_id, is_local, created_at FROM libraries WHERE id = ?1",
)?;
let mut rows = stmt.query_map([id], |row| {
let filter_json: String = row.get(2)?;
Ok(Library {
id: row.get(0)?,
name: row.get(1)?,
filter: serde_json::from_str(&filter_json).unwrap_or(LibraryFilter {
application: None,
tags: None,
user: None,
mime_prefix: None,
hashes: None,
}),
doc_id: row.get(3)?,
is_local: row.get::<_, i32>(4)? != 0,
created_at: row.get(5)?,
})
})?;
match rows.next() {
Some(Ok(lib)) => Ok(Some(lib)),
Some(Err(e)) => Err(e.into()),
None => Ok(None),
}
}
pub fn delete_library(&self, id: &str) -> Result<()> {
let db = self.lock_db();
db.execute("DELETE FROM announced_assets WHERE library_id = ?1", [id])?;
db.execute("DELETE FROM libraries WHERE id = ?1", [id])?;
Ok(())
}
pub fn update_library_doc_id(&self, id: &str, doc_id: &str) -> Result<()> {
let db = self.lock_db();
db.execute(
"UPDATE libraries SET doc_id = ?1 WHERE id = ?2",
[doc_id, id],
)?;
Ok(())
}
// ── Asset announcement tracking ──
pub fn is_announced(&self, library_id: &str, hash: &str) -> Result<bool> {
let db = self.lock_db();
let count: i64 = db.query_row(
"SELECT COUNT(*) FROM announced_assets WHERE library_id = ?1 AND hash = ?2",
[library_id, hash],
|row| row.get(0),
)?;
Ok(count > 0)
}
pub fn get_announced_version(&self, library_id: &str, hash: &str) -> Result<Option<u64>> {
let db = self.lock_db();
let mut stmt = db.prepare(
"SELECT version FROM announced_assets WHERE library_id = ?1 AND hash = ?2",
)?;
let mut rows = stmt.query_map(rusqlite::params![library_id, hash], |row| {
row.get::<_, i64>(0)
})?;
match rows.next() {
Some(Ok(v)) => Ok(Some(v as u64)),
Some(Err(e)) => Err(e.into()),
None => Ok(None),
}
}
pub fn mark_announced(&self, library_id: &str, hash: &str, version: u64) -> Result<()> {
let db = self.lock_db();
let now = chrono::Utc::now().timestamp_millis();
db.execute(
"INSERT OR REPLACE INTO announced_assets (library_id, hash, version, announced_at)
VALUES (?1, ?2, ?3, ?4)",
rusqlite::params![library_id, hash, version as i64, now],
)?;
Ok(())
}
pub fn remove_announced(&self, library_id: &str, hash: &str) -> Result<()> {
let db = self.lock_db();
db.execute(
"DELETE FROM announced_assets WHERE library_id = ?1 AND hash = ?2",
[library_id, hash],
)?;
Ok(())
}
// ── General state ──
pub fn get_state(&self, key: &str) -> Result<Option<String>> {
let db = self.lock_db();
let mut stmt = db.prepare("SELECT value FROM sync_state WHERE key = ?1")?;
let mut rows = stmt.query_map([key], |row| row.get::<_, String>(0))?;
match rows.next() {
Some(Ok(v)) => Ok(Some(v)),
Some(Err(e)) => Err(e.into()),
None => Ok(None),
}
}
pub fn set_state(&self, key: &str, value: &str) -> Result<()> {
let db = self.lock_db();
db.execute(
"INSERT OR REPLACE INTO sync_state (key, value) VALUES (?1, ?2)",
[key, value],
)?;
Ok(())
}
}

View File

@ -1,121 +1,231 @@
#![allow(dead_code)]
//! CAN Sync — P2P full-mirror replication agent for CAN Service.
//!
//! Uses iroh for encrypted QUIC transport + NAT traversal,
//! and iroh-gossip for peer discovery via a shared passphrase.
//!
//! Each instance talks to its local CAN Service via the private
//! protobuf sync API (/sync/*), authenticated with an API key.
mod announcer;
mod can_client;
mod config;
mod fetcher;
mod library;
mod manifest;
mod node;
mod routes;
mod discovery;
mod peer;
mod protocol;
mod rendezvous;
use std::path::PathBuf;
use std::sync::Arc;
use std::path::Path;
use anyhow::{Context, Result};
use tracing::info;
use iroh::endpoint::presets::N0;
use iroh::{Endpoint, EndpointAddr, EndpointId};
use iroh_gossip::net::Gossip;
use tokio::sync::mpsc;
use tracing::{error, info, warn};
use crate::announcer::Announcer;
use crate::can_client::CanClient;
use crate::can_client::CanSyncClient;
use crate::config::SyncConfig;
use crate::fetcher::Fetcher;
use crate::library::SyncState;
use crate::node::SyncNode;
use crate::routes::AppState;
use crate::discovery::Discovery;
use crate::rendezvous::Rendezvous;
/// ALPN protocol identifier for CAN sync peer connections.
const SYNC_ALPN: &[u8] = b"can-sync/1";
/// Entry point: loads config, connects to the local CAN service, sets up
/// encrypted P2P networking (iroh), and discovers + syncs with peers.
#[tokio::main]
async fn main() -> Result<()> {
// Initialize tracing
// Initialize logging
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "can_sync=info,iroh=warn".parse().unwrap()),
.unwrap_or_else(|_| "can_sync=info,iroh=warn,iroh_gossip=warn".parse().unwrap()),
)
.init();
// Load config
let config_path = std::env::args()
.nth(1)
.map(PathBuf::from)
.unwrap_or_else(|| PathBuf::from("config.yaml"));
.unwrap_or_else(|| "config.yaml".to_string());
let config = SyncConfig::load(Path::new(&config_path))
.with_context(|| format!("loading config from {}", config_path))?;
let config = SyncConfig::load(&config_path)?;
info!("CAN Sync starting...");
info!(" CAN service: {}", config.can_service_url);
info!(" Listen addr: {}", config.listen_addr);
info!(" Data dir: {}", config.data_dir);
info!("CAN Sync v2 starting");
info!("CAN service: {}", config.can_service_url);
info!("Poll interval: {}s", config.poll_interval_secs);
// Ensure data directory exists
std::fs::create_dir_all(config.data_path())
.context("Failed to create data directory")?;
// Create HTTP client for local CAN service's sync API
let can = CanSyncClient::new(&config.can_service_url, &config.sync_api_key);
// Initialize CAN service client
let can = CanClient::new(&config.can_service_url);
// Check CAN service health
match can.health_check().await {
Ok(true) => info!("CAN service is reachable"),
Ok(false) | Err(_) => {
tracing::warn!(
"CAN service at {} is not reachable — will retry on each poll",
config.can_service_url
);
}
// Verify CAN service is reachable
if can.health_check().await {
info!("CAN service sync API is healthy");
} else {
warn!("CAN service sync API not reachable — will retry on sync");
}
// Open sync state database
let state = SyncState::open(&config.db_path())?;
let state = Arc::new(state);
info!("Sync state DB opened at {}", config.db_path().display());
// Start iroh P2P node
let node = SyncNode::spawn(&config).await?;
let node = Arc::new(node);
info!("iroh node ID: {}", node.peer_id());
// Build shared app state
let app_state = Arc::new(AppState {
node: node.clone(),
state: state.clone(),
can: can.clone(),
});
// Start the announcer (polls CAN service for new assets)
let announcer = Announcer::new(
can.clone(),
state.clone(),
node.clone(),
config.poll_interval_secs,
config.full_scan_interval_secs,
);
tokio::spawn(async move {
announcer.run().await;
});
// Start the fetcher (receives remote assets and ingests them)
let fetcher = Fetcher::new(can.clone(), state.clone(), node.clone());
tokio::spawn(async move {
fetcher.run().await;
});
// Build HTTP router
let router = routes::build_router(app_state);
// Start HTTP server
let listener = tokio::net::TcpListener::bind(&config.listen_addr)
// Create iroh endpoint for QUIC transport with n0 defaults (relay + discovery)
let endpoint = Endpoint::builder()
.preset(N0)
.alpns(vec![SYNC_ALPN.to_vec()])
.bind()
.await
.context("Failed to bind HTTP listener")?;
info!("CAN Sync API listening on http://{}", config.listen_addr);
.context("creating iroh endpoint")?;
// Open browser to status page
let status_url = format!("http://{}/status", config.listen_addr);
if open::that(&status_url).is_err() {
info!("Open {} in your browser to check status", status_url);
let node_id = endpoint.id();
info!("Node ID: {}", node_id);
let addrs = endpoint.bound_sockets();
if let Some(addr) = addrs.first() {
info!("Listening on {}", addr);
}
axum::serve(listener, router)
.await
.context("HTTP server error")?;
// Write our EndpointAddr to file if configured (for direct peer connection in tests)
if let Some(ref ticket_path) = config.ticket_file {
// Wait briefly for the endpoint to register with relay
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
let addr = endpoint.addr();
let addr_json = serde_json::to_string(&addr)
.context("serializing EndpointAddr")?;
std::fs::write(ticket_path, &addr_json)
.with_context(|| format!("writing addr to {}", ticket_path))?;
info!("Wrote EndpointAddr to {}", ticket_path);
}
// Create gossip instance for peer discovery (not async — returns directly)
let gossip = Gossip::builder().spawn(endpoint.clone());
// Channel for discovered peers
let (peer_tx, mut peer_rx) = mpsc::channel::<EndpointId>(32);
// Spawn discovery via gossip (works once bootstrap peers are known)
let disc = Discovery::new(endpoint.clone(), gossip.clone(), &config.sync_passphrase);
let peer_tx_gossip = peer_tx.clone();
tokio::spawn(async move {
if let Err(e) = disc.run(peer_tx_gossip).await {
error!("Gossip discovery failed: {:#}", e);
}
});
// Spawn internet rendezvous via pkarr relay (discovers peers worldwide)
let rendezvous = Rendezvous::new(&config.sync_passphrase, node_id)
.context("creating rendezvous")?;
let peer_tx_rdv = peer_tx.clone();
tokio::spawn(async move {
if let Err(e) = rendezvous.run(peer_tx_rdv).await {
error!("Rendezvous discovery failed: {:#}", e);
}
});
// If a direct connect ticket file is specified, spawn a task to read it and connect
if let Some(ref ticket_path) = config.connect_ticket_file {
let ticket_path = ticket_path.clone();
let endpoint_direct = endpoint.clone();
let can_direct = can.clone();
tokio::spawn(async move {
info!("Waiting for peer addr file: {}", ticket_path);
// Poll until the file exists and is non-empty
let addr_json = loop {
match std::fs::read_to_string(&ticket_path) {
Ok(s) if !s.trim().is_empty() => break s.trim().to_string(),
_ => tokio::time::sleep(std::time::Duration::from_millis(200)).await,
}
};
info!("Read peer addr from {}", ticket_path);
let peer_addr: EndpointAddr = match serde_json::from_str(&addr_json) {
Ok(a) => a,
Err(e) => {
error!("Invalid EndpointAddr JSON: {:#}", e);
return;
}
};
let peer_id = peer_addr.id;
let short = peer_id.fmt_short().to_string();
info!("Direct connecting to peer: {} (from addr file)", short);
match endpoint_direct.connect(peer_addr, SYNC_ALPN).await {
Ok(conn) => {
info!("Direct connection to {} established!", short);
// Initial reconciliation
if let Err(e) = peer::run_sync_session(conn.clone(), can_direct.clone(), true).await {
error!("Initial sync with {} failed: {:#}", short, e);
return;
}
info!("Initial sync with {} complete, starting live sync", short);
// Live sync: SSE-driven push + accept incoming streams
peer::run_live_sync(conn, can_direct).await;
}
Err(e) => {
error!("Failed to connect to {}: {:#}", short, e);
}
}
});
}
// Spawn incoming connection handler
let endpoint_accept = endpoint.clone();
let can_accept = can.clone();
tokio::spawn(async move {
loop {
match endpoint_accept.accept().await {
Some(incoming) => {
let can_clone = can_accept.clone();
tokio::spawn(async move {
match incoming.await {
Ok(conn) => {
info!("Accepted incoming connection from {}", conn.remote_id().fmt_short());
peer::handle_incoming(conn, can_clone, std::time::Duration::from_secs(0)).await;
}
Err(e) => {
warn!("Failed to accept connection: {:#}", e);
}
}
});
}
None => {
info!("Endpoint closed, stopping accept loop");
break;
}
}
}
});
// Main loop: connect to discovered peers (from gossip) and sync
info!("Waiting for peers...");
while let Some(peer_id) = peer_rx.recv().await {
let short = peer_id.fmt_short();
info!("Connecting to discovered peer: {}", short);
let endpoint_clone = endpoint.clone();
let can_clone = can.clone();
tokio::spawn(async move {
let conn = match endpoint_clone.connect(peer_id, SYNC_ALPN).await {
Ok(c) => c,
Err(e) => {
error!("Failed to connect to {}: {:#}", short, e);
return;
}
};
if let Err(e) = peer::run_sync_session(conn.clone(), can_clone.clone(), true).await {
error!("Initial sync with {} failed: {:#}", short, e);
return;
}
peer::run_live_sync(conn, can_clone).await;
});
}
info!("CAN Sync shutting down");
Ok(())
}

View File

@ -1,75 +0,0 @@
use serde::{Deserialize, Serialize};
use crate::can_client::AssetMeta;
/// Entry stored in iroh documents for each synced asset.
/// Key = CAN hash, Value = serialized AssetSyncEntry
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AssetSyncEntry {
/// CAN timestamp (milliseconds since epoch)
pub timestamp: i64,
/// MIME type
pub mime_type: String,
/// Application tag
pub application: Option<String>,
/// User identity
pub user: Option<String>,
/// Tags list
pub tags: Vec<String>,
/// Description
pub description: Option<String>,
/// Original human-readable filename
pub human_filename: Option<String>,
/// Original human-readable path
pub human_path: Option<String>,
/// File size in bytes
pub size: i64,
/// Whether the asset is trashed
pub is_trashed: bool,
/// iroh blob hash (BLAKE3) for downloading via iroh
pub iroh_blob_hash: Option<String>,
/// Version counter for conflict resolution (higher wins)
pub version: u64,
/// Peer ID that last modified this entry
pub last_modified_by: String,
}
impl AssetSyncEntry {
/// Create from CAN service asset metadata
pub fn from_asset_meta(meta: &AssetMeta, peer_id: &str) -> Self {
Self {
timestamp: meta.timestamp,
mime_type: meta.mime_type.clone(),
application: meta.application.clone(),
user: meta.user.clone(),
tags: meta.tags.clone(),
description: meta.description.clone(),
human_filename: meta.human_filename.clone(),
human_path: meta.human_path.clone(),
size: meta.size,
is_trashed: meta.is_trashed,
iroh_blob_hash: None,
version: 1,
last_modified_by: peer_id.to_string(),
}
}
/// Serialize to bytes for storage in iroh document
pub fn to_bytes(&self) -> Vec<u8> {
postcard::to_allocvec(self).expect("serialize AssetSyncEntry")
}
/// Deserialize from bytes
pub fn from_bytes(bytes: &[u8]) -> anyhow::Result<Self> {
Ok(postcard::from_bytes(bytes)?)
}
/// Check if metadata differs from a CAN asset (indicates update needed)
pub fn metadata_differs(&self, meta: &AssetMeta) -> bool {
self.tags != meta.tags
|| self.description != meta.description
|| self.is_trashed != meta.is_trashed
|| self.human_filename != meta.human_filename
|| self.human_path != meta.human_path
}
}

View File

@ -1,150 +0,0 @@
use anyhow::{Context, Result};
use iroh::protocol::Router;
use iroh::Endpoint;
use iroh_blobs::store::mem::MemStore;
use iroh_blobs::{BlobsProtocol, ALPN as BLOBS_ALPN};
use iroh_docs::api::protocol::{AddrInfoOptions, ShareMode};
use iroh_docs::protocol::Docs;
use iroh_docs::{AuthorId, DocTicket, NamespaceId, ALPN as DOCS_ALPN};
use iroh_gossip::net::Gossip;
use iroh_gossip::ALPN as GOSSIP_ALPN;
use tokio::sync::OnceCell;
use crate::config::SyncConfig;
/// Holds all iroh subsystems for the P2P node
pub struct SyncNode {
pub endpoint: Endpoint,
pub blobs: BlobsProtocol,
pub docs: Docs,
pub gossip: Gossip,
pub router: Router,
/// Cached default author ID (created once on startup)
author_id: OnceCell<AuthorId>,
}
impl SyncNode {
/// Start the iroh node with all protocol handlers
pub async fn spawn(_config: &SyncConfig) -> Result<Self> {
// Build endpoint (Ed25519 keypair auto-generated and cached)
let endpoint = Endpoint::bind()
.await
.map_err(|e| anyhow::anyhow!("Failed to bind iroh endpoint: {}", e))?;
tracing::info!(
"iroh node started — EndpointID: {}",
endpoint.id()
);
// Gossip for peer communication
let gossip = Gossip::builder().spawn(endpoint.clone());
// Blob store (in-memory — blobs are transient, CAN service is authoritative)
let mem_store = MemStore::default();
let blobs_store: &iroh_blobs::api::Store = &mem_store;
let blobs = BlobsProtocol::new(blobs_store, None);
// Document sync (CRDT-replicated key-value store)
let docs = Docs::memory()
.spawn(endpoint.clone(), blobs_store.clone(), gossip.clone())
.await
.context("Failed to spawn iroh-docs")?;
// Router accepts incoming connections and dispatches to handlers
let router = Router::builder(endpoint.clone())
.accept(BLOBS_ALPN, blobs.clone())
.accept(GOSSIP_ALPN, gossip.clone())
.accept(DOCS_ALPN, docs.clone())
.spawn();
Ok(Self {
endpoint,
blobs,
docs,
gossip,
router,
author_id: OnceCell::new(),
})
}
/// Get this node's peer ID as a string
pub fn peer_id(&self) -> String {
self.endpoint.id().to_string()
}
/// Get the node's endpoint address info for sharing
pub fn endpoint_addr(&self) -> iroh::EndpointAddr {
self.endpoint.addr()
}
/// Get or create the default author for writing to documents
pub async fn author(&self) -> Result<AuthorId> {
self.author_id
.get_or_try_init(|| async {
self.docs.author_default().await
})
.await
.copied()
}
/// Create a new iroh document and return its NamespaceId as a hex string
pub async fn create_doc(&self) -> Result<String> {
let doc = self.docs.create().await?;
let ns_id = doc.id();
Ok(hex::encode(ns_id.to_bytes()))
}
/// Open an existing document by its hex-encoded namespace ID
pub async fn open_doc(&self, doc_id_hex: &str) -> Result<iroh_docs::api::Doc> {
let ns_id = parse_namespace_id(doc_id_hex)?;
self.docs
.open(ns_id)
.await?
.ok_or_else(|| anyhow::anyhow!("Document {} not found", &doc_id_hex[..12]))
}
/// Write a key-value entry to a document
pub async fn write_to_doc(
&self,
doc_id_hex: &str,
key: &[u8],
value: &[u8],
) -> Result<()> {
let doc = self.open_doc(doc_id_hex).await?;
let author = self.author().await?;
doc.set_bytes(author, key.to_vec(), value.to_vec()).await?;
Ok(())
}
/// Generate a share ticket (DocTicket) for a document
pub async fn share_doc(&self, doc_id_hex: &str) -> Result<DocTicket> {
let doc = self.open_doc(doc_id_hex).await?;
let ticket = doc
.share(ShareMode::Write, AddrInfoOptions::RelayAndAddresses)
.await?;
Ok(ticket)
}
/// Import a document from a DocTicket, returns the namespace ID as hex
pub async fn import_doc(&self, ticket: DocTicket) -> Result<String> {
let doc = self.docs.import(ticket).await?;
let ns_id = doc.id();
Ok(hex::encode(ns_id.to_bytes()))
}
/// Graceful shutdown
pub async fn shutdown(self) -> Result<()> {
tracing::info!("Shutting down iroh node...");
self.router.shutdown().await?;
Ok(())
}
}
/// Parse a hex-encoded NamespaceId
pub fn parse_namespace_id(hex_str: &str) -> Result<NamespaceId> {
let bytes: [u8; 32] = hex::decode(hex_str)
.context("Invalid hex in doc_id")?
.try_into()
.map_err(|_| anyhow::anyhow!("doc_id must be 32 bytes (64 hex chars)"))?;
Ok(NamespaceId::from(bytes))
}

View File

@ -0,0 +1,469 @@
//! Per-peer sync: reconciliation and live bidirectional asset transfer.
//!
//! When two sync agents connect, they:
//! 1. Exchange hash lists (from their local CAN services)
//! 2. Compute the diff (what each side is missing)
//! 3. Send/receive missing assets concurrently (avoids deadlock)
//! 4. Subscribe to SSE events from local CAN for instant push on new assets
//!
//! The live sync uses:
//! - **SSE events** from local CAN service to detect new assets instantly
//! (replaces the old polling loop — no more wasted hash-list queries)
//! - An unbounded channel to share received hashes from the receive loop
//! to the push loop, preventing "echo" where an asset received from a
//! peer gets pushed right back to them.
//! - A fallback incremental poll on timeout for catch-up if SSE was briefly down.
use std::collections::{HashMap, HashSet};
use anyhow::{Context, Result};
use iroh::endpoint::Connection;
use prost::Message;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::sync::mpsc;
use tracing::{debug, error, info, warn};
use crate::can_client::{CanSyncClient, SyncEvent};
use crate::protocol::*;
// Message type tags for QUIC stream framing
const MSG_HASH_SET: u8 = 0x01;
const MSG_ASSET_BUNDLE: u8 = 0x02;
const MSG_META_UPDATE: u8 = 0x03;
const MSG_DONE: u8 = 0x04;
/// Frame a protobuf message with a type tag and length prefix.
fn encode_frame(msg_type: u8, payload: &[u8]) -> Vec<u8> {
let len = payload.len() as u32;
let mut frame = Vec::with_capacity(5 + payload.len());
frame.push(msg_type);
frame.extend_from_slice(&len.to_be_bytes());
frame.extend_from_slice(payload);
frame
}
/// Read a single framed message from a QUIC recv stream.
/// Returns (msg_type, payload_bytes).
async fn read_frame(recv: &mut iroh::endpoint::RecvStream) -> Result<(u8, Vec<u8>)> {
let msg_type = recv.read_u8().await.context("reading message type")?;
let len = recv.read_u32().await.context("reading message length")?;
if len > 256 * 1024 * 1024 {
anyhow::bail!("Message too large: {} bytes", len);
}
let mut payload = vec![0u8; len as usize];
recv.read_exact(&mut payload)
.await
.context("reading message payload")?;
Ok((msg_type, payload))
}
/// Run a full sync session with a connected peer.
///
/// This handles initial reconciliation: exchange hash lists, compute diffs,
/// then send/receive missing assets **concurrently** to avoid deadlock when
/// both sides have large amounts of data to transfer.
pub async fn run_sync_session(
conn: Connection,
can: CanSyncClient,
is_initiator: bool,
) -> Result<()> {
let peer_id = conn.remote_id();
let short_id = peer_id.fmt_short().to_string();
info!("Starting sync session with {} (initiator={})", short_id, is_initiator);
// Initiator opens the stream, responder accepts it
let (mut send, mut recv) = if is_initiator {
conn.open_bi().await.context("opening bi stream")?
} else {
conn.accept_bi().await.context("accepting bi stream")?
};
// Step 1: Get our local hash list from CAN service
let our_hashes = can.get_hashes().await.context("getting local hashes")?;
let our_hash_map: HashMap<String, &AssetDigest> = our_hashes
.assets
.iter()
.map(|a| (a.hash.clone(), a))
.collect();
info!(
"Local state: {} assets, sending to peer {}",
our_hashes.assets.len(),
short_id
);
// Step 2: Send our hash set to peer
let hash_set_msg = PeerHashSet {
assets: our_hashes.assets.clone(),
};
let mut buf = Vec::with_capacity(hash_set_msg.encoded_len());
hash_set_msg.encode(&mut buf)?;
let frame = encode_frame(MSG_HASH_SET, &buf);
send.write_all(&frame).await.context("sending hash set")?;
send.flush().await?;
// Step 3: Receive peer's hash set
let (msg_type, payload) = read_frame(&mut recv).await.context("reading peer hash set")?;
if msg_type != MSG_HASH_SET {
anyhow::bail!("Expected hash set message, got type {}", msg_type);
}
let peer_hash_set = PeerHashSet::decode(payload.as_slice()).context("decoding peer hash set")?;
let peer_hash_map: HashMap<String, &AssetDigest> = peer_hash_set
.assets
.iter()
.map(|a| (a.hash.clone(), a))
.collect();
info!(
"Peer {} has {} assets",
short_id,
peer_hash_set.assets.len()
);
// Step 4: Compute diffs
let our_hashes_set: HashSet<&String> = our_hash_map.keys().collect();
let peer_hashes_set: HashSet<&String> = peer_hash_map.keys().collect();
let we_need: Vec<String> = peer_hashes_set
.difference(&our_hashes_set)
.map(|h| (*h).clone())
.collect();
let they_need: Vec<String> = our_hashes_set
.difference(&peer_hashes_set)
.map(|h| (*h).clone())
.collect();
info!(
"Diff with {}: we need {}, they need {}",
short_id,
we_need.len(),
they_need.len()
);
// Step 5+6: Send and receive assets CONCURRENTLY to avoid deadlock.
let send_fut = async {
if !they_need.is_empty() {
send_assets(&can, &mut send, &they_need, &short_id).await?;
}
let done_frame = encode_frame(MSG_DONE, &[]);
send.write_all(&done_frame).await.context("sending DONE")?;
send.flush().await.context("flushing after DONE")?;
Ok::<_, anyhow::Error>(())
};
let recv_fut = receive_assets(&can, &mut recv, &short_id);
let (send_result, recv_result) = tokio::join!(send_fut, recv_fut);
send_result.context("sending assets to peer")?;
recv_result.context("receiving assets from peer")?;
info!("Sync session with {} complete", short_id);
Ok(())
}
/// Pull assets from local CAN service and send them to the peer.
async fn send_assets(
can: &CanSyncClient,
send: &mut iroh::endpoint::SendStream,
hashes: &[String],
peer_short: &str,
) -> Result<()> {
for chunk in hashes.chunks(10) {
let pull_resp = can
.pull(chunk.to_vec())
.await
.context("pulling assets from CAN")?;
for bundle in pull_resp.bundles {
let hash_short = &bundle.hash[..bundle.hash.len().min(12)];
info!("Sending asset {} to peer {}", hash_short, peer_short);
let mut buf = Vec::with_capacity(bundle.encoded_len());
bundle.encode(&mut buf)?;
let frame = encode_frame(MSG_ASSET_BUNDLE, &buf);
send.write_all(&frame).await?;
send.flush().await?;
}
}
Ok(())
}
/// Receive assets from peer and push them to local CAN service.
/// Returns the list of hashes that were successfully ingested.
async fn receive_assets(
can: &CanSyncClient,
recv: &mut iroh::endpoint::RecvStream,
peer_short: &str,
) -> Result<Vec<String>> {
let mut received = Vec::new();
loop {
let (msg_type, payload) = read_frame(recv).await.context("reading asset from peer")?;
match msg_type {
MSG_DONE => {
debug!("Peer {} finished sending assets", peer_short);
break;
}
MSG_ASSET_BUNDLE => {
let bundle =
AssetBundle::decode(payload.as_slice()).context("decoding asset bundle")?;
let hash = bundle.hash.clone();
let hash_short = hash[..hash.len().min(12)].to_string();
info!("Received asset {} from peer {}", hash_short, peer_short);
match can.push(bundle).await {
Ok(resp) => {
if resp.already_existed {
debug!("Asset {} already existed locally", hash_short);
} else {
info!("Ingested asset {} from peer {}", resp.hash, peer_short);
}
received.push(hash);
}
Err(e) => {
error!("Failed to push asset {} to CAN: {:#}", hash_short, e);
}
}
}
MSG_META_UPDATE => {
let meta = MetaUpdateRequest::decode(payload.as_slice())
.context("decoding meta update")?;
let hash_short = meta.hash[..meta.hash.len().min(12)].to_string();
debug!(
"Received meta update for {} from peer {}",
hash_short, peer_short
);
if let Err(e) = can
.update_meta(
meta.hash.clone(),
meta.description.clone(),
meta.tags.clone(),
meta.is_trashed,
)
.await
{
error!("Failed to update meta for {}: {:#}", hash_short, e);
}
}
other => {
warn!("Unknown message type {} from peer {}", other, peer_short);
}
}
}
Ok(received)
}
/// Handle an incoming connection from a peer who connected to us.
pub async fn handle_incoming(
conn: Connection,
can: CanSyncClient,
_poll_interval: std::time::Duration,
) {
let peer_id = conn.remote_id();
let short_id = peer_id.fmt_short().to_string();
info!("Incoming sync connection from {}", short_id);
if let Err(e) = run_sync_session(conn.clone(), can.clone(), false).await {
error!("Sync session with {} failed: {:#}", short_id, e);
return;
}
info!("Initial sync with {} complete, starting live sync", short_id);
run_live_sync(conn, can).await;
}
/// Run both live sync loops (push + receive) concurrently.
///
/// Uses SSE events from CAN service for instant push (no polling).
/// Uses an unbounded channel to prevent the "echo" problem.
pub async fn run_live_sync(
conn: Connection,
can: CanSyncClient,
) {
let short_id = conn.remote_id().fmt_short().to_string();
// Channel for receive loop to notify push loop about received hashes
let (received_tx, received_rx) = mpsc::unbounded_channel::<String>();
// Subscribe to SSE events from local CAN service
let sse_rx = can.subscribe_events();
// Run push loop and receive loop concurrently — when either ends, we're done
tokio::select! {
result = live_push_loop(conn.clone(), can.clone(), received_rx, sse_rx) => {
if let Err(e) = result {
warn!("Live push loop with {} ended: {:#}", short_id, e);
}
}
result = live_receive_loop(conn, can, received_tx) => {
if let Err(e) = result {
warn!("Live receive loop with {} ended: {:#}", short_id, e);
}
}
}
}
/// Wait for SSE events from local CAN service and push new assets to the peer.
///
/// Drains the `received_rx` channel to learn about hashes that arrived from
/// the peer, so we don't echo them back.
///
/// Falls back to incremental poll if no SSE events arrive within 30s.
async fn live_push_loop(
conn: Connection,
can: CanSyncClient,
mut received_rx: mpsc::UnboundedReceiver<String>,
mut sse_rx: mpsc::UnboundedReceiver<SyncEvent>,
) -> Result<()> {
let peer_id = conn.remote_id();
let short_id = peer_id.fmt_short().to_string();
info!("Starting live push loop with {} (SSE-driven)", short_id);
// Track what we've already synced (local + received from peer)
let resp = can.get_hashes().await?;
let mut max_timestamp: i64 = resp.assets.iter().map(|a| a.timestamp).max().unwrap_or(0);
let mut known_hashes: HashSet<String> = resp.assets.into_iter().map(|a| a.hash).collect();
// Fallback: if no SSE event in 30s, do an incremental poll to catch gaps
let fallback_interval = std::time::Duration::from_secs(30);
loop {
// Wait for SSE event, or fallback timeout
let new_hashes: Vec<String> = tokio::select! {
event = sse_rx.recv() => {
match event {
Some(evt) => {
// Drain any additional events that arrived at the same time
let mut batch = vec![evt];
while let Ok(more) = sse_rx.try_recv() {
batch.push(more);
}
// Drain received-from-peer hashes (echo prevention)
while let Ok(hash) = received_rx.try_recv() {
known_hashes.insert(hash);
}
// Filter to only truly new hashes
batch
.into_iter()
.filter(|e| {
if e.timestamp > max_timestamp {
max_timestamp = e.timestamp;
}
!known_hashes.contains(&e.hash)
})
.map(|e| e.hash)
.collect()
}
None => {
warn!("SSE channel closed, stopping push loop");
break;
}
}
}
// Fallback: periodic incremental poll
_ = tokio::time::sleep(fallback_interval) => {
debug!("Fallback incremental poll (no SSE events in {}s)", fallback_interval.as_secs());
while let Ok(hash) = received_rx.try_recv() {
known_hashes.insert(hash);
}
match can.get_hashes_since(max_timestamp).await {
Ok(resp) => {
resp.assets
.into_iter()
.filter(|a| {
if a.timestamp > max_timestamp {
max_timestamp = a.timestamp;
}
!known_hashes.contains(&a.hash)
})
.map(|a| a.hash)
.collect()
}
Err(e) => {
warn!("Fallback poll failed: {:#}", e);
continue;
}
}
}
};
if new_hashes.is_empty() {
continue;
}
info!(
"Pushing {} new assets to peer {}",
new_hashes.len(),
short_id
);
// Open a new QUIC stream for this batch
match conn.open_bi().await {
Ok((mut send, _recv)) => {
if let Err(e) = send_assets(&can, &mut send, &new_hashes, &short_id).await {
error!("Failed to push new assets to {}: {:#}", short_id, e);
}
let done_frame = encode_frame(MSG_DONE, &[]);
let _ = send.write_all(&done_frame).await;
let _ = send.flush().await;
let _ = send.finish();
}
Err(e) => {
warn!("Failed to open stream to {}: {:#}", short_id, e);
break; // Connection probably dead
}
}
// Update known set
for h in new_hashes {
known_hashes.insert(h);
}
}
Ok(())
}
/// Accept incoming QUIC bi-streams from the peer and receive assets.
async fn live_receive_loop(
conn: Connection,
can: CanSyncClient,
received_tx: mpsc::UnboundedSender<String>,
) -> Result<()> {
let peer_id = conn.remote_id();
let short_id = peer_id.fmt_short().to_string();
info!("Starting live receive loop with {}", short_id);
loop {
match conn.accept_bi().await {
Ok((_send, mut recv)) => {
info!("Accepted live sync stream from peer {}", short_id);
match receive_assets(&can, &mut recv, &short_id).await {
Ok(received_hashes) => {
for hash in received_hashes {
let _ = received_tx.send(hash);
}
}
Err(e) => {
warn!("Error receiving live assets from {}: {:#}", short_id, e);
}
}
}
Err(e) => {
info!("Live receive loop: connection to {} closed: {:#}", short_id, e);
break;
}
}
}
Ok(())
}

View File

@ -0,0 +1,123 @@
//! Protobuf message types for CAN sync API + peer-to-peer protocol.
//!
//! These match the types in CAN service's routes/sync.rs exactly.
use prost::Message;
// ── CAN Sync API messages (protobuf, same as CAN service) ───────────────
#[derive(Clone, PartialEq, Message)]
pub struct HashListRequest {}
#[derive(Clone, PartialEq, Message)]
pub struct HashListResponse {
#[prost(message, repeated, tag = "1")]
pub assets: Vec<AssetDigest>,
}
#[derive(Clone, PartialEq, Message)]
pub struct AssetDigest {
#[prost(string, tag = "1")]
pub hash: String,
#[prost(int64, tag = "2")]
pub timestamp: i64,
#[prost(int64, tag = "3")]
pub size: i64,
#[prost(bool, tag = "4")]
pub is_trashed: bool,
}
#[derive(Clone, PartialEq, Message)]
pub struct PullRequest {
#[prost(string, repeated, tag = "1")]
pub hashes: Vec<String>,
}
#[derive(Clone, PartialEq, Message)]
pub struct PullResponse {
#[prost(message, repeated, tag = "1")]
pub bundles: Vec<AssetBundle>,
}
#[derive(Clone, PartialEq, Message)]
pub struct AssetBundle {
#[prost(string, tag = "1")]
pub hash: String,
#[prost(int64, tag = "2")]
pub timestamp: i64,
#[prost(string, tag = "3")]
pub mime_type: String,
#[prost(string, optional, tag = "4")]
pub application: Option<String>,
#[prost(string, optional, tag = "5")]
pub user_identity: Option<String>,
#[prost(string, optional, tag = "6")]
pub description: Option<String>,
#[prost(string, optional, tag = "7")]
pub human_filename: Option<String>,
#[prost(string, optional, tag = "8")]
pub human_path: Option<String>,
#[prost(bool, tag = "9")]
pub is_trashed: bool,
#[prost(int64, tag = "10")]
pub size: i64,
#[prost(string, repeated, tag = "11")]
pub tags: Vec<String>,
#[prost(bytes = "vec", tag = "12")]
pub content: Vec<u8>,
}
#[derive(Clone, PartialEq, Message)]
pub struct PushRequest {
#[prost(message, optional, tag = "1")]
pub bundle: Option<AssetBundle>,
}
#[derive(Clone, PartialEq, Message)]
pub struct PushResponse {
#[prost(string, tag = "1")]
pub hash: String,
#[prost(bool, tag = "2")]
pub already_existed: bool,
}
#[derive(Clone, PartialEq, Message)]
pub struct MetaUpdateRequest {
#[prost(string, tag = "1")]
pub hash: String,
#[prost(string, optional, tag = "2")]
pub description: Option<String>,
#[prost(string, repeated, tag = "3")]
pub tags: Vec<String>,
#[prost(bool, tag = "4")]
pub is_trashed: bool,
}
#[derive(Clone, PartialEq, Message)]
pub struct MetaUpdateResponse {
#[prost(bool, tag = "1")]
pub success: bool,
}
// ── Peer-to-peer messages (sent over QUIC streams between sync agents) ──
/// Sent between peers during reconciliation: "here are all the hashes I have"
#[derive(Clone, PartialEq, Message)]
pub struct PeerHashSet {
#[prost(message, repeated, tag = "1")]
pub assets: Vec<AssetDigest>,
}
/// Request from peer: "please send me these assets"
#[derive(Clone, PartialEq, Message)]
pub struct PeerPullRequest {
#[prost(string, repeated, tag = "1")]
pub hashes: Vec<String>,
}
/// A single asset being sent from one peer to another
#[derive(Clone, PartialEq, Message)]
pub struct PeerAssetTransfer {
#[prost(message, optional, tag = "1")]
pub bundle: Option<AssetBundle>,
}

View File

@ -0,0 +1,195 @@
//! Internet peer discovery via pkarr relay servers.
//!
//! Derives deterministic keypair "slots" from the shared passphrase.
//! Each peer claims a slot by publishing its EndpointId as a TXT record.
//! All peers scan all slots periodically to discover each other.
//!
//! This works over the internet — no LAN, no port forwarding needed.
//! Uses n0's public pkarr relay servers (same infrastructure as iroh).
use std::collections::HashSet;
use std::time::Duration;
use anyhow::{Context, Result};
use iroh::EndpointId;
use pkarr::{Client as PkarrClient, Keypair, SignedPacket};
use simple_dns::rdata::RData;
use tokio::sync::mpsc;
use tracing::{debug, info, warn};
const NUM_SLOTS: usize = 8;
const PUBLISH_INTERVAL: Duration = Duration::from_secs(60);
const SCAN_INTERVAL: Duration = Duration::from_secs(15);
const RECORD_NAME: &str = "_can_sync";
/// Derive a deterministic pkarr keypair for a given slot index.
fn derive_slot_keypair(passphrase: &str, slot: usize) -> Keypair {
let seed = blake3::hash(
format!("can-sync-rendezvous:{}:{}", passphrase, slot).as_bytes(),
);
let seed_bytes: [u8; 32] = *seed.as_bytes();
let secret = ed25519_dalek::SecretKey::from(seed_bytes);
Keypair::from_secret_key(&secret)
}
/// Internet peer discovery via pkarr relay.
pub struct Rendezvous {
slots: Vec<Keypair>,
our_id: EndpointId,
client: PkarrClient,
}
impl Rendezvous {
/// Create a new Rendezvous by deriving keypairs for all slots from the passphrase.
pub fn new(passphrase: &str, our_id: EndpointId) -> Result<Self> {
let slots: Vec<Keypair> = (0..NUM_SLOTS)
.map(|i| derive_slot_keypair(passphrase, i))
.collect();
let client = PkarrClient::builder()
.build()
.context("creating pkarr client")?;
Ok(Self {
slots,
our_id,
client,
})
}
/// Run the rendezvous loop: claim a slot, periodically re-publish and scan.
pub async fn run(self, tx: mpsc::Sender<EndpointId>) -> Result<()> {
let our_id_hex = hex::encode(self.our_id.as_bytes());
info!(
"Rendezvous: starting internet discovery ({} slots, publish every {}s, scan every {}s)",
NUM_SLOTS,
PUBLISH_INTERVAL.as_secs(),
SCAN_INTERVAL.as_secs(),
);
// Claim our slot (first empty, or hash-based fallback)
let our_slot = self.claim_slot(&our_id_hex).await;
info!("Rendezvous: claimed slot {}", our_slot);
let mut known_peers: HashSet<EndpointId> = HashSet::new();
let mut publish_tick = tokio::time::interval(PUBLISH_INTERVAL);
let mut scan_tick = tokio::time::interval(SCAN_INTERVAL);
// Do an initial scan immediately
self.scan_all_slots(&mut known_peers, &tx).await;
loop {
tokio::select! {
_ = publish_tick.tick() => {
if let Err(e) = self.publish_slot(our_slot, &our_id_hex).await {
warn!("Rendezvous: failed to re-publish slot {}: {:#}", our_slot, e);
}
}
_ = scan_tick.tick() => {
self.scan_all_slots(&mut known_peers, &tx).await;
}
}
}
}
// Read every slot and report any newly discovered peer IDs.
async fn scan_all_slots(
&self,
known_peers: &mut HashSet<EndpointId>,
tx: &mpsc::Sender<EndpointId>,
) {
for i in 0..NUM_SLOTS {
match self.read_slot(i).await {
Some(peer_id) if peer_id != self.our_id && known_peers.insert(peer_id) => {
info!(
"Rendezvous: discovered peer {} in slot {}",
peer_id.fmt_short(),
i
);
let _ = tx.send(peer_id).await;
}
_ => {}
}
}
}
// Pick an available slot for this peer: reuse our old slot, take an empty one,
// or fall back to a deterministic slot based on our ID.
async fn claim_slot(&self, our_id_hex: &str) -> usize {
// Check if we already own a slot (from a previous run)
for i in 0..NUM_SLOTS {
if let Some(peer_id) = self.read_slot(i).await {
if peer_id == self.our_id {
debug!("Rendezvous: already own slot {}", i);
return i;
}
}
}
// Claim first empty slot
for i in 0..NUM_SLOTS {
if self.read_slot(i).await.is_none() {
if let Err(e) = self.publish_slot(i, our_id_hex).await {
warn!("Rendezvous: failed to claim slot {}: {:#}", i, e);
continue;
}
return i;
}
}
// All slots occupied — use deterministic slot based on our ID
let slot = {
let h = blake3::hash(self.our_id.as_bytes());
let bytes: [u8; 8] = h.as_bytes()[..8].try_into().unwrap();
u64::from_le_bytes(bytes) as usize % NUM_SLOTS
};
let _ = self.publish_slot(slot, our_id_hex).await;
slot
}
// Write our EndpointId into the given slot's DNS TXT record via the pkarr relay.
async fn publish_slot(&self, slot: usize, our_id_hex: &str) -> Result<()> {
let keypair = &self.slots[slot];
let packet = SignedPacket::builder()
.txt(
RECORD_NAME.try_into().context("invalid record name")?,
our_id_hex.try_into().context("invalid txt value")?,
300, // 5 min TTL
)
.sign(keypair)
.context("signing pkarr packet")?;
self.client
.publish(&packet, None)
.await
.context("publishing to pkarr relay")?;
debug!("Rendezvous: published slot {}", slot);
Ok(())
}
// Look up a slot's DNS TXT record and parse the EndpointId stored there, if any.
async fn read_slot(&self, slot: usize) -> Option<EndpointId> {
let public_key = self.slots[slot].public_key();
let packet = self.client.resolve(&public_key).await?;
// Use pkarr's resource_records iterator to find our TXT record
for record in packet.resource_records(RECORD_NAME) {
if let RData::TXT(txt) = &record.rdata {
// Try to extract the hex-encoded EndpointId from TXT attributes
if let Ok(txt_string) = String::try_from(txt.clone()) {
let hex_str = txt_string.trim();
if let Ok(bytes) = hex::decode(hex_str) {
if bytes.len() == 32 {
if let Ok(arr) = <[u8; 32]>::try_from(bytes.as_slice()) {
return EndpointId::from_bytes(&arr).ok();
}
}
}
}
}
}
None
}
}

View File

@ -1,430 +0,0 @@
use std::sync::Arc;
use axum::{
extract::{Path, State},
http::StatusCode,
response::IntoResponse,
routing::{get, post},
Json, Router,
};
use serde::{Deserialize, Serialize};
use crate::can_client::CanClient;
use crate::library::{Library, LibraryFilter, SyncState};
use crate::node::SyncNode;
/// Shared application state for route handlers
pub struct AppState {
pub node: Arc<SyncNode>,
pub state: Arc<SyncState>,
pub can: CanClient,
}
// ── Request/Response types ──
#[derive(Serialize)]
struct StatusResponse {
peer_id: String,
can_service_healthy: bool,
library_count: usize,
}
#[derive(Serialize)]
struct PeerInfo {
peer_id: String,
}
#[derive(Deserialize)]
pub struct CreateLibraryRequest {
pub name: String,
pub filter: LibraryFilter,
}
#[derive(Serialize)]
struct LibraryResponse {
id: String,
name: String,
filter: LibraryFilter,
doc_id: Option<String>,
is_local: bool,
created_at: i64,
}
#[derive(Serialize)]
struct InviteResponse {
ticket: String,
}
#[derive(Deserialize)]
pub struct JoinRequest {
pub ticket: String,
}
#[derive(Serialize)]
struct JoinResponse {
library_id: String,
message: String,
}
#[derive(Serialize)]
struct ApiResp<T: Serialize> {
status: String,
data: T,
}
#[derive(Serialize)]
struct ApiErr {
status: String,
error: String,
}
fn ok_json<T: Serialize>(data: T) -> Json<ApiResp<T>> {
Json(ApiResp {
status: "success".to_string(),
data,
})
}
fn err_resp(status: StatusCode, msg: &str) -> (StatusCode, Json<ApiErr>) {
(
status,
Json(ApiErr {
status: "error".to_string(),
error: msg.to_string(),
}),
)
}
// ── Routes ──
pub fn build_router(app_state: Arc<AppState>) -> Router {
Router::new()
.route("/status", get(get_status))
.route("/peers", get(get_peers))
.route("/libraries", post(create_library).get(list_libraries))
.route(
"/libraries/{id}",
get(get_library).delete(delete_library),
)
.route("/libraries/{id}/invite", post(create_invite))
.route("/join", post(join_library))
.with_state(app_state)
}
// ── Handlers ──
async fn get_status(State(app): State<Arc<AppState>>) -> impl IntoResponse {
let can_healthy = app.can.health_check().await.unwrap_or(false);
let lib_count = app.state.list_libraries().unwrap_or_default().len();
ok_json(StatusResponse {
peer_id: app.node.peer_id(),
can_service_healthy: can_healthy,
library_count: lib_count,
})
.into_response()
}
async fn get_peers(State(app): State<Arc<AppState>>) -> impl IntoResponse {
let peers: Vec<PeerInfo> = vec![PeerInfo {
peer_id: app.node.peer_id(),
}];
ok_json(peers).into_response()
}
async fn create_library(
State(app): State<Arc<AppState>>,
Json(req): Json<CreateLibraryRequest>,
) -> impl IntoResponse {
// Create an iroh document for this library
let doc_id = match app.node.create_doc().await {
Ok(id) => Some(id),
Err(e) => {
tracing::warn!("Failed to create iroh document for library: {:#}", e);
None
}
};
let lib = Library {
id: uuid::Uuid::new_v4().to_string(),
name: req.name,
filter: req.filter,
doc_id,
is_local: true,
created_at: chrono::Utc::now().timestamp_millis(),
};
if let Err(e) = app.state.save_library(&lib) {
return err_resp(
StatusCode::INTERNAL_SERVER_ERROR,
&format!("save failed: {}", e),
)
.into_response();
}
tracing::info!(
"Created library '{}' (id={}, doc_id={:?})",
lib.name,
&lib.id[..8],
lib.doc_id.as_deref().map(|d| &d[..12.min(d.len())])
);
ok_json(LibraryResponse {
id: lib.id,
name: lib.name,
filter: lib.filter,
doc_id: lib.doc_id,
is_local: lib.is_local,
created_at: lib.created_at,
})
.into_response()
}
async fn list_libraries(State(app): State<Arc<AppState>>) -> impl IntoResponse {
match app.state.list_libraries() {
Ok(libs) => {
let responses: Vec<LibraryResponse> = libs
.into_iter()
.map(|lib| LibraryResponse {
id: lib.id,
name: lib.name,
filter: lib.filter,
doc_id: lib.doc_id,
is_local: lib.is_local,
created_at: lib.created_at,
})
.collect();
ok_json(responses).into_response()
}
Err(e) => {
err_resp(StatusCode::INTERNAL_SERVER_ERROR, &format!("{}", e)).into_response()
}
}
}
async fn get_library(
State(app): State<Arc<AppState>>,
Path(id): Path<String>,
) -> impl IntoResponse {
match app.state.get_library(&id) {
Ok(Some(lib)) => ok_json(LibraryResponse {
id: lib.id,
name: lib.name,
filter: lib.filter,
doc_id: lib.doc_id,
is_local: lib.is_local,
created_at: lib.created_at,
})
.into_response(),
Ok(None) => err_resp(StatusCode::NOT_FOUND, "Library not found").into_response(),
Err(e) => {
err_resp(StatusCode::INTERNAL_SERVER_ERROR, &format!("{}", e)).into_response()
}
}
}
async fn delete_library(
State(app): State<Arc<AppState>>,
Path(id): Path<String>,
) -> impl IntoResponse {
match app.state.delete_library(&id) {
Ok(()) => ok_json("deleted").into_response(),
Err(e) => {
err_resp(StatusCode::INTERNAL_SERVER_ERROR, &format!("{}", e)).into_response()
}
}
}
async fn create_invite(
State(app): State<Arc<AppState>>,
Path(id): Path<String>,
) -> impl IntoResponse {
match app.state.get_library(&id) {
Ok(Some(lib)) => {
let doc_id = match &lib.doc_id {
Some(d) => d,
None => {
return err_resp(
StatusCode::BAD_REQUEST,
"Library has no iroh document — cannot create invite",
)
.into_response()
}
};
// Generate a real DocTicket via iroh
match app.node.share_doc(doc_id).await {
Ok(ticket) => {
// DocTicket implements Display via iroh's Ticket trait (base32 serialization)
let ticket_str = ticket.to_string();
// Wrap with library metadata so the joiner knows the name and filter
let invite_data = serde_json::json!({
"ticket": ticket_str,
"library_name": lib.name,
"filter": lib.filter,
});
let invite_b64 = base64_encode(
&serde_json::to_vec(&invite_data).unwrap(),
);
ok_json(InviteResponse { ticket: invite_b64 }).into_response()
}
Err(e) => err_resp(
StatusCode::INTERNAL_SERVER_ERROR,
&format!("Failed to create invite: {}", e),
)
.into_response(),
}
}
Ok(None) => err_resp(StatusCode::NOT_FOUND, "Library not found").into_response(),
Err(e) => {
err_resp(StatusCode::INTERNAL_SERVER_ERROR, &format!("{}", e)).into_response()
}
}
}
async fn join_library(
State(app): State<Arc<AppState>>,
Json(req): Json<JoinRequest>,
) -> impl IntoResponse {
// Decode our envelope
let ticket_bytes = match base64_decode(&req.ticket) {
Ok(b) => b,
Err(_) => {
return err_resp(StatusCode::BAD_REQUEST, "Invalid ticket encoding").into_response()
}
};
let ticket_data: serde_json::Value = match serde_json::from_slice(&ticket_bytes) {
Ok(v) => v,
Err(_) => {
return err_resp(StatusCode::BAD_REQUEST, "Invalid ticket data").into_response()
}
};
// Extract the real DocTicket string
let ticket_str = match ticket_data["ticket"].as_str() {
Some(s) => s,
None => {
return err_resp(StatusCode::BAD_REQUEST, "Missing 'ticket' field in invite")
.into_response()
}
};
// Parse DocTicket from the serialized string
let doc_ticket: iroh_docs::DocTicket = match ticket_str.parse() {
Ok(t) => t,
Err(e) => {
return err_resp(
StatusCode::BAD_REQUEST,
&format!("Invalid DocTicket: {}", e),
)
.into_response()
}
};
// Import the document via iroh (starts sync with remote peers)
let doc_id_hex = match app.node.import_doc(doc_ticket).await {
Ok(id) => id,
Err(e) => {
return err_resp(
StatusCode::INTERNAL_SERVER_ERROR,
&format!("Failed to join document: {}", e),
)
.into_response()
}
};
let name = ticket_data["library_name"]
.as_str()
.unwrap_or("remote library")
.to_string();
let filter: LibraryFilter = serde_json::from_value(ticket_data["filter"].clone())
.unwrap_or(LibraryFilter {
application: None,
tags: None,
user: None,
mime_prefix: None,
hashes: None,
});
let lib = Library {
id: uuid::Uuid::new_v4().to_string(),
name: name.clone(),
filter,
doc_id: Some(doc_id_hex),
is_local: false,
created_at: chrono::Utc::now().timestamp_millis(),
};
if let Err(e) = app.state.save_library(&lib) {
return err_resp(
StatusCode::INTERNAL_SERVER_ERROR,
&format!("save failed: {}", e),
)
.into_response();
}
tracing::info!(
"Joined library '{}' (id={}, doc_id={:?})",
name,
&lib.id[..8],
lib.doc_id.as_deref().map(|d| &d[..12.min(d.len())])
);
ok_json(JoinResponse {
library_id: lib.id,
message: "Joined library successfully".to_string(),
})
.into_response()
}
// ── Base64 helpers ──
fn base64_encode(data: &[u8]) -> String {
const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let mut result = Vec::new();
for chunk in data.chunks(3) {
let b0 = chunk[0] as u32;
let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 };
let b2 = if chunk.len() > 2 { chunk[2] as u32 } else { 0 };
let triple = (b0 << 16) | (b1 << 8) | b2;
result.push(CHARS[((triple >> 18) & 0x3F) as usize]);
result.push(CHARS[((triple >> 12) & 0x3F) as usize]);
if chunk.len() > 1 {
result.push(CHARS[((triple >> 6) & 0x3F) as usize]);
} else {
result.push(b'=');
}
if chunk.len() > 2 {
result.push(CHARS[(triple & 0x3F) as usize]);
} else {
result.push(b'=');
}
}
String::from_utf8(result).unwrap()
}
fn base64_decode(input: &str) -> Result<Vec<u8>, &'static str> {
const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let input = input.trim_end_matches('=');
let bytes: Vec<u8> = input
.bytes()
.filter_map(|b| CHARS.iter().position(|&c| c == b).map(|p| p as u8))
.collect();
let mut buf = Vec::new();
for chunk in bytes.chunks(4) {
if chunk.len() >= 2 {
buf.push((chunk[0] << 2) | (chunk[1] >> 4));
}
if chunk.len() >= 3 {
buf.push((chunk[1] << 4) | (chunk[2] >> 2));
}
if chunk.len() >= 4 {
buf.push((chunk[2] << 6) | chunk[3]);
}
}
Ok(buf)
}

File diff suppressed because it is too large Load Diff

View File

@ -49,6 +49,7 @@ pub struct CanClient {
}
impl CanClient {
/// Create a new client pointed at the given CAN service base URL.
pub fn new(base_url: &str) -> Self {
Self {
client: reqwest::blocking::Client::new(),

View File

@ -26,6 +26,7 @@ const FILE_ATTRIBUTE_DIRECTORY: u32 = 0x10;
const FILE_ATTRIBUTE_READONLY: u32 = 0x01;
const FILE_ATTRIBUTE_ARCHIVE: u32 = 0x20;
// Wrap a raw NTSTATUS error code into WinFSP's error type.
fn ntstatus(code: i32) -> FspError {
FspError::NTSTATUS(code)
}
@ -54,6 +55,7 @@ pub struct CanFileContext {
impl FileSystemContext for CanFs {
type FileContext = CanFileContext;
/// Called by Windows to check if a file/folder exists and get its basic attributes before opening it.
fn get_security_by_name(
&self,
file_name: &U16CStr,
@ -83,6 +85,7 @@ impl FileSystemContext for CanFs {
})
}
/// Called when a file or directory is opened; returns a context handle and fills in size/timestamps.
fn open(
&self,
file_name: &U16CStr,
@ -142,8 +145,10 @@ impl FileSystemContext for CanFs {
})
}
/// Called when a handle is closed; nothing to clean up since content is dropped automatically.
fn close(&self, _context: Self::FileContext) {}
/// Returns up-to-date size and attribute info for an already-opened file or directory.
fn get_file_info(
&self,
context: &Self::FileContext,
@ -194,6 +199,7 @@ impl FileSystemContext for CanFs {
Ok(())
}
/// Reads file bytes at the given offset; downloads the asset from the CAN service on first access.
fn read(
&self,
context: &Self::FileContext,
@ -233,6 +239,7 @@ impl FileSystemContext for CanFs {
Ok(count as u32)
}
/// Lists the contents of a directory, including "." and ".." entries, for Windows Explorer and dir commands.
fn read_directory(
&self,
context: &Self::FileContext,
@ -308,6 +315,7 @@ impl FileSystemContext for CanFs {
Ok(context.dir_buffer.read(marker, buffer))
}
/// Reports the virtual drive's total and free space (shows as a 1 GB read-only volume).
fn get_volume_info(&self, out_volume_info: &mut VolumeInfo) -> winfsp::Result<()> {
out_volume_info.total_size = 1024 * 1024 * 1024; // 1 GB
out_volume_info.free_size = 0;

View File

@ -17,6 +17,7 @@ use crate::api::CanClient;
use crate::fs::{CacheState, CanFs};
use crate::tree::VirtualTree;
/// Command-line arguments for mounting CAN service assets as a virtual Windows drive using WinFSP.
#[derive(Parser)]
#[command(name = "canfs", about = "Mount CAN service assets as a virtual drive")]
struct Args {
@ -33,6 +34,7 @@ struct Args {
refresh_secs: u64,
}
/// Entry point: connects to the CAN service, builds a virtual file tree, and mounts it as a read-only Windows drive.
fn main() {
tracing_subscriber::fmt()
.with_env_filter(

View File

@ -131,6 +131,7 @@ struct TreeBuilder {
}
impl TreeBuilder {
// Create a new tree builder with an empty root directory node.
fn new() -> Self {
let root = VNode {
name: String::new(),

View File

@ -531,6 +531,24 @@ function mimeToExt(mime) {
return map[mime] || mime.split('/').pop() || 'bin';
}
function mimeToTypeCategory(mime) {
if (mime.startsWith('image/')) return 'images';
if (mime === 'application/pdf') return 'pdf';
if (mime.startsWith('video/')) return 'video';
if (mime.startsWith('audio/')) return 'audio';
if (mime.startsWith('text/')
|| mime === 'application/json'
|| mime === 'application/xml'
|| mime === 'application/msword'
|| mime === 'application/rtf'
|| mime.startsWith('application/vnd.openxmlformats')
|| mime.startsWith('application/vnd.ms-')
|| mime === 'application/vnd.oasis.opendocument.text'
|| mime === 'application/vnd.oasis.opendocument.spreadsheet')
return 'documents';
return 'others';
}
function buildVirtualTree(assets) {
const root = { name: '', type: 'dir', children: {}, items: [] };
@ -591,6 +609,12 @@ function buildVirtualTree(assets) {
addFile(tagDir, friendlyName, asset);
}
}
// TYPE/
const typeRoot = ensureDir(root, 'TYPE');
const typeCat = mimeToTypeCategory(asset.mime_type);
const typeDir = ensureDir(typeRoot, typeCat);
addFile(typeDir, friendlyName, asset);
}
return root;

View File

@ -9,11 +9,13 @@ use std::collections::HashMap;
const CAN_API: &str = "http://127.0.0.1:3210/api/v1/can/0";
// Shared state passed to every request handler; holds a reusable HTTP client.
#[derive(Clone)]
struct AppState {
client: reqwest::Client,
}
/// Web-based file manager UI that proxies requests to the CAN service API.
#[tokio::main]
async fn main() {
tracing_subscriber::fmt()
@ -47,6 +49,7 @@ async fn main() {
axum::serve(listener, app).await.unwrap();
}
// Return the single-page HTML UI for the file manager.
async fn serve_index() -> Html<&'static str> {
Html(html::INDEX_HTML)
}
@ -86,6 +89,7 @@ fn build_qs(params: &HashMap<String, String>) -> String {
format!("?{}", qs.join("&"))
}
// Percent-encode a string for use in URL query parameters.
fn urlencoding(s: &str) -> String {
s.chars()
.map(|c| match c {

View File

@ -241,6 +241,23 @@ version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
[[package]]
name = "futures-io"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
[[package]]
name = "futures-macro"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "futures-sink"
version = "0.3.32"
@ -260,7 +277,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
dependencies = [
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
"pin-project-lite",
"slab",
]
@ -850,11 +871,13 @@ name = "paste"
version = "0.1.0"
dependencies = [
"axum",
"futures-util",
"open",
"reqwest",
"serde",
"serde_json",
"tokio",
"tokio-stream",
"tracing",
"tracing-subscriber",
]
@ -991,12 +1014,14 @@ dependencies = [
"sync_wrapper",
"tokio",
"tokio-native-tls",
"tokio-util",
"tower",
"tower-http",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"wasm-streams",
"web-sys",
]
@ -1379,6 +1404,17 @@ dependencies = [
"tokio",
]
[[package]]
name = "tokio-stream"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
dependencies = [
"futures-core",
"pin-project-lite",
"tokio",
]
[[package]]
name = "tokio-util"
version = "0.7.18"
@ -1680,6 +1716,19 @@ dependencies = [
"wasmparser",
]
[[package]]
name = "wasm-streams"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
dependencies = [
"futures-util",
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "wasmparser"
version = "0.244.0"

View File

@ -12,9 +12,11 @@ path = "src/main.rs"
[dependencies]
axum = { version = "0.8", features = ["multipart"] }
tokio = { version = "1", features = ["full"] }
reqwest = { version = "0.12", features = ["multipart", "json"] }
reqwest = { version = "0.12", features = ["multipart", "json", "stream"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
open = "5"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tokio-stream = "0.1"
futures-util = "0.3"

View File

@ -376,6 +376,14 @@ fileInput.addEventListener('change', () => {
// Initial load
loadItems();
// Live updates via SSE — auto-refresh when assets arrive from sync or other sources
const evtSource = new EventSource('/paste/events');
evtSource.addEventListener('new_asset', () => loadItems());
evtSource.onerror = () => {
// EventSource auto-reconnects; just log for debugging
console.debug('SSE connection lost, reconnecting...');
};
</script>
</body>

View File

@ -2,19 +2,23 @@ mod html;
use axum::extract::{DefaultBodyLimit, Multipart, Path, State};
use axum::http::{header, StatusCode};
use axum::response::sse::{Event, Sse};
use axum::response::{Html, IntoResponse, Response};
use axum::routing::{get, post};
use axum::{Json, Router};
use serde::Deserialize;
use std::convert::Infallible;
use std::net::SocketAddr;
const CAN_API: &str = "http://127.0.0.1:3210/api/v1/can/0";
/// Shared HTTP client for talking to the CAN service backend.
#[derive(Clone)]
struct AppState {
client: reqwest::Client,
}
/// JSON body for the text paste endpoint.
#[derive(Deserialize)]
struct PasteTextRequest {
text: String,
@ -60,6 +64,7 @@ async fn forward(resp: Result<reqwest::Response, reqwest::Error>) -> Response {
// ── Handlers ─────────────────────────────────────────────────────────────
/// Serve the single-page HTML frontend.
async fn serve_index() -> Html<&'static str> {
Html(html::INDEX_HTML)
}
@ -225,8 +230,70 @@ async fn proxy_thumb(
forward(resp).await
}
/// Proxy SSE (Server-Sent Events) from the CAN service to the browser so
/// the frontend auto-refreshes when new pastes arrive.
async fn paste_events(
State(state): State<AppState>,
) -> Sse<impl futures_util::Stream<Item = Result<Event, Infallible>>> {
let (tx, rx) = tokio::sync::mpsc::channel::<Result<Event, Infallible>>(32);
let client = state.client.clone();
tokio::spawn(async move {
loop {
match client.get(format!("{CAN_API}/events")).send().await {
Ok(resp) => {
use futures_util::StreamExt;
let mut stream = resp.bytes_stream();
let mut buf = String::new();
while let Some(chunk) = stream.next().await {
let Ok(bytes) = chunk else { break };
buf.push_str(&String::from_utf8_lossy(&bytes));
// Parse SSE frames (double-newline delimited)
while let Some(pos) = buf.find("\n\n") {
let frame = buf[..pos].to_string();
buf = buf[pos + 2..].to_string();
let mut event_type = None;
let mut data = None;
for line in frame.lines() {
if let Some(v) = line.strip_prefix("event: ") {
event_type = Some(v.to_string());
} else if let Some(v) = line.strip_prefix("data: ") {
data = Some(v.to_string());
}
// lines starting with ':' are SSE comments (keepalive) — skip
}
if let Some(d) = data {
let mut evt = Event::default().data(d);
if let Some(t) = event_type {
evt = evt.event(t);
}
if tx.send(Ok(evt)).await.is_err() {
return; // client disconnected
}
}
}
}
}
Err(e) => {
tracing::warn!("SSE proxy connect failed: {e}");
}
}
// Reconnect after a short delay
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
});
let stream = tokio_stream::wrappers::ReceiverStream::new(rx);
Sse::new(stream).keep_alive(
axum::response::sse::KeepAlive::new()
.interval(std::time::Duration::from_secs(15))
.text("ping"),
)
}
// ── Main ─────────────────────────────────────────────────────────────────
/// Start the Paste web app: a simple pastebin that stores text and images in CAN service.
#[tokio::main]
async fn main() {
tracing_subscriber::fmt()
@ -247,6 +314,7 @@ async fn main() {
.route("/paste/list", get(paste_list))
.route("/paste/asset/{hash}", get(proxy_asset))
.route("/paste/thumb/{hash}", get(proxy_thumb))
.route("/paste/events", get(paste_events))
.layer(DefaultBodyLimit::max(100 * 1024 * 1024)) // 100 MB
.with_state(state);

View File

@ -1,4 +1,7 @@
# go_example_1.ps1 — Start CanService + Paste example, open browser
# go_example_1.ps1 — Start CanService + Paste + Sync agent, open browser
#
# Run on multiple machines (after git clone) and they will auto-sync
# all ingested assets via iroh's relay network. No port forwarding needed.
$ErrorActionPreference = "Stop"
$root = $PSScriptRoot
@ -11,13 +14,19 @@ Get-NetTCPConnection -LocalPort 3211 -ErrorAction SilentlyContinue |
ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue }
Start-Sleep -Milliseconds 500
# --- Build everything ---
Write-Host "Building CanService..." -ForegroundColor Cyan
cargo build --manifest-path "$root\Cargo.toml"
Write-Host "Building Paste example..." -ForegroundColor Cyan
cargo build --manifest-path "$root\examples\paste\Cargo.toml"
# Start CanService in background
Write-Host "Building CAN Sync agent..." -ForegroundColor Cyan
cargo build --manifest-path "$root\examples\can-sync\Cargo.toml" --bin can-sync
# --- Start CanService ---
Write-Host "Starting CanService on port 3210..." -ForegroundColor Green
$canService = Start-Process -FilePath "cargo" `
-ArgumentList "run --manifest-path `"$root\Cargo.toml`"" `
@ -43,7 +52,20 @@ if (-not $ready) {
}
Write-Host "CanService ready." -ForegroundColor Green
# Start Paste example (it opens the browser itself)
# --- Start Sync agent ---
$syncConfig = "$root\examples\can-sync\config.yaml"
Write-Host "Starting CAN Sync agent (P2P replication)..." -ForegroundColor Green
$syncAgent = Start-Process -FilePath "cargo" `
-ArgumentList "run --manifest-path `"$root\examples\can-sync\Cargo.toml`" --bin can-sync -- `"$syncConfig`"" `
-WorkingDirectory $root `
-PassThru -NoNewWindow
# Give sync agent a moment to connect to CAN service
Start-Sleep -Seconds 2
# --- Start Paste example ---
Write-Host "Starting Paste on port 3211..." -ForegroundColor Green
$paste = Start-Process -FilePath "cargo" `
-ArgumentList "run --manifest-path `"$root\examples\paste\Cargo.toml`"" `
@ -54,16 +76,22 @@ Write-Host ""
Write-Host "Running:" -ForegroundColor Cyan
Write-Host " CanService -> http://127.0.0.1:3210"
Write-Host " Paste UI -> http://127.0.0.1:3211"
Write-Host " CAN Sync -> P2P replication active (iroh relay)" -ForegroundColor Magenta
Write-Host ""
Write-Host "Press Ctrl+C to stop both." -ForegroundColor Yellow
Write-Host "Sync passphrase: 'duke-canman-sync'" -ForegroundColor Magenta
Write-Host "Any other machine running this script with the same passphrase" -ForegroundColor Magenta
Write-Host "will automatically discover this instance and sync all assets." -ForegroundColor Magenta
Write-Host ""
Write-Host "Press Ctrl+C to stop all services." -ForegroundColor Yellow
# Wait for either process to exit, then clean up both
# Wait for any process to exit, then clean up all
try {
while (-not $canService.HasExited -and -not $paste.HasExited) {
while (-not $canService.HasExited -and -not $paste.HasExited -and -not $syncAgent.HasExited) {
Start-Sleep -Seconds 1
}
} finally {
Write-Host "Shutting down..." -ForegroundColor Yellow
Stop-Process -Id $canService.Id -Force -ErrorAction SilentlyContinue
Stop-Process -Id $paste.Id -Force -ErrorAction SilentlyContinue
Stop-Process -Id $syncAgent.Id -Force -ErrorAction SilentlyContinue
}

85
go_example_1.sh Normal file
View File

@ -0,0 +1,85 @@
#!/usr/bin/env bash
# go_example_1.sh — Start CanService + Paste + Sync agent
#
# Run on multiple machines (after git clone) and they will auto-sync
# all ingested assets via iroh's relay network. No port forwarding needed.
set -e
ROOT="$(cd "$(dirname "$0")" && pwd)"
cleanup() {
echo ""
echo "Shutting down..."
[ -n "$CAN_PID" ] && kill "$CAN_PID" 2>/dev/null
[ -n "$SYNC_PID" ] && kill "$SYNC_PID" 2>/dev/null
[ -n "$PASTE_PID" ] && kill "$PASTE_PID" 2>/dev/null
wait 2>/dev/null
exit 0
}
trap cleanup INT TERM EXIT
# Kill anything on our ports
echo "Cleaning up stale processes..."
lsof -ti:3210 2>/dev/null | xargs kill -9 2>/dev/null || true
lsof -ti:3211 2>/dev/null | xargs kill -9 2>/dev/null || true
sleep 0.5
# --- Build everything ---
echo "Building CanService..."
cargo build --manifest-path "$ROOT/Cargo.toml"
echo "Building Paste example..."
cargo build --manifest-path "$ROOT/examples/paste/Cargo.toml"
echo "Building CAN Sync agent..."
cargo build --manifest-path "$ROOT/examples/can-sync/Cargo.toml" --bin can-sync
# --- Start CanService ---
echo "Starting CanService on port 3210..."
cargo run --manifest-path "$ROOT/Cargo.toml" &
CAN_PID=$!
# Wait for CanService to be ready
echo "Waiting for CanService..."
for i in $(seq 1 30); do
if curl -sf http://127.0.0.1:3210/api/v1/can/0/list >/dev/null 2>&1; then
break
fi
if [ "$i" -eq 30 ]; then
echo "CanService failed to start within 15s"
exit 1
fi
sleep 0.5
done
echo "CanService ready."
# --- Start Sync agent ---
SYNC_CONFIG="$ROOT/examples/can-sync/config.yaml"
echo "Starting CAN Sync agent (P2P replication)..."
cargo run --manifest-path "$ROOT/examples/can-sync/Cargo.toml" --bin can-sync -- "$SYNC_CONFIG" &
SYNC_PID=$!
sleep 2
# --- Start Paste example ---
echo "Starting Paste on port 3211..."
cargo run --manifest-path "$ROOT/examples/paste/Cargo.toml" &
PASTE_PID=$!
echo ""
echo "Running:"
echo " CanService -> http://127.0.0.1:3210"
echo " Paste UI -> http://127.0.0.1:3211"
echo " CAN Sync -> P2P replication active (iroh relay)"
echo ""
echo "Sync passphrase: 'duke-canman-sync'"
echo "Any other machine running this script with the same passphrase"
echo "will automatically discover this instance and sync all assets."
echo ""
echo "Press Ctrl+C to stop all services."
# Wait for any process to exit
wait -n "$CAN_PID" "$PASTE_PID" "$SYNC_PID" 2>/dev/null || true

View File

@ -1,6 +1,7 @@
use serde::Deserialize;
use std::path::{Path, PathBuf};
/// Application settings loaded from config.yaml at startup.
#[derive(Debug, Clone, Deserialize)]
pub struct Config {
pub storage_root: PathBuf,
@ -12,8 +13,13 @@ pub struct Config {
pub rebuild_error_threshold: u32,
#[serde(default = "default_verify_interval")]
pub verify_interval_hours: u64,
/// Optional API key for the private sync endpoints (/sync/*).
/// If not set, sync endpoints are disabled (return 404).
#[serde(default)]
pub sync_api_key: Option<String>,
}
// Default values used when a field is missing from config.yaml.
fn default_admin_token() -> String {
"changeme".to_string()
}
@ -28,24 +34,29 @@ fn default_verify_interval() -> u64 {
}
impl Config {
/// Read and parse the YAML config file from disk.
pub fn load(path: &Path) -> anyhow::Result<Self> {
let contents = std::fs::read_to_string(path)?;
let config: Config = serde_yaml::from_str(&contents)?;
Ok(config)
}
/// Returns the path to the SQLite database file inside storage_root.
pub fn db_path(&self) -> PathBuf {
self.storage_root.join(".can.db")
}
/// Returns the path to the trash folder for soft-deleted files.
pub fn trash_dir(&self) -> PathBuf {
self.storage_root.join(".trash")
}
/// Returns the path to the cached thumbnail images folder.
pub fn thumbs_dir(&self) -> PathBuf {
self.storage_root.join(".thumbs")
}
/// Create the storage, trash, and thumbnail directories if they don't exist yet.
pub fn ensure_dirs(&self) -> anyhow::Result<()> {
std::fs::create_dir_all(&self.storage_root)?;
std::fs::create_dir_all(self.trash_dir())?;

View File

@ -4,8 +4,11 @@ use std::sync::{Arc, Mutex};
use crate::models::{Asset, AssetMeta, ListParams, SearchParams};
/// Thread-safe handle to the SQLite database (wrapped in Arc<Mutex> so multiple
/// threads can share it safely).
pub type Db = Arc<Mutex<Connection>>;
/// Open (or create) the SQLite database file and set up tables.
pub fn open(path: &Path) -> anyhow::Result<Db> {
let conn = Connection::open(path)?;
conn.execute_batch("PRAGMA journal_mode=WAL; PRAGMA foreign_keys=ON;")?;
@ -13,6 +16,7 @@ pub fn open(path: &Path) -> anyhow::Result<Db> {
Ok(Arc::new(Mutex::new(conn)))
}
/// Open a temporary in-memory database (used for tests).
pub fn open_in_memory() -> anyhow::Result<Db> {
let conn = Connection::open_in_memory()?;
conn.execute_batch("PRAGMA foreign_keys=ON;")?;
@ -20,6 +24,8 @@ pub fn open_in_memory() -> anyhow::Result<Db> {
Ok(Arc::new(Mutex::new(conn)))
}
/// Create the assets, tags, and asset_tags tables if they don't already exist,
/// and run any pending migrations.
fn init_schema(conn: &Connection) -> rusqlite::Result<()> {
conn.execute_batch(
"
@ -66,7 +72,7 @@ fn init_schema(conn: &Connection) -> rusqlite::Result<()> {
Ok(())
}
/// Insert a new asset. Returns the row id.
/// Save a new asset record to the database. Returns the auto-generated row id.
pub fn insert_asset(conn: &Connection, asset: &Asset) -> rusqlite::Result<i64> {
conn.execute(
"INSERT INTO assets (timestamp, hash, mime_type, application, user_identity, description, actual_filename, human_filename, human_path, size)
@ -87,7 +93,7 @@ pub fn insert_asset(conn: &Connection, asset: &Asset) -> rusqlite::Result<i64> {
Ok(conn.last_insert_rowid())
}
/// Look up an asset by its hash.
/// Find an asset by its unique SHA-256 hash. Returns None if not found.
pub fn get_asset_by_hash(conn: &Connection, hash: &str) -> rusqlite::Result<Option<Asset>> {
conn.query_row(
"SELECT id, timestamp, hash, mime_type, application, user_identity, description,
@ -115,7 +121,7 @@ pub fn get_asset_by_hash(conn: &Connection, hash: &str) -> rusqlite::Result<Opti
.optional()
}
/// Get tags for an asset.
/// Get the list of tag names attached to an asset.
pub fn get_asset_tags(conn: &Connection, asset_id: i64) -> rusqlite::Result<Vec<String>> {
let mut stmt = conn.prepare(
"SELECT t.name FROM tags t
@ -127,7 +133,7 @@ pub fn get_asset_tags(conn: &Connection, asset_id: i64) -> rusqlite::Result<Vec<
tags.collect()
}
/// Upsert a tag and return its id.
/// Insert a tag if it doesn't exist yet, then return its id.
pub fn upsert_tag(conn: &Connection, name: &str) -> rusqlite::Result<i64> {
conn.execute(
"INSERT OR IGNORE INTO tags (name) VALUES (?1)",
@ -138,7 +144,7 @@ pub fn upsert_tag(conn: &Connection, name: &str) -> rusqlite::Result<i64> {
})
}
/// Replace all tags for an asset within a transaction.
/// Remove all existing tags for an asset and assign the new ones.
pub fn set_asset_tags(conn: &Connection, asset_id: i64, tags: &[String]) -> rusqlite::Result<()> {
conn.execute(
"DELETE FROM asset_tags WHERE asset_id = ?1",
@ -154,7 +160,8 @@ pub fn set_asset_tags(conn: &Connection, asset_id: i64, tags: &[String]) -> rusq
Ok(())
}
/// Build an AssetMeta from an Asset row + tags.
/// Convert an internal Asset database row into the API-friendly AssetMeta format
/// (includes tags fetched from the join table).
pub fn asset_to_meta(conn: &Connection, asset: &Asset) -> rusqlite::Result<AssetMeta> {
let tags = get_asset_tags(conn, asset.id)?;
Ok(AssetMeta {
@ -173,7 +180,7 @@ pub fn asset_to_meta(conn: &Connection, asset: &Asset) -> rusqlite::Result<Asset
})
}
/// Update description and/or tags for an asset.
/// Update an asset's description and/or tags (only changes the fields you provide).
pub fn update_asset_metadata(
conn: &Connection,
hash: &str,
@ -195,7 +202,7 @@ pub fn update_asset_metadata(
Ok(())
}
/// Flag an asset as corrupted.
/// Mark or unmark an asset as corrupted (set by the background verifier).
pub fn flag_corrupted(conn: &Connection, hash: &str, corrupted: bool) -> rusqlite::Result<()> {
conn.execute(
"UPDATE assets SET is_corrupted = ?1 WHERE hash = ?2",
@ -204,7 +211,8 @@ pub fn flag_corrupted(conn: &Connection, hash: &str, corrupted: bool) -> rusqlit
Ok(())
}
/// Update file size for an asset (used by verifier to backfill).
/// Store the file size in bytes for an asset (used by the verifier to fill in
/// sizes for assets that were created before the size column existed).
pub fn update_asset_size(conn: &Connection, hash: &str, size: i64) -> rusqlite::Result<()> {
conn.execute(
"UPDATE assets SET size = ?1 WHERE hash = ?2",
@ -213,7 +221,7 @@ pub fn update_asset_size(conn: &Connection, hash: &str, size: i64) -> rusqlite::
Ok(())
}
/// Soft-delete: mark as trashed.
/// Soft-delete an asset by marking it as trashed (the file is moved to .trash/).
pub fn trash_asset(conn: &Connection, hash: &str) -> rusqlite::Result<()> {
conn.execute(
"UPDATE assets SET is_trashed = 1 WHERE hash = ?1",
@ -222,7 +230,8 @@ pub fn trash_asset(conn: &Connection, hash: &str) -> rusqlite::Result<()> {
Ok(())
}
/// List assets with pagination and filtering.
/// Fetch a page of assets with optional filters (application, trashed, etc.).
/// Returns the matching assets and the total count for pagination.
pub fn list_assets(conn: &Connection, params: &ListParams) -> rusqlite::Result<(Vec<Asset>, i64)> {
let limit = params.limit.unwrap_or(50);
let offset = params.offset.unwrap_or(0);
@ -301,7 +310,8 @@ pub fn list_assets(conn: &Connection, params: &ListParams) -> rusqlite::Result<(
Ok((assets, total))
}
/// Search assets with various filters.
/// Search assets with multiple filters (hash prefix, time range, MIME type, tags, etc.).
/// Returns matching assets and total count for pagination.
pub fn search_assets(
conn: &Connection,
params: &SearchParams,
@ -428,7 +438,69 @@ pub fn search_assets(
Ok((assets, total))
}
/// Get all non-trashed asset records (for verifier startup scan).
/// Get every asset record in the database, including trashed ones.
/// Used by the sync system to compare what two peers have.
pub fn get_all_assets(conn: &Connection) -> rusqlite::Result<Vec<Asset>> {
let mut stmt = conn.prepare(
"SELECT id, timestamp, hash, mime_type, application, user_identity, description,
actual_filename, human_filename, human_path, is_trashed, is_corrupted, size
FROM assets",
)?;
let assets = stmt
.query_map([], |row| {
Ok(Asset {
id: row.get(0)?,
timestamp: row.get(1)?,
hash: row.get(2)?,
mime_type: row.get(3)?,
application: row.get(4)?,
user_identity: row.get(5)?,
description: row.get(6)?,
actual_filename: row.get(7)?,
human_filename: row.get(8)?,
human_path: row.get(9)?,
is_trashed: row.get(10)?,
is_corrupted: row.get(11)?,
size: row.get(12)?,
})
})?
.collect::<rusqlite::Result<Vec<_>>>()?;
Ok(assets)
}
/// Get only assets added after a given timestamp (for incremental sync --
/// "what's new since last time I checked?").
pub fn get_assets_since(conn: &Connection, since: i64) -> rusqlite::Result<Vec<Asset>> {
let mut stmt = conn.prepare(
"SELECT id, timestamp, hash, mime_type, application, user_identity, description,
actual_filename, human_filename, human_path, is_trashed, is_corrupted, size
FROM assets WHERE timestamp > ?1
ORDER BY timestamp ASC",
)?;
let assets = stmt
.query_map([since], |row| {
Ok(Asset {
id: row.get(0)?,
timestamp: row.get(1)?,
hash: row.get(2)?,
mime_type: row.get(3)?,
application: row.get(4)?,
user_identity: row.get(5)?,
description: row.get(6)?,
actual_filename: row.get(7)?,
human_filename: row.get(8)?,
human_path: row.get(9)?,
is_trashed: row.get(10)?,
is_corrupted: row.get(11)?,
size: row.get(12)?,
})
})?
.collect::<rusqlite::Result<Vec<_>>>()?;
Ok(assets)
}
/// Get all non-trashed assets (used by the background verifier to check
/// file integrity on startup).
pub fn get_all_active_assets(conn: &Connection) -> rusqlite::Result<Vec<Asset>> {
let mut stmt = conn.prepare(
"SELECT id, timestamp, hash, mime_type, application, user_identity, description,

View File

@ -2,6 +2,7 @@ use axum::http::StatusCode;
use axum::response::{IntoResponse, Response};
use crate::models::ErrorResponse;
/// All the error types the API can return. Each variant maps to an HTTP status code.
#[derive(Debug, thiserror::Error)]
pub enum AppError {
#[error("Not found: {0}")]
@ -23,6 +24,7 @@ pub enum AppError {
Internal(String),
}
/// Converts an AppError into an HTTP response with the right status code and a JSON body.
impl IntoResponse for AppError {
fn into_response(self) -> Response {
let (status, message) = match &self {

View File

@ -1,20 +1,26 @@
pub mod config;
pub mod db;
pub mod error;
pub mod hash;
pub mod models;
pub mod routes;
pub mod storage;
pub mod verifier;
pub mod xattr;
pub mod config; // Configuration loading from YAML
pub mod db; // SQLite database access (CRUD for assets and tags)
pub mod error; // Centralized error types and HTTP error responses
pub mod hash; // SHA-256 content hashing
pub mod models; // Data structures shared across the codebase
pub mod routes; // HTTP API route handlers
pub mod storage; // File I/O: reading, writing, and trashing asset files
pub mod verifier; // Background integrity checker and file-attribute syncer
pub mod xattr; // OS-level file metadata (xattr on Unix, NTFS ADS on Windows)
use std::sync::Arc;
use crate::config::Config;
use crate::db::Db;
/// Broadcast channel for notifying sync subscribers about new assets.
/// Each message is `"hash:timestamp"` (e.g. `"abc123def456:1710000000000"`).
pub type SyncEventSender = tokio::sync::broadcast::Sender<String>;
/// Shared application state passed to every HTTP handler.
#[derive(Clone)]
pub struct AppState {
pub config: Arc<Config>,
pub db: Db,
pub sync_events: SyncEventSender,
}

View File

@ -10,6 +10,8 @@ use tower_http::trace::TraceLayer;
use can_service::config::Config;
use can_service::{db, routes, verifier, AppState};
/// Entry point: loads config, opens the database, starts background services,
/// and launches the HTTP server.
#[tokio::main]
async fn main() -> anyhow::Result<()> {
// Initialize tracing
@ -41,9 +43,14 @@ async fn main() -> anyhow::Result<()> {
// Start background verifier
verifier::start((*config).clone(), db.clone());
// Broadcast channel for SSE sync events (capacity doesn't matter much —
// slow receivers just miss events and do a full reconciliation on reconnect)
let (sync_events, _) = tokio::sync::broadcast::channel::<String>(256);
let state = AppState {
config: config.clone(),
db,
sync_events,
};
// Build router
@ -54,7 +61,11 @@ async fn main() -> anyhow::Result<()> {
.layer(CorsLayer::permissive())
.with_state(state);
let addr = SocketAddr::from(([0, 0, 0, 0], 3210));
let port: u16 = std::env::var("CAN_PORT")
.ok()
.and_then(|p| p.parse().ok())
.unwrap_or(3210);
let addr = SocketAddr::from(([0, 0, 0, 0], port));
tracing::info!("CAN service listening on {}", addr);
let listener = tokio::net::TcpListener::bind(addr).await?;

View File

@ -1,6 +1,7 @@
use serde::{Deserialize, Serialize};
/// Database representation of an asset.
/// Internal database row for a stored file. Contains all metadata fields
/// that are persisted in SQLite.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Asset {
pub id: i64,
@ -18,7 +19,8 @@ pub struct Asset {
pub size: i64,
}
/// API-facing asset metadata response.
/// The public-facing version of an asset's metadata, returned by the API.
/// Includes resolved tags and omits internal fields like `id` and `actual_filename`.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AssetMeta {
pub hash: String,
@ -35,7 +37,7 @@ pub struct AssetMeta {
pub size: i64,
}
/// Standard API response wrapper.
/// Wraps every successful API response in `{ "status": "success", "data": ... }`.
#[derive(Debug, Serialize, Deserialize)]
pub struct ApiResponse<T: Serialize> {
pub status: String,
@ -43,6 +45,7 @@ pub struct ApiResponse<T: Serialize> {
}
impl<T: Serialize> ApiResponse<T> {
/// Create a success response wrapping the given data.
pub fn success(data: T) -> Self {
Self {
status: "success".to_string(),
@ -51,7 +54,7 @@ impl<T: Serialize> ApiResponse<T> {
}
}
/// Error response body.
/// JSON body for error responses: `{ "status": "error", "error": "..." }`.
#[derive(Debug, Serialize, Deserialize)]
pub struct ErrorResponse {
pub status: String,
@ -67,7 +70,7 @@ impl ErrorResponse {
}
}
/// Ingest success response data.
/// Returned after a successful file upload: the timestamp, hash, and on-disk filename.
#[derive(Debug, Serialize, Deserialize)]
pub struct IngestResult {
pub timestamp: i64,
@ -97,7 +100,9 @@ pub struct MetadataUpdate {
pub description: Option<String>,
}
/// OS-level file attribute metadata (for xattr / NTFS ADS).
/// Metadata stored directly on the file via OS-level attributes
/// (xattr on macOS/Linux, NTFS Alternate Data Streams on Windows).
/// This lets external tools read CAN metadata without hitting the database.
#[derive(Debug, Clone, Default, PartialEq)]
pub struct FileAttributes {
pub mime_type: Option<String>,

View File

@ -17,7 +17,8 @@ pub fn router() -> Router<AppState> {
.route("/api/v1/can/0/asset/{hash}", patch(patch_asset))
}
/// GET /api/v1/can/0/asset/{hash} - Stream the physical file.
/// Download an asset's file by its hash. Streams the raw bytes back to the
/// client with the correct MIME type and a suggested filename.
async fn get_asset(
State(state): State<AppState>,
Path(hash): Path<String>,
@ -59,7 +60,8 @@ async fn get_asset(
.into_response())
}
/// PATCH /api/v1/can/0/asset/{hash} - Update metadata (tags, description).
/// Update an asset's tags and/or description. Saves changes to both the
/// database and the OS-level file attributes.
async fn patch_asset(
State(state): State<AppState>,
Path(hash): Path<String>,

39
src/routes/events.rs Normal file
View File

@ -0,0 +1,39 @@
//! Public SSE endpoint for real-time asset notifications.
//!
//! `GET /api/v1/can/0/events` — no authentication required.
//! Streams `new_asset` events whenever an asset is ingested or synced.
//! Used by frontends (e.g. Paste) to auto-refresh when content arrives.
use std::convert::Infallible;
use axum::extract::State;
use axum::response::sse::{Event, Sse};
use axum::routing::get;
use axum::Router;
use tokio_stream::wrappers::BroadcastStream;
use tokio_stream::StreamExt;
use crate::AppState;
pub fn router() -> Router<AppState> {
Router::new().route("/api/v1/can/0/events", get(asset_events))
}
/// Public SSE stream of new asset events.
///
/// Each event is `event: new_asset` with `data: {"hash":"...","timestamp":...}`.
async fn asset_events(
State(state): State<AppState>,
) -> Sse<impl tokio_stream::Stream<Item = Result<Event, Infallible>>> {
let rx = state.sync_events.subscribe();
let stream = BroadcastStream::new(rx).filter_map(|result| match result {
Ok(data) => Some(Ok(Event::default().event("new_asset").data(data))),
Err(_) => None, // lagged — skip, client will see the data on next loadItems()
});
Sse::new(stream).keep_alive(
axum::response::sse::KeepAlive::new()
.interval(std::time::Duration::from_secs(15))
.text("ping"),
)
}

View File

@ -7,6 +7,9 @@ use crate::error::AppError;
use crate::models::{ApiResponse, Asset, DataIngestRequest, FileAttributes, IngestResult};
use crate::{db, hash, storage, xattr, AppState};
/// Register the two upload endpoints:
/// - POST /ingest (multipart file upload)
/// - POST /ingest/data (JSON body upload, agent-friendly)
pub fn router() -> Router<AppState> {
Router::new()
.route("/api/v1/can/0/ingest", post(ingest_multipart))
@ -27,7 +30,9 @@ struct IngestInput {
description: Option<String>,
}
/// Common pipeline: timestamp → hash → write file → xattr → DB insert.
/// Core ingest pipeline shared by both upload endpoints.
/// Steps: generate timestamp -> hash content -> write file to disk ->
/// save OS-level metadata -> insert into database -> notify SSE subscribers.
fn do_ingest(state: &AppState, input: IngestInput) -> Result<IngestResult, AppError> {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
@ -85,6 +90,13 @@ fn do_ingest(state: &AppState, input: IngestInput) -> Result<IngestResult, AppEr
}
}
// Notify SSE subscribers about the new asset
let event_data = format!(
r#"{{"hash":"{}","timestamp":{}}}"#,
file_hash, timestamp
);
let _ = state.sync_events.send(event_data);
Ok(IngestResult {
timestamp,
hash: file_hash,
@ -92,7 +104,7 @@ fn do_ingest(state: &AppState, input: IngestInput) -> Result<IngestResult, AppEr
})
}
/// Parse a comma-separated tag string into a clean Vec.
/// Split a comma-separated tag string like "photo,vacation" into a clean list.
fn parse_tags(raw: Option<&str>) -> Vec<String> {
raw.unwrap_or("")
.split(',')
@ -103,6 +115,8 @@ fn parse_tags(raw: Option<&str>) -> Vec<String> {
// ── POST /api/v1/can/0/ingest (multipart — file uploads) ──────────────
/// Handle multipart file upload. Reads the "file" field plus optional metadata
/// fields (tags, application, user, etc.) and runs the ingest pipeline.
async fn ingest_multipart(
State(state): State<AppState>,
mut multipart: Multipart,

View File

@ -10,6 +10,8 @@ pub fn router() -> Router<AppState> {
Router::new().route("/api/v1/can/0/list", get(list_assets))
}
/// GET /api/v1/can/0/list - Return a paginated list of assets with their metadata.
/// Supports query params: limit, offset, order (asc/desc), application filter.
async fn list_assets(
State(state): State<AppState>,
Query(params): Query<ListParams>,

View File

@ -10,6 +10,9 @@ pub fn router() -> Router<AppState> {
Router::new().route("/api/v1/can/0/asset/{hash}/meta", get(get_meta))
}
/// GET /api/v1/can/0/asset/{hash}/meta - Return an asset's metadata as JSON
/// (hash, MIME type, tags, description, timestamps, etc.) without downloading
/// the actual file.
async fn get_meta(
State(state): State<AppState>,
Path(hash): Path<String>,

View File

@ -1,13 +1,16 @@
pub mod ingest;
pub mod asset;
pub mod meta;
pub mod list;
pub mod search;
pub mod thumb;
pub mod ingest; // POST endpoints for uploading files and JSON data
pub mod asset; // GET/PATCH endpoints for downloading files and updating metadata
pub mod meta; // GET endpoint for reading asset metadata as JSON
pub mod list; // GET endpoint for paginated asset listing
pub mod search; // GET endpoint for searching/filtering assets
pub mod thumb; // GET endpoint for generating resized thumbnail images
pub mod sync; // Private P2P sync endpoints (protobuf, requires API key)
pub mod events; // Public SSE endpoint for real-time "new asset" notifications
use axum::Router;
use crate::AppState;
/// Combine all route modules into one router. Called once at startup.
pub fn router() -> Router<AppState> {
Router::new()
.merge(ingest::router())
@ -16,4 +19,6 @@ pub fn router() -> Router<AppState> {
.merge(list::router())
.merge(search::router())
.merge(thumb::router())
.merge(sync::router())
.merge(events::router())
}

View File

@ -10,6 +10,8 @@ pub fn router() -> Router<AppState> {
Router::new().route("/api/v1/can/0/search", get(search_assets))
}
/// GET /api/v1/can/0/search - Search assets by hash prefix, time range,
/// MIME type, user, application, or tags. Returns paginated results.
async fn search_assets(
State(state): State<AppState>,
Query(params): Query<SearchParams>,

481
src/routes/sync.rs Normal file
View File

@ -0,0 +1,481 @@
//! Private sync API endpoints (protobuf-encoded).
//!
//! All endpoints require `X-Sync-Key` header matching `config.sync_api_key`.
//! If `sync_api_key` is not configured, all endpoints return 404.
//!
//! Includes an SSE endpoint (`GET /sync/events`) that streams real-time
//! notifications when new assets are ingested.
use std::convert::Infallible;
use axum::body::Bytes;
use axum::extract::{Query, State};
use axum::http::{HeaderMap, StatusCode};
use axum::response::sse::{Event, Sse};
use axum::response::IntoResponse;
use axum::routing::{get, post};
use axum::Router;
use prost::Message;
use tokio_stream::wrappers::BroadcastStream;
use tokio_stream::StreamExt;
use crate::models::{Asset, FileAttributes};
use crate::{db, hash, storage, xattr, AppState};
// ── Protobuf message types ───────────────────────────────────────────────
// These structs are serialized/deserialized as protobuf using the `prost` crate.
// They define the wire format for peer-to-peer sync communication.
#[derive(Clone, PartialEq, Message)]
pub struct HashListRequest {}
#[derive(Clone, PartialEq, Message)]
pub struct HashListResponse {
#[prost(message, repeated, tag = "1")]
pub assets: Vec<AssetDigest>,
}
#[derive(Clone, PartialEq, Message)]
pub struct AssetDigest {
#[prost(string, tag = "1")]
pub hash: String,
#[prost(int64, tag = "2")]
pub timestamp: i64,
#[prost(int64, tag = "3")]
pub size: i64,
#[prost(bool, tag = "4")]
pub is_trashed: bool,
}
#[derive(Clone, PartialEq, Message)]
pub struct PullRequest {
#[prost(string, repeated, tag = "1")]
pub hashes: Vec<String>,
}
#[derive(Clone, PartialEq, Message)]
pub struct PullResponse {
#[prost(message, repeated, tag = "1")]
pub bundles: Vec<AssetBundle>,
}
#[derive(Clone, PartialEq, Message)]
pub struct AssetBundle {
#[prost(string, tag = "1")]
pub hash: String,
#[prost(int64, tag = "2")]
pub timestamp: i64,
#[prost(string, tag = "3")]
pub mime_type: String,
#[prost(string, optional, tag = "4")]
pub application: Option<String>,
#[prost(string, optional, tag = "5")]
pub user_identity: Option<String>,
#[prost(string, optional, tag = "6")]
pub description: Option<String>,
#[prost(string, optional, tag = "7")]
pub human_filename: Option<String>,
#[prost(string, optional, tag = "8")]
pub human_path: Option<String>,
#[prost(bool, tag = "9")]
pub is_trashed: bool,
#[prost(int64, tag = "10")]
pub size: i64,
#[prost(string, repeated, tag = "11")]
pub tags: Vec<String>,
#[prost(bytes = "vec", tag = "12")]
pub content: Vec<u8>,
}
#[derive(Clone, PartialEq, Message)]
pub struct PushRequest {
#[prost(message, optional, tag = "1")]
pub bundle: Option<AssetBundle>,
}
#[derive(Clone, PartialEq, Message)]
pub struct PushResponse {
#[prost(string, tag = "1")]
pub hash: String,
#[prost(bool, tag = "2")]
pub already_existed: bool,
}
#[derive(Clone, PartialEq, Message)]
pub struct MetaUpdateRequest {
#[prost(string, tag = "1")]
pub hash: String,
#[prost(string, optional, tag = "2")]
pub description: Option<String>,
#[prost(string, repeated, tag = "3")]
pub tags: Vec<String>,
#[prost(bool, tag = "4")]
pub is_trashed: bool,
}
#[derive(Clone, PartialEq, Message)]
pub struct MetaUpdateResponse {
#[prost(bool, tag = "1")]
pub success: bool,
}
// ── Router ──────────────────────────────────────────────────────────────
pub fn router() -> Router<AppState> {
Router::new()
.route("/sync/hashes", post(sync_hashes))
.route("/sync/pull", post(sync_pull))
.route("/sync/push", post(sync_push))
.route("/sync/meta", post(sync_meta))
.route("/sync/events", get(sync_events))
}
/// Query params for /sync/hashes (optional `since` timestamp for incremental queries).
#[derive(serde::Deserialize, Default)]
struct HashesQuery {
/// Only return assets with `timestamp > since`. Omit or 0 for full list.
since: Option<i64>,
}
// ── Auth ────────────────────────────────────────────────────────────────
/// Verify the X-Sync-Key header matches the configured API key.
/// Returns 404 if sync is not configured, 401 if the key is wrong.
fn check_sync_key(state: &AppState, headers: &HeaderMap) -> Result<(), (StatusCode, String)> {
let expected = match &state.config.sync_api_key {
Some(key) if !key.is_empty() => key,
_ => return Err((StatusCode::NOT_FOUND, "Sync API not enabled".into())),
};
let provided = headers
.get("X-Sync-Key")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if provided != expected {
return Err((StatusCode::UNAUTHORIZED, "Invalid sync key".into()));
}
Ok(())
}
// ── Helpers ─────────────────────────────────────────────────────────────
/// Serialize a protobuf message into bytes.
fn encode_proto<M: Message>(msg: &M) -> Result<Vec<u8>, (StatusCode, String)> {
let mut buf = Vec::with_capacity(msg.encoded_len());
msg.encode(&mut buf)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Encode error: {}", e)))?;
Ok(buf)
}
/// Wrap protobuf bytes into an HTTP 200 response with the right content type.
fn proto_response(buf: Vec<u8>) -> (StatusCode, [(&'static str, &'static str); 1], Vec<u8>) {
(StatusCode::OK, [("content-type", "application/x-protobuf")], buf)
}
// ── POST /sync/hashes ───────────────────────────────────────────────────
/// Return a compact list of all known asset hashes + timestamps.
/// A remote peer calls this first to figure out which assets it's missing.
/// Supports `?since=<timestamp>` for incremental queries.
async fn sync_hashes(
State(state): State<AppState>,
headers: HeaderMap,
query: Query<HashesQuery>,
_body: Bytes,
) -> Result<impl IntoResponse, (StatusCode, String)> {
check_sync_key(&state, &headers)?;
let since = query.since.unwrap_or(0);
let assets = {
let conn = state.db.lock().unwrap();
if since > 0 {
db::get_assets_since(&conn, since)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("DB error: {}", e)))?
} else {
db::get_all_assets(&conn)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("DB error: {}", e)))?
}
};
let resp = HashListResponse {
assets: assets
.iter()
.map(|a| AssetDigest {
hash: a.hash.clone(),
timestamp: a.timestamp,
size: a.size,
is_trashed: a.is_trashed,
})
.collect(),
};
Ok(proto_response(encode_proto(&resp)?))
}
// ── POST /sync/pull ─────────────────────────────────────────────────────
/// Download full asset bundles (metadata + file content) for a list of hashes.
/// A remote peer calls this to fetch assets it doesn't have yet.
async fn sync_pull(
State(state): State<AppState>,
headers: HeaderMap,
body: Bytes,
) -> Result<impl IntoResponse, (StatusCode, String)> {
check_sync_key(&state, &headers)?;
let req = PullRequest::decode(body)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Decode error: {}", e)))?;
let mut bundles = Vec::new();
for hash_str in &req.hashes {
let (asset, tags) = {
let conn = state.db.lock().unwrap();
let asset = match db::get_asset_by_hash(&conn, hash_str) {
Ok(Some(a)) => a,
_ => continue,
};
let tags = db::get_asset_tags(&conn, asset.id).unwrap_or_default();
(asset, tags)
};
let content =
match storage::read_asset(&state.config.storage_root, &asset.actual_filename) {
Ok(c) => c,
Err(e) => {
tracing::warn!("Failed to read {}: {}", &asset.actual_filename, e);
continue;
}
};
bundles.push(AssetBundle {
hash: asset.hash,
timestamp: asset.timestamp,
mime_type: asset.mime_type,
application: asset.application,
user_identity: asset.user_identity,
description: asset.description,
human_filename: asset.human_filename,
human_path: asset.human_path,
is_trashed: asset.is_trashed,
size: asset.size,
tags,
content,
});
}
Ok(proto_response(encode_proto(&PullResponse { bundles })?))
}
// ── POST /sync/push ─────────────────────────────────────────────────────
/// Receive and store a new asset pushed from a remote peer.
/// Verifies the hash, writes the file, and inserts the DB record.
/// Returns early if the asset already exists locally.
async fn sync_push(
State(state): State<AppState>,
headers: HeaderMap,
body: Bytes,
) -> Result<impl IntoResponse, (StatusCode, String)> {
check_sync_key(&state, &headers)?;
let req = PushRequest::decode(body)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Decode error: {}", e)))?;
let bundle = req
.bundle
.ok_or_else(|| (StatusCode::BAD_REQUEST, "Missing bundle".into()))?;
// 1. Verify hash
let computed = hash::compute_hash(bundle.timestamp, &bundle.content);
if computed != bundle.hash {
return Err((
StatusCode::BAD_REQUEST,
format!(
"Hash mismatch: computed {} vs provided {}",
&computed[..12],
&bundle.hash[..12.min(bundle.hash.len())]
),
));
}
// 2. Check if already exists
{
let conn = state.db.lock().unwrap();
if let Ok(Some(_)) = db::get_asset_by_hash(&conn, &bundle.hash) {
return Ok(proto_response(encode_proto(&PushResponse {
hash: bundle.hash,
already_existed: true,
})?));
}
}
// 3. Write file
let actual_filename =
storage::build_filename(bundle.timestamp, &bundle.hash, &bundle.tags, &bundle.mime_type);
let file_path =
storage::write_asset(&state.config.storage_root, &actual_filename, &bundle.content)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Write error: {}", e)))?;
// 4. OS attributes (best-effort)
let attrs = FileAttributes {
mime_type: Some(bundle.mime_type.clone()),
application: bundle.application.clone(),
user: bundle.user_identity.clone(),
tags: if bundle.tags.is_empty() {
None
} else {
Some(bundle.tags.join(","))
},
description: bundle.description.clone(),
human_filename: bundle.human_filename.clone(),
human_path: bundle.human_path.clone(),
};
if let Err(e) = xattr::write_attributes(&file_path, &attrs) {
tracing::warn!("Failed to write OS attributes: {}", e);
}
// 5. DB insert
let asset = Asset {
id: 0,
timestamp: bundle.timestamp,
hash: bundle.hash.clone(),
mime_type: bundle.mime_type,
application: bundle.application,
user_identity: bundle.user_identity,
description: bundle.description,
actual_filename,
human_filename: bundle.human_filename,
human_path: bundle.human_path,
is_trashed: false,
is_corrupted: false,
size: bundle.content.len() as i64,
};
{
let conn = state.db.lock().unwrap();
let asset_id = db::insert_asset(&conn, &asset)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("DB error: {}", e)))?;
if !bundle.tags.is_empty() {
db::set_asset_tags(&conn, asset_id, &bundle.tags)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Tag error: {}", e)))?;
}
if bundle.is_trashed {
let _ = db::trash_asset(&conn, &bundle.hash);
}
}
tracing::info!("Sync push: ingested {} ({}B)", &bundle.hash[..12], bundle.content.len());
// Notify SSE subscribers about the new asset
let event_data = format!(
r#"{{"hash":"{}","timestamp":{}}}"#,
bundle.hash, bundle.timestamp
);
let _ = state.sync_events.send(event_data);
Ok(proto_response(encode_proto(&PushResponse {
hash: bundle.hash,
already_existed: false,
})?))
}
// ── POST /sync/meta ─────────────────────────────────────────────────────
/// Receive a metadata update from a remote peer (description, tags, trash status).
async fn sync_meta(
State(state): State<AppState>,
headers: HeaderMap,
body: Bytes,
) -> Result<impl IntoResponse, (StatusCode, String)> {
check_sync_key(&state, &headers)?;
let req = MetaUpdateRequest::decode(body)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Decode error: {}", e)))?;
let conn = state.db.lock().unwrap();
let asset = db::get_asset_by_hash(&conn, &req.hash)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("DB error: {}", e)))?
.ok_or_else(|| (StatusCode::NOT_FOUND, "Asset not found".into()))?;
if let Some(ref desc) = req.description {
db::update_asset_metadata(&conn, &req.hash, Some(desc), None)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Update error: {}", e)))?;
}
if !req.tags.is_empty() {
db::set_asset_tags(&conn, asset.id, &req.tags)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Tag error: {}", e)))?;
}
if req.is_trashed && !asset.is_trashed {
db::trash_asset(&conn, &req.hash)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Trash error: {}", e)))?;
let _ = storage::trash_asset_file(&state.config.storage_root, &asset.actual_filename);
}
tracing::info!("Sync meta update for {}", &req.hash[..12.min(req.hash.len())]);
Ok(proto_response(encode_proto(&MetaUpdateResponse {
success: true,
})?))
}
// ── GET /sync/events (SSE) ────────────────────────────────────────────
/// Server-Sent Events endpoint. Streams `new_asset` events whenever a file is
/// ingested (via public API or sync push). Requires `X-Sync-Key` as a query
/// param (`?key=...`) since SSE/EventSource doesn't support custom headers.
///
/// Each event is:
/// ```text
/// event: new_asset
/// data: {"hash":"abc...","timestamp":1710000000000}
/// ```
async fn sync_events(
State(state): State<AppState>,
headers: HeaderMap,
query: Query<SseQuery>,
) -> Result<Sse<impl tokio_stream::Stream<Item = Result<Event, Infallible>>>, (StatusCode, String)>
{
// SSE clients (EventSource) can't set custom headers, so accept key from query param too
let key_ok = check_sync_key(&state, &headers).is_ok()
|| query
.key
.as_deref()
.map(|k| {
state
.config
.sync_api_key
.as_deref()
.map(|expected| k == expected)
.unwrap_or(false)
})
.unwrap_or(false);
if !key_ok {
return Err((StatusCode::UNAUTHORIZED, "Invalid sync key".into()));
}
let rx = state.sync_events.subscribe();
let stream = BroadcastStream::new(rx).filter_map(|result| match result {
Ok(data) => Some(Ok(Event::default().event("new_asset").data(data))),
Err(_) => None, // lagged — skip missed events, client will reconcile
});
Ok(Sse::new(stream).keep_alive(
axum::response::sse::KeepAlive::new()
.interval(std::time::Duration::from_secs(15))
.text("ping"),
))
}
#[derive(serde::Deserialize, Default)]
struct SseQuery {
key: Option<String>,
}

View File

@ -18,12 +18,15 @@ pub fn router() -> Router<AppState> {
)
}
/// Static fallback SVG icon for non-image assets.
/// A simple "?" placeholder icon returned when the asset isn't a resizable image.
const FALLBACK_SVG: &str = r##"<svg xmlns="http://www.w3.org/2000/svg" width="128" height="128" viewBox="0 0 128 128">
<rect width="128" height="128" rx="8" fill="#e0e0e0"/>
<text x="64" y="72" text-anchor="middle" font-family="sans-serif" font-size="40" fill="#888">?</text>
</svg>"##;
/// GET /api/v1/can/0/asset/{hash}/thumb/{width}/{height}
/// Generate (or serve from cache) a resized JPEG thumbnail for image assets.
/// Non-image assets get a placeholder SVG icon instead.
async fn get_thumb(
State(state): State<AppState>,
Path((hash, max_width, max_height)): Path<(String, u32, u32)>,

View File

@ -1,7 +1,8 @@
use std::path::{Path, PathBuf};
/// Build the physical filename per the spec:
/// `{timestamp}_{sha256}_{truncated_tags}.{extension}`
/// Build the on-disk filename for a new asset.
/// Format: `{timestamp}_{sha256hash}_{tags}.{extension}`
/// Tags are sanitized (alphanumeric only) and truncated to fit filesystem limits.
pub fn build_filename(
timestamp: i64,
hash: &str,
@ -41,7 +42,8 @@ pub fn build_filename(
.replace(". ", ".")
}
/// Derive file extension from MIME type.
/// Convert a MIME type string (like "image/png") into a file extension (like "png").
/// Falls back to "bin" for unknown types.
pub fn mime_to_extension(mime: &str) -> &str {
match mime {
"application/pdf" => "pdf",
@ -75,20 +77,20 @@ pub fn mime_to_extension(mime: &str) -> &str {
}
}
/// Write asset bytes to the storage root. Returns the full path.
/// Save a file's raw bytes to the storage directory. Returns the full path on disk.
pub fn write_asset(root: &Path, filename: &str, data: &[u8]) -> std::io::Result<PathBuf> {
let path = root.join(filename);
std::fs::write(&path, data)?;
Ok(path)
}
/// Read asset bytes from the storage root.
/// Load the raw bytes of a stored file from the storage directory.
pub fn read_asset(root: &Path, filename: &str) -> std::io::Result<Vec<u8>> {
let path = root.join(filename);
std::fs::read(path)
}
/// Move an asset file to the .trash directory.
/// Move a file from the storage directory into the .trash/ folder (soft delete).
pub fn trash_asset_file(root: &Path, filename: &str) -> std::io::Result<()> {
let src = root.join(filename);
let trash_dir = root.join(".trash");
@ -98,8 +100,9 @@ pub fn trash_asset_file(root: &Path, filename: &str) -> std::io::Result<()> {
Ok(())
}
/// Parse a physical filename to extract the hash component.
/// Format: `{timestamp}_{sha256}_{tags}.{ext}` or `{timestamp}_{sha256}.{ext}`
/// Extract the SHA-256 hash from a CAN filename.
/// Expects format: `{timestamp}_{sha256hash}_{tags}.{ext}`
/// Returns None if the filename doesn't match the expected pattern.
pub fn parse_hash_from_filename(filename: &str) -> Option<String> {
// Remove extension
let stem = filename.rsplit_once('.')?.0;
@ -112,7 +115,8 @@ pub fn parse_hash_from_filename(filename: &str) -> Option<String> {
}
}
/// Parse a physical filename to extract the timestamp component.
/// Extract the millisecond timestamp from a CAN filename.
/// Returns None if the filename doesn't match the expected pattern.
pub fn parse_timestamp_from_filename(filename: &str) -> Option<i64> {
let stem = filename.rsplit_once('.')?.0;
let ts_str = stem.split('_').next()?;

View File

@ -11,10 +11,10 @@ use crate::models::FileAttributes;
use crate::storage::{parse_hash_from_filename, parse_timestamp_from_filename};
use crate::xattr;
/// Start the background verifier subsystem.
/// - Runs an initial full scrub
/// - Watches for filesystem changes
/// - Runs periodic scrubs
/// Launch the background integrity checker. It does three things:
/// 1. Immediately scans all files to detect corruption or missing data.
/// 2. Watches the storage folder for file changes and re-checks them in real time.
/// 3. Re-runs the full scan on a timer (configurable in config.yaml).
pub fn start(config: Config, db: Db) {
let config2 = config.clone();
let db2 = db.clone();
@ -58,6 +58,7 @@ fn config3_for_watcher(config: Config) -> Config {
config
}
/// Watch the storage directory for file changes and verify each changed file.
async fn run_watcher(config: Config, db: Db) -> anyhow::Result<()> {
let (tx, mut rx) = mpsc::channel::<PathBuf>(100);
let storage_root = config.storage_root.clone();
@ -114,7 +115,9 @@ async fn run_watcher(config: Config, db: Db) -> anyhow::Result<()> {
Ok(())
}
/// Run a full scrub: verify every active asset's hash.
/// Full integrity scan: re-hashes every active file on disk and compares it
/// to the expected hash in the database. Also syncs OS-level file attributes
/// and backfills missing file sizes.
async fn run_scrub(config: &Config, db: &Db) -> anyhow::Result<()> {
let assets = {
let conn = db.lock().unwrap();
@ -276,7 +279,8 @@ async fn run_scrub(config: &Config, db: &Db) -> anyhow::Result<()> {
Ok(())
}
/// Verify a single file by its physical filename.
/// Re-hash a single file and flag it as corrupted if the hash doesn't match.
/// Called when the filesystem watcher detects a change.
async fn verify_single_file(
config: &Config,
db: &Db,

View File

@ -27,7 +27,8 @@ pub fn read_attributes(path: &Path) -> std::io::Result<FileAttributes> {
}
}
// ── Unix implementation using xattr crate ──
// ── Unix implementation ──
// Stores each metadata field as an extended attribute (e.g. "user.can.mime_type").
#[cfg(unix)]
fn write_xattr(path: &Path, attrs: &FileAttributes) -> std::io::Result<()> {
@ -58,6 +59,7 @@ fn write_xattr(path: &Path, attrs: &FileAttributes) -> std::io::Result<()> {
Ok(())
}
/// Read all CAN metadata from Unix extended attributes on a file.
#[cfg(unix)]
fn read_xattr(path: &Path) -> std::io::Result<FileAttributes> {
use xattr::FileExt;
@ -81,8 +83,10 @@ fn read_xattr(path: &Path) -> std::io::Result<FileAttributes> {
})
}
// ── Windows implementation using NTFS Alternate Data Streams ──
// ── Windows implementation ──
// Stores each metadata field as an NTFS Alternate Data Stream (e.g. "file.txt:can.mime_type").
/// Write CAN metadata fields as NTFS Alternate Data Streams on a file.
#[cfg(windows)]
fn write_ntfs_ads(path: &Path, attrs: &FileAttributes) -> std::io::Result<()> {
let base = path.to_string_lossy();
@ -111,6 +115,7 @@ fn write_ntfs_ads(path: &Path, attrs: &FileAttributes) -> std::io::Result<()> {
Ok(())
}
/// Read all CAN metadata from NTFS Alternate Data Streams on a file.
#[cfg(windows)]
fn read_ntfs_ads(path: &Path) -> std::io::Result<FileAttributes> {
let base = path.to_string_lossy();