Jason Tudisco 7b54bbe892 Add SSE events and incremental hash queries for live sync
Replace polling-based sync detection with SSE (Server-Sent Events) from
CAN service for instant push notifications on new asset ingests. Add
incremental hash queries via ?since=timestamp parameter to avoid
transferring full hash lists on every sync cycle.

CAN service changes:
- Add broadcast channel (SyncEventSender) in AppState for SSE events
- Add GET /sync/events SSE endpoint with auth via header or query param
- Fire broadcast events on both ingest and sync push
- Add db::get_assets_since() for incremental queries
- Support ?since= parameter on POST /sync/hashes

can-sync agent changes:
- Add SSE subscription with auto-reconnect in can_client
- Add get_hashes_since() for incremental catch-up
- Rewrite live push loop: SSE-driven with 30s fallback poll
- Remove poll_interval parameter from live sync functions

All 6 stress tests pass (102 assets, 63 MB/s bidirectional).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 16:48:26 -06:00

217 lines
7.5 KiB
Rust

//! CAN Sync — P2P full-mirror replication agent for CAN Service.
//!
//! Uses iroh for encrypted QUIC transport + NAT traversal,
//! and iroh-gossip for peer discovery via a shared passphrase.
//!
//! Each instance talks to its local CAN Service via the private
//! protobuf sync API (/sync/*), authenticated with an API key.
mod can_client;
mod config;
mod discovery;
mod peer;
mod protocol;
use std::path::Path;
use anyhow::{Context, Result};
use iroh::endpoint::presets::N0;
use iroh::{Endpoint, EndpointAddr, EndpointId};
use iroh_gossip::net::Gossip;
use tokio::sync::mpsc;
use tracing::{error, info, warn};
use crate::can_client::CanSyncClient;
use crate::config::SyncConfig;
use crate::discovery::Discovery;
/// ALPN protocol identifier for CAN sync peer connections.
const SYNC_ALPN: &[u8] = b"can-sync/1";
#[tokio::main]
async fn main() -> Result<()> {
// Initialize logging
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "can_sync=info,iroh=warn,iroh_gossip=warn".parse().unwrap()),
)
.init();
// Load config
let config_path = std::env::args()
.nth(1)
.unwrap_or_else(|| "config.yaml".to_string());
let config = SyncConfig::load(Path::new(&config_path))
.with_context(|| format!("loading config from {}", config_path))?;
info!("CAN Sync v2 starting");
info!("CAN service: {}", config.can_service_url);
info!("Poll interval: {}s", config.poll_interval_secs);
// Create HTTP client for local CAN service's sync API
let can = CanSyncClient::new(&config.can_service_url, &config.sync_api_key);
// Verify CAN service is reachable
if can.health_check().await {
info!("CAN service sync API is healthy");
} else {
warn!("CAN service sync API not reachable — will retry on sync");
}
// Create iroh endpoint for QUIC transport with n0 defaults (relay + discovery)
let endpoint = Endpoint::builder()
.preset(N0)
.alpns(vec![SYNC_ALPN.to_vec()])
.bind()
.await
.context("creating iroh endpoint")?;
let node_id = endpoint.id();
info!("Node ID: {}", node_id);
let addrs = endpoint.bound_sockets();
if let Some(addr) = addrs.first() {
info!("Listening on {}", addr);
}
// Write our EndpointAddr to file if configured (for direct peer connection in tests)
if let Some(ref ticket_path) = config.ticket_file {
// Wait briefly for the endpoint to register with relay
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
let addr = endpoint.addr();
let addr_json = serde_json::to_string(&addr)
.context("serializing EndpointAddr")?;
std::fs::write(ticket_path, &addr_json)
.with_context(|| format!("writing addr to {}", ticket_path))?;
info!("Wrote EndpointAddr to {}", ticket_path);
}
// Create gossip instance for peer discovery (not async — returns directly)
let gossip = Gossip::builder().spawn(endpoint.clone());
// Channel for discovered peers
let (peer_tx, mut peer_rx) = mpsc::channel::<EndpointId>(32);
// Spawn discovery via gossip
let disc = Discovery::new(endpoint.clone(), gossip.clone(), &config.sync_passphrase);
tokio::spawn(async move {
if let Err(e) = disc.run(peer_tx.clone()).await {
error!("Discovery failed: {:#}", e);
}
});
// If a direct connect ticket file is specified, spawn a task to read it and connect
if let Some(ref ticket_path) = config.connect_ticket_file {
let ticket_path = ticket_path.clone();
let endpoint_direct = endpoint.clone();
let can_direct = can.clone();
tokio::spawn(async move {
info!("Waiting for peer addr file: {}", ticket_path);
// Poll until the file exists and is non-empty
let addr_json = loop {
match std::fs::read_to_string(&ticket_path) {
Ok(s) if !s.trim().is_empty() => break s.trim().to_string(),
_ => tokio::time::sleep(std::time::Duration::from_millis(200)).await,
}
};
info!("Read peer addr from {}", ticket_path);
let peer_addr: EndpointAddr = match serde_json::from_str(&addr_json) {
Ok(a) => a,
Err(e) => {
error!("Invalid EndpointAddr JSON: {:#}", e);
return;
}
};
let peer_id = peer_addr.id;
let short = peer_id.fmt_short().to_string();
info!("Direct connecting to peer: {} (from addr file)", short);
match endpoint_direct.connect(peer_addr, SYNC_ALPN).await {
Ok(conn) => {
info!("Direct connection to {} established!", short);
// Initial reconciliation
if let Err(e) = peer::run_sync_session(conn.clone(), can_direct.clone(), true).await {
error!("Initial sync with {} failed: {:#}", short, e);
return;
}
info!("Initial sync with {} complete, starting live sync", short);
// Live sync: SSE-driven push + accept incoming streams
peer::run_live_sync(conn, can_direct).await;
}
Err(e) => {
error!("Failed to connect to {}: {:#}", short, e);
}
}
});
}
// Spawn incoming connection handler
let endpoint_accept = endpoint.clone();
let can_accept = can.clone();
tokio::spawn(async move {
loop {
match endpoint_accept.accept().await {
Some(incoming) => {
let can_clone = can_accept.clone();
tokio::spawn(async move {
match incoming.await {
Ok(conn) => {
info!("Accepted incoming connection from {}", conn.remote_id().fmt_short());
peer::handle_incoming(conn, can_clone, std::time::Duration::from_secs(0)).await;
}
Err(e) => {
warn!("Failed to accept connection: {:#}", e);
}
}
});
}
None => {
info!("Endpoint closed, stopping accept loop");
break;
}
}
}
});
// Main loop: connect to discovered peers (from gossip) and sync
info!("Waiting for peers...");
while let Some(peer_id) = peer_rx.recv().await {
let short = peer_id.fmt_short();
info!("Connecting to discovered peer: {}", short);
let endpoint_clone = endpoint.clone();
let can_clone = can.clone();
tokio::spawn(async move {
let conn = match endpoint_clone.connect(peer_id, SYNC_ALPN).await {
Ok(c) => c,
Err(e) => {
error!("Failed to connect to {}: {:#}", short, e);
return;
}
};
if let Err(e) = peer::run_sync_session(conn.clone(), can_clone.clone(), true).await {
error!("Initial sync with {} failed: {:#}", short, e);
return;
}
peer::run_live_sync(conn, can_clone).await;
});
}
info!("CAN Sync shutting down");
Ok(())
}