Replace polling-based sync detection with SSE (Server-Sent Events) from CAN service for instant push notifications on new asset ingests. Add incremental hash queries via ?since=timestamp parameter to avoid transferring full hash lists on every sync cycle. CAN service changes: - Add broadcast channel (SyncEventSender) in AppState for SSE events - Add GET /sync/events SSE endpoint with auth via header or query param - Fire broadcast events on both ingest and sync push - Add db::get_assets_since() for incremental queries - Support ?since= parameter on POST /sync/hashes can-sync agent changes: - Add SSE subscription with auto-reconnect in can_client - Add get_hashes_since() for incremental catch-up - Rewrite live push loop: SSE-driven with 30s fallback poll - Remove poll_interval parameter from live sync functions All 6 stress tests pass (102 assets, 63 MB/s bidirectional). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
217 lines
7.5 KiB
Rust
217 lines
7.5 KiB
Rust
//! CAN Sync — P2P full-mirror replication agent for CAN Service.
|
|
//!
|
|
//! Uses iroh for encrypted QUIC transport + NAT traversal,
|
|
//! and iroh-gossip for peer discovery via a shared passphrase.
|
|
//!
|
|
//! Each instance talks to its local CAN Service via the private
|
|
//! protobuf sync API (/sync/*), authenticated with an API key.
|
|
|
|
mod can_client;
|
|
mod config;
|
|
mod discovery;
|
|
mod peer;
|
|
mod protocol;
|
|
|
|
use std::path::Path;
|
|
|
|
use anyhow::{Context, Result};
|
|
use iroh::endpoint::presets::N0;
|
|
use iroh::{Endpoint, EndpointAddr, EndpointId};
|
|
use iroh_gossip::net::Gossip;
|
|
use tokio::sync::mpsc;
|
|
use tracing::{error, info, warn};
|
|
|
|
use crate::can_client::CanSyncClient;
|
|
use crate::config::SyncConfig;
|
|
use crate::discovery::Discovery;
|
|
|
|
/// ALPN protocol identifier for CAN sync peer connections.
|
|
const SYNC_ALPN: &[u8] = b"can-sync/1";
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<()> {
|
|
// Initialize logging
|
|
tracing_subscriber::fmt()
|
|
.with_env_filter(
|
|
tracing_subscriber::EnvFilter::try_from_default_env()
|
|
.unwrap_or_else(|_| "can_sync=info,iroh=warn,iroh_gossip=warn".parse().unwrap()),
|
|
)
|
|
.init();
|
|
|
|
// Load config
|
|
let config_path = std::env::args()
|
|
.nth(1)
|
|
.unwrap_or_else(|| "config.yaml".to_string());
|
|
let config = SyncConfig::load(Path::new(&config_path))
|
|
.with_context(|| format!("loading config from {}", config_path))?;
|
|
|
|
info!("CAN Sync v2 starting");
|
|
info!("CAN service: {}", config.can_service_url);
|
|
info!("Poll interval: {}s", config.poll_interval_secs);
|
|
|
|
// Create HTTP client for local CAN service's sync API
|
|
let can = CanSyncClient::new(&config.can_service_url, &config.sync_api_key);
|
|
|
|
// Verify CAN service is reachable
|
|
if can.health_check().await {
|
|
info!("CAN service sync API is healthy");
|
|
} else {
|
|
warn!("CAN service sync API not reachable — will retry on sync");
|
|
}
|
|
|
|
// Create iroh endpoint for QUIC transport with n0 defaults (relay + discovery)
|
|
let endpoint = Endpoint::builder()
|
|
.preset(N0)
|
|
.alpns(vec![SYNC_ALPN.to_vec()])
|
|
.bind()
|
|
.await
|
|
.context("creating iroh endpoint")?;
|
|
|
|
let node_id = endpoint.id();
|
|
info!("Node ID: {}", node_id);
|
|
|
|
let addrs = endpoint.bound_sockets();
|
|
if let Some(addr) = addrs.first() {
|
|
info!("Listening on {}", addr);
|
|
}
|
|
|
|
// Write our EndpointAddr to file if configured (for direct peer connection in tests)
|
|
if let Some(ref ticket_path) = config.ticket_file {
|
|
// Wait briefly for the endpoint to register with relay
|
|
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
|
|
|
let addr = endpoint.addr();
|
|
let addr_json = serde_json::to_string(&addr)
|
|
.context("serializing EndpointAddr")?;
|
|
std::fs::write(ticket_path, &addr_json)
|
|
.with_context(|| format!("writing addr to {}", ticket_path))?;
|
|
info!("Wrote EndpointAddr to {}", ticket_path);
|
|
}
|
|
|
|
// Create gossip instance for peer discovery (not async — returns directly)
|
|
let gossip = Gossip::builder().spawn(endpoint.clone());
|
|
|
|
// Channel for discovered peers
|
|
let (peer_tx, mut peer_rx) = mpsc::channel::<EndpointId>(32);
|
|
|
|
// Spawn discovery via gossip
|
|
let disc = Discovery::new(endpoint.clone(), gossip.clone(), &config.sync_passphrase);
|
|
tokio::spawn(async move {
|
|
if let Err(e) = disc.run(peer_tx.clone()).await {
|
|
error!("Discovery failed: {:#}", e);
|
|
}
|
|
});
|
|
|
|
// If a direct connect ticket file is specified, spawn a task to read it and connect
|
|
if let Some(ref ticket_path) = config.connect_ticket_file {
|
|
let ticket_path = ticket_path.clone();
|
|
let endpoint_direct = endpoint.clone();
|
|
let can_direct = can.clone();
|
|
|
|
tokio::spawn(async move {
|
|
info!("Waiting for peer addr file: {}", ticket_path);
|
|
|
|
// Poll until the file exists and is non-empty
|
|
let addr_json = loop {
|
|
match std::fs::read_to_string(&ticket_path) {
|
|
Ok(s) if !s.trim().is_empty() => break s.trim().to_string(),
|
|
_ => tokio::time::sleep(std::time::Duration::from_millis(200)).await,
|
|
}
|
|
};
|
|
|
|
info!("Read peer addr from {}", ticket_path);
|
|
|
|
let peer_addr: EndpointAddr = match serde_json::from_str(&addr_json) {
|
|
Ok(a) => a,
|
|
Err(e) => {
|
|
error!("Invalid EndpointAddr JSON: {:#}", e);
|
|
return;
|
|
}
|
|
};
|
|
|
|
let peer_id = peer_addr.id;
|
|
let short = peer_id.fmt_short().to_string();
|
|
info!("Direct connecting to peer: {} (from addr file)", short);
|
|
|
|
match endpoint_direct.connect(peer_addr, SYNC_ALPN).await {
|
|
Ok(conn) => {
|
|
info!("Direct connection to {} established!", short);
|
|
|
|
// Initial reconciliation
|
|
if let Err(e) = peer::run_sync_session(conn.clone(), can_direct.clone(), true).await {
|
|
error!("Initial sync with {} failed: {:#}", short, e);
|
|
return;
|
|
}
|
|
|
|
info!("Initial sync with {} complete, starting live sync", short);
|
|
|
|
// Live sync: SSE-driven push + accept incoming streams
|
|
peer::run_live_sync(conn, can_direct).await;
|
|
}
|
|
Err(e) => {
|
|
error!("Failed to connect to {}: {:#}", short, e);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
// Spawn incoming connection handler
|
|
let endpoint_accept = endpoint.clone();
|
|
let can_accept = can.clone();
|
|
tokio::spawn(async move {
|
|
loop {
|
|
match endpoint_accept.accept().await {
|
|
Some(incoming) => {
|
|
let can_clone = can_accept.clone();
|
|
tokio::spawn(async move {
|
|
match incoming.await {
|
|
Ok(conn) => {
|
|
info!("Accepted incoming connection from {}", conn.remote_id().fmt_short());
|
|
peer::handle_incoming(conn, can_clone, std::time::Duration::from_secs(0)).await;
|
|
}
|
|
Err(e) => {
|
|
warn!("Failed to accept connection: {:#}", e);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
None => {
|
|
info!("Endpoint closed, stopping accept loop");
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
// Main loop: connect to discovered peers (from gossip) and sync
|
|
info!("Waiting for peers...");
|
|
|
|
while let Some(peer_id) = peer_rx.recv().await {
|
|
let short = peer_id.fmt_short();
|
|
info!("Connecting to discovered peer: {}", short);
|
|
|
|
let endpoint_clone = endpoint.clone();
|
|
let can_clone = can.clone();
|
|
|
|
tokio::spawn(async move {
|
|
let conn = match endpoint_clone.connect(peer_id, SYNC_ALPN).await {
|
|
Ok(c) => c,
|
|
Err(e) => {
|
|
error!("Failed to connect to {}: {:#}", short, e);
|
|
return;
|
|
}
|
|
};
|
|
|
|
if let Err(e) = peer::run_sync_session(conn.clone(), can_clone.clone(), true).await {
|
|
error!("Initial sync with {} failed: {:#}", short, e);
|
|
return;
|
|
}
|
|
|
|
peer::run_live_sync(conn, can_clone).await;
|
|
});
|
|
}
|
|
|
|
info!("CAN Sync shutting down");
|
|
Ok(())
|
|
}
|