A lexicon-driven AppView for ATProto. happyview.dev
backfill firehose jetstream atproto appview oauth lexicon
8
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: replace Jetstream and backfill with Tap

Trezy 3559b2bc a6483c3b

+732 -753
+21 -2
docker-compose.yml
··· 33 33 postgres: 34 34 condition: service_healthy 35 35 36 + tap: 37 + image: ghcr.io/bluesky-social/indigo/tap:latest 38 + ports: 39 + - "2480:2480" 40 + environment: 41 + TAP_DATABASE_URL: postgres://tap:tap@postgres/tap 42 + TAP_RELAY_URL: https://bsky.network 43 + TAP_PLC_URL: https://plc.directory 44 + TAP_ADMIN_PASSWORD: ${TAP_ADMIN_PASSWORD} 45 + TAP_COLLECTION_FILTERS: "" 46 + TAP_SIGNAL_COLLECTIONS: "" 47 + depends_on: 48 + postgres: 49 + condition: service_healthy 50 + 36 51 happyview: 37 52 image: rust:1.93 38 53 working_dir: /app ··· 47 62 environment: 48 63 DATABASE_URL: postgres://happyview:happyview@postgres/happyview 49 64 AIP_URL: https://aip.gamesgamesgamesgames.games 65 + TAP_URL: http://tap:2480 66 + TAP_ADMIN_PASSWORD: ${TAP_ADMIN_PASSWORD} 50 67 PORT: 3000 51 68 depends_on: 52 69 postgres: 53 70 condition: service_healthy 54 71 aip: 55 72 condition: service_started 73 + tap: 74 + condition: service_started 56 75 57 76 web: 58 77 image: node:24-alpine ··· 66 85 environment: 67 86 - HOSTNAME=0.0.0.0 68 87 - API_URL=http://happyview:3000 69 - - AIP_PROXY_URL=https://aip.gamesgamesgamesgames.games 70 - - NEXT_PUBLIC_AIP_URL=https://aip.gamesgamesgamesgames.games 88 + - AIP_PROXY_URL=http://aip:8080 89 + - NEXT_PUBLIC_AIP_URL=http://localhost:8080 71 90 72 91 volumes: 73 92 pgdata:
+2
docker/init-databases.sh
··· 4 4 psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL 5 5 CREATE USER aip WITH PASSWORD 'aip'; 6 6 CREATE DATABASE aip OWNER aip; 7 + CREATE USER tap WITH PASSWORD 'tap'; 8 + CREATE DATABASE tap OWNER tap; 7 9 EOSQL
+188 -3
src/admin/backfill.rs
··· 1 1 use axum::Json; 2 2 use axum::extract::State; 3 3 use axum::http::StatusCode; 4 + use serde::Deserialize; 4 5 use serde_json::Value; 5 6 6 7 use crate::AppState; 7 8 use crate::error::AppError; 9 + use crate::tap; 8 10 9 11 use super::auth::AdminAuth; 10 12 use super::types::{BackfillJob, CreateBackfillBody}; 11 13 12 - /// POST /admin/backfill — create a new backfill job. 14 + // --------------------------------------------------------------------------- 15 + // Relay discovery (reused from old backfill module) 16 + // --------------------------------------------------------------------------- 17 + 18 + #[derive(Deserialize)] 19 + struct ListReposResponse { 20 + repos: Vec<RepoEntry>, 21 + cursor: Option<String>, 22 + } 23 + 24 + #[derive(Deserialize)] 25 + struct RepoEntry { 26 + did: String, 27 + } 28 + 29 + /// Discover all DIDs that have records in `collection` via the relay's 30 + /// `com.atproto.sync.listReposByCollection` endpoint. Paginates until done. 31 + async fn list_repos_by_collection( 32 + http: &reqwest::Client, 33 + relay_url: &str, 34 + collection: &str, 35 + ) -> Result<Vec<String>, String> { 36 + let base = relay_url.trim_end_matches('/'); 37 + let mut dids = Vec::new(); 38 + let mut cursor: Option<String> = None; 39 + 40 + loop { 41 + let mut url = format!( 42 + "{base}/xrpc/com.atproto.sync.listReposByCollection?collection={collection}&limit=1000" 43 + ); 44 + if let Some(ref c) = cursor { 45 + url.push_str(&format!("&cursor={c}")); 46 + } 47 + 48 + let resp = http 49 + .get(&url) 50 + .send() 51 + .await 52 + .map_err(|e| format!("relay request failed: {e}"))?; 53 + 54 + if !resp.status().is_success() { 55 + return Err(format!("relay returned {}", resp.status())); 56 + } 57 + 58 + let body: ListReposResponse = resp 59 + .json() 60 + .await 61 + .map_err(|e| format!("invalid relay response: {e}"))?; 62 + 63 + let page_count = body.repos.len(); 64 + for repo in body.repos { 65 + dids.push(repo.did); 66 + } 67 + 68 + match body.cursor { 69 + Some(c) if page_count > 0 => cursor = Some(c), 70 + _ => break, 71 + } 72 + } 73 + 74 + Ok(dids) 75 + } 76 + 77 + // --------------------------------------------------------------------------- 78 + // Admin handlers 79 + // --------------------------------------------------------------------------- 80 + 81 + /// POST /admin/backfill — create a backfill job, discover repos, and add them to Tap. 13 82 pub(super) async fn create_backfill( 14 83 State(state): State<AppState>, 15 84 _admin: AdminAuth, 16 85 Json(body): Json<CreateBackfillBody>, 17 86 ) -> Result<(StatusCode, Json<Value>), AppError> { 87 + // Create a backfill_jobs record for tracking/audit. 18 88 let row: (String,) = sqlx::query_as( 19 89 "INSERT INTO backfill_jobs (collection, did) VALUES ($1, $2) RETURNING id::text", 20 90 ) ··· 24 94 .await 25 95 .map_err(|e| AppError::Internal(format!("failed to create backfill job: {e}")))?; 26 96 97 + let job_id = row.0.clone(); 98 + 99 + // Mark as running. 100 + let _ = sqlx::query( 101 + "UPDATE backfill_jobs SET status = 'running', started_at = NOW() WHERE id::text = $1", 102 + ) 103 + .bind(&job_id) 104 + .execute(&state.db) 105 + .await; 106 + 107 + // Determine target collections. 108 + let collections: Vec<String> = if let Some(ref col) = body.collection { 109 + vec![col.clone()] 110 + } else { 111 + let rows: Vec<(String,)> = sqlx::query_as( 112 + "SELECT id FROM lexicons WHERE backfill = TRUE AND lexicon_json->'defs'->'main'->>'type' = 'record'", 113 + ) 114 + .fetch_all(&state.db) 115 + .await 116 + .map_err(|e| AppError::Internal(format!("failed to query backfill-eligible lexicons: {e}")))?; 117 + rows.into_iter().map(|(id,)| id).collect() 118 + }; 119 + 120 + if collections.is_empty() { 121 + let _ = sqlx::query( 122 + "UPDATE backfill_jobs SET status = 'completed', completed_at = NOW(), error = 'no backfill-eligible collections' WHERE id::text = $1", 123 + ) 124 + .bind(&job_id) 125 + .execute(&state.db) 126 + .await; 127 + 128 + return Ok(( 129 + StatusCode::CREATED, 130 + Json(serde_json::json!({ 131 + "id": job_id, 132 + "status": "completed", 133 + "error": "no backfill-eligible collections", 134 + })), 135 + )); 136 + } 137 + 138 + // Discover repos and add them to Tap. 139 + let mut all_dids = Vec::new(); 140 + 141 + for collection in &collections { 142 + let dids = if let Some(ref did) = body.did { 143 + vec![did.clone()] 144 + } else { 145 + match list_repos_by_collection(&state.http, &state.config.relay_url, collection).await { 146 + Ok(dids) => dids, 147 + Err(e) => { 148 + tracing::warn!(collection, error = %e, "failed to discover repos, skipping"); 149 + continue; 150 + } 151 + } 152 + }; 153 + 154 + all_dids.extend(dids); 155 + } 156 + 157 + // Deduplicate DIDs. 158 + all_dids.sort(); 159 + all_dids.dedup(); 160 + 161 + let total_repos = all_dids.len() as i32; 162 + 163 + // Update job with total repos. 164 + let _ = sqlx::query("UPDATE backfill_jobs SET total_repos = $2 WHERE id::text = $1") 165 + .bind(&job_id) 166 + .bind(total_repos) 167 + .execute(&state.db) 168 + .await; 169 + 170 + // Add repos to Tap in batches. 171 + if !all_dids.is_empty() { 172 + for chunk in all_dids.chunks(1000) { 173 + if let Err(e) = tap::add_repos( 174 + &state.http, 175 + &state.config.tap_url, 176 + state.config.tap_admin_password.as_deref(), 177 + chunk, 178 + ) 179 + .await 180 + { 181 + tracing::warn!(error = %e, "failed to add repos to tap"); 182 + let _ = sqlx::query( 183 + "UPDATE backfill_jobs SET status = 'failed', completed_at = NOW(), error = $2 WHERE id::text = $1", 184 + ) 185 + .bind(&job_id) 186 + .bind(&e) 187 + .execute(&state.db) 188 + .await; 189 + 190 + return Ok(( 191 + StatusCode::CREATED, 192 + Json(serde_json::json!({ 193 + "id": job_id, 194 + "status": "failed", 195 + "error": e, 196 + })), 197 + )); 198 + } 199 + } 200 + } 201 + 202 + // Mark as completed (Tap handles the actual backfill asynchronously). 203 + let _ = sqlx::query( 204 + "UPDATE backfill_jobs SET status = 'completed', completed_at = NOW(), processed_repos = $2 WHERE id::text = $1", 205 + ) 206 + .bind(&job_id) 207 + .bind(total_repos) 208 + .execute(&state.db) 209 + .await; 210 + 27 211 Ok(( 28 212 StatusCode::CREATED, 29 213 Json(serde_json::json!({ 30 - "id": row.0, 31 - "status": "pending", 214 + "id": job_id, 215 + "status": "completed", 216 + "total_repos": total_repos, 32 217 })), 33 218 )) 34 219 }
+5 -5
src/admin/lexicons.rs
··· 10 10 use super::auth::AdminAuth; 11 11 use super::types::{LexiconSummary, UploadLexiconBody}; 12 12 13 - /// Send the current record collection list to the Jetstream task so it 14 - /// reconnects with the updated filter. 15 - async fn notify_jetstream(state: &AppState) { 13 + /// Send the current record collection list to the Tap task so it 14 + /// syncs the updated filter. 15 + async fn notify_collections(state: &AppState) { 16 16 let collections = state.lexicons.get_record_collections().await; 17 17 let _ = state.collections_tx.send(collections); 18 18 } ··· 93 93 state.lexicons.upsert(parsed).await; 94 94 95 95 if is_record { 96 - notify_jetstream(&state).await; 96 + notify_collections(&state).await; 97 97 } 98 98 99 99 let status = if revision == 1 { ··· 197 197 } 198 198 199 199 state.lexicons.remove(&id).await; 200 - notify_jetstream(&state).await; 200 + notify_collections(&state).await; 201 201 202 202 Ok(StatusCode::NO_CONTENT) 203 203 }
+5 -5
src/admin/network_lexicons.rs
··· 11 11 use super::auth::AdminAuth; 12 12 use super::types::{AddNetworkLexiconBody, NetworkLexiconSummary}; 13 13 14 - /// Send the current record collection list to the Jetstream task so it 15 - /// reconnects with the updated filter. 16 - async fn notify_jetstream(state: &AppState) { 14 + /// Send the current record collection list to the Tap task so it 15 + /// syncs the updated filter. 16 + async fn notify_collections(state: &AppState) { 17 17 let collections = state.lexicons.get_record_collections().await; 18 18 let _ = state.collections_tx.send(collections); 19 19 } ··· 95 95 state.lexicons.upsert(parsed).await; 96 96 97 97 if is_record { 98 - notify_jetstream(&state).await; 98 + notify_collections(&state).await; 99 99 } 100 100 101 101 Ok(( ··· 165 165 .await; 166 166 167 167 state.lexicons.remove(&nsid).await; 168 - notify_jetstream(&state).await; 168 + notify_collections(&state).await; 169 169 170 170 Ok(StatusCode::NO_CONTENT) 171 171 }
-373
src/backfill.rs
··· 1 - use serde::Deserialize; 2 - use serde_json::Value; 3 - use sqlx::PgPool; 4 - use std::sync::Arc; 5 - use tokio::sync::Semaphore; 6 - use tracing::{debug, error, info, warn}; 7 - 8 - use crate::profile; 9 - 10 - // --------------------------------------------------------------------------- 11 - // Relay / PDS response types 12 - // --------------------------------------------------------------------------- 13 - 14 - #[derive(Deserialize)] 15 - struct ListReposResponse { 16 - repos: Vec<RepoEntry>, 17 - cursor: Option<String>, 18 - } 19 - 20 - #[derive(Deserialize)] 21 - struct RepoEntry { 22 - did: String, 23 - } 24 - 25 - #[derive(Deserialize)] 26 - struct ListRecordsResponse { 27 - records: Vec<RecordEntry>, 28 - cursor: Option<String>, 29 - } 30 - 31 - #[derive(Deserialize)] 32 - struct RecordEntry { 33 - uri: String, 34 - cid: String, 35 - value: Value, 36 - } 37 - 38 - // --------------------------------------------------------------------------- 39 - // Relay discovery 40 - // --------------------------------------------------------------------------- 41 - 42 - /// Discover all DIDs that have records in `collection` via the relay's 43 - /// `com.atproto.sync.listReposByCollection` endpoint. Paginates until done. 44 - async fn list_repos_by_collection( 45 - http: &reqwest::Client, 46 - relay_url: &str, 47 - collection: &str, 48 - ) -> Result<Vec<String>, String> { 49 - let base = relay_url.trim_end_matches('/'); 50 - let mut dids = Vec::new(); 51 - let mut cursor: Option<String> = None; 52 - 53 - loop { 54 - let mut url = format!( 55 - "{base}/xrpc/com.atproto.sync.listReposByCollection?collection={collection}&limit=1000" 56 - ); 57 - if let Some(ref c) = cursor { 58 - url.push_str(&format!("&cursor={c}")); 59 - } 60 - 61 - let resp = http 62 - .get(&url) 63 - .send() 64 - .await 65 - .map_err(|e| format!("relay request failed: {e}"))?; 66 - 67 - if !resp.status().is_success() { 68 - return Err(format!("relay returned {}", resp.status())); 69 - } 70 - 71 - let body: ListReposResponse = resp 72 - .json() 73 - .await 74 - .map_err(|e| format!("invalid relay response: {e}"))?; 75 - 76 - let page_count = body.repos.len(); 77 - for repo in body.repos { 78 - dids.push(repo.did); 79 - } 80 - 81 - match body.cursor { 82 - Some(c) if page_count > 0 => cursor = Some(c), 83 - _ => break, 84 - } 85 - } 86 - 87 - Ok(dids) 88 - } 89 - 90 - // --------------------------------------------------------------------------- 91 - // PDS record fetching 92 - // --------------------------------------------------------------------------- 93 - 94 - /// Fetch all records for a DID + collection from their PDS via 95 - /// `com.atproto.repo.listRecords`. Paginates until done. 96 - async fn fetch_records( 97 - http: &reqwest::Client, 98 - pds_url: &str, 99 - did: &str, 100 - collection: &str, 101 - ) -> Result<Vec<(String, String, String, Value)>, String> { 102 - let base = pds_url.trim_end_matches('/'); 103 - let mut records = Vec::new(); 104 - let mut cursor: Option<String> = None; 105 - 106 - loop { 107 - let mut url = format!( 108 - "{base}/xrpc/com.atproto.repo.listRecords?repo={did}&collection={collection}&limit=100" 109 - ); 110 - if let Some(ref c) = cursor { 111 - url.push_str(&format!("&cursor={c}")); 112 - } 113 - 114 - let resp = http 115 - .get(&url) 116 - .send() 117 - .await 118 - .map_err(|e| format!("PDS listRecords failed: {e}"))?; 119 - 120 - if !resp.status().is_success() { 121 - return Err(format!("PDS returned {} for {did}", resp.status())); 122 - } 123 - 124 - let body: ListRecordsResponse = resp 125 - .json() 126 - .await 127 - .map_err(|e| format!("invalid PDS listRecords response: {e}"))?; 128 - 129 - let page_count = body.records.len(); 130 - for entry in body.records { 131 - let rkey = entry 132 - .uri 133 - .split('/') 134 - .next_back() 135 - .unwrap_or_default() 136 - .to_string(); 137 - records.push((entry.uri, rkey, entry.cid, entry.value)); 138 - } 139 - 140 - match body.cursor { 141 - Some(c) if page_count > 0 => cursor = Some(c), 142 - _ => break, 143 - } 144 - } 145 - 146 - Ok(records) 147 - } 148 - 149 - // --------------------------------------------------------------------------- 150 - // Job runner 151 - // --------------------------------------------------------------------------- 152 - 153 - /// Run a single backfill job: discover repos, fetch records, upsert into DB. 154 - async fn run_job( 155 - db: &PgPool, 156 - http: &reqwest::Client, 157 - relay_url: &str, 158 - plc_url: &str, 159 - job_id: &str, 160 - ) -> Result<(), String> { 161 - // Fetch the job 162 - let job: (Option<String>, Option<String>) = 163 - sqlx::query_as("SELECT collection, did FROM backfill_jobs WHERE id::text = $1") 164 - .bind(job_id) 165 - .fetch_one(db) 166 - .await 167 - .map_err(|e| format!("failed to fetch job: {e}"))?; 168 - 169 - let (job_collection, job_did) = job; 170 - 171 - // Mark as running 172 - let _ = sqlx::query( 173 - "UPDATE backfill_jobs SET status = 'running', started_at = NOW() WHERE id::text = $1", 174 - ) 175 - .bind(job_id) 176 - .execute(db) 177 - .await; 178 - 179 - // Determine target collections 180 - let collections: Vec<String> = if let Some(ref col) = job_collection { 181 - vec![col.clone()] 182 - } else { 183 - // All backfill-eligible collections 184 - let rows: Vec<(String,)> = sqlx::query_as( 185 - "SELECT id FROM lexicons WHERE backfill = TRUE AND lexicon_json->'defs'->'main'->>'type' = 'record'", 186 - ) 187 - .fetch_all(db) 188 - .await 189 - .map_err(|e| format!("failed to query backfill-eligible lexicons: {e}"))?; 190 - rows.into_iter().map(|(id,)| id).collect() 191 - }; 192 - 193 - if collections.is_empty() { 194 - let _ = sqlx::query( 195 - "UPDATE backfill_jobs SET status = 'completed', completed_at = NOW(), error = 'no backfill-eligible collections' WHERE id::text = $1", 196 - ) 197 - .bind(job_id) 198 - .execute(db) 199 - .await; 200 - return Ok(()); 201 - } 202 - 203 - info!(job = job_id, ?collections, "starting backfill"); 204 - 205 - let semaphore = Arc::new(Semaphore::new(8)); 206 - let mut total_repos = 0i32; 207 - let mut processed_repos = 0i32; 208 - let mut total_records = 0i32; 209 - 210 - for collection in &collections { 211 - // Discover DIDs 212 - let dids = if let Some(ref did) = job_did { 213 - vec![did.clone()] 214 - } else { 215 - match list_repos_by_collection(http, relay_url, collection).await { 216 - Ok(dids) => dids, 217 - Err(e) => { 218 - warn!(collection, error = %e, "failed to discover repos, skipping"); 219 - continue; 220 - } 221 - } 222 - }; 223 - 224 - total_repos += dids.len() as i32; 225 - let _ = sqlx::query("UPDATE backfill_jobs SET total_repos = $2 WHERE id::text = $1") 226 - .bind(job_id) 227 - .bind(total_repos) 228 - .execute(db) 229 - .await; 230 - 231 - // Process each DID concurrently (bounded by semaphore) 232 - let mut tasks = Vec::new(); 233 - 234 - for did in dids { 235 - let permit = semaphore.clone().acquire_owned().await.unwrap(); 236 - let http = http.clone(); 237 - let db = db.clone(); 238 - let collection = collection.clone(); 239 - 240 - let plc_url = plc_url.to_string(); 241 - let task = tokio::spawn(async move { 242 - let _permit = permit; 243 - backfill_repo(&db, &http, &plc_url, &did, &collection).await 244 - }); 245 - tasks.push(task); 246 - } 247 - 248 - for task in tasks { 249 - match task.await { 250 - Ok(Ok(count)) => { 251 - total_records += count; 252 - processed_repos += 1; 253 - } 254 - Ok(Err(e)) => { 255 - warn!(error = %e, "repo backfill failed"); 256 - processed_repos += 1; 257 - } 258 - Err(e) => { 259 - warn!(error = %e, "repo backfill task panicked"); 260 - processed_repos += 1; 261 - } 262 - } 263 - 264 - // Update progress periodically 265 - let _ = sqlx::query( 266 - "UPDATE backfill_jobs SET processed_repos = $2, total_records = $3 WHERE id::text = $1", 267 - ) 268 - .bind(job_id) 269 - .bind(processed_repos) 270 - .bind(total_records) 271 - .execute(db) 272 - .await; 273 - } 274 - } 275 - 276 - // Mark completed 277 - let _ = sqlx::query( 278 - "UPDATE backfill_jobs SET status = 'completed', completed_at = NOW(), processed_repos = $2, total_records = $3 WHERE id::text = $1", 279 - ) 280 - .bind(job_id) 281 - .bind(processed_repos) 282 - .bind(total_records) 283 - .execute(db) 284 - .await; 285 - 286 - info!( 287 - job = job_id, 288 - processed_repos, total_records, "backfill completed" 289 - ); 290 - Ok(()) 291 - } 292 - 293 - /// Backfill a single repo's records for a collection. Returns the number of 294 - /// records upserted. 295 - async fn backfill_repo( 296 - db: &PgPool, 297 - http: &reqwest::Client, 298 - plc_url: &str, 299 - did: &str, 300 - collection: &str, 301 - ) -> Result<i32, String> { 302 - // Resolve PDS 303 - let pds = profile::resolve_pds_endpoint(http, plc_url, did) 304 - .await 305 - .map_err(|e| format!("PDS resolution failed for {did}: {e}"))?; 306 - 307 - // Fetch records 308 - let records = fetch_records(http, &pds, did, collection).await?; 309 - let count = records.len() as i32; 310 - 311 - debug!(did, collection, count, "fetched records from PDS"); 312 - 313 - // Upsert into DB 314 - for (uri, rkey, cid, value) in records { 315 - let _ = sqlx::query( 316 - r#" 317 - INSERT INTO records (uri, did, collection, rkey, record, cid) 318 - VALUES ($1, $2, $3, $4, $5, $6) 319 - ON CONFLICT (uri) DO UPDATE 320 - SET record = EXCLUDED.record, 321 - cid = EXCLUDED.cid 322 - "#, 323 - ) 324 - .bind(&uri) 325 - .bind(did) 326 - .bind(collection) 327 - .bind(&rkey) 328 - .bind(&value) 329 - .bind(&cid) 330 - .execute(db) 331 - .await 332 - .map_err(|e| format!("DB upsert failed for {uri}: {e}"))?; 333 - } 334 - 335 - Ok(count) 336 - } 337 - 338 - // --------------------------------------------------------------------------- 339 - // Background worker 340 - // --------------------------------------------------------------------------- 341 - 342 - /// Spawn a background task that polls for pending backfill jobs and runs them. 343 - pub fn spawn_worker(db: PgPool, http: reqwest::Client, relay_url: String, plc_url: String) { 344 - tokio::spawn(async move { 345 - info!("backfill worker started"); 346 - loop { 347 - // Poll for a pending job 348 - let job: Option<(String,)> = sqlx::query_as( 349 - "SELECT id::text FROM backfill_jobs WHERE status = 'pending' ORDER BY created_at ASC LIMIT 1", 350 - ) 351 - .fetch_optional(&db) 352 - .await 353 - .unwrap_or(None); 354 - 355 - if let Some((job_id,)) = job { 356 - info!(job = %job_id, "picked up backfill job"); 357 - if let Err(e) = run_job(&db, &http, &relay_url, &plc_url, &job_id).await { 358 - error!(job = %job_id, error = %e, "backfill job failed"); 359 - let _ = sqlx::query( 360 - "UPDATE backfill_jobs SET status = 'failed', completed_at = NOW(), error = $2 WHERE id::text = $1", 361 - ) 362 - .bind(&job_id) 363 - .bind(&e) 364 - .execute(&db) 365 - .await; 366 - } 367 - } else { 368 - // No pending jobs, wait before polling again 369 - tokio::time::sleep(std::time::Duration::from_secs(5)).await; 370 - } 371 - } 372 - }); 373 - }
+10 -6
src/config.rs
··· 7 7 pub port: u16, 8 8 pub database_url: String, 9 9 pub aip_url: String, 10 - pub jetstream_url: String, 10 + pub tap_url: String, 11 + pub tap_admin_password: Option<String>, 11 12 pub relay_url: String, 12 13 pub plc_url: String, 13 14 pub static_dir: String, ··· 23 24 .unwrap_or(3000), 24 25 database_url: env::var("DATABASE_URL").expect("DATABASE_URL must be set"), 25 26 aip_url: env::var("AIP_URL").expect("AIP_URL must be set"), 26 - jetstream_url: env::var("JETSTREAM_URL") 27 - .unwrap_or_else(|_| "wss://jetstream2.us-west.bsky.network/subscribe".into()), 27 + tap_url: env::var("TAP_URL").unwrap_or_else(|_| "http://localhost:2480".into()), 28 + tap_admin_password: env::var("TAP_ADMIN_PASSWORD").ok(), 28 29 relay_url: env::var("RELAY_URL").unwrap_or_else(|_| "https://bsky.network".into()), 29 30 plc_url: env::var("PLC_URL").unwrap_or_else(|_| "https://plc.directory".into()), 30 31 static_dir: env::var("STATIC_DIR").unwrap_or_else(|_| "./web/out".into()), ··· 49 50 "PORT", 50 51 "DATABASE_URL", 51 52 "AIP_URL", 52 - "JETSTREAM_URL", 53 + "TAP_URL", 54 + "TAP_ADMIN_PASSWORD", 53 55 "RELAY_URL", 54 56 "PLC_URL", 55 57 ] { ··· 73 75 port: 8080, 74 76 database_url: String::new(), 75 77 aip_url: String::new(), 76 - jetstream_url: String::new(), 78 + tap_url: String::new(), 79 + tap_admin_password: None, 77 80 relay_url: String::new(), 78 81 plc_url: String::new(), 79 82 static_dir: String::new(), ··· 106 109 let config = Config::from_env(); 107 110 assert_eq!(config.host, "0.0.0.0"); 108 111 assert_eq!(config.port, 3000); 109 - assert!(config.jetstream_url.contains("jetstream")); 112 + assert_eq!(config.tap_url, "http://localhost:2480"); 113 + assert!(config.tap_admin_password.is_none()); 110 114 assert_eq!(config.relay_url, "https://bsky.network"); 111 115 assert_eq!(config.plc_url, "https://plc.directory"); 112 116 }
-347
src/jetstream.rs
··· 1 - use futures_util::StreamExt; 2 - use serde::Deserialize; 3 - use serde_json::Value; 4 - use sqlx::PgPool; 5 - use std::sync::Arc; 6 - use std::sync::atomic::{AtomicI64, Ordering}; 7 - use tokio::sync::watch; 8 - use tokio_tungstenite::tungstenite::Message; 9 - 10 - use crate::lexicon::{LexiconRegistry, ParsedLexicon, ProcedureAction}; 11 - 12 - // --------------------------------------------------------------------------- 13 - // Jetstream event types 14 - // --------------------------------------------------------------------------- 15 - 16 - #[derive(Deserialize)] 17 - struct JetstreamEvent { 18 - did: String, 19 - time_us: i64, 20 - kind: String, 21 - commit: Option<JetstreamCommit>, 22 - } 23 - 24 - #[derive(Deserialize)] 25 - struct JetstreamCommit { 26 - operation: String, 27 - collection: String, 28 - rkey: String, 29 - record: Option<Value>, 30 - cid: Option<String>, 31 - } 32 - 33 - // --------------------------------------------------------------------------- 34 - // Public API 35 - // --------------------------------------------------------------------------- 36 - 37 - /// The static collection we always watch for lexicon schema updates. 38 - const LEXICON_SCHEMA_COLLECTION: &str = "com.atproto.lexicon.schema"; 39 - 40 - /// Spawn a background task that subscribes to the Jetstream firehose and 41 - /// indexes records for collections specified by the watch channel. 42 - /// 43 - /// When the collection list is empty, the task idles without connecting. 44 - /// When collections change, it disconnects and reconnects with the new filter. 45 - pub fn spawn( 46 - db: PgPool, 47 - jetstream_url: String, 48 - mut collections_rx: watch::Receiver<Vec<String>>, 49 - lexicons: LexiconRegistry, 50 - collections_tx: watch::Sender<Vec<String>>, 51 - ) { 52 - tokio::spawn(async move { 53 - let cursor: Arc<AtomicI64> = Arc::new(AtomicI64::new(0)); 54 - 55 - loop { 56 - // Wait until we have at least one collection to subscribe to. 57 - let collections = collections_rx.borrow_and_update().clone(); 58 - if collections.is_empty() { 59 - tracing::info!("no collections configured, jetstream idle"); 60 - // Block until the collection list changes. 61 - if collections_rx.changed().await.is_err() { 62 - // Sender dropped — shut down. 63 - tracing::info!("jetstream watch channel closed, shutting down"); 64 - return; 65 - } 66 - continue; 67 - } 68 - 69 - // Always include the lexicon schema collection alongside the dynamic ones. 70 - let mut wanted = collections.clone(); 71 - if !wanted.contains(&LEXICON_SCHEMA_COLLECTION.to_string()) { 72 - wanted.push(LEXICON_SCHEMA_COLLECTION.to_string()); 73 - } 74 - 75 - // Connect and process events. If the collection list changes 76 - // mid-stream, `run` returns so we can reconnect with new filters. 77 - match run( 78 - &db, 79 - &jetstream_url, 80 - &cursor, 81 - &wanted, 82 - &mut collections_rx, 83 - &lexicons, 84 - &collections_tx, 85 - ) 86 - .await 87 - { 88 - Ok(()) => { 89 - tracing::info!("jetstream reconnecting due to collection change"); 90 - } 91 - Err(e) => { 92 - tracing::warn!("jetstream disconnected: {e}"); 93 - tokio::time::sleep(std::time::Duration::from_secs(2)).await; 94 - tracing::info!("reconnecting to jetstream..."); 95 - } 96 - } 97 - } 98 - }); 99 - } 100 - 101 - // --------------------------------------------------------------------------- 102 - // Connection loop 103 - // --------------------------------------------------------------------------- 104 - 105 - async fn run( 106 - db: &PgPool, 107 - jetstream_url: &str, 108 - cursor: &Arc<AtomicI64>, 109 - collections: &[String], 110 - collections_rx: &mut watch::Receiver<Vec<String>>, 111 - lexicons: &LexiconRegistry, 112 - collections_tx: &watch::Sender<Vec<String>>, 113 - ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { 114 - let wanted: String = collections 115 - .iter() 116 - .map(|c| format!("wantedCollections={c}")) 117 - .collect::<Vec<_>>() 118 - .join("&"); 119 - 120 - let mut url = format!("{jetstream_url}?{wanted}"); 121 - 122 - let last = cursor.load(Ordering::Relaxed); 123 - if last > 0 { 124 - // Rewind 5 seconds for gapless playback. 125 - let rewound = last - 5_000_000; 126 - url.push_str(&format!("&cursor={rewound}")); 127 - tracing::info!(cursor = rewound, "resuming jetstream with cursor"); 128 - } 129 - 130 - tracing::info!(collections = ?collections, "connecting to jetstream"); 131 - 132 - let (ws, _) = tokio_tungstenite::connect_async(&url).await?; 133 - tracing::info!("connected to jetstream"); 134 - 135 - let (_, mut read) = ws.split(); 136 - 137 - loop { 138 - tokio::select! { 139 - msg = read.next() => { 140 - let msg = match msg { 141 - Some(Ok(m)) => m, 142 - Some(Err(e)) => return Err(e.into()), 143 - None => break, 144 - }; 145 - 146 - let text = match msg { 147 - Message::Text(t) => t, 148 - Message::Close(_) => break, 149 - _ => continue, 150 - }; 151 - 152 - let event: JetstreamEvent = match serde_json::from_str(&text) { 153 - Ok(e) => e, 154 - Err(e) => { 155 - tracing::debug!("skipping unparseable event: {e}"); 156 - continue; 157 - } 158 - }; 159 - 160 - // Update cursor. 161 - cursor.store(event.time_us, Ordering::Relaxed); 162 - 163 - if event.kind != "commit" { 164 - continue; 165 - } 166 - 167 - let commit = match event.commit { 168 - Some(c) => c, 169 - None => continue, 170 - }; 171 - 172 - let uri = format!( 173 - "at://{}/{}/{}", 174 - event.did, commit.collection, commit.rkey, 175 - ); 176 - 177 - // Handle lexicon schema events for tracked network lexicons. 178 - if commit.collection == LEXICON_SCHEMA_COLLECTION { 179 - handle_lexicon_schema_event( 180 - db, 181 - lexicons, 182 - collections_tx, 183 - &event.did, 184 - &commit, 185 - ) 186 - .await; 187 - continue; 188 - } 189 - 190 - match commit.operation.as_str() { 191 - "create" | "update" => { 192 - let record = match commit.record { 193 - Some(r) => r, 194 - None => continue, 195 - }; 196 - let cid = commit.cid.unwrap_or_default(); 197 - 198 - if let Err(e) = sqlx::query( 199 - r#" 200 - INSERT INTO records (uri, did, collection, rkey, record, cid, indexed_at) 201 - VALUES ($1, $2, $3, $4, $5, $6, NOW()) 202 - ON CONFLICT (uri) DO UPDATE 203 - SET record = EXCLUDED.record, 204 - cid = EXCLUDED.cid, 205 - indexed_at = NOW() 206 - "#, 207 - ) 208 - .bind(&uri) 209 - .bind(&event.did) 210 - .bind(&commit.collection) 211 - .bind(&commit.rkey) 212 - .bind(&record) 213 - .bind(&cid) 214 - .execute(db) 215 - .await 216 - { 217 - tracing::warn!(uri = %uri, "failed to upsert record: {e}"); 218 - } 219 - } 220 - "delete" => { 221 - if let Err(e) = sqlx::query("DELETE FROM records WHERE uri = $1") 222 - .bind(&uri) 223 - .execute(db) 224 - .await 225 - { 226 - tracing::warn!(uri = %uri, "failed to delete record: {e}"); 227 - } 228 - } 229 - _ => {} 230 - } 231 - } 232 - // If the collection list changes, break out to reconnect. 233 - _ = collections_rx.changed() => { 234 - tracing::info!("collection filter changed, will reconnect"); 235 - return Ok(()); 236 - } 237 - } 238 - } 239 - 240 - Ok(()) 241 - } 242 - 243 - // --------------------------------------------------------------------------- 244 - // Lexicon schema event handler 245 - // --------------------------------------------------------------------------- 246 - 247 - /// Handle a `com.atproto.lexicon.schema` commit event for tracked network lexicons. 248 - async fn handle_lexicon_schema_event( 249 - db: &PgPool, 250 - lexicons: &LexiconRegistry, 251 - collections_tx: &watch::Sender<Vec<String>>, 252 - did: &str, 253 - commit: &JetstreamCommit, 254 - ) { 255 - let nsid = &commit.rkey; 256 - 257 - // Check if this NSID is one we're tracking and the DID matches the authority. 258 - let tracked: Option<(Option<String>,)> = sqlx::query_as( 259 - "SELECT target_collection FROM network_lexicons WHERE nsid = $1 AND authority_did = $2", 260 - ) 261 - .bind(nsid) 262 - .bind(did) 263 - .fetch_optional(db) 264 - .await 265 - .unwrap_or(None); 266 - 267 - let target_collection = match tracked { 268 - Some((tc,)) => tc, 269 - None => return, // Not a tracked network lexicon. 270 - }; 271 - 272 - match commit.operation.as_str() { 273 - "create" | "update" => { 274 - let record = match &commit.record { 275 - Some(r) => r, 276 - None => return, 277 - }; 278 - 279 - let parsed = match ParsedLexicon::parse( 280 - record.clone(), 281 - 1, 282 - target_collection.clone(), 283 - ProcedureAction::Upsert, 284 - ) { 285 - Ok(p) => p, 286 - Err(e) => { 287 - tracing::warn!(nsid, "failed to parse lexicon schema event: {e}"); 288 - return; 289 - } 290 - }; 291 - 292 - let is_record = parsed.lexicon_type == crate::lexicon::LexiconType::Record; 293 - 294 - // Upsert into lexicons table. 295 - if let Err(e) = sqlx::query( 296 - r#" 297 - INSERT INTO lexicons (id, lexicon_json, backfill, target_collection) 298 - VALUES ($1, $2, false, $3) 299 - ON CONFLICT (id) DO UPDATE SET 300 - lexicon_json = EXCLUDED.lexicon_json, 301 - target_collection = EXCLUDED.target_collection, 302 - revision = lexicons.revision + 1, 303 - updated_at = NOW() 304 - "#, 305 - ) 306 - .bind(nsid) 307 - .bind(record) 308 - .bind(&target_collection) 309 - .execute(db) 310 - .await 311 - { 312 - tracing::warn!(nsid, "failed to upsert lexicon from event: {e}"); 313 - return; 314 - } 315 - 316 - // Update last_fetched_at. 317 - let _ = 318 - sqlx::query("UPDATE network_lexicons SET last_fetched_at = NOW() WHERE nsid = $1") 319 - .bind(nsid) 320 - .execute(db) 321 - .await; 322 - 323 - lexicons.upsert(parsed).await; 324 - tracing::info!(nsid, "updated network lexicon from jetstream event"); 325 - 326 - if is_record { 327 - let collections = lexicons.get_record_collections().await; 328 - let _ = collections_tx.send(collections); 329 - } 330 - } 331 - "delete" => { 332 - // Remove from lexicons table and registry. 333 - let _ = sqlx::query("DELETE FROM lexicons WHERE id = $1") 334 - .bind(nsid) 335 - .execute(db) 336 - .await; 337 - 338 - let was_present = lexicons.remove(nsid).await; 339 - if was_present { 340 - tracing::info!(nsid, "removed network lexicon from jetstream delete event"); 341 - let collections = lexicons.get_record_collections().await; 342 - let _ = collections_tx.send(collections); 343 - } 344 - } 345 - _ => {} 346 - } 347 - }
+1 -2
src/lib.rs
··· 1 1 pub mod admin; 2 2 pub mod auth; 3 - pub mod backfill; 4 3 pub mod config; 5 4 pub mod error; 6 - pub mod jetstream; 7 5 pub mod lexicon; 8 6 pub mod profile; 9 7 pub mod repo; 10 8 pub mod resolve; 11 9 pub mod server; 10 + pub mod tap; 12 11 pub mod xrpc; 13 12 14 13 use config::Config;
+23 -9
src/main.rs
··· 1 1 use happyview::config::Config; 2 2 use happyview::lexicon::{LexiconRegistry, ParsedLexicon, ProcedureAction}; 3 3 use happyview::resolve::{fetch_lexicon_from_pds, resolve_nsid_authority}; 4 - use happyview::{AppState, backfill, jetstream, server}; 4 + use happyview::{AppState, server, tap}; 5 5 use tokio::sync::watch; 6 6 use tracing::{info, warn}; 7 7 ··· 107 107 } 108 108 109 109 let initial_collections = lexicons.get_record_collections().await; 110 + let initial_collections_for_sync = initial_collections.clone(); 110 111 let (collections_tx, collections_rx) = watch::channel(initial_collections); 111 112 112 113 let state = AppState { ··· 117 118 collections_tx, 118 119 }; 119 120 120 - jetstream::spawn( 121 + // Sync initial collections to Tap on startup. 122 + { 123 + let mut wanted = initial_collections_for_sync; 124 + if !wanted.contains(&"com.atproto.lexicon.schema".to_string()) { 125 + wanted.push("com.atproto.lexicon.schema".to_string()); 126 + } 127 + if let Err(e) = tap::sync_collections( 128 + &state.http, 129 + &config.tap_url, 130 + config.tap_admin_password.as_deref(), 131 + &wanted, 132 + ) 133 + .await 134 + { 135 + warn!("failed to sync initial collections to tap: {e}"); 136 + } 137 + } 138 + 139 + tap::spawn( 121 140 state.db.clone(), 122 - config.jetstream_url.clone(), 141 + config.tap_url.clone(), 142 + config.tap_admin_password.clone(), 123 143 collections_rx, 124 144 state.lexicons.clone(), 125 145 state.collections_tx.clone(), 126 - ); 127 - backfill::spawn_worker( 128 - state.db.clone(), 129 - state.http.clone(), 130 - config.relay_url.clone(), 131 - config.plc_url.clone(), 132 146 ); 133 147 134 148 let app = server::router(state);
+475
src/tap.rs
··· 1 + use futures_util::{SinkExt, StreamExt}; 2 + use serde::Deserialize; 3 + use serde_json::Value; 4 + use sqlx::PgPool; 5 + use tokio::sync::watch; 6 + use tokio_tungstenite::tungstenite::Message; 7 + use tokio_tungstenite::tungstenite::client::IntoClientRequest; 8 + 9 + use crate::lexicon::{LexiconRegistry, ParsedLexicon, ProcedureAction}; 10 + 11 + // --------------------------------------------------------------------------- 12 + // Tap event types (matches Tap's outbox JSON format) 13 + // --------------------------------------------------------------------------- 14 + 15 + #[derive(Deserialize)] 16 + struct TapEvent { 17 + id: u64, 18 + #[serde(rename = "type")] 19 + event_type: String, 20 + record: Option<TapRecordEvent>, 21 + identity: Option<TapIdentityEvent>, 22 + } 23 + 24 + #[derive(Deserialize)] 25 + struct TapRecordEvent { 26 + did: String, 27 + collection: String, 28 + rkey: String, 29 + action: String, 30 + record: Option<Value>, 31 + cid: Option<String>, 32 + #[allow(dead_code)] 33 + live: Option<bool>, 34 + } 35 + 36 + #[derive(Deserialize)] 37 + #[allow(dead_code)] 38 + struct TapIdentityEvent { 39 + did: String, 40 + handle: Option<String>, 41 + #[serde(rename = "isActive")] 42 + is_active: Option<bool>, 43 + status: Option<String>, 44 + } 45 + 46 + // --------------------------------------------------------------------------- 47 + // Tap HTTP client helpers 48 + // --------------------------------------------------------------------------- 49 + 50 + async fn tap_put( 51 + http: &reqwest::Client, 52 + tap_url: &str, 53 + path: &str, 54 + password: Option<&str>, 55 + body: &Value, 56 + ) -> Result<(), String> { 57 + let url = format!("{}{}", tap_url.trim_end_matches('/'), path); 58 + let mut req = http.put(&url).json(body); 59 + if let Some(pw) = password { 60 + req = req.basic_auth("admin", Some(pw)); 61 + } 62 + let resp = req 63 + .send() 64 + .await 65 + .map_err(|e| format!("tap HTTP request failed: {e}"))?; 66 + if !resp.status().is_success() { 67 + let status = resp.status(); 68 + let body = resp.text().await.unwrap_or_default(); 69 + return Err(format!("tap returned {status}: {body}")); 70 + } 71 + Ok(()) 72 + } 73 + 74 + async fn tap_post( 75 + http: &reqwest::Client, 76 + tap_url: &str, 77 + path: &str, 78 + password: Option<&str>, 79 + body: &Value, 80 + ) -> Result<(), String> { 81 + let url = format!("{}{}", tap_url.trim_end_matches('/'), path); 82 + let mut req = http.post(&url).json(body); 83 + if let Some(pw) = password { 84 + req = req.basic_auth("admin", Some(pw)); 85 + } 86 + let resp = req 87 + .send() 88 + .await 89 + .map_err(|e| format!("tap HTTP request failed: {e}"))?; 90 + if !resp.status().is_success() { 91 + let status = resp.status(); 92 + let body = resp.text().await.unwrap_or_default(); 93 + return Err(format!("tap returned {status}: {body}")); 94 + } 95 + Ok(()) 96 + } 97 + 98 + /// Sync Tap's collection filters and signal collections with HappyView's 99 + /// current record collections. 100 + pub async fn sync_collections( 101 + http: &reqwest::Client, 102 + tap_url: &str, 103 + tap_admin_password: Option<&str>, 104 + collections: &[String], 105 + ) -> Result<(), String> { 106 + let body = serde_json::json!({ "collections": collections }); 107 + tap_put( 108 + http, 109 + tap_url, 110 + "/collection-filters", 111 + tap_admin_password, 112 + &body, 113 + ) 114 + .await?; 115 + tap_put( 116 + http, 117 + tap_url, 118 + "/signal-collections", 119 + tap_admin_password, 120 + &body, 121 + ) 122 + .await?; 123 + Ok(()) 124 + } 125 + 126 + /// Add repos to Tap for backfill via POST /repos/add. 127 + pub async fn add_repos( 128 + http: &reqwest::Client, 129 + tap_url: &str, 130 + tap_admin_password: Option<&str>, 131 + dids: &[String], 132 + ) -> Result<(), String> { 133 + let body = serde_json::json!({ "dids": dids }); 134 + tap_post(http, tap_url, "/repos/add", tap_admin_password, &body).await 135 + } 136 + 137 + // --------------------------------------------------------------------------- 138 + // Public API 139 + // --------------------------------------------------------------------------- 140 + 141 + /// The static collection we always include for lexicon schema updates. 142 + const LEXICON_SCHEMA_COLLECTION: &str = "com.atproto.lexicon.schema"; 143 + 144 + /// Spawn a background task that connects to Tap's WebSocket channel and 145 + /// processes record + identity events. Replaces both jetstream and backfill. 146 + /// 147 + /// When the collection list changes (via `collections_rx`), the task syncs 148 + /// the updated filters to Tap's HTTP API. 149 + pub fn spawn( 150 + db: PgPool, 151 + tap_url: String, 152 + tap_admin_password: Option<String>, 153 + mut collections_rx: watch::Receiver<Vec<String>>, 154 + lexicons: LexiconRegistry, 155 + collections_tx: watch::Sender<Vec<String>>, 156 + ) { 157 + let http = reqwest::Client::new(); 158 + 159 + tokio::spawn(async move { 160 + loop { 161 + // Build WebSocket URL from HTTP URL. 162 + let ws_url = build_ws_url(&tap_url); 163 + 164 + match run( 165 + &db, 166 + &http, 167 + &tap_url, 168 + tap_admin_password.as_deref(), 169 + &ws_url, 170 + &mut collections_rx, 171 + &lexicons, 172 + &collections_tx, 173 + ) 174 + .await 175 + { 176 + Ok(()) => { 177 + tracing::info!("tap reconnecting due to collection change"); 178 + } 179 + Err(e) => { 180 + tracing::warn!("tap disconnected: {e}"); 181 + tokio::time::sleep(std::time::Duration::from_secs(2)).await; 182 + tracing::info!("reconnecting to tap..."); 183 + } 184 + } 185 + } 186 + }); 187 + } 188 + 189 + fn build_ws_url(tap_url: &str) -> String { 190 + let base = tap_url.trim_end_matches('/'); 191 + let ws_base = if let Some(rest) = base.strip_prefix("https://") { 192 + format!("wss://{rest}") 193 + } else if let Some(rest) = base.strip_prefix("http://") { 194 + format!("ws://{rest}") 195 + } else { 196 + format!("ws://{base}") 197 + }; 198 + format!("{ws_base}/channel") 199 + } 200 + 201 + // --------------------------------------------------------------------------- 202 + // Connection loop 203 + // --------------------------------------------------------------------------- 204 + 205 + #[allow(clippy::too_many_arguments)] 206 + async fn run( 207 + db: &PgPool, 208 + http: &reqwest::Client, 209 + tap_url: &str, 210 + tap_admin_password: Option<&str>, 211 + ws_url: &str, 212 + collections_rx: &mut watch::Receiver<Vec<String>>, 213 + lexicons: &LexiconRegistry, 214 + collections_tx: &watch::Sender<Vec<String>>, 215 + ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { 216 + tracing::info!(url = %ws_url, "connecting to tap"); 217 + 218 + let mut request = ws_url.to_string().into_client_request()?; 219 + if let Some(pw) = tap_admin_password { 220 + use base64::Engine; 221 + let encoded = base64::engine::general_purpose::STANDARD.encode(format!("admin:{pw}")); 222 + request 223 + .headers_mut() 224 + .insert("Authorization", format!("Basic {encoded}").parse().unwrap()); 225 + } 226 + 227 + let (ws, _): ( 228 + tokio_tungstenite::WebSocketStream< 229 + tokio_tungstenite::MaybeTlsStream<tokio::net::TcpStream>, 230 + >, 231 + _, 232 + ) = tokio_tungstenite::connect_async(request).await?; 233 + tracing::info!("connected to tap"); 234 + 235 + let (mut write, mut read) = ws.split(); 236 + 237 + loop { 238 + tokio::select! { 239 + msg = read.next() => { 240 + let msg = match msg { 241 + Some(Ok(m)) => m, 242 + Some(Err(e)) => return Err(e.into()), 243 + None => break, 244 + }; 245 + 246 + let text = match msg { 247 + Message::Text(t) => t, 248 + Message::Close(_) => break, 249 + _ => continue, 250 + }; 251 + 252 + let event: TapEvent = match serde_json::from_str(&text) { 253 + Ok(e) => e, 254 + Err(e) => { 255 + tracing::debug!("skipping unparseable tap event: {e}"); 256 + continue; 257 + } 258 + }; 259 + 260 + let event_id = event.id; 261 + 262 + match event.event_type.as_str() { 263 + "record" => { 264 + if let Some(record) = event.record { 265 + handle_record_event(db, lexicons, collections_tx, &record).await; 266 + } 267 + } 268 + "identity" => { 269 + if let Some(identity) = event.identity { 270 + tracing::debug!( 271 + did = %identity.did, 272 + handle = ?identity.handle, 273 + "received identity event from tap" 274 + ); 275 + } 276 + } 277 + other => { 278 + tracing::debug!(event_type = %other, "unknown tap event type"); 279 + } 280 + } 281 + 282 + // Ack the event. 283 + let ack = serde_json::json!({ "type": "ack", "id": event_id }); 284 + if let Err(e) = write.send(Message::Text(ack.to_string().into())).await { 285 + tracing::warn!("failed to send ack: {e}"); 286 + return Err(e.into()); 287 + } 288 + } 289 + // If the collection list changes, sync to Tap and continue. 290 + _ = collections_rx.changed() => { 291 + let collections = collections_rx.borrow_and_update().clone(); 292 + tracing::info!(?collections, "collection filter changed, syncing to tap"); 293 + 294 + // Always include the lexicon schema collection. 295 + let mut wanted = collections; 296 + if !wanted.contains(&LEXICON_SCHEMA_COLLECTION.to_string()) { 297 + wanted.push(LEXICON_SCHEMA_COLLECTION.to_string()); 298 + } 299 + 300 + if let Err(e) = sync_collections(http, tap_url, tap_admin_password, &wanted).await { 301 + tracing::warn!("failed to sync collections to tap: {e}"); 302 + } 303 + } 304 + } 305 + } 306 + 307 + Ok(()) 308 + } 309 + 310 + // --------------------------------------------------------------------------- 311 + // Record event handler 312 + // --------------------------------------------------------------------------- 313 + 314 + async fn handle_record_event( 315 + db: &PgPool, 316 + lexicons: &LexiconRegistry, 317 + collections_tx: &watch::Sender<Vec<String>>, 318 + record: &TapRecordEvent, 319 + ) { 320 + let uri = format!("at://{}/{}/{}", record.did, record.collection, record.rkey,); 321 + 322 + // Handle lexicon schema events for tracked network lexicons. 323 + if record.collection == LEXICON_SCHEMA_COLLECTION { 324 + handle_lexicon_schema_event(db, lexicons, collections_tx, &record.did, record).await; 325 + return; 326 + } 327 + 328 + match record.action.as_str() { 329 + "create" | "update" => { 330 + let rec = match &record.record { 331 + Some(r) => r, 332 + None => return, 333 + }; 334 + let cid = record.cid.as_deref().unwrap_or_default(); 335 + 336 + if let Err(e) = sqlx::query( 337 + r#" 338 + INSERT INTO records (uri, did, collection, rkey, record, cid, indexed_at) 339 + VALUES ($1, $2, $3, $4, $5, $6, NOW()) 340 + ON CONFLICT (uri) DO UPDATE 341 + SET record = EXCLUDED.record, 342 + cid = EXCLUDED.cid, 343 + indexed_at = NOW() 344 + "#, 345 + ) 346 + .bind(&uri) 347 + .bind(&record.did) 348 + .bind(&record.collection) 349 + .bind(&record.rkey) 350 + .bind(rec) 351 + .bind(cid) 352 + .execute(db) 353 + .await 354 + { 355 + tracing::warn!(uri = %uri, "failed to upsert record: {e}"); 356 + } 357 + } 358 + "delete" => { 359 + if let Err(e) = sqlx::query("DELETE FROM records WHERE uri = $1") 360 + .bind(&uri) 361 + .execute(db) 362 + .await 363 + { 364 + tracing::warn!(uri = %uri, "failed to delete record: {e}"); 365 + } 366 + } 367 + _ => {} 368 + } 369 + } 370 + 371 + // --------------------------------------------------------------------------- 372 + // Lexicon schema event handler 373 + // --------------------------------------------------------------------------- 374 + 375 + /// Handle a `com.atproto.lexicon.schema` record event for tracked network lexicons. 376 + async fn handle_lexicon_schema_event( 377 + db: &PgPool, 378 + lexicons: &LexiconRegistry, 379 + collections_tx: &watch::Sender<Vec<String>>, 380 + did: &str, 381 + record: &TapRecordEvent, 382 + ) { 383 + let nsid = &record.rkey; 384 + 385 + // Check if this NSID is one we're tracking and the DID matches the authority. 386 + let tracked: Option<(Option<String>,)> = sqlx::query_as( 387 + "SELECT target_collection FROM network_lexicons WHERE nsid = $1 AND authority_did = $2", 388 + ) 389 + .bind(nsid) 390 + .bind(did) 391 + .fetch_optional(db) 392 + .await 393 + .unwrap_or(None); 394 + 395 + let target_collection = match tracked { 396 + Some((tc,)) => tc, 397 + None => return, // Not a tracked network lexicon. 398 + }; 399 + 400 + match record.action.as_str() { 401 + "create" | "update" => { 402 + let rec = match &record.record { 403 + Some(r) => r, 404 + None => return, 405 + }; 406 + 407 + let parsed = match ParsedLexicon::parse( 408 + rec.clone(), 409 + 1, 410 + target_collection.clone(), 411 + ProcedureAction::Upsert, 412 + ) { 413 + Ok(p) => p, 414 + Err(e) => { 415 + tracing::warn!(nsid, "failed to parse lexicon schema event: {e}"); 416 + return; 417 + } 418 + }; 419 + 420 + let is_record = parsed.lexicon_type == crate::lexicon::LexiconType::Record; 421 + 422 + // Upsert into lexicons table. 423 + if let Err(e) = sqlx::query( 424 + r#" 425 + INSERT INTO lexicons (id, lexicon_json, backfill, target_collection) 426 + VALUES ($1, $2, false, $3) 427 + ON CONFLICT (id) DO UPDATE SET 428 + lexicon_json = EXCLUDED.lexicon_json, 429 + target_collection = EXCLUDED.target_collection, 430 + revision = lexicons.revision + 1, 431 + updated_at = NOW() 432 + "#, 433 + ) 434 + .bind(nsid) 435 + .bind(rec) 436 + .bind(&target_collection) 437 + .execute(db) 438 + .await 439 + { 440 + tracing::warn!(nsid, "failed to upsert lexicon from event: {e}"); 441 + return; 442 + } 443 + 444 + // Update last_fetched_at. 445 + let _ = 446 + sqlx::query("UPDATE network_lexicons SET last_fetched_at = NOW() WHERE nsid = $1") 447 + .bind(nsid) 448 + .execute(db) 449 + .await; 450 + 451 + lexicons.upsert(parsed).await; 452 + tracing::info!(nsid, "updated network lexicon from tap event"); 453 + 454 + if is_record { 455 + let collections = lexicons.get_record_collections().await; 456 + let _ = collections_tx.send(collections); 457 + } 458 + } 459 + "delete" => { 460 + // Remove from lexicons table and registry. 461 + let _ = sqlx::query("DELETE FROM lexicons WHERE id = $1") 462 + .bind(nsid) 463 + .execute(db) 464 + .await; 465 + 466 + let was_present = lexicons.remove(nsid).await; 467 + if was_present { 468 + tracing::info!(nsid, "removed network lexicon from tap delete event"); 469 + let collections = lexicons.get_record_collections().await; 470 + let _ = collections_tx.send(collections); 471 + } 472 + } 473 + _ => {} 474 + } 475 + }
+2 -1
tests/common/app.rs
··· 35 35 port: 0, 36 36 database_url: String::new(), // not used — pool is already connected 37 37 aip_url: mock_url.clone(), 38 - jetstream_url: String::new(), 38 + tap_url: "http://localhost:2480".into(), 39 + tap_admin_password: None, 39 40 relay_url: mock_url.clone(), 40 41 plc_url: mock_url.clone(), 41 42 static_dir: "./web/out".into(),