Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
75
fork

Configure Feed

Select the types of activity you want to include in your feed.

event shape and link filtering and stuff

phil ace206fc 9a09802e

+394 -42
+4
Cargo.lock
··· 3381 3381 name = "spacedust" 3382 3382 version = "0.1.0" 3383 3383 dependencies = [ 3384 + "async-trait", 3384 3385 "clap", 3385 3386 "dropshot", 3386 3387 "futures", 3388 + "http", 3387 3389 "jetstream", 3388 3390 "links", 3389 3391 "metrics", 3390 3392 "schemars", 3393 + "semver", 3391 3394 "serde", 3392 3395 "serde_json", 3396 + "serde_qs", 3393 3397 "tinyjson", 3394 3398 "tokio", 3395 3399 "tokio-tungstenite 0.27.0",
+4
spacedust/Cargo.toml
··· 4 4 edition = "2024" 5 5 6 6 [dependencies] 7 + async-trait = "0.1.88" 7 8 clap = { version = "4.5.40", features = ["derive"] } 8 9 dropshot = "0.16.2" 9 10 futures = "0.3.31" 11 + http = "1.3.1" 10 12 jetstream = { path = "../jetstream", features = ["metrics"] } 11 13 links = { path = "../links" } 12 14 metrics = "0.24.2" 13 15 schemars = "0.8.22" 16 + semver = "1.0.26" 14 17 serde = { version = "1.0.219", features = ["derive"] } 15 18 serde_json = "1.0.140" 19 + serde_qs = "1.0.0-rc.3" 16 20 tinyjson = "2.5.1" 17 21 tokio = { version = "1.45.1", features = ["full"] } 18 22 tokio-tungstenite = "0.27.0"
+1
spacedust/src/consumer.rs
··· 72 72 &*commit.collection, 73 73 &*commit.rkey, 74 74 ), 75 + rev: commit.rev.to_string(), 75 76 target: link.target.into_string(), 76 77 }; 77 78 let _ = b.send(link_ev); // only errors if no subscribers are connected, which is just fine.
+11
spacedust/src/lib.rs
··· 1 1 pub mod consumer; 2 2 pub mod server; 3 + pub mod subscriber; 3 4 4 5 use serde::Serialize; 5 6 ··· 9 10 path: String, 10 11 origin: String, 11 12 target: String, 13 + rev: String, 14 + } 15 + 16 + #[derive(Debug, Serialize)] 17 + #[serde(rename_all="snake_case")] 18 + pub struct ClientEvent { 19 + kind: String, 20 + link: ClientLinkEvent, 12 21 } 13 22 14 23 #[derive(Debug, Serialize)] ··· 16 25 operation: String, 17 26 source: String, 18 27 source_record: String, 28 + source_rev: String, 19 29 subject: String, 20 30 // TODO: include the record too? would save clients a level of hydration 21 31 } ··· 30 40 operation: "create".to_string(), 31 41 source: format!("{}:{undotted}", link.collection), 32 42 source_record: link.origin, 43 + source_rev: link.rev, 33 44 subject: link.target, 34 45 } 35 46 }
+244 -41
spacedust/src/server.rs
··· 1 - use crate::{ClientLinkEvent, LinkEvent}; 1 + use crate::subscriber; 2 + use metrics::{histogram, counter}; 3 + use std::sync::Arc; 4 + use crate::LinkEvent; 5 + use http::{ 6 + header::{ORIGIN, USER_AGENT}, 7 + Response, StatusCode, 8 + }; 2 9 use dropshot::{ 10 + Body, 3 11 ApiDescription, ConfigDropshot, ConfigLogging, ConfigLoggingLevel, Query, RequestContext, 4 - ServerBuilder, WebsocketConnection, channel, 12 + ServerBuilder, WebsocketConnection, channel, endpoint, HttpResponse, 13 + ApiEndpointBodyContentType, ExtractorMetadata, HttpError, ServerContext, 14 + SharedExtractor, 5 15 }; 6 - use futures::SinkExt; 16 + 7 17 use schemars::JsonSchema; 8 18 use serde::{Deserialize, Serialize}; 9 19 use tokio::sync::broadcast; 10 - use tokio_tungstenite::tungstenite::Message; 20 + use tokio::time::Instant; 11 21 use tokio_tungstenite::tungstenite::protocol::Role; 22 + use async_trait::async_trait; 23 + use std::collections::HashSet; 24 + 25 + const INDEX_HTML: &str = include_str!("../static/index.html"); 26 + const FAVICON: &[u8] = include_bytes!("../static/favicon.ico"); 12 27 13 28 pub async fn serve(b: broadcast::Sender<LinkEvent>) -> Result<(), String> { 14 29 let config_logging = ConfigLogging::StderrTerminal { ··· 20 35 .map_err(|error| format!("failed to create logger: {}", error))?; 21 36 22 37 let mut api = ApiDescription::new(); 38 + api.register(index).unwrap(); 39 + api.register(favicon).unwrap(); 40 + api.register(openapi).unwrap(); 23 41 api.register(subscribe).unwrap(); 24 42 25 - let server = ServerBuilder::new(api, b, log) 43 + // TODO: put spec in a once cell / lazy lock thing? 44 + let spec = Arc::new( 45 + api.openapi( 46 + "Spacedust", 47 + env!("CARGO_PKG_VERSION") 48 + .parse() 49 + .inspect_err(|e| { 50 + eprintln!("failed to parse cargo package version for openapi: {e:?}") 51 + }) 52 + .unwrap_or(semver::Version::new(0, 0, 1)), 53 + ) 54 + .description("A configurable ATProto notifications firehose.") 55 + .contact_name("part of @microcosm.blue") 56 + .contact_url("https://microcosm.blue") 57 + .json() 58 + .map_err(|e| e.to_string())?, 59 + ); 60 + 61 + let ctx = Context { spec, b }; 62 + 63 + let server = ServerBuilder::new(api, ctx, log) 26 64 .config(ConfigDropshot { 27 65 bind_address: "0.0.0.0:9998".parse().unwrap(), 28 66 ..Default::default() ··· 33 71 server.await 34 72 } 35 73 36 - #[derive(Debug, Serialize)] 37 - #[serde(rename_all="snake_case")] 38 - struct ClientEvent { 39 - r#type: String, 40 - link: ClientLinkEvent, 74 + #[derive(Debug, Clone)] 75 + struct Context { 76 + pub spec: Arc<serde_json::Value>, 77 + pub b: broadcast::Sender<LinkEvent>, 78 + } 79 + 80 + async fn instrument_handler<T, H, R>(ctx: &RequestContext<T>, handler: H) -> Result<R, HttpError> 81 + where 82 + R: HttpResponse, 83 + H: Future<Output = Result<R, HttpError>>, 84 + T: ServerContext, 85 + { 86 + let start = Instant::now(); 87 + let result = handler.await; 88 + let latency = start.elapsed(); 89 + let status_code = match &result { 90 + Ok(response) => response.status_code(), 91 + Err(e) => e.status_code.as_status(), 92 + } 93 + .as_str() // just the number (.to_string()'s Display does eg `200 OK`) 94 + .to_string(); 95 + let endpoint = ctx.endpoint.operation_id.clone(); 96 + let headers = ctx.request.headers(); 97 + let origin = headers 98 + .get(ORIGIN) 99 + .and_then(|v| v.to_str().ok()) 100 + .unwrap_or("") 101 + .to_string(); 102 + let ua = headers 103 + .get(USER_AGENT) 104 + .and_then(|v| v.to_str().ok()) 105 + .map(|ua| { 106 + if ua.starts_with("Mozilla/5.0 ") { 107 + "browser" 108 + } else { 109 + ua 110 + } 111 + }) 112 + .unwrap_or("") 113 + .to_string(); 114 + counter!("server_requests_total", 115 + "endpoint" => endpoint.clone(), 116 + "origin" => origin, 117 + "ua" => ua, 118 + "status_code" => status_code, 119 + ) 120 + .increment(1); 121 + histogram!("server_handler_latency", "endpoint" => endpoint).record(latency.as_micros() as f64); 122 + result 123 + } 124 + 125 + use dropshot::{HttpResponseHeaders, HttpResponseOk}; 126 + 127 + pub type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>; 128 + 129 + /// Helper for constructing Ok responses: return OkCors(T).into() 130 + /// (not happy with this yet) 131 + pub struct OkCors<T: Serialize + JsonSchema + Send + Sync>(pub T); 132 + 133 + impl<T> From<OkCors<T>> for OkCorsResponse<T> 134 + where 135 + T: Serialize + JsonSchema + Send + Sync, 136 + { 137 + fn from(ok: OkCors<T>) -> OkCorsResponse<T> { 138 + let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(ok.0)); 139 + res.headers_mut() 140 + .insert("access-control-allow-origin", "*".parse().unwrap()); 141 + Ok(res) 142 + } 143 + } 144 + 145 + // TODO: cors for HttpError 146 + 147 + 148 + /// Serve index page as html 149 + #[endpoint { 150 + method = GET, 151 + path = "/", 152 + /* 153 + * not useful to have this in openapi 154 + */ 155 + unpublished = true, 156 + }] 157 + async fn index(ctx: RequestContext<Context>) -> Result<Response<Body>, HttpError> { 158 + instrument_handler(&ctx, async { 159 + Ok(Response::builder() 160 + .status(StatusCode::OK) 161 + .header(http::header::CONTENT_TYPE, "text/html") 162 + .body(INDEX_HTML.into())?) 163 + }) 164 + .await 165 + } 166 + 167 + /// Serve index page as html 168 + #[endpoint { 169 + method = GET, 170 + path = "/favicon.ico", 171 + /* 172 + * not useful to have this in openapi 173 + */ 174 + unpublished = true, 175 + }] 176 + async fn favicon(ctx: RequestContext<Context>) -> Result<Response<Body>, HttpError> { 177 + instrument_handler(&ctx, async { 178 + Ok(Response::builder() 179 + .status(StatusCode::OK) 180 + .header(http::header::CONTENT_TYPE, "image/x-icon") 181 + .body(FAVICON.to_vec().into())?) 182 + }) 183 + .await 184 + } 185 + 186 + /// Meta: get the openapi spec for this api 187 + #[endpoint { 188 + method = GET, 189 + path = "/openapi", 190 + /* 191 + * not useful to have this in openapi 192 + */ 193 + unpublished = true, 194 + }] 195 + async fn openapi(ctx: RequestContext<Context>) -> OkCorsResponse<serde_json::Value> { 196 + instrument_handler(&ctx, async { 197 + let spec = (*ctx.context().spec).clone(); 198 + OkCors(spec).into() 199 + }) 200 + .await 201 + } 202 + 203 + /// The real type that gets deserialized 204 + #[derive(Debug, Deserialize, JsonSchema)] 205 + #[serde(rename_all = "camelCase")] 206 + pub struct MultiSubscribeQuery { 207 + #[serde(default)] 208 + pub wanted_subjects: HashSet<String>, 209 + #[serde(default)] 210 + pub wanted_subject_dids: HashSet<String>, 211 + #[serde(default)] 212 + pub wanted_sources: HashSet<String>, 213 + } 214 + /// The fake corresponding type for docs that dropshot won't freak out about a 215 + /// vec for 216 + #[derive(Deserialize, JsonSchema)] 217 + #[allow(dead_code)] 218 + #[serde(rename_all = "camelCase")] 219 + struct MultiSubscribeQueryForDocs { 220 + /// One or more at-uris to receive links about 221 + /// 222 + /// The at-uri must be url-encoded 223 + /// 224 + /// Pass this parameter multiple times to specify multiple collections, like 225 + /// `wantedSubjects=[...]&wantedSubjects=[...]` 226 + pub wanted_subjects: String, 227 + /// One or more DIDs to receive links about 228 + /// 229 + /// Pass this parameter multiple times to specify multiple collections 230 + pub wanted_subject_dids: String, 231 + /// One or more link sources to receive links about 232 + /// 233 + /// TODO: docs about link sources 234 + /// 235 + /// eg, a bluesky like's link source: `app.bsky.feed.like:subject.uri` 236 + /// 237 + /// Pass this parameter multiple times to specify multiple sources 238 + pub wanted_sources: String, 239 + } 240 + 241 + // The `SharedExtractor` implementation for Query<QueryType> describes how to 242 + // construct an instance of `Query<QueryType>` from an HTTP request: namely, by 243 + // parsing the query string to an instance of `QueryType`. 244 + #[async_trait] 245 + impl SharedExtractor for MultiSubscribeQuery { 246 + async fn from_request<Context: ServerContext>( 247 + ctx: &RequestContext<Context>, 248 + ) -> Result<MultiSubscribeQuery, HttpError> { 249 + let raw_query = ctx.request.uri().query().unwrap_or(""); 250 + let q = serde_qs::from_str(raw_query).map_err(|e| { 251 + HttpError::for_bad_request(None, format!("unable to parse query string: {}", e)) 252 + })?; 253 + Ok(q) 254 + } 255 + 256 + fn metadata(body_content_type: ApiEndpointBodyContentType) -> ExtractorMetadata { 257 + // HACK: query type switcheroo: passing MultiSubscribeQuery to 258 + // `metadata` would "helpfully" panic because dropshot believes we can 259 + // only have scalar types in a query. 260 + // 261 + // so instead we have a fake second type whose only job is to look the 262 + // same as MultiSubscribeQuery exept that it has `String` instead of 263 + // `Vec<String>`, which dropshot will accept, and generate ~close-enough 264 + // docs for. 265 + <Query<MultiSubscribeQueryForDocs> as SharedExtractor>::metadata(body_content_type) 266 + } 41 267 } 42 268 43 269 #[derive(Deserialize, JsonSchema)] ··· 50 276 path = "/subscribe", 51 277 }] 52 278 async fn subscribe( 53 - ctx: RequestContext<broadcast::Sender<LinkEvent>>, 54 - _qp: Query<QueryParams>, 279 + ctx: RequestContext<Context>, 280 + query: MultiSubscribeQuery, 55 281 upgraded: WebsocketConnection, 56 282 ) -> dropshot::WebsocketChannelResult { 57 - let mut ws = tokio_tungstenite::WebSocketStream::from_raw_socket( 283 + let ws = tokio_tungstenite::WebSocketStream::from_raw_socket( 58 284 upgraded.into_inner(), 59 285 Role::Server, 60 286 None, 61 287 ) 62 288 .await; 63 - let mut sub = ctx.context().subscribe(); 64 289 65 - // TODO: pingpong 66 - // TODO: filtering subscription 290 + let b = ctx.context().b.subscribe(); 67 291 68 - loop { 69 - match sub.recv().await { 70 - Ok(link) => { 71 - let ev = ClientEvent { 72 - r#type: "link".to_string(), 73 - link: link.into(), 74 - }; 75 - let json = serde_json::to_string(&ev)?; 76 - if let Err(e) = ws.send(Message::Text(json.into())).await { 77 - eprintln!("client: failed to send event: {e:?}"); 78 - ws.close(None).await?; // TODO: do we need this one?? 79 - break; 80 - } 81 - } 82 - Err(broadcast::error::RecvError::Closed) => { 83 - ws.close(None).await?; // TODO: send reason 84 - break; 85 - } 86 - Err(broadcast::error::RecvError::Lagged(_n_missed)) => { 87 - eprintln!("client lagged, closing"); 88 - ws.close(None).await?; // TODO: send reason 89 - break; 90 - } 91 - } 92 - } 292 + subscriber::subscribe(b, ws, query) 293 + .await 294 + .map_err(|e| format!("boo: {e:?}"))?; 295 + 93 296 Ok(()) 94 297 }
+75
spacedust/src/subscriber.rs
··· 1 + use crate::ClientEvent; 2 + use crate::LinkEvent; 3 + use crate::server::MultiSubscribeQuery; 4 + use futures::SinkExt; 5 + use std::error::Error; 6 + use tokio::sync::broadcast; 7 + use tokio_tungstenite::{WebSocketStream, tungstenite::Message}; 8 + use dropshot::WebsocketConnectionRaw; 9 + 10 + pub async fn subscribe( 11 + mut sub: broadcast::Receiver<LinkEvent>, 12 + mut ws: WebSocketStream<WebsocketConnectionRaw>, 13 + query: MultiSubscribeQuery, 14 + ) -> Result<(), Box<dyn Error>> { 15 + // TODO: pingpong 16 + 17 + loop { 18 + match sub.recv().await { 19 + Ok(link) => { 20 + 21 + // subject + subject DIDs are logical OR 22 + let target_did = if link.target.starts_with("did:") { 23 + link.target.clone() 24 + } else { 25 + let Some(rest) = link.target.strip_prefix("at://") else { 26 + continue; 27 + }; 28 + if let Some((did, _)) = rest.split_once("/") { 29 + did 30 + } else { 31 + rest 32 + }.to_string() 33 + }; 34 + if !(query.wanted_subjects.contains(&link.target) || query.wanted_subject_dids.contains(&target_did) || query.wanted_subjects.is_empty() && query.wanted_subject_dids.is_empty()) { 35 + // wowwww ^^ fix that 36 + continue; 37 + } 38 + 39 + // subjects together with sources are logical AND 40 + 41 + if !query.wanted_sources.is_empty() { 42 + let undotted = link.path.strip_prefix('.').unwrap_or_else(|| { 43 + eprintln!("link path did not have expected '.' prefix: {}", link.path); 44 + "" 45 + }); 46 + let source = format!("{}:{undotted}", link.collection); 47 + if !query.wanted_sources.contains(&source) { 48 + continue; 49 + } 50 + } 51 + 52 + let ev = ClientEvent { 53 + kind: "link".to_string(), 54 + link: link.into(), 55 + }; 56 + let json = serde_json::to_string(&ev)?; 57 + if let Err(e) = ws.send(Message::Text(json.into())).await { 58 + eprintln!("client: failed to send event: {e:?}"); 59 + ws.close(None).await?; // TODO: do we need this one?? 60 + break; 61 + } 62 + } 63 + Err(broadcast::error::RecvError::Closed) => { 64 + ws.close(None).await?; // TODO: send reason 65 + break; 66 + } 67 + Err(broadcast::error::RecvError::Lagged(_n_missed)) => { 68 + eprintln!("client lagged, closing"); 69 + ws.close(None).await?; // TODO: send reason 70 + break; 71 + } 72 + } 73 + } 74 + Ok(()) 75 + }
spacedust/static/favicon.ico

This is a binary file and will not be displayed.

+54
spacedust/static/index.html
··· 1 + <!doctype html> 2 + <html lang="en"> 3 + <head> 4 + <meta charset="utf-8" /> 5 + <title>Spacedust documentation</title> 6 + <meta name="viewport" content="width=device-width, initial-scale=1" /> 7 + <meta name="description" content="API Documentation for Spacedust, a configurable ATProto notifications firehose" /> 8 + <style> 9 + .custom-header { 10 + height: 42px; 11 + background-color: #221828; 12 + box-shadow: inset 0 -1px 0 var(--scalar-border-color); 13 + color: var(--scalar-color-1); 14 + font-size: var(--scalar-font-size-3); 15 + font-family: 'Iowan Old Style', 'Palatino Linotype', 'URW Palladio L', P052, serif; 16 + padding: 0 18px; 17 + justify-content: space-between; 18 + } 19 + .custom-header, 20 + .custom-header nav { 21 + display: flex; 22 + align-items: center; 23 + gap: 18px; 24 + } 25 + .custom-header a:hover { 26 + color: var(--scalar-color-2); 27 + } 28 + </style> 29 + </head> 30 + <body> 31 + <header class="custom-header scalar-app"> 32 + <p> 33 + TODO: pdsls jetstream link 34 + <a href="https://ufos.microcosm.blue">Launch 🛸 UFOs app</a>: Explore lexicons 35 + </p> 36 + <nav> 37 + <b>a <a href="https://microcosm.blue">microcosm</a> project</b> 38 + <a href="https://bsky.app/profile/microcosm.blue">@microcosm.blue</a> 39 + <a href="https://github.com/at-microcosm">github</a> 40 + </nav> 41 + </header> 42 + 43 + <script id="api-reference" type="application/json" data-url="/openapi""></script> 44 + 45 + <script> 46 + var configuration = { 47 + theme: 'purple', 48 + } 49 + document.getElementById('api-reference').dataset.configuration = JSON.stringify(configuration) 50 + </script> 51 + 52 + <script src="https://cdn.jsdelivr.net/npm/@scalar/api-reference"></script> 53 + </body> 54 + </html>
+1 -1
ufos/src/index_html.rs
··· 2 2 <html lang="en"> 3 3 <head> 4 4 <meta charset="utf-8" /> 5 - <title>UFOs API Documentation</title> 5 + <title>UFOs API documentation</title> 6 6 <meta name="viewport" content="width=device-width, initial-scale=1" /> 7 7 <meta name="description" content="API Documentation for UFOs: Samples and stats for all atproto lexicons." /> 8 8 <style>