···11use clap::Parser;
22use jetstream::events::Cursor;
33use std::path::PathBuf;
44+use std::time::{Duration, SystemTime};
45use ufos::consumer;
55-use ufos::error::StorageError;
66use ufos::file_consumer;
77use ufos::server;
88-use ufos::storage::{StorageWhatever, StoreReader, StoreWriter};
88+use ufos::storage::{StorageWhatever, StoreBackground, StoreReader, StoreWriter};
99use ufos::storage_fjall::FjallStorage;
1010-use ufos::storage_mem::MemStorage;
1010+use ufos::store_types::SketchSecretPrefix;
1111+use ufos::{nice_duration, ConsumerInfo};
11121213#[cfg(not(target_env = "msvc"))]
1314use tikv_jemallocator::Jemalloc;
···1718static GLOBAL: Jemalloc = Jemalloc;
18191920/// Aggregate links in the at-mosphere
2020-#[derive(Parser, Debug)]
2121+#[derive(Parser, Debug, Clone)]
2122#[command(version, about, long_about = None)]
2223struct Args {
2324 /// Jetstream server to connect to (exclusive with --fixture). Provide either a wss:// URL, or a shorhand value:
···3637 #[arg(long)]
3738 data: PathBuf,
3839 /// DEBUG: don't start the jetstream consumer or its write loop
3939- /// todo: restore this
4040 #[arg(long, action)]
4141 pause_writer: bool,
4242+ /// Adjust runtime settings like background task intervals for efficient backfill
4343+ #[arg(long, action)]
4444+ backfill: bool,
4245 /// DEBUG: force the rw loop to fall behind by pausing it
4346 /// todo: restore this
4447 #[arg(long, action)]
4548 pause_rw: bool,
4646- /// DEBUG: use an in-memory store instead of fjall
4949+ /// reset the rollup cursor, scrape through missed things in the past (backfill)
4750 #[arg(long, action)]
4848- in_mem: bool,
5151+ reroll: bool,
4952 /// DEBUG: interpret jetstream as a file fixture
5053 #[arg(long, action)]
5154 jetstream_fixture: bool,
5255}
53565454-// #[tokio::main(flavor = "current_thread")] // TODO: move this to config via args
5557#[tokio::main]
5658async fn main() -> anyhow::Result<()> {
5759 env_logger::init();
58605961 let args = Args::parse();
6062 let jetstream = args.jetstream.clone();
6161- if args.in_mem {
6262- let (read_store, write_store, cursor) = MemStorage::init(
6363- args.data,
6464- jetstream,
6565- args.jetstream_force,
6666- Default::default(),
6767- )?;
6868- go(
6969- args.jetstream,
7070- args.jetstream_fixture,
7171- args.pause_writer,
7272- read_store,
7373- write_store,
7474- cursor,
7575- )
7676- .await?;
7777- } else {
7878- let (read_store, write_store, cursor) = FjallStorage::init(
7979- args.data,
8080- jetstream,
8181- args.jetstream_force,
8282- Default::default(),
8383- )?;
8484- go(
8585- args.jetstream,
8686- args.jetstream_fixture,
8787- args.pause_writer,
8888- read_store,
8989- write_store,
9090- cursor,
9191- )
9292- .await?;
9393- }
9494-6363+ let (read_store, write_store, cursor, sketch_secret) = FjallStorage::init(
6464+ args.data.clone(),
6565+ jetstream,
6666+ args.jetstream_force,
6767+ Default::default(),
6868+ )?;
6969+ go(args, read_store, write_store, cursor, sketch_secret).await?;
9570 Ok(())
9671}
97729898-async fn go(
9999- jetstream: String,
100100- jetstream_fixture: bool,
101101- pause_writer: bool,
102102- read_store: impl StoreReader + 'static,
103103- mut write_store: impl StoreWriter + 'static,
7373+async fn go<B: StoreBackground>(
7474+ args: Args,
7575+ read_store: impl StoreReader + 'static + Clone,
7676+ mut write_store: impl StoreWriter<B> + 'static,
10477 cursor: Option<Cursor>,
7878+ sketch_secret: SketchSecretPrefix,
10579) -> anyhow::Result<()> {
10680 println!("starting server with storage...");
107107- let serving = server::serve(read_store);
8181+ let serving = server::serve(read_store.clone());
10882109109- let t1 = tokio::task::spawn(async {
110110- let r = serving.await;
111111- log::warn!("serving ended with: {r:?}");
112112- });
8383+ if args.pause_writer {
8484+ log::info!("not starting jetstream or the write loop.");
8585+ serving.await.map_err(|e| anyhow::anyhow!(e))?;
8686+ return Ok(());
8787+ }
11388114114- let t2: tokio::task::JoinHandle<anyhow::Result<()>> = tokio::task::spawn({
115115- async move {
116116- if !pause_writer {
117117- println!(
118118- "starting consumer with cursor: {cursor:?} from {:?} ago",
119119- cursor.map(|c| c.elapsed())
120120- );
121121- let mut batches = if jetstream_fixture {
122122- file_consumer::consume(jetstream.into()).await?
123123- } else {
124124- consumer::consume(&jetstream, cursor, false).await?
125125- };
8989+ let batches = if args.jetstream_fixture {
9090+ log::info!("starting with jestream file fixture: {:?}", args.jetstream);
9191+ file_consumer::consume(args.jetstream.into(), sketch_secret, cursor).await?
9292+ } else {
9393+ log::info!(
9494+ "starting consumer with cursor: {cursor:?} from {:?} ago",
9595+ cursor.map(|c| c.elapsed())
9696+ );
9797+ consumer::consume(&args.jetstream, cursor, false, sketch_secret).await?
9898+ };
12699127127- tokio::task::spawn_blocking(move || {
128128- while let Some(event_batch) = batches.blocking_recv() {
129129- write_store.insert_batch(event_batch)?;
130130- write_store
131131- .step_rollup()
132132- .inspect_err(|e| log::error!("laksjdfl: {e:?}"))?;
133133- }
134134- Ok::<(), StorageError>(())
135135- })
136136- .await??;
100100+ let rolling = write_store
101101+ .background_tasks(args.reroll)?
102102+ .run(args.backfill);
103103+ let consuming = write_store.receive_batches(batches);
137104138138- log::warn!("storage.receive ended with");
139139- } else {
140140- log::info!("not starting jetstream or the write loop.");
141141- }
142142- Ok(())
143143- }
144144- });
105105+ let stating = do_update_stuff(read_store);
145106146107 tokio::select! {
147147- z = t1 => log::warn!("serve task ended: {z:?}"),
148148- z = t2 => log::warn!("storage task ended: {z:?}"),
108108+ z = serving => log::warn!("serve task ended: {z:?}"),
109109+ z = rolling => log::warn!("rollup task ended: {z:?}"),
110110+ z = consuming => log::warn!("consuming task ended: {z:?}"),
111111+ z = stating => log::warn!("status task ended: {z:?}"),
149112 };
150113151114 println!("bye!");
152115153116 Ok(())
154117}
118118+119119+async fn do_update_stuff(read_store: impl StoreReader) {
120120+ let started_at = std::time::SystemTime::now();
121121+ let mut first_cursor = None;
122122+ let mut first_rollup = None;
123123+ let mut last_at = std::time::SystemTime::now();
124124+ let mut last_cursor = None;
125125+ let mut last_rollup = None;
126126+ let mut interval = tokio::time::interval(std::time::Duration::from_secs(4));
127127+ interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
128128+ loop {
129129+ interval.tick().await;
130130+ match read_store.get_consumer_info().await {
131131+ Err(e) => log::warn!("failed to get jetstream consumer info: {e:?}"),
132132+ Ok(ConsumerInfo::Jetstream {
133133+ latest_cursor,
134134+ rollup_cursor,
135135+ ..
136136+ }) => {
137137+ let now = std::time::SystemTime::now();
138138+ let latest_cursor = latest_cursor.map(Cursor::from_raw_u64);
139139+ let rollup_cursor = rollup_cursor.map(Cursor::from_raw_u64);
140140+ backfill_info(
141141+ latest_cursor,
142142+ rollup_cursor,
143143+ last_cursor,
144144+ last_rollup,
145145+ last_at,
146146+ first_cursor,
147147+ first_rollup,
148148+ started_at,
149149+ now,
150150+ );
151151+ first_cursor = first_cursor.or(latest_cursor);
152152+ first_rollup = first_rollup.or(rollup_cursor);
153153+ last_cursor = latest_cursor;
154154+ last_rollup = rollup_cursor;
155155+ last_at = now;
156156+ }
157157+ }
158158+ }
159159+}
160160+161161+#[allow(clippy::too_many_arguments)]
162162+fn backfill_info(
163163+ latest_cursor: Option<Cursor>,
164164+ rollup_cursor: Option<Cursor>,
165165+ last_cursor: Option<Cursor>,
166166+ last_rollup: Option<Cursor>,
167167+ last_at: SystemTime,
168168+ first_cursor: Option<Cursor>,
169169+ first_rollup: Option<Cursor>,
170170+ started_at: SystemTime,
171171+ now: SystemTime,
172172+) {
173173+ let nice_dt_two_maybes = |earlier: Option<Cursor>, later: Option<Cursor>| match (earlier, later)
174174+ {
175175+ (Some(earlier), Some(later)) => match later.duration_since(&earlier) {
176176+ Ok(dt) => nice_duration(dt),
177177+ Err(e) => {
178178+ let rev_dt = e.duration();
179179+ format!("+{}", nice_duration(rev_dt))
180180+ }
181181+ },
182182+ _ => "unknown".to_string(),
183183+ };
184184+185185+ let rate = |mlatest: Option<Cursor>, msince: Option<Cursor>, real: Duration| {
186186+ mlatest
187187+ .zip(msince)
188188+ .map(|(latest, since)| {
189189+ latest
190190+ .duration_since(&since)
191191+ .unwrap_or(Duration::from_millis(1))
192192+ })
193193+ .map(|dtc| format!("{:.2}", dtc.as_secs_f64() / real.as_secs_f64()))
194194+ .unwrap_or("??".into())
195195+ };
196196+197197+ let dt_real = now
198198+ .duration_since(last_at)
199199+ .unwrap_or(Duration::from_millis(1));
200200+201201+ let dt_real_total = now
202202+ .duration_since(started_at)
203203+ .unwrap_or(Duration::from_millis(1));
204204+205205+ let cursor_rate = rate(latest_cursor, last_cursor, dt_real);
206206+ let cursor_avg = rate(latest_cursor, first_cursor, dt_real_total);
207207+208208+ let rollup_rate = rate(rollup_cursor, last_rollup, dt_real);
209209+ let rollup_avg = rate(rollup_cursor, first_rollup, dt_real_total);
210210+211211+ log::info!(
212212+ "cursor: {} behind (→{}, {cursor_rate}x, {cursor_avg}x avg). rollup: {} behind (→{}, {rollup_rate}x, {rollup_avg}x avg).",
213213+ latest_cursor.map(|c| c.elapsed().map(nice_duration).unwrap_or("++".to_string())).unwrap_or("?".to_string()),
214214+ nice_dt_two_maybes(last_cursor, latest_cursor),
215215+ rollup_cursor.map(|c| c.elapsed().map(nice_duration).unwrap_or("++".to_string())).unwrap_or("?".to_string()),
216216+ nice_dt_two_maybes(last_rollup, rollup_cursor),
217217+ );
218218+}
-249
ufos/src/server.rs
···11-use crate::storage::StoreReader;
22-use crate::{ConsumerInfo, Nsid, TopCollections, UFOsRecord};
33-use dropshot::endpoint;
44-use dropshot::ApiDescription;
55-use dropshot::ConfigDropshot;
66-use dropshot::ConfigLogging;
77-use dropshot::ConfigLoggingLevel;
88-use dropshot::HttpError;
99-use dropshot::HttpResponseHeaders;
1010-use dropshot::HttpResponseOk;
1111-use dropshot::Query;
1212-use dropshot::RequestContext;
1313-use dropshot::ServerBuilder;
1414-use schemars::JsonSchema;
1515-use serde::{Deserialize, Serialize};
1616-use std::collections::HashMap;
1717-use std::sync::Arc;
1818-1919-struct Context {
2020- pub spec: Arc<serde_json::Value>,
2121- storage: Box<dyn StoreReader>,
2222-}
2323-2424-/// Meta: get the openapi spec for this api
2525-#[endpoint {
2626- method = GET,
2727- path = "/openapi",
2828-}]
2929-async fn get_openapi(ctx: RequestContext<Context>) -> OkCorsResponse<serde_json::Value> {
3030- let spec = (*ctx.context().spec).clone();
3131- ok_cors(spec)
3232-}
3333-3434-#[derive(Debug, Serialize, JsonSchema)]
3535-struct MetaInfo {
3636- storage_name: String,
3737- storage: serde_json::Value,
3838- consumer: ConsumerInfo,
3939-}
4040-/// Get meta information about UFOs itself
4141-#[endpoint {
4242- method = GET,
4343- path = "/meta"
4444-}]
4545-async fn get_meta_info(ctx: RequestContext<Context>) -> OkCorsResponse<MetaInfo> {
4646- let Context { storage, .. } = ctx.context();
4747- let failed_to_get =
4848- |what| move |e| HttpError::for_internal_error(format!("failed to get {what}: {e:?}"));
4949-5050- let storage_info = storage
5151- .get_storage_stats()
5252- .await
5353- .map_err(failed_to_get("storage info"))?;
5454-5555- let consumer = storage
5656- .get_consumer_info()
5757- .await
5858- .map_err(failed_to_get("consumer info"))?;
5959-6060- ok_cors(MetaInfo {
6161- storage_name: storage.name(),
6262- storage: storage_info,
6363- consumer,
6464- })
6565-}
6666-fn to_multiple_nsids(s: &str) -> Result<Vec<Nsid>, String> {
6767- let mut out = Vec::new();
6868- for collection in s.split(',') {
6969- let Ok(nsid) = Nsid::new(collection.to_string()) else {
7070- return Err(format!("collection {collection:?} was not a valid NSID"));
7171- };
7272- out.push(nsid);
7373- }
7474- Ok(out)
7575-}
7676-7777-#[derive(Debug, Deserialize, JsonSchema)]
7878-struct RecordsCollectionsQuery {
7979- collection: Option<String>, // JsonSchema not implemented for Nsid :(
8080-}
8181-#[derive(Debug, Serialize, JsonSchema)]
8282-struct ApiRecord {
8383- did: String,
8484- collection: String,
8585- rkey: String,
8686- record: Box<serde_json::value::RawValue>,
8787- time_us: u64,
8888-}
8989-impl From<UFOsRecord> for ApiRecord {
9090- fn from(ufo: UFOsRecord) -> Self {
9191- Self {
9292- did: ufo.did.to_string(),
9393- collection: ufo.collection.to_string(),
9494- rkey: ufo.rkey.to_string(),
9595- record: ufo.record,
9696- time_us: ufo.cursor.to_raw_u64(),
9797- }
9898- }
9999-}
100100-/// Get recent records by collection
101101-///
102102-/// Multiple collections are supported. they will be delivered in one big array with no
103103-/// specified order.
104104-#[endpoint {
105105- method = GET,
106106- path = "/records",
107107-}]
108108-async fn get_records_by_collections(
109109- ctx: RequestContext<Context>,
110110- collection_query: Query<RecordsCollectionsQuery>,
111111-) -> OkCorsResponse<Vec<ApiRecord>> {
112112- let Context { storage, .. } = ctx.context();
113113- let mut limit = 42;
114114- let query = collection_query.into_inner();
115115- let collections = if let Some(provided_collection) = query.collection {
116116- to_multiple_nsids(&provided_collection)
117117- .map_err(|reason| HttpError::for_bad_request(None, reason))?
118118- } else {
119119- let all_collections_should_be_nsids: Vec<String> = storage
120120- .get_top_collections()
121121- .await
122122- .map_err(|e| {
123123- HttpError::for_internal_error(format!("failed to get top collections: {e:?}"))
124124- })?
125125- .into();
126126- let mut all_collections = Vec::with_capacity(all_collections_should_be_nsids.len());
127127- for raw_nsid in all_collections_should_be_nsids {
128128- let nsid = Nsid::new(raw_nsid).map_err(|e| {
129129- HttpError::for_internal_error(format!("failed to parse nsid: {e:?}"))
130130- })?;
131131- all_collections.push(nsid);
132132- }
133133-134134- limit = 12;
135135- all_collections
136136- };
137137-138138- let records = storage
139139- .get_records_by_collections(&collections, limit, true)
140140- .await
141141- .map_err(|e| HttpError::for_internal_error(e.to_string()))?
142142- .into_iter()
143143- .map(|r| r.into())
144144- .collect();
145145-146146- ok_cors(records)
147147-}
148148-149149-#[derive(Debug, Deserialize, JsonSchema)]
150150-struct TotalSeenCollectionsQuery {
151151- collection: String, // JsonSchema not implemented for Nsid :(
152152-}
153153-#[derive(Debug, Serialize, JsonSchema)]
154154-struct TotalCounts {
155155- total_records: u64,
156156- dids_estimate: u64,
157157-}
158158-/// Get total records seen by collection
159159-#[endpoint {
160160- method = GET,
161161- path = "/records/total-seen"
162162-}]
163163-async fn get_records_total_seen(
164164- ctx: RequestContext<Context>,
165165- collection_query: Query<TotalSeenCollectionsQuery>,
166166-) -> OkCorsResponse<HashMap<String, TotalCounts>> {
167167- let Context { storage, .. } = ctx.context();
168168-169169- let query = collection_query.into_inner();
170170- let collections = to_multiple_nsids(&query.collection)
171171- .map_err(|reason| HttpError::for_bad_request(None, reason))?;
172172-173173- let mut seen_by_collection = HashMap::with_capacity(collections.len());
174174-175175- for collection in &collections {
176176- let (total_records, dids_estimate) = storage
177177- .get_counts_by_collection(collection)
178178- .await
179179- .map_err(|e| HttpError::for_internal_error(format!("boooo: {e:?}")))?;
180180-181181- seen_by_collection.insert(
182182- collection.to_string(),
183183- TotalCounts {
184184- total_records,
185185- dids_estimate,
186186- },
187187- );
188188- }
189189-190190- ok_cors(seen_by_collection)
191191-}
192192-193193-/// Get top collections
194194-#[endpoint {
195195- method = GET,
196196- path = "/collections"
197197-}]
198198-async fn get_top_collections(ctx: RequestContext<Context>) -> OkCorsResponse<TopCollections> {
199199- let Context { storage, .. } = ctx.context();
200200- let collections = storage
201201- .get_top_collections()
202202- .await
203203- .map_err(|e| HttpError::for_internal_error(format!("boooo: {e:?}")))?;
204204-205205- ok_cors(collections)
206206-}
207207-208208-pub async fn serve(storage: impl StoreReader + 'static) -> Result<(), String> {
209209- let log = ConfigLogging::StderrTerminal {
210210- level: ConfigLoggingLevel::Info,
211211- }
212212- .to_logger("hello-ufos")
213213- .map_err(|e| e.to_string())?;
214214-215215- let mut api = ApiDescription::new();
216216-217217- api.register(get_openapi).unwrap();
218218- api.register(get_meta_info).unwrap();
219219- api.register(get_records_by_collections).unwrap();
220220- api.register(get_records_total_seen).unwrap();
221221- api.register(get_top_collections).unwrap();
222222-223223- let context = Context {
224224- spec: Arc::new(
225225- api.openapi("UFOs", semver::Version::new(0, 0, 0))
226226- .json()
227227- .map_err(|e| e.to_string())?,
228228- ),
229229- storage: Box::new(storage),
230230- };
231231-232232- ServerBuilder::new(api, context, log)
233233- .config(ConfigDropshot {
234234- bind_address: "0.0.0.0:9999".parse().unwrap(),
235235- ..Default::default()
236236- })
237237- .start()
238238- .map_err(|error| format!("failed to start server: {}", error))?
239239- .await
240240-}
241241-242242-/// awkward helpers
243243-type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>;
244244-fn ok_cors<T: Send + Sync + Serialize + JsonSchema>(t: T) -> OkCorsResponse<T> {
245245- let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(t));
246246- res.headers_mut()
247247- .insert("access-control-allow-origin", "*".parse().unwrap());
248248- Ok(res)
249249-}
+72
ufos/src/server/collections_query.rs
···11+use crate::Nsid;
22+use async_trait::async_trait;
33+use dropshot::{
44+ ApiEndpointBodyContentType, ExtractorMetadata, HttpError, Query, RequestContext, ServerContext,
55+ SharedExtractor,
66+};
77+use schemars::JsonSchema;
88+use serde::Deserialize;
99+use std::collections::HashSet;
1010+1111+/// The real type that gets deserialized
1212+#[derive(Debug, Deserialize, JsonSchema)]
1313+pub struct MultiCollectionQuery {
1414+ pub collection: Vec<String>,
1515+}
1616+1717+/// The fake corresponding type for docs that dropshot won't freak out about a
1818+/// vec for
1919+#[derive(Deserialize, JsonSchema)]
2020+#[allow(dead_code)]
2121+struct MultiCollectionQueryForDocs {
2222+ /// One or more collection [NSID](https://atproto.com/specs/nsid)s
2323+ ///
2424+ /// Pass this parameter multiple times to specify multiple collections, like
2525+ /// `collection=app.bsky.feed.like&collection=app.bsky.feed.post`
2626+ collection: String,
2727+}
2828+2929+impl TryFrom<MultiCollectionQuery> for HashSet<Nsid> {
3030+ type Error = HttpError;
3131+ fn try_from(mcq: MultiCollectionQuery) -> Result<Self, Self::Error> {
3232+ let mut out = HashSet::with_capacity(mcq.collection.len());
3333+ for c in mcq.collection {
3434+ let nsid = Nsid::new(c).map_err(|e| {
3535+ HttpError::for_bad_request(
3636+ None,
3737+ format!("failed to convert collection to an NSID: {e:?}"),
3838+ )
3939+ })?;
4040+ out.insert(nsid);
4141+ }
4242+ Ok(out)
4343+ }
4444+}
4545+4646+// The `SharedExtractor` implementation for Query<QueryType> describes how to
4747+// construct an instance of `Query<QueryType>` from an HTTP request: namely, by
4848+// parsing the query string to an instance of `QueryType`.
4949+#[async_trait]
5050+impl SharedExtractor for MultiCollectionQuery {
5151+ async fn from_request<Context: ServerContext>(
5252+ ctx: &RequestContext<Context>,
5353+ ) -> Result<MultiCollectionQuery, HttpError> {
5454+ let raw_query = ctx.request.uri().query().unwrap_or("");
5555+ let q = serde_qs::from_str(raw_query).map_err(|e| {
5656+ HttpError::for_bad_request(None, format!("unable to parse query string: {}", e))
5757+ })?;
5858+ Ok(q)
5959+ }
6060+6161+ fn metadata(body_content_type: ApiEndpointBodyContentType) -> ExtractorMetadata {
6262+ // HACK: query type switcheroo: passing MultiCollectionQuery to
6363+ // `metadata` would "helpfully" panic because dropshot believes we can
6464+ // only have scalar types in a query.
6565+ //
6666+ // so instead we have a fake second type whose only job is to look the
6767+ // same as MultiCollectionQuery exept that it has `String` instead of
6868+ // `Vec<String>`, which dropshot will accept, and generate ~close-enough
6969+ // docs for.
7070+ <Query<MultiCollectionQueryForDocs> as SharedExtractor>::metadata(body_content_type)
7171+ }
7272+}
+23
ufos/src/server/cors.rs
···11+use dropshot::{HttpError, HttpResponseHeaders, HttpResponseOk};
22+use schemars::JsonSchema;
33+use serde::Serialize;
44+55+pub type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>;
66+77+/// Helper for constructing Ok responses: return OkCors(T).into()
88+/// (not happy with this yet)
99+pub struct OkCors<T: Serialize + JsonSchema + Send + Sync>(pub T);
1010+1111+impl<T> From<OkCors<T>> for OkCorsResponse<T>
1212+where
1313+ T: Serialize + JsonSchema + Send + Sync,
1414+{
1515+ fn from(ok: OkCors<T>) -> OkCorsResponse<T> {
1616+ let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(ok.0));
1717+ res.headers_mut()
1818+ .insert("access-control-allow-origin", "*".parse().unwrap());
1919+ Ok(res)
2020+ }
2121+}
2222+2323+// TODO: cors for HttpError
+638
ufos/src/server/mod.rs
···11+mod collections_query;
22+mod cors;
33+44+use crate::index_html::INDEX_HTML;
55+use crate::storage::StoreReader;
66+use crate::store_types::{HourTruncatedCursor, WeekTruncatedCursor};
77+use crate::{
88+ ConsumerInfo, Cursor, JustCount, Nsid, NsidCount, NsidPrefix, OrderCollectionsBy, PrefixChild,
99+ UFOsRecord,
1010+};
1111+use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
1212+use chrono::{DateTime, Utc};
1313+use collections_query::MultiCollectionQuery;
1414+use cors::{OkCors, OkCorsResponse};
1515+use dropshot::endpoint;
1616+use dropshot::ApiDescription;
1717+use dropshot::Body;
1818+use dropshot::ConfigDropshot;
1919+use dropshot::ConfigLogging;
2020+use dropshot::ConfigLoggingLevel;
2121+use dropshot::HttpError;
2222+use dropshot::Query;
2323+use dropshot::RequestContext;
2424+use dropshot::ServerBuilder;
2525+2626+use http::{Response, StatusCode};
2727+use schemars::JsonSchema;
2828+use serde::{Deserialize, Serialize};
2929+use std::collections::{HashMap, HashSet};
3030+use std::sync::Arc;
3131+use std::time::{Duration, SystemTime, UNIX_EPOCH};
3232+3333+struct Context {
3434+ pub spec: Arc<serde_json::Value>,
3535+ storage: Box<dyn StoreReader>,
3636+}
3737+3838+fn dt_to_cursor(dt: DateTime<Utc>) -> Result<HourTruncatedCursor, HttpError> {
3939+ let t = dt.timestamp_micros();
4040+ if t < 0 {
4141+ Err(HttpError::for_bad_request(None, "timestamp too old".into()))
4242+ } else {
4343+ let t = t as u64;
4444+ let t_now = SystemTime::now()
4545+ .duration_since(UNIX_EPOCH)
4646+ .unwrap()
4747+ .as_micros() as u64;
4848+ const ONE_HOUR: u64 = 60 * 60 * 1_000_000;
4949+ if t > t_now && (t - t_now > 2 * ONE_HOUR) {
5050+ Err(HttpError::for_bad_request(None, "future timestamp".into()))
5151+ } else {
5252+ Ok(HourTruncatedCursor::truncate_raw_u64(t))
5353+ }
5454+ }
5555+}
5656+5757+/// Serve index page as html
5858+#[endpoint {
5959+ method = GET,
6060+ path = "/",
6161+ /*
6262+ * not useful to have this in openapi
6363+ */
6464+ unpublished = true,
6565+}]
6666+async fn index(_ctx: RequestContext<Context>) -> Result<Response<Body>, HttpError> {
6767+ Ok(Response::builder()
6868+ .status(StatusCode::OK)
6969+ .header(http::header::CONTENT_TYPE, "text/html")
7070+ .body(INDEX_HTML.into())?)
7171+}
7272+7373+/// Meta: get the openapi spec for this api
7474+#[endpoint {
7575+ method = GET,
7676+ path = "/openapi",
7777+ /*
7878+ * not useful to have this in openapi
7979+ */
8080+ unpublished = true,
8181+}]
8282+async fn get_openapi(ctx: RequestContext<Context>) -> OkCorsResponse<serde_json::Value> {
8383+ let spec = (*ctx.context().spec).clone();
8484+ OkCors(spec).into()
8585+}
8686+8787+#[derive(Debug, Serialize, JsonSchema)]
8888+struct MetaInfo {
8989+ storage_name: String,
9090+ storage: serde_json::Value,
9191+ consumer: ConsumerInfo,
9292+}
9393+/// UFOs meta-info
9494+#[endpoint {
9595+ method = GET,
9696+ path = "/meta"
9797+}]
9898+async fn get_meta_info(ctx: RequestContext<Context>) -> OkCorsResponse<MetaInfo> {
9999+ let Context { storage, .. } = ctx.context();
100100+ let failed_to_get =
101101+ |what| move |e| HttpError::for_internal_error(format!("failed to get {what}: {e:?}"));
102102+103103+ let storage_info = storage
104104+ .get_storage_stats()
105105+ .await
106106+ .map_err(failed_to_get("storage info"))?;
107107+108108+ let consumer = storage
109109+ .get_consumer_info()
110110+ .await
111111+ .map_err(failed_to_get("consumer info"))?;
112112+113113+ OkCors(MetaInfo {
114114+ storage_name: storage.name(),
115115+ storage: storage_info,
116116+ consumer,
117117+ })
118118+ .into()
119119+}
120120+121121+// TODO: replace with normal (🙃) multi-qs value somehow
122122+fn to_multiple_nsids(s: &str) -> Result<HashSet<Nsid>, String> {
123123+ let mut out = HashSet::new();
124124+ for collection in s.split(',') {
125125+ let Ok(nsid) = Nsid::new(collection.to_string()) else {
126126+ return Err(format!("collection {collection:?} was not a valid NSID"));
127127+ };
128128+ out.insert(nsid);
129129+ }
130130+ Ok(out)
131131+}
132132+133133+#[derive(Debug, Deserialize, JsonSchema)]
134134+struct RecordsCollectionsQuery {
135135+ collection: Option<String>, // JsonSchema not implemented for Nsid :(
136136+}
137137+#[derive(Debug, Serialize, JsonSchema)]
138138+struct ApiRecord {
139139+ did: String,
140140+ collection: String,
141141+ rkey: String,
142142+ record: Box<serde_json::value::RawValue>,
143143+ time_us: u64,
144144+}
145145+impl From<UFOsRecord> for ApiRecord {
146146+ fn from(ufo: UFOsRecord) -> Self {
147147+ Self {
148148+ did: ufo.did.to_string(),
149149+ collection: ufo.collection.to_string(),
150150+ rkey: ufo.rkey.to_string(),
151151+ record: ufo.record,
152152+ time_us: ufo.cursor.to_raw_u64(),
153153+ }
154154+ }
155155+}
156156+/// Record samples
157157+///
158158+/// Get most recent records seen in the firehose, by collection NSID
159159+///
160160+/// Multiple collections are supported. They will be delivered in one big array with no
161161+/// specified order.
162162+#[endpoint {
163163+ method = GET,
164164+ path = "/records",
165165+}]
166166+async fn get_records_by_collections(
167167+ ctx: RequestContext<Context>,
168168+ collection_query: Query<RecordsCollectionsQuery>,
169169+) -> OkCorsResponse<Vec<ApiRecord>> {
170170+ let Context { storage, .. } = ctx.context();
171171+ let mut limit = 42;
172172+ let query = collection_query.into_inner();
173173+ let collections = if let Some(provided_collection) = query.collection {
174174+ to_multiple_nsids(&provided_collection)
175175+ .map_err(|reason| HttpError::for_bad_request(None, reason))?
176176+ } else {
177177+ limit = 12;
178178+ let min_time_ago = SystemTime::now() - Duration::from_secs(86_400 * 3); // we want at least 3 days of data
179179+ let since: WeekTruncatedCursor = Cursor::at(min_time_ago).into();
180180+ let (collections, _) = storage
181181+ .get_collections(
182182+ 1000,
183183+ Default::default(),
184184+ Some(since.try_as().unwrap()),
185185+ None,
186186+ )
187187+ .await
188188+ .map_err(|e| HttpError::for_internal_error(e.to_string()))?;
189189+ collections
190190+ .into_iter()
191191+ .map(|c| Nsid::new(c.nsid).unwrap())
192192+ .collect()
193193+ };
194194+195195+ let records = storage
196196+ .get_records_by_collections(collections, limit, true)
197197+ .await
198198+ .map_err(|e| HttpError::for_internal_error(e.to_string()))?
199199+ .into_iter()
200200+ .map(|r| r.into())
201201+ .collect();
202202+203203+ OkCors(records).into()
204204+}
205205+206206+#[derive(Debug, Deserialize, JsonSchema)]
207207+struct CollectionsStatsQuery {
208208+ /// Limit stats to those seen after this UTC datetime
209209+ ///
210210+ /// default: 1 week ago
211211+ since: Option<DateTime<Utc>>,
212212+ /// Limit stats to those seen before this UTC datetime
213213+ ///
214214+ /// default: now
215215+ until: Option<DateTime<Utc>>,
216216+}
217217+/// Collection stats
218218+///
219219+/// Get record statistics for collections during a specific time period.
220220+///
221221+/// Note: the statistics are "rolled up" into hourly buckets in the background,
222222+/// so the data here can be as stale as that background task is behind. See the
223223+/// meta info endpoint to find out how up-to-date the rollup currently is. (In
224224+/// general it sholud be pretty close to live)
225225+#[endpoint {
226226+ method = GET,
227227+ path = "/collections/stats"
228228+}]
229229+async fn get_collection_stats(
230230+ ctx: RequestContext<Context>,
231231+ collections_query: MultiCollectionQuery,
232232+ query: Query<CollectionsStatsQuery>,
233233+) -> OkCorsResponse<HashMap<String, JustCount>> {
234234+ let Context { storage, .. } = ctx.context();
235235+ let q = query.into_inner();
236236+ let collections: HashSet<Nsid> = collections_query.try_into()?;
237237+238238+ let since = q.since.map(dt_to_cursor).transpose()?.unwrap_or_else(|| {
239239+ let week_ago_secs = 7 * 86_400;
240240+ let week_ago = SystemTime::now() - Duration::from_secs(week_ago_secs);
241241+ Cursor::at(week_ago).into()
242242+ });
243243+244244+ let until = q.until.map(dt_to_cursor).transpose()?;
245245+246246+ let mut seen_by_collection = HashMap::with_capacity(collections.len());
247247+248248+ for collection in &collections {
249249+ let counts = storage
250250+ .get_collection_counts(collection, since, until)
251251+ .await
252252+ .map_err(|e| HttpError::for_internal_error(format!("boooo: {e:?}")))?;
253253+254254+ seen_by_collection.insert(collection.to_string(), counts);
255255+ }
256256+257257+ OkCors(seen_by_collection).into()
258258+}
259259+260260+#[derive(Debug, Serialize, JsonSchema)]
261261+struct CollectionsResponse {
262262+ /// Each known collection and its associated statistics
263263+ ///
264264+ /// The order is unspecified.
265265+ collections: Vec<NsidCount>,
266266+ /// Include in a follow-up request to get the next page of results, if more are available
267267+ cursor: Option<String>,
268268+}
269269+#[derive(Debug, Deserialize, JsonSchema)]
270270+#[serde(rename_all = "kebab-case")]
271271+pub enum CollectionsQueryOrder {
272272+ RecordsCreated,
273273+ DidsEstimate,
274274+}
275275+impl From<&CollectionsQueryOrder> for OrderCollectionsBy {
276276+ fn from(q: &CollectionsQueryOrder) -> Self {
277277+ match q {
278278+ CollectionsQueryOrder::RecordsCreated => OrderCollectionsBy::RecordsCreated,
279279+ CollectionsQueryOrder::DidsEstimate => OrderCollectionsBy::DidsEstimate,
280280+ }
281281+ }
282282+}
283283+#[derive(Debug, Deserialize, JsonSchema)]
284284+struct CollectionsQuery {
285285+ /// The maximum number of collections to return in one request.
286286+ ///
287287+ /// Default: `100` normally, `32` if `order` is specified.
288288+ #[schemars(range(min = 1, max = 200))]
289289+ limit: Option<usize>,
290290+ /// Get a paginated response with more collections.
291291+ ///
292292+ /// Always omit the cursor for the first request. If more collections than the limit are available, the response will contain a non-null `cursor` to include with the next request.
293293+ ///
294294+ /// `cursor` is mutually exclusive with `order`.
295295+ cursor: Option<String>,
296296+ /// Limit collections and statistics to those seen after this UTC datetime
297297+ since: Option<DateTime<Utc>>,
298298+ /// Limit collections and statistics to those seen before this UTC datetime
299299+ until: Option<DateTime<Utc>>,
300300+ /// Get a limited, sorted list
301301+ ///
302302+ /// Mutually exclusive with `cursor` -- sorted results cannot be paged.
303303+ order: Option<CollectionsQueryOrder>,
304304+}
305305+306306+/// List collections
307307+///
308308+/// With statistics.
309309+///
310310+/// ## To fetch a full list:
311311+///
312312+/// Omit the `order` parameter and page through the results using the `cursor`. There have been a lot of collections seen in the ATmosphere, well over 400 at time of writing, so you *will* need to make a series of paginaged requests with `cursor`s to get them all.
313313+///
314314+/// The set of collections across multiple requests is not guaranteed to be a perfectly consistent snapshot:
315315+///
316316+/// - all collection NSIDs observed before the first request will be included in the results
317317+///
318318+/// - *new* NSIDs observed in the firehose *while paging* might be included or excluded from the final set
319319+///
320320+/// - no duplicate NSIDs will occur in the combined results
321321+///
322322+/// In practice this is close enough for most use-cases to not worry about.
323323+///
324324+/// ## To fetch the top collection NSIDs:
325325+///
326326+/// Specify the `order` parameter (must be either `records-created` or `did-estimate`). Note that ordered results cannot be paged.
327327+///
328328+/// All statistics are bucketed hourly, so the most granular effecitve time boundary for `since` and `until` is one hour.
329329+#[endpoint {
330330+ method = GET,
331331+ path = "/collections"
332332+}]
333333+async fn get_collections(
334334+ ctx: RequestContext<Context>,
335335+ query: Query<CollectionsQuery>,
336336+) -> OkCorsResponse<CollectionsResponse> {
337337+ let Context { storage, .. } = ctx.context();
338338+ let q = query.into_inner();
339339+340340+ if q.cursor.is_some() && q.order.is_some() {
341341+ let msg = "`cursor` is mutually exclusive with `order`. ordered results cannot be paged.";
342342+ return Err(HttpError::for_bad_request(None, msg.to_string()));
343343+ }
344344+345345+ let order = if let Some(ref o) = q.order {
346346+ o.into()
347347+ } else {
348348+ let cursor = q
349349+ .cursor
350350+ .and_then(|c| if c.is_empty() { None } else { Some(c) })
351351+ .map(|c| URL_SAFE_NO_PAD.decode(&c))
352352+ .transpose()
353353+ .map_err(|e| HttpError::for_bad_request(None, format!("invalid cursor: {e:?}")))?;
354354+ OrderCollectionsBy::Lexi { cursor }
355355+ };
356356+357357+ let limit = match (q.limit, q.order) {
358358+ (Some(limit), _) => limit,
359359+ (None, Some(_)) => 32,
360360+ (None, None) => 100,
361361+ };
362362+363363+ if !(1..=200).contains(&limit) {
364364+ let msg = format!("limit not in 1..=200: {}", limit);
365365+ return Err(HttpError::for_bad_request(None, msg));
366366+ }
367367+368368+ let since = q.since.map(dt_to_cursor).transpose()?;
369369+ let until = q.until.map(dt_to_cursor).transpose()?;
370370+371371+ let (collections, next_cursor) = storage
372372+ .get_collections(limit, order, since, until)
373373+ .await
374374+ .map_err(|e| HttpError::for_internal_error(format!("oh shoot: {e:?}")))?;
375375+376376+ let next_cursor = next_cursor.map(|c| URL_SAFE_NO_PAD.encode(c));
377377+378378+ OkCors(CollectionsResponse {
379379+ collections,
380380+ cursor: next_cursor,
381381+ })
382382+ .into()
383383+}
384384+385385+#[derive(Debug, Serialize, JsonSchema)]
386386+struct PrefixResponse {
387387+ /// Note that total may not include counts beyond the current page (TODO)
388388+ total: JustCount,
389389+ children: Vec<PrefixChild>,
390390+ /// Include in a follow-up request to get the next page of results, if more are available
391391+ cursor: Option<String>,
392392+}
393393+#[derive(Debug, Deserialize, JsonSchema)]
394394+struct PrefixQuery {
395395+ ///
396396+ /// The final segment of a collection NSID is the `name`, and everything before it is called its `group`. eg:
397397+ ///
398398+ /// - `app.bsky.feed.post` and `app.bsky.feed.like` are both in the _lexicon group_ "`app.bsky.feed`".
399399+ ///
400400+ prefix: String,
401401+ /// The maximum number of collections to return in one request.
402402+ ///
403403+ /// The number of items actually returned may be less than the limit. If paginating, this does **not** indicate that no
404404+ /// more items are available! Check if the `cursor` in the response is `null` to determine the end of items.
405405+ ///
406406+ /// Default: `100` normally, `32` if `order` is specified.
407407+ #[schemars(range(min = 1, max = 200))]
408408+ limit: Option<usize>,
409409+ /// Get a paginated response with more collections.
410410+ ///
411411+ /// Always omit the cursor for the first request. If more collections than the limit are available, the response will contain a non-null `cursor` to include with the next request.
412412+ ///
413413+ /// `cursor` is mutually exclusive with `order`.
414414+ cursor: Option<String>,
415415+ /// Limit collections and statistics to those seen after this UTC datetime
416416+ ///
417417+ /// Default: all-time
418418+ since: Option<DateTime<Utc>>,
419419+ /// Limit collections and statistics to those seen before this UTC datetime
420420+ ///
421421+ /// Default: now
422422+ until: Option<DateTime<Utc>>,
423423+ /// Get a limited, sorted list
424424+ ///
425425+ /// Mutually exclusive with `cursor` -- sorted results cannot be paged.
426426+ order: Option<CollectionsQueryOrder>,
427427+}
428428+/// Prefix-filter collections list
429429+///
430430+/// This endpoint enumerates all collection NSIDs for a lexicon group.
431431+///
432432+/// ## To fetch a full list:
433433+///
434434+/// Omit the `order` parameter and page through the results using the `cursor`. There have been a lot of collections seen in the ATmosphere, well over 400 at time of writing, so you *will* need to make a series of paginaged requests with `cursor`s to get them all.
435435+///
436436+/// The set of collections across multiple requests is not guaranteed to be a perfectly consistent snapshot:
437437+///
438438+/// - all collection NSIDs observed before the first request will be included in the results
439439+///
440440+/// - *new* NSIDs observed in the firehose *while paging* might be included or excluded from the final set
441441+///
442442+/// - no duplicate NSIDs will occur in the combined results
443443+///
444444+/// In practice this is close enough for most use-cases to not worry about.
445445+///
446446+/// ## To fetch the top collection NSIDs:
447447+///
448448+/// Specify the `order` parameter (must be either `records-created` or `did-estimate`). Note that ordered results cannot be paged.
449449+///
450450+/// All statistics are bucketed hourly, so the most granular effecitve time boundary for `since` and `until` is one hour.
451451+#[endpoint {
452452+ method = GET,
453453+ path = "/prefix"
454454+}]
455455+async fn get_prefix(
456456+ ctx: RequestContext<Context>,
457457+ query: Query<PrefixQuery>,
458458+) -> OkCorsResponse<PrefixResponse> {
459459+ let Context { storage, .. } = ctx.context();
460460+ let q = query.into_inner();
461461+462462+ let prefix = NsidPrefix::new(&q.prefix).map_err(|e| {
463463+ HttpError::for_bad_request(
464464+ None,
465465+ format!("{:?} was not a valid NSID prefix: {e:?}", q.prefix),
466466+ )
467467+ })?;
468468+469469+ if q.cursor.is_some() && q.order.is_some() {
470470+ let msg = "`cursor` is mutually exclusive with `order`. ordered results cannot be paged.";
471471+ return Err(HttpError::for_bad_request(None, msg.to_string()));
472472+ }
473473+474474+ let order = if let Some(ref o) = q.order {
475475+ o.into()
476476+ } else {
477477+ let cursor = q
478478+ .cursor
479479+ .and_then(|c| if c.is_empty() { None } else { Some(c) })
480480+ .map(|c| URL_SAFE_NO_PAD.decode(&c))
481481+ .transpose()
482482+ .map_err(|e| HttpError::for_bad_request(None, format!("invalid cursor: {e:?}")))?;
483483+ OrderCollectionsBy::Lexi { cursor }
484484+ };
485485+486486+ let limit = match (q.limit, q.order) {
487487+ (Some(limit), _) => limit,
488488+ (None, Some(_)) => 32,
489489+ (None, None) => 100,
490490+ };
491491+492492+ if !(1..=200).contains(&limit) {
493493+ let msg = format!("limit not in 1..=200: {}", limit);
494494+ return Err(HttpError::for_bad_request(None, msg));
495495+ }
496496+497497+ let since = q.since.map(dt_to_cursor).transpose()?;
498498+ let until = q.until.map(dt_to_cursor).transpose()?;
499499+500500+ let (total, children, next_cursor) = storage
501501+ .get_prefix(prefix, limit, order, since, until)
502502+ .await
503503+ .map_err(|e| HttpError::for_internal_error(format!("oh shoot: {e:?}")))?;
504504+505505+ let next_cursor = next_cursor.map(|c| URL_SAFE_NO_PAD.encode(c));
506506+507507+ OkCors(PrefixResponse {
508508+ total,
509509+ children,
510510+ cursor: next_cursor,
511511+ })
512512+ .into()
513513+}
514514+515515+#[derive(Debug, Deserialize, JsonSchema)]
516516+struct CollectionTimeseriesQuery {
517517+ collection: String, // JsonSchema not implemented for Nsid :(
518518+ /// Limit collections and statistics to those seen after this UTC datetime
519519+ ///
520520+ /// default: 1 week ago
521521+ since: Option<DateTime<Utc>>,
522522+ /// Limit collections and statistics to those seen before this UTC datetime
523523+ ///
524524+ /// default: now
525525+ until: Option<DateTime<Utc>>,
526526+ /// time steps between data, in seconds
527527+ ///
528528+ /// the step will be rounded down to the nearest hour
529529+ ///
530530+ /// default: 86400 (24hrs)
531531+ #[schemars(range(min = 3600))]
532532+ step: Option<u64>,
533533+ // todo: rolling averages
534534+}
535535+#[derive(Debug, Serialize, JsonSchema)]
536536+struct CollectionTimeseriesResponse {
537537+ range: Vec<DateTime<Utc>>,
538538+ series: HashMap<String, Vec<JustCount>>,
539539+}
540540+/// Collection timeseries stats
541541+#[endpoint {
542542+ method = GET,
543543+ path = "/timeseries"
544544+}]
545545+async fn get_timeseries(
546546+ ctx: RequestContext<Context>,
547547+ query: Query<CollectionTimeseriesQuery>,
548548+) -> OkCorsResponse<CollectionTimeseriesResponse> {
549549+ let Context { storage, .. } = ctx.context();
550550+ let q = query.into_inner();
551551+552552+ let since = q.since.map(dt_to_cursor).transpose()?.unwrap_or_else(|| {
553553+ let week_ago_secs = 7 * 86_400;
554554+ let week_ago = SystemTime::now() - Duration::from_secs(week_ago_secs);
555555+ Cursor::at(week_ago).into()
556556+ });
557557+558558+ let until = q.until.map(dt_to_cursor).transpose()?;
559559+560560+ let step = if let Some(secs) = q.step {
561561+ if secs < 3600 {
562562+ let msg = format!("step is too small: {}", secs);
563563+ Err(HttpError::for_bad_request(None, msg))?;
564564+ }
565565+ (secs / 3600) * 3600 // trucate to hour
566566+ } else {
567567+ 86_400
568568+ };
569569+570570+ let nsid = Nsid::new(q.collection).map_err(|e| {
571571+ HttpError::for_bad_request(None, format!("collection was not a valid NSID: {:?}", e))
572572+ })?;
573573+574574+ let (range_cursors, series) = storage
575575+ .get_timeseries(vec![nsid], since, until, step)
576576+ .await
577577+ .map_err(|e| HttpError::for_internal_error(format!("oh shoot: {e:?}")))?;
578578+579579+ let range = range_cursors
580580+ .into_iter()
581581+ .map(|c| DateTime::<Utc>::from_timestamp_micros(c.to_raw_u64() as i64).unwrap())
582582+ .collect();
583583+584584+ let series = series
585585+ .into_iter()
586586+ .map(|(k, v)| (k.to_string(), v.iter().map(Into::into).collect()))
587587+ .collect();
588588+589589+ OkCors(CollectionTimeseriesResponse { range, series }).into()
590590+}
591591+592592+pub async fn serve(storage: impl StoreReader + 'static) -> Result<(), String> {
593593+ let log = ConfigLogging::StderrTerminal {
594594+ level: ConfigLoggingLevel::Info,
595595+ }
596596+ .to_logger("hello-ufos")
597597+ .map_err(|e| e.to_string())?;
598598+599599+ let mut api = ApiDescription::new();
600600+601601+ api.register(index).unwrap();
602602+ api.register(get_openapi).unwrap();
603603+ api.register(get_meta_info).unwrap();
604604+ api.register(get_records_by_collections).unwrap();
605605+ api.register(get_collection_stats).unwrap();
606606+ api.register(get_collections).unwrap();
607607+ api.register(get_prefix).unwrap();
608608+ api.register(get_timeseries).unwrap();
609609+610610+ let context = Context {
611611+ spec: Arc::new(
612612+ api.openapi(
613613+ "UFOs: Every lexicon in the ATmosphere",
614614+ env!("CARGO_PKG_VERSION")
615615+ .parse()
616616+ .inspect_err(|e| {
617617+ log::warn!("failed to parse cargo package version for openapi: {e:?}")
618618+ })
619619+ .unwrap_or(semver::Version::new(0, 0, 1)),
620620+ )
621621+ .description("Samples and statistics of atproto records by their collection NSID")
622622+ .contact_name("part of @microcosm.blue")
623623+ .contact_url("https://microcosm.blue")
624624+ .json()
625625+ .map_err(|e| e.to_string())?,
626626+ ),
627627+ storage: Box::new(storage),
628628+ };
629629+630630+ ServerBuilder::new(api, context, log)
631631+ .config(ConfigDropshot {
632632+ bind_address: "0.0.0.0:9999".parse().unwrap(),
633633+ ..Default::default()
634634+ })
635635+ .start()
636636+ .map_err(|error| format!("failed to start server: {}", error))?
637637+ .await
638638+}