Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
75
fork

Configure Feed

Select the types of activity you want to include in your feed.

deserialize multiple collections from query

blehhhhhh

phil 7da961f4 f3579c5a

+121 -120
-1
ufos/src/lib.rs
··· 3 3 pub mod error; 4 4 pub mod file_consumer; 5 5 pub mod index_html; 6 - pub mod qs_query; 7 6 pub mod server; 8 7 pub mod storage; 9 8 pub mod storage_fjall;
-73
ufos/src/qs_query.rs
··· 1 - use async_trait::async_trait; 2 - use dropshot::{ 3 - ApiEndpointBodyContentType, ExclusiveExtractor, ExtractorMetadata, HttpError, RequestContext, 4 - RequestInfo, ServerContext, SharedExtractor, 5 - }; 6 - /// copied from https://github.com/oxidecomputer/dropshot/blob/695e1d8872c988c43066eb0848c87c127eeda361/dropshot/src/extractor/query.rs 7 - /// Apache 2.0: https://github.com/oxidecomputer/dropshot/blob/695e1d8872c988c43066eb0848c87c127eeda361/LICENSE 8 - use schemars::JsonSchema; 9 - use serde::de::DeserializeOwned; 10 - 11 - /// `VecsAllowedQuery<QueryType>` is an extractor used to deserialize an 12 - /// instance of `QueryType` from an HTTP request's query string. `QueryType` 13 - /// is any structure of yours that implements [serde::Deserialize] and 14 - /// [schemars::JsonSchema]. See the crate documentation for more information. 15 - #[derive(Debug)] 16 - pub struct VecsAllowedQuery<QueryType: DeserializeOwned + JsonSchema + Send + Sync> { 17 - inner: QueryType, 18 - } 19 - impl<QueryType: DeserializeOwned + JsonSchema + Send + Sync> VecsAllowedQuery<QueryType> { 20 - // TODO drop this in favor of Deref? + Display and Debug for convenience? 21 - pub fn into_inner(self) -> QueryType { 22 - self.inner 23 - } 24 - } 25 - 26 - /// Given an HTTP request, pull out the query string and attempt to deserialize 27 - /// it as an instance of `QueryType`. 28 - fn http_request_load_query<QueryType>( 29 - request: &RequestInfo, 30 - ) -> Result<VecsAllowedQuery<QueryType>, HttpError> 31 - where 32 - QueryType: DeserializeOwned + JsonSchema + Send + Sync, 33 - { 34 - let raw_query_string = request.uri().query().unwrap_or(""); 35 - // TODO-correctness: are query strings defined to be urlencoded in this way? 36 - match serde_qs::from_str(raw_query_string) { 37 - Ok(q) => Ok(VecsAllowedQuery { inner: q }), 38 - Err(e) => Err(HttpError::for_bad_request( 39 - None, 40 - format!("unable to parse query string: {}", e), 41 - )), 42 - } 43 - } 44 - 45 - // The `SharedExtractor` implementation for Query<QueryType> describes how to 46 - // construct an instance of `Query<QueryType>` from an HTTP request: namely, by 47 - // parsing the query string to an instance of `QueryType`. 48 - // TODO-cleanup We shouldn't have to use the "'static" bound on `QueryType` 49 - // here. It seems like we ought to be able to use 'async_trait, but that 50 - // doesn't seem to be defined. 51 - #[async_trait] 52 - impl<QueryType> SharedExtractor for VecsAllowedQuery<QueryType> 53 - where 54 - QueryType: JsonSchema + DeserializeOwned + Send + Sync + 'static, 55 - { 56 - async fn from_request<Context: ServerContext>( 57 - rqctx: &RequestContext<Context>, 58 - ) -> Result<VecsAllowedQuery<QueryType>, HttpError> { 59 - http_request_load_query(&rqctx.request) 60 - } 61 - 62 - fn metadata(body_content_type: ApiEndpointBodyContentType) -> ExtractorMetadata { 63 - // HACK: would love to use Query here but it "helpfully" panics when it sees a Vec. 64 - // we can't really get at enough of Query's logic to use it directly, sadly, so the 65 - // resulting openapi docs suck (query params are listed as body payload, example 66 - // codes make no sense, etc.) 67 - // 68 - // trying to hack the resulting ExtractorMetadata to look like Query's is a pain: 69 - // things almost work out but then something in dropshot won't be `pub` and it falls 70 - // apart. maybe it's possible, i didn't get it in the time i had. 71 - dropshot::TypedBody::<QueryType>::metadata(body_content_type) 72 - } 73 - }
+26 -46
ufos/src/server.rs ufos/src/server/mod.rs
··· 1 + mod collections_query; 2 + mod cors; 3 + 1 4 use crate::index_html::INDEX_HTML; 2 - use crate::qs_query::VecsAllowedQuery; 3 5 use crate::storage::StoreReader; 4 6 use crate::store_types::{HourTruncatedCursor, WeekTruncatedCursor}; 5 7 use crate::{ConsumerInfo, Cursor, JustCount, Nsid, NsidCount, OrderCollectionsBy, UFOsRecord}; 6 8 use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; 7 9 use chrono::{DateTime, Utc}; 10 + use collections_query::MultiCollectionQuery; 11 + use cors::{OkCors, OkCorsResponse}; 8 12 use dropshot::endpoint; 9 13 use dropshot::ApiDescription; 10 14 use dropshot::Body; ··· 12 16 use dropshot::ConfigLogging; 13 17 use dropshot::ConfigLoggingLevel; 14 18 use dropshot::HttpError; 15 - use dropshot::HttpResponseHeaders; 16 - use dropshot::HttpResponseOk; 17 19 use dropshot::Query; 18 20 use dropshot::RequestContext; 19 21 use dropshot::ServerBuilder; ··· 76 78 }] 77 79 async fn get_openapi(ctx: RequestContext<Context>) -> OkCorsResponse<serde_json::Value> { 78 80 let spec = (*ctx.context().spec).clone(); 79 - ok_cors(spec) 81 + OkCors(spec).into() 80 82 } 81 83 82 84 #[derive(Debug, Serialize, JsonSchema)] ··· 105 107 .await 106 108 .map_err(failed_to_get("consumer info"))?; 107 109 108 - ok_cors(MetaInfo { 110 + OkCors(MetaInfo { 109 111 storage_name: storage.name(), 110 112 storage: storage_info, 111 113 consumer, 112 114 }) 115 + .into() 113 116 } 114 117 115 118 // TODO: replace with normal (🙃) multi-qs value somehow ··· 194 197 .map(|r| r.into()) 195 198 .collect(); 196 199 197 - ok_cors(records) 200 + OkCors(records).into() 198 201 } 199 202 200 203 #[derive(Debug, Deserialize, JsonSchema)] 201 - struct TotalSeenCollectionsQuery { 202 - collection: Vec<String>, // JsonSchema not implemented for Nsid :( 204 + struct CollectionsStatsQuery { 203 205 /// Limit stats to those seen after this UTC datetime 204 206 /// 205 207 /// default: 1 week ago ··· 216 218 } 217 219 /// Collection stats 218 220 /// 219 - /// Get stats for a collection over a specific time period 220 - /// 221 - /// API docs note: the **Body** fields here are actually query parameters!! 222 - /// 223 - /// Due to limitations with dropshot's query parsing (no support for sequences), 224 - /// this is kind of the best i could do for now. sadly. 221 + /// Get record statistics for collections during a specific time period 225 222 #[endpoint { 226 223 method = GET, 227 224 path = "/collections/stats" 228 225 }] 229 - async fn get_records_total_seen( 226 + async fn get_collection_stats( 230 227 ctx: RequestContext<Context>, 231 - query: VecsAllowedQuery<TotalSeenCollectionsQuery>, 228 + collections_query: MultiCollectionQuery, 229 + query: Query<CollectionsStatsQuery>, 232 230 ) -> OkCorsResponse<HashMap<String, TotalCounts>> { 233 231 let Context { storage, .. } = ctx.context(); 234 232 let q = query.into_inner(); 235 - 236 - log::warn!("collection: {:?}", q.collection); 237 - 238 - let mut collections = Vec::with_capacity(q.collection.len()); 239 - for c in q.collection { 240 - let Ok(nsid) = Nsid::new(c.clone()) else { 241 - return Err(HttpError::for_bad_request( 242 - None, 243 - format!("could not parse collection to nsid: {c}"), 244 - )); 245 - }; 246 - collections.push(nsid); 247 - } 233 + let collections: HashSet<Nsid> = collections_query.try_into()?; 248 234 249 - let since = q.since.map(dt_to_cursor).transpose()?; 250 - let until = q.until.map(dt_to_cursor).transpose()?; 235 + let _since = q.since.map(dt_to_cursor).transpose()?; 236 + let _until = q.until.map(dt_to_cursor).transpose()?; 251 237 252 238 let mut seen_by_collection = HashMap::with_capacity(collections.len()); 253 239 ··· 266 252 ); 267 253 } 268 254 269 - ok_cors(seen_by_collection) 255 + OkCors(seen_by_collection).into() 270 256 } 271 257 272 258 #[derive(Debug, Serialize, JsonSchema)] ··· 315 301 order: Option<CollectionsQueryOrder>, 316 302 } 317 303 318 - /// List collections (with stats) 304 + /// List collections 305 + /// 306 + /// With statistics. 319 307 /// 320 308 /// ## To fetch a full list: 321 309 /// ··· 385 373 386 374 let next_cursor = next_cursor.map(|c| URL_SAFE_NO_PAD.encode(c)); 387 375 388 - ok_cors(CollectionsResponse { 376 + OkCors(CollectionsResponse { 389 377 collections, 390 378 cursor: next_cursor, 391 379 }) 380 + .into() 392 381 } 393 382 394 383 #[derive(Debug, Deserialize, JsonSchema)] ··· 439 428 let step = if let Some(secs) = q.step { 440 429 if secs < 3600 { 441 430 let msg = format!("step is too small: {}", secs); 442 - return Err(HttpError::for_bad_request(None, msg)); 431 + Err(HttpError::for_bad_request(None, msg))?; 443 432 } 444 433 (secs / 3600) * 3600 // trucate to hour 445 434 } else { ··· 465 454 .map(|(k, v)| (k.to_string(), v.iter().map(Into::into).collect())) 466 455 .collect(); 467 456 468 - ok_cors(CollectionTimeseriesResponse { range, series }) 457 + OkCors(CollectionTimeseriesResponse { range, series }).into() 469 458 } 470 459 471 460 pub async fn serve(storage: impl StoreReader + 'static) -> Result<(), String> { ··· 481 470 api.register(get_openapi).unwrap(); 482 471 api.register(get_meta_info).unwrap(); 483 472 api.register(get_records_by_collections).unwrap(); 484 - api.register(get_records_total_seen).unwrap(); 473 + api.register(get_collection_stats).unwrap(); 485 474 api.register(get_collections).unwrap(); 486 475 api.register(get_timeseries).unwrap(); 487 476 ··· 514 503 .map_err(|error| format!("failed to start server: {}", error))? 515 504 .await 516 505 } 517 - 518 - /// awkward helpers 519 - type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>; 520 - fn ok_cors<T: Send + Sync + Serialize + JsonSchema>(t: T) -> OkCorsResponse<T> { 521 - let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(t)); 522 - res.headers_mut() 523 - .insert("access-control-allow-origin", "*".parse().unwrap()); 524 - Ok(res) 525 - }
+72
ufos/src/server/collections_query.rs
··· 1 + use crate::Nsid; 2 + use async_trait::async_trait; 3 + use dropshot::{ 4 + ApiEndpointBodyContentType, ExtractorMetadata, HttpError, Query, RequestContext, ServerContext, 5 + SharedExtractor, 6 + }; 7 + use schemars::JsonSchema; 8 + use serde::Deserialize; 9 + use std::collections::HashSet; 10 + 11 + /// The real type that gets deserialized 12 + #[derive(Debug, Deserialize, JsonSchema)] 13 + pub struct MultiCollectionQuery { 14 + pub collection: Vec<String>, 15 + } 16 + 17 + /// The fake corresponding type for docs that dropshot won't freak out about a 18 + /// vec for 19 + #[derive(Deserialize, JsonSchema)] 20 + #[allow(dead_code)] 21 + struct MultiCollectionQueryForDocs { 22 + /// One or more collection [NSID](https://atproto.com/specs/nsid)s 23 + /// 24 + /// Pass this parameter multiple times to specify multiple collections, like 25 + /// `collection=app.bsky.feed.like&collection=app.bsky.feed.post` 26 + collection: String, 27 + } 28 + 29 + impl TryFrom<MultiCollectionQuery> for HashSet<Nsid> { 30 + type Error = HttpError; 31 + fn try_from(mcq: MultiCollectionQuery) -> Result<Self, Self::Error> { 32 + let mut out = HashSet::with_capacity(mcq.collection.len()); 33 + for c in mcq.collection { 34 + let nsid = Nsid::new(c).map_err(|e| { 35 + HttpError::for_bad_request( 36 + None, 37 + format!("failed to convert collection to an NSID: {e:?}"), 38 + ) 39 + })?; 40 + out.insert(nsid); 41 + } 42 + Ok(out) 43 + } 44 + } 45 + 46 + // The `SharedExtractor` implementation for Query<QueryType> describes how to 47 + // construct an instance of `Query<QueryType>` from an HTTP request: namely, by 48 + // parsing the query string to an instance of `QueryType`. 49 + #[async_trait] 50 + impl SharedExtractor for MultiCollectionQuery { 51 + async fn from_request<Context: ServerContext>( 52 + ctx: &RequestContext<Context>, 53 + ) -> Result<MultiCollectionQuery, HttpError> { 54 + let raw_query = ctx.request.uri().query().unwrap_or(""); 55 + let q = serde_qs::from_str(raw_query).map_err(|e| { 56 + HttpError::for_bad_request(None, format!("unable to parse query string: {}", e)) 57 + })?; 58 + Ok(q) 59 + } 60 + 61 + fn metadata(body_content_type: ApiEndpointBodyContentType) -> ExtractorMetadata { 62 + // HACK: query type switcheroo: passing MultiCollectionQuery to 63 + // `metadata` would "helpfully" panic because dropshot believes we can 64 + // only have scalar types in a query. 65 + // 66 + // so instead we have a fake second type whose only job is to look the 67 + // same as MultiCollectionQuery exept that it has `String` instead of 68 + // `Vec<String>`, which dropshot will accept, and generate ~close-enough 69 + // docs for. 70 + <Query<MultiCollectionQueryForDocs> as SharedExtractor>::metadata(body_content_type) 71 + } 72 + }
+23
ufos/src/server/cors.rs
··· 1 + use dropshot::{HttpError, HttpResponseHeaders, HttpResponseOk}; 2 + use schemars::JsonSchema; 3 + use serde::Serialize; 4 + 5 + pub type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>; 6 + 7 + /// Helper for constructing Ok responses: return OkCors(T).into() 8 + /// (not happy with this yet) 9 + pub struct OkCors<T: Serialize + JsonSchema + Send + Sync>(pub T); 10 + 11 + impl<T> From<OkCors<T>> for OkCorsResponse<T> 12 + where 13 + T: Serialize + JsonSchema + Send + Sync, 14 + { 15 + fn from(ok: OkCors<T>) -> OkCorsResponse<T> { 16 + let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(ok.0)); 17 + res.headers_mut() 18 + .insert("access-control-allow-origin", "*".parse().unwrap()); 19 + Ok(res) 20 + } 21 + } 22 + 23 + // TODO: cors for HttpError