Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

deserialize multiple collections from query

blehhhhhh

phil 7da961f4 f3579c5a

+121 -120
-1
ufos/src/lib.rs
··· 3 3 pub mod error; 4 4 pub mod file_consumer; 5 5 pub mod index_html; 6 - pub mod qs_query; 7 6 pub mod server; 8 7 pub mod storage; 9 8 pub mod storage_fjall;
-73
ufos/src/qs_query.rs
··· 1 - use async_trait::async_trait; 2 - use dropshot::{ 3 - ApiEndpointBodyContentType, ExclusiveExtractor, ExtractorMetadata, HttpError, RequestContext, 4 - RequestInfo, ServerContext, SharedExtractor, 5 - }; 6 - /// copied from https://github.com/oxidecomputer/dropshot/blob/695e1d8872c988c43066eb0848c87c127eeda361/dropshot/src/extractor/query.rs 7 - /// Apache 2.0: https://github.com/oxidecomputer/dropshot/blob/695e1d8872c988c43066eb0848c87c127eeda361/LICENSE 8 - use schemars::JsonSchema; 9 - use serde::de::DeserializeOwned; 10 - 11 - /// `VecsAllowedQuery<QueryType>` is an extractor used to deserialize an 12 - /// instance of `QueryType` from an HTTP request's query string. `QueryType` 13 - /// is any structure of yours that implements [serde::Deserialize] and 14 - /// [schemars::JsonSchema]. See the crate documentation for more information. 15 - #[derive(Debug)] 16 - pub struct VecsAllowedQuery<QueryType: DeserializeOwned + JsonSchema + Send + Sync> { 17 - inner: QueryType, 18 - } 19 - impl<QueryType: DeserializeOwned + JsonSchema + Send + Sync> VecsAllowedQuery<QueryType> { 20 - // TODO drop this in favor of Deref? + Display and Debug for convenience? 21 - pub fn into_inner(self) -> QueryType { 22 - self.inner 23 - } 24 - } 25 - 26 - /// Given an HTTP request, pull out the query string and attempt to deserialize 27 - /// it as an instance of `QueryType`. 28 - fn http_request_load_query<QueryType>( 29 - request: &RequestInfo, 30 - ) -> Result<VecsAllowedQuery<QueryType>, HttpError> 31 - where 32 - QueryType: DeserializeOwned + JsonSchema + Send + Sync, 33 - { 34 - let raw_query_string = request.uri().query().unwrap_or(""); 35 - // TODO-correctness: are query strings defined to be urlencoded in this way? 36 - match serde_qs::from_str(raw_query_string) { 37 - Ok(q) => Ok(VecsAllowedQuery { inner: q }), 38 - Err(e) => Err(HttpError::for_bad_request( 39 - None, 40 - format!("unable to parse query string: {}", e), 41 - )), 42 - } 43 - } 44 - 45 - // The `SharedExtractor` implementation for Query<QueryType> describes how to 46 - // construct an instance of `Query<QueryType>` from an HTTP request: namely, by 47 - // parsing the query string to an instance of `QueryType`. 48 - // TODO-cleanup We shouldn't have to use the "'static" bound on `QueryType` 49 - // here. It seems like we ought to be able to use 'async_trait, but that 50 - // doesn't seem to be defined. 51 - #[async_trait] 52 - impl<QueryType> SharedExtractor for VecsAllowedQuery<QueryType> 53 - where 54 - QueryType: JsonSchema + DeserializeOwned + Send + Sync + 'static, 55 - { 56 - async fn from_request<Context: ServerContext>( 57 - rqctx: &RequestContext<Context>, 58 - ) -> Result<VecsAllowedQuery<QueryType>, HttpError> { 59 - http_request_load_query(&rqctx.request) 60 - } 61 - 62 - fn metadata(body_content_type: ApiEndpointBodyContentType) -> ExtractorMetadata { 63 - // HACK: would love to use Query here but it "helpfully" panics when it sees a Vec. 64 - // we can't really get at enough of Query's logic to use it directly, sadly, so the 65 - // resulting openapi docs suck (query params are listed as body payload, example 66 - // codes make no sense, etc.) 67 - // 68 - // trying to hack the resulting ExtractorMetadata to look like Query's is a pain: 69 - // things almost work out but then something in dropshot won't be `pub` and it falls 70 - // apart. maybe it's possible, i didn't get it in the time i had. 71 - dropshot::TypedBody::<QueryType>::metadata(body_content_type) 72 - } 73 - }
+26 -46
ufos/src/server.rs ufos/src/server/mod.rs
··· 1 + mod collections_query; 2 + mod cors; 3 + 1 4 use crate::index_html::INDEX_HTML; 2 - use crate::qs_query::VecsAllowedQuery; 3 5 use crate::storage::StoreReader; 4 6 use crate::store_types::{HourTruncatedCursor, WeekTruncatedCursor}; 5 7 use crate::{ConsumerInfo, Cursor, JustCount, Nsid, NsidCount, OrderCollectionsBy, UFOsRecord}; 6 8 use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; 7 9 use chrono::{DateTime, Utc}; 10 + use collections_query::MultiCollectionQuery; 11 + use cors::{OkCors, OkCorsResponse}; 8 12 use dropshot::endpoint; 9 13 use dropshot::ApiDescription; 10 14 use dropshot::Body; ··· 12 16 use dropshot::ConfigLogging; 13 17 use dropshot::ConfigLoggingLevel; 14 18 use dropshot::HttpError; 15 - use dropshot::HttpResponseHeaders; 16 - use dropshot::HttpResponseOk; 17 19 use dropshot::Query; 18 20 use dropshot::RequestContext; 19 21 use dropshot::ServerBuilder; ··· 76 78 }] 77 79 async fn get_openapi(ctx: RequestContext<Context>) -> OkCorsResponse<serde_json::Value> { 78 80 let spec = (*ctx.context().spec).clone(); 79 - ok_cors(spec) 81 + OkCors(spec).into() 80 82 } 81 83 82 84 #[derive(Debug, Serialize, JsonSchema)] ··· 105 107 .await 106 108 .map_err(failed_to_get("consumer info"))?; 107 109 108 - ok_cors(MetaInfo { 110 + OkCors(MetaInfo { 109 111 storage_name: storage.name(), 110 112 storage: storage_info, 111 113 consumer, 112 114 }) 115 + .into() 113 116 } 114 117 115 118 // TODO: replace with normal (🙃) multi-qs value somehow ··· 194 197 .map(|r| r.into()) 195 198 .collect(); 196 199 197 - ok_cors(records) 200 + OkCors(records).into() 198 201 } 199 202 200 203 #[derive(Debug, Deserialize, JsonSchema)] 201 - struct TotalSeenCollectionsQuery { 202 - collection: Vec<String>, // JsonSchema not implemented for Nsid :( 204 + struct CollectionsStatsQuery { 203 205 /// Limit stats to those seen after this UTC datetime 204 206 /// 205 207 /// default: 1 week ago ··· 216 218 } 217 219 /// Collection stats 218 220 /// 219 - /// Get stats for a collection over a specific time period 220 - /// 221 - /// API docs note: the **Body** fields here are actually query parameters!! 222 - /// 223 - /// Due to limitations with dropshot's query parsing (no support for sequences), 224 - /// this is kind of the best i could do for now. sadly. 221 + /// Get record statistics for collections during a specific time period 225 222 #[endpoint { 226 223 method = GET, 227 224 path = "/collections/stats" 228 225 }] 229 - async fn get_records_total_seen( 226 + async fn get_collection_stats( 230 227 ctx: RequestContext<Context>, 231 - query: VecsAllowedQuery<TotalSeenCollectionsQuery>, 228 + collections_query: MultiCollectionQuery, 229 + query: Query<CollectionsStatsQuery>, 232 230 ) -> OkCorsResponse<HashMap<String, TotalCounts>> { 233 231 let Context { storage, .. } = ctx.context(); 234 232 let q = query.into_inner(); 235 - 236 - log::warn!("collection: {:?}", q.collection); 237 - 238 - let mut collections = Vec::with_capacity(q.collection.len()); 239 - for c in q.collection { 240 - let Ok(nsid) = Nsid::new(c.clone()) else { 241 - return Err(HttpError::for_bad_request( 242 - None, 243 - format!("could not parse collection to nsid: {c}"), 244 - )); 245 - }; 246 - collections.push(nsid); 247 - } 233 + let collections: HashSet<Nsid> = collections_query.try_into()?; 248 234 249 - let since = q.since.map(dt_to_cursor).transpose()?; 250 - let until = q.until.map(dt_to_cursor).transpose()?; 235 + let _since = q.since.map(dt_to_cursor).transpose()?; 236 + let _until = q.until.map(dt_to_cursor).transpose()?; 251 237 252 238 let mut seen_by_collection = HashMap::with_capacity(collections.len()); 253 239 ··· 266 252 ); 267 253 } 268 254 269 - ok_cors(seen_by_collection) 255 + OkCors(seen_by_collection).into() 270 256 } 271 257 272 258 #[derive(Debug, Serialize, JsonSchema)] ··· 315 301 order: Option<CollectionsQueryOrder>, 316 302 } 317 303 318 - /// List collections (with stats) 304 + /// List collections 305 + /// 306 + /// With statistics. 319 307 /// 320 308 /// ## To fetch a full list: 321 309 /// ··· 385 373 386 374 let next_cursor = next_cursor.map(|c| URL_SAFE_NO_PAD.encode(c)); 387 375 388 - ok_cors(CollectionsResponse { 376 + OkCors(CollectionsResponse { 389 377 collections, 390 378 cursor: next_cursor, 391 379 }) 380 + .into() 392 381 } 393 382 394 383 #[derive(Debug, Deserialize, JsonSchema)] ··· 439 428 let step = if let Some(secs) = q.step { 440 429 if secs < 3600 { 441 430 let msg = format!("step is too small: {}", secs); 442 - return Err(HttpError::for_bad_request(None, msg)); 431 + Err(HttpError::for_bad_request(None, msg))?; 443 432 } 444 433 (secs / 3600) * 3600 // trucate to hour 445 434 } else { ··· 465 454 .map(|(k, v)| (k.to_string(), v.iter().map(Into::into).collect())) 466 455 .collect(); 467 456 468 - ok_cors(CollectionTimeseriesResponse { range, series }) 457 + OkCors(CollectionTimeseriesResponse { range, series }).into() 469 458 } 470 459 471 460 pub async fn serve(storage: impl StoreReader + 'static) -> Result<(), String> { ··· 481 470 api.register(get_openapi).unwrap(); 482 471 api.register(get_meta_info).unwrap(); 483 472 api.register(get_records_by_collections).unwrap(); 484 - api.register(get_records_total_seen).unwrap(); 473 + api.register(get_collection_stats).unwrap(); 485 474 api.register(get_collections).unwrap(); 486 475 api.register(get_timeseries).unwrap(); 487 476 ··· 514 503 .map_err(|error| format!("failed to start server: {}", error))? 515 504 .await 516 505 } 517 - 518 - /// awkward helpers 519 - type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>; 520 - fn ok_cors<T: Send + Sync + Serialize + JsonSchema>(t: T) -> OkCorsResponse<T> { 521 - let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(t)); 522 - res.headers_mut() 523 - .insert("access-control-allow-origin", "*".parse().unwrap()); 524 - Ok(res) 525 - }
+72
ufos/src/server/collections_query.rs
··· 1 + use crate::Nsid; 2 + use async_trait::async_trait; 3 + use dropshot::{ 4 + ApiEndpointBodyContentType, ExtractorMetadata, HttpError, Query, RequestContext, ServerContext, 5 + SharedExtractor, 6 + }; 7 + use schemars::JsonSchema; 8 + use serde::Deserialize; 9 + use std::collections::HashSet; 10 + 11 + /// The real type that gets deserialized 12 + #[derive(Debug, Deserialize, JsonSchema)] 13 + pub struct MultiCollectionQuery { 14 + pub collection: Vec<String>, 15 + } 16 + 17 + /// The fake corresponding type for docs that dropshot won't freak out about a 18 + /// vec for 19 + #[derive(Deserialize, JsonSchema)] 20 + #[allow(dead_code)] 21 + struct MultiCollectionQueryForDocs { 22 + /// One or more collection [NSID](https://atproto.com/specs/nsid)s 23 + /// 24 + /// Pass this parameter multiple times to specify multiple collections, like 25 + /// `collection=app.bsky.feed.like&collection=app.bsky.feed.post` 26 + collection: String, 27 + } 28 + 29 + impl TryFrom<MultiCollectionQuery> for HashSet<Nsid> { 30 + type Error = HttpError; 31 + fn try_from(mcq: MultiCollectionQuery) -> Result<Self, Self::Error> { 32 + let mut out = HashSet::with_capacity(mcq.collection.len()); 33 + for c in mcq.collection { 34 + let nsid = Nsid::new(c).map_err(|e| { 35 + HttpError::for_bad_request( 36 + None, 37 + format!("failed to convert collection to an NSID: {e:?}"), 38 + ) 39 + })?; 40 + out.insert(nsid); 41 + } 42 + Ok(out) 43 + } 44 + } 45 + 46 + // The `SharedExtractor` implementation for Query<QueryType> describes how to 47 + // construct an instance of `Query<QueryType>` from an HTTP request: namely, by 48 + // parsing the query string to an instance of `QueryType`. 49 + #[async_trait] 50 + impl SharedExtractor for MultiCollectionQuery { 51 + async fn from_request<Context: ServerContext>( 52 + ctx: &RequestContext<Context>, 53 + ) -> Result<MultiCollectionQuery, HttpError> { 54 + let raw_query = ctx.request.uri().query().unwrap_or(""); 55 + let q = serde_qs::from_str(raw_query).map_err(|e| { 56 + HttpError::for_bad_request(None, format!("unable to parse query string: {}", e)) 57 + })?; 58 + Ok(q) 59 + } 60 + 61 + fn metadata(body_content_type: ApiEndpointBodyContentType) -> ExtractorMetadata { 62 + // HACK: query type switcheroo: passing MultiCollectionQuery to 63 + // `metadata` would "helpfully" panic because dropshot believes we can 64 + // only have scalar types in a query. 65 + // 66 + // so instead we have a fake second type whose only job is to look the 67 + // same as MultiCollectionQuery exept that it has `String` instead of 68 + // `Vec<String>`, which dropshot will accept, and generate ~close-enough 69 + // docs for. 70 + <Query<MultiCollectionQueryForDocs> as SharedExtractor>::metadata(body_content_type) 71 + } 72 + }
+23
ufos/src/server/cors.rs
··· 1 + use dropshot::{HttpError, HttpResponseHeaders, HttpResponseOk}; 2 + use schemars::JsonSchema; 3 + use serde::Serialize; 4 + 5 + pub type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>; 6 + 7 + /// Helper for constructing Ok responses: return OkCors(T).into() 8 + /// (not happy with this yet) 9 + pub struct OkCors<T: Serialize + JsonSchema + Send + Sync>(pub T); 10 + 11 + impl<T> From<OkCors<T>> for OkCorsResponse<T> 12 + where 13 + T: Serialize + JsonSchema + Send + Sync, 14 + { 15 + fn from(ok: OkCors<T>) -> OkCorsResponse<T> { 16 + let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(ok.0)); 17 + res.headers_mut() 18 + .insert("access-control-allow-origin", "*".parse().unwrap()); 19 + Ok(res) 20 + } 21 + } 22 + 23 + // TODO: cors for HttpError