···11use crate::index_html::INDEX_HTML;
22use crate::storage::StoreReader;
33-use crate::{ConsumerInfo, Count, Nsid, QueryPeriod, TopCollections, UFOsRecord};
33+use crate::{ConsumerInfo, Nsid, NsidCount, QueryPeriod, TopCollections, UFOsRecord};
44+use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
45use dropshot::endpoint;
56use dropshot::ApiDescription;
67use dropshot::Body;
···213214 ok_cors(seen_by_collection)
214215}
215216216216-/// Get all collections
217217-///
218218-/// TODO: paginate
219219-///
220220-/// WARNING: this endpoint will return an object instead of array when pagination is added
217217+#[derive(Debug, Serialize, JsonSchema)]
218218+struct CollectionsResponse {
219219+ /// Each known collection and its associated statistics
220220+ ///
221221+ /// The order is unspecified.
222222+ collections: Vec<NsidCount>,
223223+ /// Include in a follow-up request to get the next page of results, if more are available
224224+ cursor: Option<String>,
225225+}
226226+#[derive(Debug, Deserialize, JsonSchema)]
227227+struct AllCollectionsQuery {
228228+ /// The maximum number of collections to return in one request.
229229+ #[schemars(range(min = 1, max = 200), default = "all_collections_default_limit")]
230230+ limit: usize,
231231+ /// Always omit the cursor for the first request. If more collections than the limit are available, the response will contain a non-null `cursor` to include with the next request.
232232+ cursor: Option<String>,
233233+}
234234+fn all_collections_default_limit() -> usize {
235235+ 100
236236+}
221237#[endpoint {
222238 method = GET,
223239 path = "/collections/all"
224240}]
225225-async fn get_all_collections(ctx: RequestContext<Context>) -> OkCorsResponse<Vec<Count>> {
241241+/// Get all collections
242242+///
243243+/// There have been a lot of collections seen in the ATmosphere, well over 400 at time of writing, so you *will* need to make a series of paginaged requests using the `cursor` response property and request parameter to get them all.
244244+///
245245+/// The set of collections across multiple requests is not guaranteed to be a perfectly consistent snapshot:
246246+///
247247+/// - all collection NSIDs observed before the first request will be included in the results
248248+///
249249+/// - *new* NSIDs observed in the firehose *while paging* might be included or excluded from the final set
250250+///
251251+/// - no duplicate NSIDs will occur in the combined results
252252+///
253253+/// In practice this is close enough for most use-cases to not worry about.
254254+async fn get_all_collections(
255255+ ctx: RequestContext<Context>,
256256+ query: Query<AllCollectionsQuery>,
257257+) -> OkCorsResponse<CollectionsResponse> {
226258 let Context { storage, .. } = ctx.context();
227227- let collections = storage
228228- .get_all_collections(QueryPeriod::all_time())
259259+ let q = query.into_inner();
260260+261261+ if !(1..=200).contains(&q.limit) {
262262+ let msg = format!("limit not in 1..=200: {}", q.limit);
263263+ return Err(HttpError::for_bad_request(None, msg));
264264+ }
265265+266266+ let cursor = q
267267+ .cursor
268268+ .and_then(|c| if c.is_empty() { None } else { Some(c) })
269269+ .map(|c| URL_SAFE_NO_PAD.decode(&c))
270270+ .transpose()
271271+ .map_err(|e| HttpError::for_bad_request(None, format!("invalid cursor: {e:?}")))?;
272272+273273+ let (collections, next_cursor) = storage
274274+ .get_all_collections(QueryPeriod::all_time(), q.limit, cursor)
229275 .await
230276 .map_err(|e| HttpError::for_internal_error(format!("oh shoot: {e:?}")))?;
231277232232- ok_cors(collections)
278278+ let next_cursor = next_cursor.map(|c| URL_SAFE_NO_PAD.encode(c));
279279+280280+ ok_cors(CollectionsResponse {
281281+ collections,
282282+ cursor: next_cursor,
283283+ })
233284}
234285235286/// Get top collections by record count
···237288 method = GET,
238289 path = "/collections/by-count"
239290}]
240240-async fn get_top_collections_by_count(ctx: RequestContext<Context>) -> OkCorsResponse<Vec<Count>> {
291291+async fn get_top_collections_by_count(
292292+ ctx: RequestContext<Context>,
293293+) -> OkCorsResponse<Vec<NsidCount>> {
241294 let Context { storage, .. } = ctx.context();
242295 let collections = storage
243296 .get_top_collections_by_count(100, QueryPeriod::all_time())
···252305 method = GET,
253306 path = "/collections/by-dids"
254307}]
255255-async fn get_top_collections_by_dids(ctx: RequestContext<Context>) -> OkCorsResponse<Vec<Count>> {
308308+async fn get_top_collections_by_dids(
309309+ ctx: RequestContext<Context>,
310310+) -> OkCorsResponse<Vec<NsidCount>> {
256311 let Context { storage, .. } = ctx.context();
257312 let collections = storage
258313 .get_top_collections_by_dids(100, QueryPeriod::all_time())