very fast at protocol indexer with flexible filtering, xrpc queries, cursor-backed event stream, and more, built on fjall
rust fjall at-protocol atproto indexer
60
fork

Configure Feed

Select the types of activity you want to include in your feed.

[lib,api] implement getting mini doc, blue.microcosm.identity.resolveMiniDoc and custom describeRepo

dawn 6624ccf7 b71d5223

+278 -19
+22 -1
README.md
··· 4 4 -> [vs tap](#vs-tap) | [stream](#stream-behavior) | [multi-relay](#multiple-relay-support) | [crawler sources](#crawler-sources)</br> 5 5 -> [configuration](#configuration)</br> 6 6 -> [rest api](#rest-api) | [filter](#filter-management) | [ingestion](#ingestion-control) | [crawler](#crawler-management) | [firehose](#firehose-management) | [repos](#repository-management)</br> 7 - -> [xrpc api](#data-access-xrpc) | [backlinks](#bluemicrocosmlinks) | [atproto](#comatproto) | [custom](#systemsgazehydrant) 7 + -> [xrpc api](#data-access-xrpc) | [backlinks](#bluemicrocosmlinks) | [identity](#bluemicrocosmidentity) | [atproto](#comatproto) | [custom](#systemsgazehydrant) 8 8 9 9 # hydrant 10 10 ··· 328 328 329 329 returns `{ count }`. 330 330 331 + #### systems.gaze.hydrant.describeRepo 332 + 333 + return account and identity information about this repo. 334 + this is equal to `com.atproto.repo.describeRepo`, except we don't return the full DID document. 335 + the handle is bi-directionally verified, if its invalid or the handle does not exist we return 336 + "handle.invalid". 337 + 338 + | param | required | description | 339 + | :--- | :--- | :--- | 340 + | `identifier` | yes | DID or handle of the repository. | 341 + 342 + returns `{ did, handle, pds, collections }`. 343 + 331 344 ### blue.microcosm.links.* 332 345 333 346 <small>[<- back to toc](#table-of-contents)</small> ··· 365 378 | `source` | no | filter by source collection (same format as `getBacklinks`). | 366 379 367 380 returns `{ count }`. 381 + 382 + ### blue.microcosm.identity.* 383 + 384 + <small>[<- back to toc](#table-of-contents)</small> 385 + 386 + #### blue.microcosm.identity.resolveMiniDoc 387 + 388 + see [here](https://slingshot.microcosm.blue/#tag/slingshot-specific-queries/GET/xrpc/blue.microcosm.identity.resolveMiniDoc) for this XRPC's documentation.
+85
src/api/xrpc/describe_repo.rs
··· 1 + use std::collections::HashSet; 2 + 3 + use futures::TryFutureExt; 4 + use jacquard_common::types::{did::Did, nsid::Nsid, string::Handle}; 5 + use smol_str::SmolStr; 6 + 7 + use crate::control::repos::MiniDocError; 8 + use crate::db::types::DidKey; 9 + 10 + use super::*; 11 + 12 + #[derive(Serialize, Deserialize, jacquard_derive::IntoStatic)] 13 + pub struct DescribeRepoOutput<'d> { 14 + #[serde(borrow)] 15 + pub did: Did<'d>, 16 + #[serde(borrow)] 17 + pub handle: Handle<'d>, 18 + #[serde(serialize_with = "crate::util::did_key_serialize_str")] 19 + #[serde(borrow)] 20 + pub signing_key: DidKey<'d>, 21 + pub pds: SmolStr, 22 + #[serde(borrow)] 23 + pub collections: HashSet<Nsid<'d>>, 24 + } 25 + 26 + pub struct DescribeRepoResponse; 27 + impl jacquard_common::xrpc::XrpcResp for DescribeRepoResponse { 28 + const NSID: &'static str = "systems.gaze.hydrant.describeRepo"; 29 + const ENCODING: &'static str = "application/json"; 30 + type Output<'de> = DescribeRepoOutput<'de>; 31 + type Err<'de> = GenericXrpcError; 32 + } 33 + 34 + #[derive(Serialize, Deserialize, jacquard_derive::IntoStatic)] 35 + pub struct DescribeRepoRequestData<'i> { 36 + #[serde(borrow)] 37 + pub identifier: AtIdentifier<'i>, 38 + } 39 + 40 + impl<'a> jacquard_common::xrpc::XrpcRequest for DescribeRepoRequestData<'a> { 41 + type Response = DescribeRepoResponse; 42 + const NSID: &'static str = Self::Response::NSID; 43 + const METHOD: jacquard_common::xrpc::XrpcMethod = jacquard_common::xrpc::XrpcMethod::Query; 44 + } 45 + 46 + pub struct DescribeRepo; 47 + impl jacquard_common::xrpc::XrpcEndpoint for DescribeRepo { 48 + const PATH: &'static str = "/xrpc/systems.gaze.hydrant.describeRepo"; 49 + const METHOD: jacquard_common::xrpc::XrpcMethod = jacquard_common::xrpc::XrpcMethod::Query; 50 + type Request<'de> = DescribeRepoRequestData<'de>; 51 + type Response = DescribeRepoResponse; 52 + } 53 + 54 + pub async fn handle( 55 + State(hydrant): State<Hydrant>, 56 + ExtractXrpc(req): ExtractXrpc<DescribeRepo>, 57 + ) -> XrpcResult<Json<DescribeRepoOutput<'static>>> { 58 + let nsid = DescribeRepoResponse::NSID; 59 + let did = hydrant 60 + .state 61 + .resolver 62 + .resolve_did(&req.identifier) 63 + .await 64 + .map_err(|e| internal_error(nsid, format!("can't resolve identifier: {e}")))?; 65 + 66 + let repo = hydrant.repos.get(&did); 67 + let doc = repo.mini_doc().map_err(|e| match e { 68 + MiniDocError::NotSynced => bad_request(nsid, "repo not synced"), 69 + MiniDocError::RepoNotFound => bad_request(nsid, "repo not found"), 70 + MiniDocError::CouldNotResolveIdentity => { 71 + upstream_error(nsid, "identity could not be resolved") 72 + } 73 + MiniDocError::Other(e) => internal_error(nsid, e), 74 + }); 75 + let collections = repo.collections().map_err(|e| internal_error(nsid, e)); 76 + let (doc, collections) = tokio::try_join!(doc, collections)?; 77 + 78 + Ok(Json(DescribeRepoOutput { 79 + did: doc.did, 80 + handle: doc.handle, 81 + pds: doc.pds.to_smolstr(), 82 + signing_key: doc.signing_key, 83 + collections: collections.into_iter().map(|(k, _)| k).collect(), 84 + })) 85 + }
+20
src/api/xrpc/mod.rs
··· 1 1 use crate::api::xrpc::count_records::CountRecords; 2 + use crate::api::xrpc::describe_repo::DescribeRepo; 2 3 use crate::control::Hydrant; 3 4 use axum::extract::FromRequest; 4 5 use axum::response::IntoResponse; ··· 9 10 list_records::{ListRecordsOutput, ListRecordsRequest, Record as RepoRecord}, 10 11 }; 11 12 use jacquard_common::types::ident::AtIdentifier; 13 + use jacquard_common::xrpc::XrpcResp; 12 14 use jacquard_common::xrpc::{XrpcEndpoint, XrpcMethod}; 13 15 use jacquard_common::{IntoStatic, xrpc::XrpcRequest}; 14 16 use jacquard_common::{ ··· 20 22 use std::fmt::Display; 21 23 22 24 mod count_records; 25 + mod describe_repo; 23 26 mod get_record; 24 27 mod list_records; 25 28 ··· 28 31 .route(GetRecordRequest::PATH, get(get_record::handle)) 29 32 .route(ListRecordsRequest::PATH, get(list_records::handle)) 30 33 .route(CountRecords::PATH, get(count_records::handle)) 34 + .route(DescribeRepo::PATH, get(describe_repo::handle)) 31 35 } 32 36 33 37 #[derive(Debug)] ··· 110 114 }), 111 115 } 112 116 } 117 + 118 + fn upstream_error<E: std::error::Error + IntoStatic>( 119 + nsid: &'static str, 120 + message: impl Display, 121 + ) -> XrpcErrorResponse<E> { 122 + XrpcErrorResponse { 123 + status: StatusCode::BAD_GATEWAY, 124 + error: XrpcError::Generic(GenericXrpcError { 125 + error: "UpstreamError".into(), 126 + message: Some(message.to_smolstr()), 127 + nsid, 128 + method: "GET", 129 + http_status: StatusCode::BAD_GATEWAY, 130 + }), 131 + } 132 + }
+5 -5
src/control/mod.rs
··· 1 - mod crawler; 2 - mod filter; 3 - mod firehose; 4 - mod repos; 5 - mod stream; 1 + pub(crate) mod crawler; 2 + pub(crate) mod filter; 3 + pub(crate) mod firehose; 4 + pub(crate) mod repos; 5 + pub(crate) mod stream; 6 6 7 7 pub use crawler::{CrawlerHandle, CrawlerSourceInfo}; 8 8 pub use filter::{FilterControl, FilterPatch, FilterSnapshot};
+123 -8
src/control/repos.rs
··· 1 + use std::collections::HashMap; 1 2 use std::sync::Arc; 2 3 3 4 use chrono::{DateTime, Utc}; ··· 5 6 use jacquard_common::cowstr::ToCowStr; 6 7 use jacquard_common::types::cid::{Cid, IpldCid}; 7 8 use jacquard_common::types::ident::AtIdentifier; 9 + use jacquard_common::types::nsid::Nsid; 8 10 use jacquard_common::types::string::{Did, Handle, Rkey}; 9 11 use jacquard_common::types::tid::Tid; 10 12 use jacquard_common::{CowStr, Data, IntoStatic}; ··· 13 15 use smol_str::ToSmolStr; 14 16 use url::Url; 15 17 16 - use crate::db::types::{DbRkey, TrimmedDid}; 18 + use crate::db::types::{DbRkey, DidKey, TrimmedDid}; 17 19 use crate::db::{self, Db, keys, ser_repo_state}; 18 20 use crate::state::AppState; 19 21 use crate::types::{GaugeState, RepoState, RepoStatus}; ··· 37 39 #[serde(skip_serializing_if = "Option::is_none")] 38 40 pub data: Option<IpldCid>, 39 41 /// the handle for the DID of this repository. 42 + /// 43 + /// note that this handle is not bi-directionally verified. 40 44 #[serde(skip_serializing_if = "Option::is_none")] 41 45 pub handle: Option<Handle<'static>>, 42 46 /// the URL for the PDS in which this repository is hosted on. 43 47 #[serde(skip_serializing_if = "Option::is_none")] 44 48 pub pds: Option<Url>, 45 49 /// ATProto signing key of this repository. 50 + #[serde(serialize_with = "crate::util::opt_did_key_serialize_str")] 46 51 #[serde(skip_serializing_if = "Option::is_none")] 47 - pub signing_key: Option<String>, 52 + pub signing_key: Option<DidKey<'static>>, 48 53 /// when this repository was last touched (status update, commit ingested, etc.). 49 54 #[serde(skip_serializing_if = "Option::is_none")] 50 55 pub last_updated_at: Option<DateTime<Utc>>, ··· 154 159 } 155 160 156 161 /// gets a handle for a repository to read from it. 157 - pub fn get<'i>(&self, did: &Did<'i>) -> Result<RepoHandle<'i>> { 158 - Ok(RepoHandle { 162 + pub fn get<'i>(&self, did: &Did<'i>) -> RepoHandle<'i> { 163 + RepoHandle { 159 164 state: self.0.clone(), 160 165 did: did.clone(), 161 - }) 166 + } 162 167 } 163 168 164 169 /// same as [`ReposControl::get`] but allows you to pass in an identifier that can be ··· 171 176 }) 172 177 } 173 178 174 - /// fetch the current state of repository. 179 + /// fetch the current state of a repository. 175 180 /// returns `None` if hydrant has never seen this repository. 176 181 pub async fn info(&self, did: &Did<'_>) -> Result<Option<RepoInfo>> { 177 - self.get(did)?.info().await 182 + self.get(did).info().await 178 183 } 179 184 180 185 fn _resync( ··· 384 389 data: s.data, 385 390 handle: s.handle.map(|h| h.into_static()), 386 391 pds: s.pds.and_then(|p| p.parse().ok()), 387 - signing_key: s.signing_key.map(|k| k.encode()), 392 + signing_key: s.signing_key.map(|k| k.into_static()), 388 393 last_updated_at: DateTime::from_timestamp_secs(s.last_updated_at), 389 394 last_message_at: s.last_message_time.and_then(DateTime::from_timestamp_secs), 390 395 } ··· 407 412 pub cursor: Option<Rkey<'static>>, 408 413 } 409 414 415 + #[derive(Debug, thiserror::Error)] 416 + pub enum MiniDocError { 417 + #[error("repo is not synced yet")] 418 + NotSynced, 419 + #[error("repo not found")] 420 + RepoNotFound, 421 + #[error("could not resolve identity")] 422 + CouldNotResolveIdentity, 423 + #[error("{0}")] 424 + Other(miette::Error), 425 + } 426 + 427 + /// a mini doc with a bi-directionally verified handle. 428 + pub struct MiniDoc<'i> { 429 + /// the did. 430 + pub did: Did<'i>, 431 + /// the handle. if verification fails or no handle is found, 432 + /// this will be "handle.invalid". 433 + pub handle: Handle<'i>, 434 + /// the url of the PDS of this repo. 435 + pub pds: Url, 436 + /// the atproto signing key of this repo. 437 + pub signing_key: DidKey<'i>, 438 + } 439 + 410 440 /// handle to access data related to this repository. 411 441 #[derive(Clone)] 412 442 pub struct RepoHandle<'i> { ··· 415 445 } 416 446 417 447 impl<'i> RepoHandle<'i> { 448 + /// fetch the current state of this repository. 449 + /// returns `None` if hydrant has never seen this repository. 418 450 pub async fn info(&self) -> Result<Option<RepoInfo>> { 419 451 let did_key = keys::repo_key(&self.did); 420 452 let state = self.state.clone(); ··· 429 461 .into_diagnostic()? 430 462 } 431 463 464 + /// returns the collections of this repository and the number of records it has in each. 465 + pub async fn collections(&self) -> Result<HashMap<Nsid<'static>, u64>> { 466 + let did = self.did.clone().into_static(); 467 + let state = self.state.clone(); 468 + 469 + tokio::task::spawn_blocking(move || { 470 + let prefix = keys::did_collection_prefix(&did); 471 + let mut res = HashMap::new(); 472 + for item in state.db.counts.prefix(&prefix) { 473 + let (k, v) = item.into_inner().into_diagnostic()?; 474 + let col = k 475 + .strip_prefix(prefix.as_slice()) 476 + .ok_or_else(|| miette::miette!("invalid collection count key: {k:?}")) 477 + .and_then(|r| std::str::from_utf8(r).into_diagnostic()) 478 + .and_then(|n| Nsid::new(n).into_diagnostic())? 479 + .into_static(); 480 + let count = u64::from_be_bytes( 481 + v.as_ref() 482 + .try_into() 483 + .into_diagnostic() 484 + .wrap_err("expected to be count (8 bytes)")?, 485 + ); 486 + res.insert(col, count); 487 + } 488 + Ok(res) 489 + }) 490 + .await 491 + .into_diagnostic()? 492 + } 493 + 494 + /// returns a bi-directionally validated mini doc. 495 + pub async fn mini_doc(&self) -> Result<MiniDoc<'static>, MiniDocError> { 496 + fn invalid_handle() -> Handle<'static> { 497 + unsafe { Handle::unchecked("handle.invalid") } 498 + } 499 + 500 + let Some(info) = self.info().await.map_err(MiniDocError::Other)? else { 501 + return Err(MiniDocError::RepoNotFound); 502 + }; 503 + 504 + if info.status == RepoStatus::Backfilling { 505 + return Err(MiniDocError::NotSynced); 506 + } 507 + 508 + let pds = info 509 + .pds 510 + .ok_or_else(|| MiniDocError::CouldNotResolveIdentity)?; 511 + let signing_key = info 512 + .signing_key 513 + .ok_or_else(|| MiniDocError::CouldNotResolveIdentity)? 514 + .into_static(); 515 + 516 + let handle = if let Some(handle_unverified) = info.handle { 517 + let id = AtIdentifier::Handle(handle_unverified); 518 + let handle_did = self 519 + .state 520 + .resolver 521 + .resolve_did(&id) 522 + .await 523 + .into_diagnostic() 524 + .map_err(MiniDocError::Other)?; 525 + 526 + (handle_did == self.did) 527 + .then(|| match id { 528 + AtIdentifier::Handle(h) => h, 529 + _ => unreachable!("can only be handle"), 530 + }) 531 + .unwrap_or_else(invalid_handle) 532 + } else { 533 + invalid_handle() 534 + }; 535 + 536 + Ok(MiniDoc { 537 + did: self.did.clone().into_static(), 538 + handle, 539 + pds, 540 + signing_key, 541 + }) 542 + } 543 + 544 + /// gets a record from this repository. 432 545 pub async fn get_record(&self, collection: &str, rkey: &str) -> Result<Option<Record>> { 433 546 let did = self.did.clone().into_static(); 434 547 let db_key = keys::record_key(&did, collection, &DbRkey::new(rkey)); ··· 464 577 .into_diagnostic()? 465 578 } 466 579 580 + /// lists records from this repository. 467 581 pub async fn list_records( 468 582 &self, 469 583 collection: &str, ··· 559 673 }) 560 674 } 561 675 676 + /// gets how many records of a collection this repository has. 562 677 pub async fn count_records(&self, collection: &str) -> Result<u64> { 563 678 let did = self.did.clone().into_static(); 564 679 let state = self.state.clone();
+8 -4
src/db/keys.rs
··· 110 110 111 111 pub const COUNT_COLLECTION_PREFIX: &[u8] = &[b'r', SEP]; 112 112 113 - // key format: r|{DID}|{collection} (DID trimmed) 114 - pub fn count_collection_key(did: &Did, collection: &str) -> Vec<u8> { 113 + pub fn did_collection_prefix(did: &Did) -> Vec<u8> { 115 114 let repo = TrimmedDid::from(did); 116 - let mut key = 117 - Vec::with_capacity(COUNT_COLLECTION_PREFIX.len() + repo.len() + 1 + collection.len()); 115 + let mut key = Vec::with_capacity(COUNT_COLLECTION_PREFIX.len() + repo.len() + 1); 118 116 key.extend_from_slice(COUNT_COLLECTION_PREFIX); 119 117 repo.write_to_vec(&mut key); 120 118 key.push(SEP); 119 + key 120 + } 121 + 122 + // key format: r|{DID}|{collection} (DID trimmed) 123 + pub fn count_collection_key(did: &Did, collection: &str) -> Vec<u8> { 124 + let mut key = did_collection_prefix(did); 121 125 key.extend_from_slice(collection.as_bytes()); 122 126 key 123 127 }
+15 -1
src/util.rs
··· 8 8 use tracing::info; 9 9 use url::Url; 10 10 11 - use crate::types::RepoStatus; 11 + use crate::{db::types::DidKey, types::RepoStatus}; 12 12 13 13 /// outcome of [`RetryWithBackoff::retry`] when the operation does not succeed. 14 14 pub enum RetryOutcome<E> { ··· 149 149 pub fn opt_cid_serialize_str<S: Serializer>(v: &Option<cid::Cid>, s: S) -> Result<S::Ok, S::Error> { 150 150 match v { 151 151 Some(cid) => s.serialize_some(cid.to_string().as_str()), 152 + None => s.serialize_none(), 153 + } 154 + } 155 + 156 + pub fn did_key_serialize_str<S: Serializer>(v: &DidKey<'_>, s: S) -> Result<S::Ok, S::Error> { 157 + s.serialize_str(&v.encode()) 158 + } 159 + 160 + pub fn opt_did_key_serialize_str<S: Serializer>( 161 + v: &Option<DidKey<'_>>, 162 + s: S, 163 + ) -> Result<S::Ok, S::Error> { 164 + match v { 165 + Some(k) => s.serialize_some(k.encode().as_str()), 152 166 None => s.serialize_none(), 153 167 } 154 168 }