···11+use crate::{
22+ http::{BytesStreamCappedError, PORXIE_USER_AGENT, bytes_stream_capped},
33+ identity_service::IdentityService,
44+ mime::{is_mime_allowed, sniff_mime},
55+ types::blob_cid::BlobCid,
66+};
77+use bytes::Bytes;
88+use cid::Cid;
99+use jacquard_common::types::did::Did;
1010+use mime::Mime;
1111+use moka::{future::Cache as MokaCache, policy::EvictionPolicy};
1212+use multihash_codetable::{Code, MultihashDigest};
1313+use reqwest::{StatusCode, header, header::HeaderValue};
1414+use std::{num::NonZeroU64, sync::Arc, time::Duration};
1515+use thiserror::Error;
1616+use tracing::instrument;
1717+1818+#[derive(Debug, Error)]
1919+pub enum CreateBlobServiceError {
2020+ /// An internal http client error occurred, see [`reqwest::Error`].
2121+ #[error(transparent)]
2222+ HttpClient(#[from] reqwest::Error),
2323+}
2424+2525+#[derive(Debug, Error)]
2626+pub enum BlobDownloadError {
2727+ /// The blob resolver returned an error.
2828+ #[error("blob resolver returned an error")]
2929+ BlobResolutionFailure,
3030+ /// The blob's computed CID does not match the requested CID.
3131+ #[error("blob's computed CID does not match the requested CID")]
3232+ CidMismatch,
3333+ /// The requested CID uses an unsupported multihash algorithm.
3434+ #[error("requested CID uses an unsupported multihash algorithm")]
3535+ CidUnsupportedMultihash,
3636+ /// The blob could not be found at the requested address.
3737+ #[error("blob could not be found at the requested address")]
3838+ NotFound,
3939+ /// The blob exceeds the maximum size permitted by this server.
4040+ #[error("blob exceeded the maximum size")]
4141+ TooLarge,
4242+ /// The origin returned a non-successful status code while fetching the blob,
4343+ /// excluding 404 which is handled by [`Self::NotFound`].
4444+ #[error("origin returned an unsuccessful status code")]
4545+ ErrorStatusCode,
4646+ /// The request to the origin failed.
4747+ #[error("the request to the origin failed")]
4848+ FetchFailure,
4949+ /// The blob stream was interrupted before it could be fully downloaded,
5050+ /// for example due to the connection being unexpectedly reset.
5151+ #[error("the blob stream was interrupted before completion")]
5252+ StreamFailed,
5353+ /// The blob's detected MIME type is not permitted by this server.
5454+ #[error("blob's mimetype was not in the allowlist")]
5555+ ForbiddenMimeType,
5656+}
5757+5858+#[derive(Debug, Error)]
5959+pub enum BlobOwnershipError {
6060+ /// The blob resolver returned an error.
6161+ #[error("blob resolver returned an error")]
6262+ BlobResolutionFailure,
6363+ /// The blob could not be found in the user's repository.
6464+ #[error("blob could not be found at the requested address")]
6565+ NotFound,
6666+ /// The origin returned a non-successful status code while fetching the blob,
6767+ /// excluding 404 which is handled by [`Self::NotFound`].
6868+ #[error("origin returned an unsuccessful status code")]
6969+ ErrorStatusCode,
7070+ /// The request to the origin failed.
7171+ #[error("the request to the origin failed")]
7272+ FetchFailure,
7373+}
7474+7575+#[derive(Clone)]
7676+pub struct BlobData {
7777+ pub bytes: Bytes,
7878+ pub mime_type: Mime,
7979+}
8080+8181+pub enum BlobUrlResolver<'a> {
8282+ Pds {
8383+ identity_service: &'a IdentityService,
8484+ },
8585+}
8686+8787+#[derive(Debug, Clone, Copy)]
8888+pub struct BlobServiceOptions {
8989+ pub data_cache_max_capacity: u64,
9090+ pub data_cache_tti: Duration,
9191+ pub ownership_cache_max_capacity: u64,
9292+ pub ownership_cache_ttl: Duration,
9393+ pub http_timeout: Duration,
9494+ pub http_connect_timeout: Duration,
9595+}
9696+9797+pub struct BlobService {
9898+ data_cache: MokaCache<BlobCid, BlobData>,
9999+ ownership_cache: MokaCache<(BlobCid, Did<'static>), ()>,
100100+ http_client: reqwest::Client,
101101+}
102102+103103+impl BlobService {
104104+ pub fn new(options: BlobServiceOptions) -> Result<Self, CreateBlobServiceError> {
105105+ tracing::debug!("creating blob service with options: {options:?}");
106106+ Ok(Self {
107107+ data_cache: MokaCache::<BlobCid, BlobData>::builder()
108108+ .name("blob-content")
109109+ .weigher(|_key, value| value.bytes.len().try_into().unwrap_or(u32::MAX))
110110+ .eviction_policy(EvictionPolicy::tiny_lfu())
111111+ .max_capacity(options.data_cache_max_capacity)
112112+ .time_to_idle(options.data_cache_tti)
113113+ .build(),
114114+ ownership_cache: MokaCache::<(BlobCid, Did<'static>), ()>::builder()
115115+ .name("blob-ownership")
116116+ .weigher(|key, _value| {
117117+ (key.0.encoded_len() + key.1.len())
118118+ .try_into()
119119+ .unwrap_or(u32::MAX)
120120+ })
121121+ .eviction_policy(EvictionPolicy::tiny_lfu())
122122+ .max_capacity(options.ownership_cache_max_capacity)
123123+ .time_to_live(options.ownership_cache_ttl)
124124+ .support_invalidation_closures()
125125+ .build(),
126126+ http_client: reqwest::Client::builder()
127127+ .user_agent(PORXIE_USER_AGENT)
128128+ .https_only(cfg!(debug_assertions))
129129+ .redirect(reqwest::redirect::Policy::limited(3))
130130+ .gzip(true)
131131+ .brotli(true)
132132+ .zstd(true)
133133+ .deflate(true)
134134+ .connect_timeout(options.http_connect_timeout)
135135+ .timeout(options.http_timeout)
136136+ .build()
137137+ .map_err(CreateBlobServiceError::HttpClient)?,
138138+ })
139139+ }
140140+141141+ /// Fetch the given blob either from the cache if available or from the upstream source.
142142+ ///
143143+ /// Concurrent requests for the same blob are coalesced.
144144+ /// If the initial fetch fails, the next pending request will
145145+ /// try instead, continuing until one succeeds or all have failed.
146146+ #[instrument(skip_all, fields(did = %did, cid = %cid))]
147147+ pub async fn fetch_blob(
148148+ &self,
149149+ did: &Did<'static>,
150150+ cid: &BlobCid,
151151+ url_resolver: BlobUrlResolver<'_>,
152152+ max_blob_size: NonZeroU64,
153153+ allowed_mimetypes: &[Mime],
154154+ ) -> Result<BlobData, Arc<BlobDownloadError>> {
155155+ tracing::debug!("fetching blob from origin");
156156+157157+ self.data_cache
158158+ .try_get_with_by_ref(cid, async {
159159+ let blob_url = match url_resolver {
160160+ BlobUrlResolver::Pds {
161161+ identity_service: identity_resolver,
162162+ } => {
163163+ let mut url = identity_resolver
164164+ .pds_for_did(did)
165165+ .await
166166+ .map_err(|_| BlobDownloadError::BlobResolutionFailure)?;
167167+ url.set_path("/xrpc/com.atproto.sync.getBlob");
168168+ url.query_pairs_mut()
169169+ .append_pair("did", did.as_str())
170170+ .append_pair("cid", &cid.to_string());
171171+ url
172172+ }
173173+ };
174174+175175+ let validated_bytes = {
176176+ let response = self.http_client.get(blob_url).send().await.map_err(|err| {
177177+ tracing::warn!("failed to request blob from origin: {err:?}");
178178+ BlobDownloadError::FetchFailure
179179+ })?;
180180+181181+ // Gracefully handle & abort if we do not receive a successful status code.
182182+ if !response.status().is_success() {
183183+ return Err(match response.status() {
184184+ StatusCode::NOT_FOUND => {
185185+ tracing::debug!("origin returned 404 for blob");
186186+ BlobDownloadError::NotFound
187187+ }
188188+ status => {
189189+ tracing::debug!("origin returned error status for blob: {status}");
190190+ BlobDownloadError::ErrorStatusCode
191191+ }
192192+ });
193193+ }
194194+195195+ // Download bytes as a stream, enforcing a max size limit
196196+ // and aborting if it's crossed.
197197+ let bytes = bytes_stream_capped(response, max_blob_size).await.map_err(
198198+ |err| match err {
199199+ BytesStreamCappedError::TooLarge => {
200200+ tracing::debug!("blob exceeds max size of {} bytes", max_blob_size);
201201+ BlobDownloadError::TooLarge
202202+ }
203203+ BytesStreamCappedError::ClientError(err) => {
204204+ tracing::warn!("error reading blob stream: {err:?}");
205205+ BlobDownloadError::StreamFailed
206206+ }
207207+ },
208208+ )?;
209209+210210+ // Verify request CID matches the blob's computed CID.
211211+ //
212212+ // This operation is done via spawn_blocking as creating the digest will block
213213+ // this task's executor from switching to other tasks for as long it runs.
214214+ tokio::task::spawn_blocking({
215215+ let bytes = bytes.clone();
216216+ let cid = *cid;
217217+ move || {
218218+ // Enabled Multihashes are set in the multihash-codetable crate features.
219219+ let computed_cid = match Code::try_from(cid.hash().code()) {
220220+ Ok(code) => Ok(Cid::new_v1(
221221+ 0x55, // RaW codec
222222+ code.digest(&bytes),
223223+ )),
224224+ Err(err) => {
225225+ tracing::warn!("failed to compute CID: {err:?}");
226226+ Err(BlobDownloadError::CidUnsupportedMultihash)
227227+ }
228228+ }?;
229229+230230+ if computed_cid != *cid {
231231+ tracing::warn!(
232232+ "cid mismatch: computed {computed_cid} expected {cid}"
233233+ );
234234+ return Err(BlobDownloadError::CidMismatch);
235235+ }
236236+237237+ Ok(())
238238+ }
239239+ })
240240+ .await
241241+ .expect("CID computing task should not panic")?;
242242+243243+ bytes
244244+ };
245245+246246+ // Infer MIME type from content bytes rather than headers; this is fallible
247247+ // and falls back to application/octet-stream if the type is unrecognised.
248248+ let mime_type = sniff_mime(&validated_bytes);
249249+ if !is_mime_allowed(&mime_type, allowed_mimetypes) {
250250+ tracing::debug!("blob was inferred to be a disallowed mime type: {mime_type}");
251251+ return Err(BlobDownloadError::ForbiddenMimeType);
252252+ }
253253+254254+ // Mark this DID+CID pair as ownership-verified since we just fetched it from the origin.
255255+ self.ownership_cache.insert((*cid, did.clone()), ()).await;
256256+257257+ Ok(BlobData {
258258+ bytes: validated_bytes,
259259+ mime_type,
260260+ })
261261+ })
262262+ .await
263263+ }
264264+265265+ pub async fn invalidate_blob(&self, cid: &BlobCid) {
266266+ self.data_cache.invalidate(cid).await
267267+ }
268268+269269+ /// Fetch whether the user owns the given blob either from the cache if available or the upstream source.
270270+ ///
271271+ /// The internal cache will be automatically populated if the blob was previously fetched from the same user.
272272+ #[instrument(skip_all, fields(did = %did, cid = %cid))]
273273+ pub async fn fetch_blob_ownership(
274274+ &self,
275275+ did: &Did<'static>,
276276+ cid: BlobCid,
277277+ url_resolver: BlobUrlResolver<'_>,
278278+ ) -> Result<(), Arc<BlobOwnershipError>> {
279279+ tracing::debug!("verifying ownership of blob");
280280+281281+ self.ownership_cache
282282+ // TODO: Remove clone on DID.
283283+ .try_get_with((cid, did.clone()), async {
284284+ let blob_url = match url_resolver {
285285+ BlobUrlResolver::Pds {
286286+ identity_service: identity_resolver,
287287+ } => {
288288+ let mut url = identity_resolver
289289+ .pds_for_did(did)
290290+ .await
291291+ .map_err(|_| BlobOwnershipError::BlobResolutionFailure)?;
292292+ url.set_path("/xrpc/com.atproto.sync.getBlob");
293293+ url.query_pairs_mut()
294294+ .append_pair("did", did.as_str())
295295+ .append_pair("cid", &cid.to_string());
296296+ url
297297+ }
298298+ };
299299+300300+ // Request the blob with as little of the actual body as we can.
301301+ //
302302+ // While some origins (bsky pds, tranquil pds) may support HTTP HEAD, it is not
303303+ // actually a part of the XRPC specification and we cannot rely on it (for now).
304304+ // Use a range request to avoid downloading the full body on servers that support it instead.
305305+ match self
306306+ .http_client
307307+ .get(blob_url)
308308+ .header(
309309+ header::RANGE,
310310+ const { HeaderValue::from_static("bytes=0-1023") },
311311+ )
312312+ .send()
313313+ .await
314314+ .map_err(|err| {
315315+ tracing::warn!("failed to request blob from origin: {err:?}");
316316+ BlobOwnershipError::FetchFailure
317317+ })?
318318+ .status()
319319+ {
320320+ status if status.is_success() => {
321321+ tracing::debug!("verified ownership of blob");
322322+ Ok(())
323323+ }
324324+ StatusCode::NOT_FOUND => {
325325+ tracing::debug!("origin returned 404 for blob");
326326+ Err(BlobOwnershipError::NotFound)
327327+ }
328328+ status => {
329329+ tracing::debug!("origin returned error status for blob: {}", status);
330330+ Err(BlobOwnershipError::ErrorStatusCode)
331331+ }
332332+ }
333333+ })
334334+ .await
335335+ }
336336+337337+ pub fn invalidate_blob_ownership<
338338+ F: Fn(&(BlobCid, Did<'static>), &()) -> bool + Send + Sync + 'static,
339339+ >(
340340+ &self,
341341+ predicate: F,
342342+ ) {
343343+ if let Err(err) = self.ownership_cache.invalidate_entries_if(predicate) {
344344+ tracing::error!(
345345+ "blob service has not enabled support for invalidation closures: {err:?}"
346346+ );
347347+ }
348348+ }
349349+}
···11use mime::Mime;
2233-/// Sniff the MIME type from the given bytes, returning `application/octet-stream` if unknown.
33+/// Sniff the MIME type from the given bytes.
44+///
55+/// Returns [`mime::APPLICATION_OCTET_STREAM`] when unknown.
46#[must_use]
57pub fn sniff_mime(buf: &[u8]) -> Mime {
66- // WORKAROUND: infer does not correctly detect SVG.
77- // I have created PR to fix this at https://github.com/bojand/infer/pull/119
88- // Until that is merged, this case will work around that limitation.
99- const SVG_MARKER: &[u8; 4] = b"<svg";
1010- const XML_MARKER: &[u8; 5] = b"<?xml";
1111- const XML_SNIFFAHEAD: usize = 256; // How far after the XML marker to sniff ahead for the SVG marker.
1212- if buf.starts_with(SVG_MARKER)
1313- || (buf.starts_with(XML_MARKER)
1414- && buf
1515- .get(..XML_SNIFFAHEAD)
1616- .unwrap_or(buf)
1717- .windows(SVG_MARKER.len())
1818- .any(|w| w == SVG_MARKER))
1919- {
2020- return mime::IMAGE_SVG;
2121- }
2222-238 match infer::get(buf) {
249 Some(m) => m
2510 .mime_type()
2611 .parse()
2712 .expect("infer mimetype should always be valid"),
2828- None => mime::APPLICATION_OCTET_STREAM,
1313+ None => {
1414+ // WORKAROUND: infer does not correctly detect SVG.
1515+ // I have created PR to fix this at https://github.com/bojand/infer/pull/119
1616+ // Until that is merged, this case will work around that limitation.
1717+ const SVG_MARKER: &[u8; 4] = b"<svg";
1818+ const XML_MARKER: &[u8; 5] = b"<?xml";
1919+ const XML_SNIFFAHEAD: usize = 256; // How far after the XML marker to sniff ahead for the SVG marker.
2020+ if buf.len() >= 4 && buf.starts_with(SVG_MARKER)
2121+ || (buf.starts_with(XML_MARKER)
2222+ && buf
2323+ .get(..XML_SNIFFAHEAD)
2424+ .unwrap_or(buf)
2525+ .windows(SVG_MARKER.len())
2626+ .any(|w| w == SVG_MARKER))
2727+ {
2828+ tracing::debug!("used svg workaround instead of regular inference");
2929+ return mime::IMAGE_SVG;
3030+ }
3131+ tracing::debug!("infer was unable to determine mimetype, using fallback value");
3232+ mime::APPLICATION_OCTET_STREAM
3333+ }
2934 }
3035}
31363737+/// Whether the given [`Mime`] is apart of the allowed array by
3838+/// checking if it matches directly or by wildcard.
3239#[must_use]
3340pub fn is_mime_allowed(mime: &Mime, allowed: &[Mime]) -> bool {
3441 const STAR: &str = "*";
···6370 use std::str::FromStr;
64716572 #[test]
6666- fn test_is_mime_allowed() {
6767- // Test PNG when nothing is allowed.
6868- assert_eq!(
6969- super::is_mime_allowed(&Mime::from_str("image/png").unwrap(), &[]),
7070- false
7171- );
7373+ fn no_match() {
7474+ // PNG when nothing is allowed.
7575+ assert!(!super::is_mime_allowed(
7676+ &Mime::from_str("image/png").unwrap(),
7777+ &[]
7878+ ));
7979+ }
8080+8181+ #[test]
8282+ fn exact_match() {
8383+ // PNG when PNG is allowed.
8484+ assert!(super::is_mime_allowed(
8585+ &Mime::from_str("image/png").unwrap(),
8686+ &[mime::IMAGE_PNG],
8787+ ));
72887373- // Test PNG when PNG is allowed.
7474- assert_eq!(
7575- super::is_mime_allowed(&Mime::from_str("image/png").unwrap(), &[mime::IMAGE_PNG],),
7676- true
7777- );
8989+ // PNG when only JPG is allowed.
9090+ assert!(!super::is_mime_allowed(
9191+ &Mime::from_str("image/png").unwrap(),
9292+ &[mime::IMAGE_JPEG],
9393+ ));
9494+ }
78957979- // Test PNG when only JPG is allowed.
8080- assert_eq!(
8181- super::is_mime_allowed(&Mime::from_str("image/png").unwrap(), &[mime::IMAGE_JPEG],),
8282- false
8383- );
9696+ #[test]
9797+ fn full_wildcard() {
9898+ // PNG when anything is allowed.
9999+ assert!(super::is_mime_allowed(
100100+ &Mime::from_str("image/png").unwrap(),
101101+ &[mime::STAR_STAR],
102102+ ));
103103+ }
841048585- // Test PNG when any image subtype is allowed.
8686- assert_eq!(
8787- super::is_mime_allowed(&Mime::from_str("image/png").unwrap(), &[mime::IMAGE_STAR],),
8888- true
8989- );
105105+ #[test]
106106+ fn subtype_wildcard() {
107107+ // PNG when any image subtype is allowed.
108108+ assert!(super::is_mime_allowed(
109109+ &Mime::from_str("image/png").unwrap(),
110110+ &[mime::IMAGE_STAR]
111111+ ));
901129191- // Test PNG when anything is allowed.
9292- assert_eq!(
9393- super::is_mime_allowed(&Mime::from_str("image/png").unwrap(), &[mime::STAR_STAR],),
9494- true
9595- );
113113+ // PNG when images and text are enabled.
114114+ assert!(super::is_mime_allowed(
115115+ &Mime::from_str("image/png").unwrap(),
116116+ &[mime::TEXT_STAR, mime::IMAGE_STAR],
117117+ ));
9611897119 // Test HTML when any image subtype is enabled.
9898- assert_eq!(
9999- super::is_mime_allowed(&Mime::from_str("text/html").unwrap(), &[mime::IMAGE_STAR],),
100100- false
101101- );
102102-103103- // Test PNG when images and text are enabled.
104104- assert_eq!(
105105- super::is_mime_allowed(
106106- &Mime::from_str("image/png").unwrap(),
107107- &[mime::TEXT_STAR, mime::IMAGE_STAR],
108108- ),
109109- true
110110- );
120120+ assert!(!super::is_mime_allowed(
121121+ &Mime::from_str("text/html").unwrap(),
122122+ &[mime::IMAGE_STAR],
123123+ ));
111124 }
112125}
+161
src/policy_client.rs
···11+use crate::{http::PORXIE_USER_AGENT, types::blob_cid::BlobCid};
22+use jacquard_common::types::did::Did;
33+use moka::{future::Cache as MokaCache, policy::EvictionPolicy};
44+use reqwest::{
55+ StatusCode, Url,
66+ header::{HeaderName, HeaderValue},
77+};
88+use std::{sync::Arc, time::Duration};
99+use thiserror::Error;
1010+use tracing::instrument;
1111+1212+#[derive(Debug, Clone)]
1313+pub struct PolicyDecision {
1414+ /// Whether the service allows this blob can be served.
1515+ pub can_serve: bool,
1616+}
1717+1818+#[derive(Debug, Error)]
1919+#[non_exhaustive]
2020+pub enum CreatePolicyClientError {
2121+ /// An internal http client error occurred, see [`reqwest::Error`].
2222+ #[error(transparent)]
2323+ HttpClient(#[from] reqwest::Error),
2424+}
2525+2626+#[derive(Debug, Error)]
2727+#[non_exhaustive]
2828+pub enum GetBlobPolicyError {
2929+ /// Policy service returned an unhandled status code (Not 200 OK or 410 GONE).
3030+ #[error("received an unhandled status code from the policy service: {0}")]
3131+ UnhandledStatusCode(StatusCode),
3232+ /// An internal http client error occurred, see [`reqwest::Error`].
3333+ #[error(transparent)]
3434+ HttpClient(#[from] reqwest::Error),
3535+}
3636+3737+#[derive(Debug, Clone)]
3838+pub struct PolicyClientOptions {
3939+ /// Maximum size in memory this cache is permitted to grow to.
4040+ pub cache_max_memory_allocation: u64,
4141+ /// Time-to-live duration of items in the cache.
4242+ pub cache_ttl: Duration,
4343+ /// HTTP timeout to apply to all identity requests.
4444+ pub http_timeout: Duration,
4545+ /// HTTP connection-phase timeout to apply to all policy requests.
4646+ pub http_connect_timeout: Duration,
4747+ /// URL to the policy service to query.
4848+ pub policy_service_url: Url,
4949+ /// Additional request headers to append to each policy service request.
5050+ pub policy_service_req_headers: Vec<(HeaderName, HeaderValue)>,
5151+}
5252+5353+pub struct PolicyClient {
5454+ cache: MokaCache<(Did<'static>, BlobCid), PolicyDecision>,
5555+ http_client: reqwest::Client,
5656+ policy_service_req_headers: Vec<(HeaderName, HeaderValue)>,
5757+ policy_service_url: Url,
5858+}
5959+6060+impl PolicyClient {
6161+ /// Create a new policy client.
6262+ pub fn new(options: PolicyClientOptions) -> Result<Self, CreatePolicyClientError> {
6363+ tracing::debug!("creating policy service client with options: {options:?}");
6464+ Ok(Self {
6565+ cache: MokaCache::<(Did<'static>, BlobCid), PolicyDecision>::builder()
6666+ .name("blob-policy")
6767+ .weigher(|key, _value| {
6868+ (key.0.len() + key.1.encoded_len())
6969+ .try_into()
7070+ .unwrap_or(u32::MAX)
7171+ })
7272+ .eviction_policy(EvictionPolicy::tiny_lfu())
7373+ .max_capacity(options.cache_max_memory_allocation)
7474+ .time_to_live(options.cache_ttl)
7575+ .support_invalidation_closures()
7676+ .build(),
7777+ http_client: reqwest::Client::builder()
7878+ .user_agent(PORXIE_USER_AGENT)
7979+ .https_only(false)
8080+ .redirect(reqwest::redirect::Policy::limited(2))
8181+ .gzip(true)
8282+ .brotli(true)
8383+ .zstd(true)
8484+ .deflate(true)
8585+ .connect_timeout(options.http_connect_timeout)
8686+ .timeout(options.http_timeout)
8787+ .build()
8888+ .map_err(CreatePolicyClientError::HttpClient)?,
8989+ policy_service_url: options.policy_service_url,
9090+ policy_service_req_headers: options.policy_service_req_headers,
9191+ })
9292+ }
9393+9494+ /// Query the policy service for the policy decision of this blob.
9595+ ///
9696+ /// Concurrent requests for the same policy are coalesced.
9797+ #[instrument(skip_all, fields(did = %did, cid = %cid))]
9898+ pub async fn get_policy_for_blob(
9999+ &self,
100100+ did: &Did<'static>,
101101+ cid: BlobCid,
102102+ ) -> Result<PolicyDecision, Arc<GetBlobPolicyError>> {
103103+ self.cache
104104+ .try_get_with_by_ref(&(did.clone(), cid), async {
105105+ tracing::debug!("querying policy service for the status");
106106+107107+ let mut policy_service_url = self.policy_service_url.clone();
108108+ policy_service_url
109109+ .path_segments_mut()
110110+ .expect("policy service URL should not be cannot-be-a-base")
111111+ .push(did.as_str())
112112+ .push(&cid.to_string());
113113+114114+ let mut request = self.http_client.get(policy_service_url);
115115+ for (name, value) in &self.policy_service_req_headers {
116116+ request = request.header(name, value);
117117+ }
118118+119119+ match request.send().await {
120120+ Ok(response) => match response.status() {
121121+ StatusCode::OK => {
122122+ tracing::debug!("policy service allowed blob serving");
123123+ Ok(PolicyDecision { can_serve: true })
124124+ }
125125+ StatusCode::GONE => {
126126+ tracing::debug!("policy service forbids blob serving");
127127+ Ok(PolicyDecision { can_serve: false })
128128+ }
129129+ status => {
130130+ tracing::error!("policy service returned unexpected status: {status}");
131131+ Err(GetBlobPolicyError::UnhandledStatusCode(status))
132132+ }
133133+ },
134134+ Err(err) => {
135135+ tracing::error!("error occurred contacting the policy service: {err:?}");
136136+ Err(GetBlobPolicyError::HttpClient(err))
137137+ }
138138+ }
139139+ })
140140+ .await
141141+ }
142142+143143+ /// Invalidate cached policy decisions with the given predicate.
144144+ pub fn invalidate_policies<
145145+ F: Fn(&(Did<'static>, BlobCid), &PolicyDecision) -> bool + Send + Sync + 'static,
146146+ >(
147147+ &self,
148148+ predicate: F,
149149+ ) {
150150+ if let Err(err) = self.cache.invalidate_entries_if(predicate) {
151151+ tracing::error!(
152152+ "policy client cache has not enabled support for invalidation closures: {err:?}"
153153+ );
154154+ }
155155+ }
156156+}
157157+158158+#[cfg(test)]
159159+mod tests {
160160+ // TODO: Create an in-process mock policy service to write tests against.
161161+}
+83-321
src/routes/blob/get.rs
···11-use crate::http::{BytesStreamCappedError, bytes_stream_capped};
22-use crate::routes::ErrorResponse;
33-use crate::types::blob_cid::BlobCid;
41use crate::{
52 AppState,
66- cache::{CachedBlobData, CachedBlobPolicy},
77- mime::{is_mime_allowed, sniff_mime},
33+ blob_service::{BlobDownloadError, BlobOwnershipError, BlobUrlResolver},
44+ routes::{CACHE_CONTROL_NOCACHE_VALUE, ErrorResponse},
55+ types::blob_cid::BlobCid,
86};
99-use axum::Json;
107use axum::{
88+ Json,
119 body::Body,
1210 extract::{Path, State},
1313- http::{HeaderMap, HeaderValue, Response, StatusCode, header},
1111+ http::{HeaderName, HeaderValue, StatusCode, header},
1212+ response::Response,
1413};
1515-use cid::Cid;
1614use jacquard_common::types::did::Did;
1717-use jacquard_identity::resolver::IdentityResolver;
1818-use multihash_codetable::{Code, MultihashDigest};
1919-use reqwest::Url;
2015use std::sync::Arc;
21162222-enum BlobPolicyError {
2323- /// The policy service returned an unexpected status code.
2424- UnhandledStatusCode,
2525- /// The request to the policy service failed, for example due to the server being unavailable.
2626- FetchFailed,
2727-}
2828-2929-enum BlobDownloadError {
3030- /// Failed to resolve the PDS for the given DID. The DID may be invalid or the
3131- /// resolver may be unavailable.
3232- DidPdsResolutionFailure,
3333- /// The blob's computed CID does not match the requested CID.
3434- CidMismatch,
3535- /// The requested CID uses a multihash algorithm unsupported by this server.
3636- CidUnsupportedMultihash,
3737- /// The blob could not be found in the user's repository.
3838- NotFound,
3939- /// The blob exceeds the maximum size permitted by this server.
4040- TooLarge,
4141- /// The PDS returned a non-successful status code while fetching the blob,
4242- /// excluding 404 which is handled by [`Self::NotFound`].
4343- ErrorStatusCode,
4444- /// The request to the PDS failed, for example due to the server being unavailable.
4545- FetchFailure,
4646- /// The blob stream was interrupted before it could be fully downloaded,
4747- /// for example due to the connection being unexpectedly reset.
4848- StreamFailed,
4949- /// The blob's detected MIME type is not permitted by this server.
5050- ForbiddenMimeType,
5151-}
5252-5353-enum BlobOwnershipError {
5454- /// Failed to resolve the PDS for the given DID. The DID may be invalid or the
5555- /// resolver may be unavailable.
5656- DidPdsResolutionFailure,
5757- /// The blob could not be found in the user's repository.
5858- NotFound,
5959- /// The PDS returned a non-successful status code while fetching the blob,
6060- /// excluding 404 which is handled by [`Self::NotFound`].
6161- ErrorStatusCode,
6262- /// The request to the PDS failed, for example due to the server being unavailable.
6363- FetchFailure,
6464-}
6565-6666-/// Create a `/xrpc/com.atproto.sync.getBlob` Url for the DID+CID.
6767-#[inline]
6868-#[must_use]
6969-fn to_pds_blob_url(mut pds_url: Url, did: &Did<'_>, cid: &BlobCid) -> Url {
7070- pds_url.set_path("/xrpc/com.atproto.sync.getBlob");
7171- pds_url
7272- .query_pairs_mut()
7373- .append_pair("did", did.as_str())
7474- .append_pair("cid", &cid.to_string());
7575- pds_url
7676-}
7777-1717+/// Fetch a blob from a given upstream and return it.
7818pub async fn get_blob_handler(
7919 Path((raw_did, raw_cid)): Path<(String, String)>,
8020 State(state): State<Arc<AppState>>,
8181-) -> Result<axum::response::Response, (StatusCode, Json<ErrorResponse>)> {
2121+) -> Result<
2222+ Response,
2323+ (
2424+ StatusCode,
2525+ [(HeaderName, &'static str); 1],
2626+ Json<ErrorResponse>,
2727+ ),
2828+> {
8229 let (did, cid) = (
8330 match Did::new_owned(raw_did.as_str()) {
8431 Ok(did) => did,
8532 Err(_) => {
8633 return Err((
8734 StatusCode::UNPROCESSABLE_ENTITY,
3535+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
8836 Json(ErrorResponse {
8937 error: "MalformedDid",
9038 message: Some("Invalid or unprocessable DID"),
···9745 Err(_) => {
9846 return Err((
9947 StatusCode::UNPROCESSABLE_ENTITY,
4848+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
10049 Json(ErrorResponse {
10150 error: "MalformedCid",
10251 message: Some("Invalid or unprocessable CID"),
···10655 },
10756 );
10857109109- // Check policy for this DID+CID; concurrent requests for the same key are coalesced.
110110- if let Some(ref policy_service_url) = state.policy_service_url {
111111- match state
112112- .cache
113113- .blob_policy
114114- .try_get_with_by_ref(&(did.clone(), cid), async {
115115- tracing::debug!("querying policy service for the status of blob");
116116-117117- let mut policy_service_url = policy_service_url.clone();
118118- policy_service_url
119119- .path_segments_mut()
120120- .expect("policy service URL should not be a base")
121121- .push(did.as_str())
122122- .push(raw_cid.as_str());
123123-124124- let mut request = state.policy_http_client.get(policy_service_url);
125125- for (name, value) in &state.policy_service_headers {
126126- request = request.header(name, value);
127127- }
128128-129129- match request.send().await {
130130- Ok(response) => match response.status() {
131131- StatusCode::OK => {
132132- tracing::debug!("policy service returned 200 status, can serve blob");
133133- Ok(CachedBlobPolicy { can_serve: true })
134134- }
135135- StatusCode::GONE => {
136136- tracing::debug!(
137137- "policy service returned 410 status, cannot serve blob"
138138- );
139139- Ok(CachedBlobPolicy { can_serve: false })
140140- }
141141- status => {
142142- tracing::error!("policy service returned unexpected status: {status}");
143143- Err(BlobPolicyError::UnhandledStatusCode)
144144- }
145145- },
146146- Err(err) => {
147147- tracing::error!("error occurred contacting the policy service: {err:?}");
148148- Err(BlobPolicyError::FetchFailed)
149149- }
150150- }
151151- })
152152- .await
153153- {
5858+ // Check the policy status of the blob.
5959+ if let Some(ref policy_client) = state.policy_client {
6060+ match policy_client.get_policy_for_blob(&did, cid).await {
15461 Ok(policy) => {
15562 if !policy.can_serve {
15663 return Err((
15764 StatusCode::GONE,
6565+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
15866 Json(ErrorResponse {
159159- error: "BlobUnavailable",
160160- message: Some("Blob is not available through this service"),
6767+ error: "PolicyForbidden",
6868+ message: Some("Requested blob cannot be served by this service"),
16169 }),
16270 ));
16371 }
16472 }
16573 Err(_) => {
166166- if !state.policy_service_fail_open {
7474+ if !state.policy_fail_open {
7575+ // TODO: Maybe give a more precise error?
16776 return Err((
16877 StatusCode::INTERNAL_SERVER_ERROR,
7878+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
16979 Json(ErrorResponse {
17080 error: "InternalServerError",
171171- message: Some("Internal Server Error"),
8181+ message: Some("An internal server error occured."),
17282 }),
17383 ));
17484 }
···17686 }
17787 }
17888179179- // Serve from cache, or fetch from upstream. Concurrent requests for the same key are
180180- // coalesced — if the initial fetch fails, the next pending request will try instead,
181181- // continuing until one succeeds or all have failed.
8989+ // Fetch the blob from cache/origin.
18290 let blob = match state
183183- .cache
184184- .blob_content
185185- .try_get_with_by_ref(&cid, async {
186186- tracing::debug!("fetching blob from PDS");
187187- let blob_url = to_pds_blob_url(
188188- state
189189- .cache
190190- .identity
191191- .try_get_with_by_ref(&did, state.identity_resolver.pds_for_did(&did))
192192- .await
193193- .map_err(|err| {
194194- tracing::debug!("failed to resolve PDS: {:?}", *err);
195195- BlobDownloadError::DidPdsResolutionFailure
196196- })?,
197197- &did,
198198- &cid,
199199- );
200200-201201- let validated_bytes = {
202202- let response = state
203203- .blob_fetch_http_client
204204- .get(blob_url)
205205- .send()
206206- .await
207207- .map_err(|err| {
208208- tracing::warn!("failed to request blob from PDS: {err:?}");
209209- BlobDownloadError::FetchFailure
210210- })?;
211211-212212- // Gracefully handle & abort if we do not receive a successful status code.
213213- if !response.status().is_success() {
214214- // Note: Bluesky's PDS implementation sends 400 instead of 404 when a blob is
215215- // not found. This will skip the 404 handler and instead count as an error.
216216- // This is not our responsibility to work around as other implementations do it right.
217217- return Err(match response.status() {
218218- StatusCode::NOT_FOUND => {
219219- tracing::debug!("pds returned 404 for blob");
220220- BlobDownloadError::NotFound
221221- }
222222- status => {
223223- tracing::debug!("pds returned error status for blob: {status}");
224224- BlobDownloadError::ErrorStatusCode
225225- }
226226- });
227227- }
228228-229229- // Download bytes as a stream, enforcing a max size limit
230230- // and aborting if it's crossed.
231231- let bytes = bytes_stream_capped(response, state.max_blob_size)
232232- .await
233233- .map_err(|err| match err {
234234- BytesStreamCappedError::TooLarge => {
235235- tracing::debug!(
236236- "blob exceeds max size of {} bytes",
237237- state.max_blob_size
238238- );
239239- BlobDownloadError::TooLarge
240240- }
241241- BytesStreamCappedError::ClientError(err) => {
242242- tracing::warn!("error reading blob stream: {err:?}");
243243- BlobDownloadError::StreamFailed
244244- }
245245- })?;
246246-247247- // Verify request CID matches the blob's computed CID.
248248- //
249249- // This operation is done via spawn_blocking as creating the digest will block
250250- // this task's executor from switching to other tasks for as long it runs.
251251- tokio::task::spawn_blocking({
252252- let bytes = bytes.clone();
253253- move || {
254254- // Enabled Multihashes are set in the multihash-codetable crate features.
255255- let computed_cid = match Code::try_from(cid.hash().code()) {
256256- Ok(code) => Ok(Cid::new_v1(0x55, code.digest(&bytes))),
257257- Err(err) => {
258258- tracing::warn!("failed to compute CID: {err:?}");
259259- Err(BlobDownloadError::CidUnsupportedMultihash)
260260- }
261261- }?;
262262-263263- if computed_cid != *cid {
264264- tracing::warn!("cid mismatch: computed {computed_cid} expected {cid}");
265265- return Err(BlobDownloadError::CidMismatch);
266266- }
267267-268268- Ok(())
269269- }
270270- })
271271- .await
272272- .expect("CID computing task should not panic")?;
273273-274274- bytes
275275- };
276276-277277- // Infer MIME type from content bytes rather than headers; this is fallible
278278- // and falls back to application/octet-stream if the type is unrecognised.
279279- let mime_type = sniff_mime(&validated_bytes);
280280- if !is_mime_allowed(&mime_type, &state.allowed_mimetypes) {
281281- tracing::debug!("blob was inferred to be a disallowed mime type: {mime_type}");
282282- return Err(BlobDownloadError::ForbiddenMimeType);
283283- }
284284-285285- // Build reusable cached headers.
286286- let mut headers = HeaderMap::new();
287287- headers.insert(
288288- header::CONTENT_TYPE,
289289- mime_type
290290- .essence_str()
291291- .parse()
292292- .expect("should parse mime type as header value"),
293293- );
294294- headers.insert(header::CACHE_CONTROL, state.cache_control_header.clone());
295295- headers.insert(
296296- header::CONTENT_SECURITY_POLICY,
297297- const { HeaderValue::from_static("default-src 'none'; sandbox") },
298298- );
299299- headers.insert(
300300- header::X_CONTENT_TYPE_OPTIONS,
301301- const { HeaderValue::from_static("nosniff") },
302302- );
303303- headers.insert(
304304- header::CONTENT_DISPOSITION,
305305- const { HeaderValue::from_static("attachment") },
306306- );
307307-308308- // Mark this key as verified in the ownership cache.
309309- state
310310- .cache
311311- .blob_ownership
312312- .insert((cid, did.clone()), ())
313313- .await;
314314-315315- Ok(CachedBlobData {
316316- bytes: validated_bytes,
317317- headers,
318318- })
319319- })
9191+ .blob_service
9292+ .fetch_blob(
9393+ &did,
9494+ &cid,
9595+ BlobUrlResolver::Pds {
9696+ identity_service: &state.identity_service,
9797+ },
9898+ state.max_blob_size,
9999+ &state.allowed_mimetypes,
100100+ )
320101 .await
321102 {
322103 Ok(blob) => blob,
···324105 return Err(match *err {
325106 BlobDownloadError::NotFound => (
326107 StatusCode::NOT_FOUND,
108108+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
327109 Json(ErrorResponse {
328110 error: "BlobNotFound",
329111 message: Some("Blob not found"),
···331113 ),
332114 BlobDownloadError::TooLarge => (
333115 StatusCode::PAYLOAD_TOO_LARGE,
116116+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
334117 Json(ErrorResponse {
335118 error: "BlobTooLarge",
336119 message: Some("Blob exceeds maximum allowed size"),
···338121 ),
339122 BlobDownloadError::ForbiddenMimeType => (
340123 StatusCode::FORBIDDEN,
124124+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
341125 Json(ErrorResponse {
342126 error: "BlobForbiddenType",
343127 message: Some("Content type is not allowed"),
···345129 ),
346130 BlobDownloadError::CidMismatch => (
347131 StatusCode::BAD_GATEWAY,
132132+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
348133 Json(ErrorResponse {
349134 error: "BlobCidMismatch",
350135 message: Some("Blob content does not match CID"),
···352137 ),
353138 BlobDownloadError::CidUnsupportedMultihash => (
354139 StatusCode::NOT_IMPLEMENTED,
140140+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
355141 Json(ErrorResponse {
356142 error: "CidUnsupported",
357143 message: Some("Unsupported CID multihash"),
358144 }),
359145 ),
360360- BlobDownloadError::DidPdsResolutionFailure => (
146146+ BlobDownloadError::BlobResolutionFailure => (
361147 StatusCode::BAD_GATEWAY,
148148+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
362149 Json(ErrorResponse {
363363- error: "CannotResolvePds",
364364- message: Some("Failed to resolve PDS for DID"),
150150+ error: "CannotResolve",
151151+ message: Some("Failed to resolve source of blob"),
365152 }),
366153 ),
367154 BlobDownloadError::FetchFailure
368155 | BlobDownloadError::ErrorStatusCode
369156 | BlobDownloadError::StreamFailed => (
370157 StatusCode::BAD_GATEWAY,
158158+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
371159 Json(ErrorResponse {
372160 error: "BlobFetchFailed",
373373- message: Some("Failed to fetch blob from PDS"),
161161+ message: Some("Failed to fetch blob from origin"),
374162 }),
375163 ),
376164 });
377165 }
378166 };
379167380380- // Verify this DID owns the blob; will skip if we just fetched the blob from the same DID+CID pair.
381381- // Concurrent requests for the same key are coalesced.
168168+ // Check if the user has a copy of this blob via cache/origin.
169169+ //
170170+ // Note: This will just return from cache if the blob was just fetched
171171+ // using the same key. This check does not validate the blob cid matches,
172172+ // just that the blob is reported to exist.
382173 if let Err(err) = state
383383- .cache
384384- .blob_ownership
385385- .try_get_with((cid, did.clone()), async {
386386- tracing::debug!("verifying ownership of blob");
387387- let blob_url = to_pds_blob_url(
388388- state
389389- .cache
390390- .identity
391391- .try_get_with_by_ref(&did, state.identity_resolver.pds_for_did(&did))
392392- .await
393393- .map_err(|err| {
394394- tracing::debug!("failed to resolve PDS: {:?}", *err);
395395- BlobOwnershipError::DidPdsResolutionFailure
396396- })?,
397397- &did,
398398- &cid,
399399- );
400400-401401- // Request the blob with as little of the actual body as we can.
402402- //
403403- // While some PDS implementations (bsky, tranquil) support HTTP HEAD, it is not
404404- // actually a part of the XRPC specification and we cannot rely on it (for now).
405405- // Use a range request to avoid downloading the full body on servers that support it instead.
406406- match state
407407- .blob_fetch_http_client
408408- .get(blob_url)
409409- .header(
410410- header::RANGE,
411411- const { HeaderValue::from_static("bytes=0-1023") },
412412- )
413413- .send()
414414- .await
415415- .map_err(|err| {
416416- tracing::warn!("failed to request blob from PDS: {err:?}");
417417- BlobOwnershipError::FetchFailure
418418- })?
419419- .status()
420420- {
421421- status if status.is_success() => {
422422- tracing::debug!("verified ownership of blob");
423423- Ok(())
424424- }
425425- StatusCode::NOT_FOUND | StatusCode::BAD_REQUEST => {
426426- tracing::debug!("pds returned 404 for blob");
427427- Err(BlobOwnershipError::NotFound)
428428- }
429429- status => {
430430- tracing::debug!("pds returned error status for blob: {}", status);
431431- Err(BlobOwnershipError::ErrorStatusCode)
432432- }
433433- }
434434- })
174174+ .blob_service
175175+ .fetch_blob_ownership(
176176+ &did,
177177+ cid,
178178+ BlobUrlResolver::Pds {
179179+ identity_service: &state.identity_service,
180180+ },
181181+ )
435182 .await
436183 {
437184 return Err(match *err {
438185 BlobOwnershipError::NotFound => (
439186 StatusCode::NOT_FOUND,
187187+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
440188 Json(ErrorResponse {
441189 error: "BlobNotFound",
442190 message: Some("Blob not found"),
443191 }),
444192 ),
445445- BlobOwnershipError::DidPdsResolutionFailure => (
193193+ BlobOwnershipError::BlobResolutionFailure => (
446194 StatusCode::BAD_GATEWAY,
195195+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
447196 Json(ErrorResponse {
448197 error: "CannotResolvePds",
449198 message: Some("Failed to resolve PDS for DID"),
···451200 ),
452201 BlobOwnershipError::ErrorStatusCode | BlobOwnershipError::FetchFailure => (
453202 StatusCode::BAD_GATEWAY,
203203+ [(header::CACHE_CONTROL, CACHE_CONTROL_NOCACHE_VALUE)],
454204 Json(ErrorResponse {
455205 error: "BlobFetchFailed",
456456- message: Some("Failed to fetch blob from PDS"),
206206+ message: Some("Failed to fetch blob from origin"),
457207 }),
458208 ),
459209 });
460210 }
461211462462- let mut response = Response::builder()
212212+ Ok(Response::builder()
463213 .status(StatusCode::OK)
214214+ .header(header::CONTENT_TYPE, blob.mime_type.essence_str())
215215+ .header(header::CACHE_CONTROL, &state.cache_control_header)
216216+ .header(
217217+ header::CONTENT_SECURITY_POLICY,
218218+ const { HeaderValue::from_static("default-src 'none'; sandbox") },
219219+ )
220220+ .header(
221221+ header::X_CONTENT_TYPE_OPTIONS,
222222+ const { HeaderValue::from_static("nosniff") },
223223+ )
224224+ .header(
225225+ header::CONTENT_DISPOSITION,
226226+ const { HeaderValue::from_static("attachment") },
227227+ )
464228 .body(Body::from(blob.bytes))
465465- .expect("response should always build successfully");
466466- response.headers_mut().extend(blob.headers);
467467- Ok(response)
229229+ .expect("response should always build successfully"))
468230}
···11+pub mod get;
22+33+pub use get::get_health_handler;
+9-13
src/routes/mod.rs
···11mod blob;
22mod cache;
33+mod health;
3445pub use blob::get_blob_handler;
56pub use cache::delete_cache_handler;
77+pub use health::get_health_handler;
6877-use axum::http::{HeaderName, HeaderValue, header};
88-use serde::Serialize;
99+/// A header value for [`header::CACHE_CONTROL`] indicating the response cannot be cached at all.
1010+pub const CACHE_CONTROL_NOCACHE_VALUE: &str = "must-understand, no-store";
9111010-#[derive(Serialize)]
1212+#[derive(serde::Serialize)]
1113pub struct ErrorResponse {
1214 error: &'static str,
1315 message: Option<&'static str>,
1416}
15171616-pub async fn get_index_handler() -> ([(HeaderName, HeaderValue); 1], &'static str) {
1717- (
1818- [(
1919- header::CACHE_CONTROL,
2020- const { HeaderValue::from_static("public, max-age=31536000, immutable") },
2121- )],
2222- r#"
1818+pub async fn get_index_handler() -> &'static str {
1919+ r#"
2320 _____ _
2421| __ \ (_)
2522| |__) |__ _ ____ ___ ___
···36333734Routes:
3835 - HTTP GET /{did}/{cid} - Resolve and fetch a blob from its origin.
3939- - HTTP DELETE /cache/{cid or did} - Invalidate cache for either a CID (blob, policy, ownership) or for a DID (ownerships and policies). Requires configured bearer auth token.
4040-"#,
4141- )
3636+ - HTTP DELETE /cache/{cid or did} - Invalidate cache for either a CID (blob, policy, ownership) or for a DID (ownerships and policies). Requires auth.
3737+"#
4238}
+39-17
src/types/blob_cid.rs
···11// TODO: Transfer this implementation to a standalone ATProto types crate in the future.
2233+use cid::Version;
34use serde::Serialize;
45use thiserror::Error;
5666-pub mod codecs {
77- pub const RAW: u64 = 0x55;
88-}
99-107#[derive(Debug, Error)]
118pub enum BlobCidError {
1212- /// The CID uses a codec other than raw (`0x55`), which is the only codec
1313- /// permitted for ATProto blobs.
99+ /// The CID uses an invalid codec type.
1410 #[error("invalid blob codec 0x{0:x}, the only supported codec is raw (0x55)")]
1511 InvalidBlobCodec(u64),
1616-1717- /// The underlying CID could not be parsed.
1212+ /// The CID uses an invalid version.
1313+ #[error("invalid blob version {0:?}, the only supported version is v1")]
1414+ InvalidBlobVersion(Version),
1515+ /// The CID uses an invalid multihash.
1616+ #[error("invalid multihash {0:?}, the only supported version is sha256")]
1717+ InvalidMultihash(multihash_codetable::Multihash),
1818+ /// An error from the CID crate.
1819 #[error(transparent)]
1920 CidError(#[from] cid::Error),
2021}
21222222-/// A [`cid::Cid`] wrapper that guarantees the codec is raw (`0x55`), conforming
2323-/// to the ATProto blob CID specification.
2323+/// A [`cid::Cid`] wrapper that guarantees that data conforms to the
2424+/// ATProto blob CID specification where possible.
2425///
2525-/// Specification: <https://atproto.com/specs/blob> (Conformant as of **13/03/26**).
2626+/// Note: BlobCid does not currently attempt to validate the
2727+/// encoding representation of the given value.
2828+///
2929+/// Specification: <https://atproto.com/specs/blob>.
2630#[derive(Copy, PartialEq, Eq, Clone, PartialOrd, Ord, Hash, Debug, Serialize)]
2731pub struct BlobCid(cid::Cid);
28322933impl BlobCid {
3030- pub fn new(cid: cid::Cid) -> Result<Self, BlobCidError> {
3131- if cid.codec() != codecs::RAW {
3434+ pub fn try_from_cid(cid: cid::Cid) -> Result<Self, BlobCidError> {
3535+ // Ensure the cid uses an accepted codec.
3636+ if !matches!(
3737+ cid.codec(),
3838+ 0x55 // Raw
3939+ ) {
3240 return Err(BlobCidError::InvalidBlobCodec(cid.codec()));
3341 }
4242+4343+ // Ensure the cid uses an accepted version.
4444+ if !matches!(cid.version(), Version::V1) {
4545+ return Err(BlobCidError::InvalidBlobVersion(cid.version()));
4646+ }
4747+4848+ // Ensure the cid uses an accepted multihash.
4949+ if !matches!(
5050+ multihash_codetable::Code::try_from(cid.hash().code()),
5151+ Ok(multihash_codetable::Code::Sha2_256)
5252+ ) {
5353+ return Err(BlobCidError::InvalidMultihash(*cid.hash()));
5454+ }
5555+3456 Ok(Self(cid))
3557 }
3658}
···3860impl<'de> serde::Deserialize<'de> for BlobCid {
3961 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
4062 let cid = cid::Cid::deserialize(deserializer)?;
4141- Self::new(cid).map_err(serde::de::Error::custom)
6363+ Self::try_from_cid(cid).map_err(serde::de::Error::custom)
4264 }
4365}
44664567impl core::convert::TryFrom<&str> for BlobCid {
4668 type Error = BlobCidError;
4769 fn try_from(value: &str) -> Result<Self, Self::Error> {
4848- Self::new(cid::Cid::try_from(value)?)
7070+ Self::try_from_cid(cid::Cid::try_from(value)?)
4971 }
5072}
51735274impl core::convert::TryFrom<String> for BlobCid {
5375 type Error = BlobCidError;
5476 fn try_from(value: String) -> Result<Self, Self::Error> {
5555- Self::new(cid::Cid::try_from(value)?)
7777+ Self::try_from_cid(cid::Cid::try_from(value)?)
5678 }
5779}
58805981impl core::convert::TryFrom<Vec<u8>> for BlobCid {
6082 type Error = BlobCidError;
6183 fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
6262- Self::new(cid::Cid::try_from(value)?)
8484+ Self::try_from_cid(cid::Cid::try_from(value)?)
6385 }
6486}
6587
-1
src/types/mod.rs
···11pub mod blob_cid;
22-// pub mod validated_blob;s
-80
src/types/validated_blob.rs
···11-// // TODO: Consider transferring this implementation to a standalone ATProto crate in the future.
22-33-// use crate::types::blob_cid::{self};
44-// use multihash_codetable::{Code, MultihashDigest};
55-// use thiserror::Error;
66-77-// #[derive(Debug, Error)]
88-// pub enum ValidatedBlobError {
99-// /// The CID's multihash codec is not supported by the codetable.
1010-// #[error("unsupported multihash codec 0x{0:x}")]
1111-// CidUnsupportedMultihash(u64),
1212-// /// The computed CID of the blob content does not match the expected CID.
1313-// #[error("CID mismatch: computed {computed} but expected {expected}")]
1414-// CidMismatch {
1515-// computed: blob_cid::BlobCid,
1616-// expected: blob_cid::BlobCid,
1717-// },
1818-// }
1919-2020-// /// Blob content whose integrity has been verified against a [`blob_cid::BlobCid`].
2121-// #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord, Hash)]
2222-// pub struct ValidatedBlob(bytes::Bytes);
2323-2424-// impl ValidatedBlob {
2525-// /// Verify that `bytes` matches the expected `checksum` CID.
2626-// pub fn new<B: Into<bytes::Bytes>>(
2727-// bytes: B,
2828-// checksum: blob_cid::BlobCid,
2929-// ) -> Result<Self, ValidatedBlobError> {
3030-// let bytes = bytes.into();
3131-3232-// // Enabled Multihashes are set in the multihash-codetable crate features.
3333-// let hash_code = checksum.hash().code();
3434-// let computed_cid = match Code::try_from(hash_code) {
3535-// Ok(code) => Ok(blob_cid::BlobCid::new(cid::Cid::new_v1(
3636-// blob_cid::codecs::RAW,
3737-// code.digest(&bytes),
3838-// ))
3939-// .expect("computed CID with raw codec should always be a valid BlobCid")),
4040-// Err(err) => {
4141-// tracing::warn!("failed to compute CID: {err:?}");
4242-// Err(ValidatedBlobError::CidUnsupportedMultihash(hash_code))
4343-// }
4444-// }?;
4545-4646-// if computed_cid != checksum {
4747-// tracing::warn!("cid mismatch: computed {computed_cid} expected {checksum}");
4848-// return Err(ValidatedBlobError::CidMismatch {
4949-// computed: computed_cid,
5050-// expected: checksum,
5151-// });
5252-// }
5353-5454-// Ok(Self(bytes))
5555-// }
5656-5757-// #[must_use]
5858-// pub fn into_inner(self) -> bytes::Bytes {
5959-// self.0
6060-// }
6161-// }
6262-6363-// impl core::convert::AsRef<bytes::Bytes> for ValidatedBlob {
6464-// fn as_ref(&self) -> &bytes::Bytes {
6565-// &self.0
6666-// }
6767-// }
6868-6969-// impl core::ops::Deref for ValidatedBlob {
7070-// type Target = bytes::Bytes;
7171-// fn deref(&self) -> &Self::Target {
7272-// &self.0
7373-// }
7474-// }
7575-7676-// impl core::borrow::Borrow<bytes::Bytes> for ValidatedBlob {
7777-// fn borrow(&self) -> &bytes::Bytes {
7878-// &self.0
7979-// }
8080-// }