···11-[package]
22-name = "porxie"
33-description = "A correct and efficient ATProto blob proxy for secure content delivery."
44-authors = ["Blooym"]
55-repository = "https://codeberg.org/Blooym/porxie"
66-homepage = "https://codeberg.org/Blooym/porxie/src/branch/main/README.md"
77-documentation = "https://codeberg.org/Blooym/porxie/src/branch/main/README.md"
88-license = "AGPL-3.0-or-later"
99-version = "0.1.2"
1010-edition = "2024"
11+[workspace]
22+members = ["crates/porxie", "crates/lexgen"]
33+resolver = "3"
114125[profile.release]
136lto = true
147codegen-units = 1
158opt-level = 3
169strip = "debuginfo"
1717-1818-[dependencies]
1919-anyhow = { version = "1.0.102", features = ["std"], default-features = false }
2020-axum = { version = "0.8.8", features = [
2121- "http1",
2222- "http2",
2323- "json",
2424- "matched-path",
2525- "tokio",
2626- "tower-log",
2727- "tracing",
2828-], default-features = false }
2929-axum-extra = { version = "0.12.5", features = [
3030- "typed-header",
3131- "tracing",
3232-], default-features = false }
3333-bytes = { version = "1.11.1", features = ["std"], default-features = false }
3434-bytesize = { version = "2.3.1", features = ["std"], default-features = false }
3535-cid = { version = "0.11.1", features = ["std"], default-features = false }
3636-clap = { version = "4.5.60", features = [
3737- "color",
3838- "derive",
3939- "env",
4040- "error-context",
4141- "help",
4242- "std",
4343- "suggestions",
4444- "usage",
4545-], default-features = false }
4646-dotenvy = { version = "0.15.7", default-features = false }
4747-futures-util = { version = "0.3.32", default-features = false }
4848-humantime = { version = "2.3.0", default-features = false }
4949-infer = { version = "0.19.0", default-features = false, features = ["std"] }
5050-jacquard-common = { version = "0.9.5", default-features = false }
5151-jacquard-identity = { version = "0.9.5", features = ["tracing"] }
5252-jemallocator = "0.5.4"
5353-json-subscriber = { version = "0.2.8", default-features = false, features = [
5454- "tracing-log",
5555- "env-filter",
5656-] }
5757-mime = { version = "0.3.17", default-features = false }
5858-moka = { version = "0.12.14", features = [
5959- "future",
6060- "logging",
6161-], default-features = false }
6262-multihash-codetable = { version = "0.2.1", features = [
6363- "sha2",
6464- # "blake3", # if it ever gets added to the spec.
6565- "std",
6666-], default-features = false }
6767-reqwest = { version = "0.12.28", default-features = false, features = [
6868- "http2",
6969- "system-proxy",
7070- "stream",
7171- "socks",
7272- "rustls-tls",
7373- "gzip",
7474- "brotli",
7575- "zstd",
7676- "deflate",
7777-] }
7878-serde = { version = "1.0.228", features = [
7979- "derive",
8080- "std",
8181-], default-features = false }
8282-subtle = { version = "2.6", default-features = false, features = ["std"] }
8383-sysinfo = { version = "0.38.4", default-features = false, features = [
8484- "system",
8585-] }
8686-thiserror = { version = "2.0.18", default-features = false, features = ["std"] }
8787-tokio = { version = "1.50.0", default-features = false, features = [
8888- "macros",
8989- "rt-multi-thread",
9090- "signal",
9191- "net",
9292-] }
9393-tower-http = { version = "0.6.8", features = [
9494- "catch-panic",
9595- "normalize-path",
9696- "trace",
9797- "timeout",
9898- "tracing",
9999-], default-features = false }
100100-tracing = { version = "0.1.44", features = [
101101- "attributes",
102102- "std",
103103-], default-features = false }
104104-tracing-subscriber = { version = "0.3.22", features = [
105105- "ansi",
106106- "env-filter",
107107- "fmt",
108108- "parking_lot",
109109- "smallvec",
110110- "std",
111111- "tracing",
112112- "tracing-log",
113113-], default-features = false }
+34-38
README.md
···10101111## Features
12121313-- Blob validation - verifies blob content matches its CID and rejects invalid/tampered content.
1414-- Secure serving - blobs are always served with secure headers to help improve end-user security.
1515-- MIME filtering - detects blob content MIME-types and enforces an optional allowlist of permitted types.
1616-- Policy enforcement - optionally integrate with an external policy service (like an AppView) to control which blobs can be served.
1717-- In-memory cache - configurable in-memory caching for fast repeat access with support for manual cache purging via authenticated HTTP DELETE.
1818-1919-## Routes
2020-2121-- [GET] `/{did}/{cid}`: Resolve and fetch a blob from its origin.
2222-- [DELETE] `/cache/{cid or did}`: Invalidate all valid cache items for a specific blob CID or for an entire user DID. Requires configured bearer auth token.
1313+- Blob validation: verifies blob content matches its CID and rejects invalid/tampered content.
1414+- Secure serving: blobs are always served with secure headers to help improve end-user security.
1515+- MIME filtering: detects blob content MIME-types and enforces an optional allowlist of permitted types.
1616+- Policy enforcement: optionally integrate with an external policy service (like an AppView) to control which blobs can be served.
1717+- In-memory cache: configurable in-memory caching for fast repeat access with support for manual cache purging via authenticated HTTP DELETE.
23182419## Usage
25202621> [!NOTE]
2727-> Porxie does not handle TLS, so it should be placed behind a reverse proxy like [Caddy](https://caddyserver.com), [Traefik](https://traefik.io/traefik), or [NGINX](https://nginx.org). Ensure that any intermediaries between Porxie and the client pass through the `Cache-Control`, `Content-Security-Policy` and `Content-Disposition` headers, or otherwise set them securely.
2828->
2929-> Putting a CDN in front of Porxie is also recommended for better long-term caching and worldwide latency.
2222+> Porxie does not handle TLS, so it should be placed behind a reverse proxy like [Caddy](https://caddyserver.com), [Traefik](https://traefik.io/traefik), or [NGINX](https://nginx.org). It is also recommended to use a dedicated caching layer in-between Porxie and your clients such as Varnish, Cloudflare, or similar.
2323+>
2424+> Please ensure that any intermediary services between Porxie and the client pass through the following headers or set them the same as Porxie does:
2525+> - `Content-Type` (if unmodified by the service)
2626+> - `Cache-Control`
2727+> - `Content-Security-Policy`
2828+> - `Content-Disposition`
2929+> - `X-Content-Type-Options`
30303131### Run: Binary
3232···4444 porxie
4545 ```
46464747-### Run: Docker
4848-4949-To run Porxie with the Docker CLI and default settings, use the following command:
5050-5151-```sh
5252-docker run -d \
5353- --name porxie \
5454- --restart unless-stopped \
5555- -p 6314:6314 \
5656- blooym/porxie:latest
5757-```
5858-5947### Run: Docker Compose
60486149To run Porxie with Docker Compose, you can start with the following `compose.yml` template:
···78667967To run Porxie with Nix, you can use the [package](https://search.nixos.org/packages?channel=unstable&query=porxie) or [NixOS module](https://search.nixos.org/options?channel=unstable&query=porxie) provided directly in nixpkgs.
80686969+## Routes
7070+7171+- [GET] `/{did}/{cid}`: Fetch a blob either from cache or origin.
7272+- [GET] `/xrpc/net.dollware.porxie.getBlob?did=<did>&cid=<cid>`: Compatibility alias of the fetch blob endpoint.
7373+- [POST] `/xrpc/net.dollware.porxie.clearActorCache?did=<did>`: Clear all cached items relating to an actor DID.
7474+- [POST] `/xrpc/net.dollware.porxie.clearBlobCache?cid=<cid>`: Clear all cache items relating to a blob CID.
7575+7676+8177## Policy Service
82788379Porxie can optionally check with an external HTTP service before serving any blob. You build and run this service yourself - Porxie just calls it and acts on the response. This is useful for things like content takedowns or blob allow lists.
···111107 [env: PORXIE_SERVER_ADDRESS=]
112108 [default: ip:127.0.0.1:6314]
113109114114---server-auth-token <SA_SERVER_AUTH_TOKEN>
115115- Bearer token for authenticating admin requests.
110110+--server-admin-password <SA_SERVER_ADMIN_PASSWORD>
111111+ Admin password for authenticating privileged requests.
116112117113 When unset, all authenticated endpoints will reject requests with HTTP 401.
118114119119- [env: PORXIE_SERVER_AUTH_TOKEN=]
115115+ Authenticated requests always expect the username `admin` as per specification.
116116+117117+ [env: PORXIE_SERVER_ADMIN_TOKEN=]
120118```
121119122120### Blob
···139137140138--blob-max-size <BA_BLOB_MAX_SIZE>
141139 Maximum blob size that can be fetched and served.
142142-140140+143141 Blobs that exceed this limit will return HTTP 413.
144144-142142+145143 The minimum value is 512kb and the maximum is the system's total memory.
146144147145 [env: PORXIE_BLOB_MAX_SIZE=]
···155153 cleared manually for changes to take effect quickly.
156154157155 [env: PORXIE_BLOB_CACHE_HEADER=]
158158- [default: "public, max-age=604800, must-revalidate, immutable"]
156156+ [default: "public, max-age=604800, immutable"]
159157160158--blob-processing-timeout <BA_BLOB_PROCESSING_TIMEOUT>
161159 Maximum duration a blob can be processed by this server before aborting
···209207```
210208--cache-allocation <CA_CACHE_ALLOCATION>
211209 Total memory allocation for the internal cache.
212212-210210+213211 Blobs are cached using an LFU policy. The most frequently requested blobs are kept longest when the cache approaches its limit.
214214-212212+215213 For production deployments, a CDN or caching layer in front of this server is recommended for lower latency and better global availability.
216216-214214+217215 The minimum value is 8mb and the maximum is the system's total memory.
218216219217 [env: PORXIE_CACHE_ALLOCATION=]
···266264267265 As pipes are used as a delimiter, they cannot be contained in headers.
268266269269- Example (cli): '--policy-request-headers "Authorization: Bearer token"
270270- --policy-request-headers "X-Api-Key: your-key"'
267267+ Example (cli): '--policy-request-headers "X-Hello: world" --policy-request-headers "X-Foo: bar"'
271268272272- Example (env): 'PORXIE_POLICY_REQUEST_HEADERS="Authorization: Bearer
273273- token|X-Api-Key: your-key"'
269269+ Example (env): 'PORXIE_POLICY_REQUEST_HEADERS="X-Hello: world|X-Foo: bar"'
274270275271 [env: PORXIE_POLICY_REQUEST_HEADERS=]
276272
···11+{
22+ "lexicon": 1,
33+ "id": "net.dollware.porxie.getBlobPolicy",
44+ "defs": {
55+ "main": {
66+ "type": "query",
77+ "parameters": {
88+ "type": "params",
99+ "required": [
1010+ "did",
1111+ "cid"
1212+ ],
1313+ "properties": {
1414+ "did": {
1515+ "type": "string",
1616+ "format": "did",
1717+ "description": "The DID of the account."
1818+ },
1919+ "cid": {
2020+ "type": "string",
2121+ "format": "cid",
2222+ "description": "The CID of the blob."
2323+ }
2424+ }
2525+ },
2626+ "output": {
2727+ "encoding": "application/json",
2828+ "schema": {
2929+ "type": "object",
3030+ "required": [
3131+ "policy"
3232+ ],
3333+ "properties": {
3434+ "policy": {
3535+ "type": "union",
3636+ "refs": [
3737+ "#allowed",
3838+ "#restricted",
3939+ "#unlisted"
4040+ ]
4141+ }
4242+ }
4343+ }
4444+ }
4545+ },
4646+ "allowed": {
4747+ "type": "object",
4848+ "description": "Blob is allowed to be served.",
4949+ "properties": {}
5050+ },
5151+ "restricted": {
5252+ "type": "object",
5353+ "description": "Blob is explicitly restricted. It may have been removed due to moderation reasons.",
5454+ "properties": {
5555+ "reason": {
5656+ "type": "string",
5757+ "description": "An optional reason provided for this policy being applied to provide context to the requesting service."
5858+ }
5959+ }
6060+ },
6161+ "unlisted": {
6262+ "type": "object",
6363+ "description": "Blob is not being served at operator discretion. It may not meet the requirements for the service.",
6464+ "properties": {
6565+ "reason": {
6666+ "type": "string",
6767+ "description": "An optional reason provided for this policy being applied to provide context to the requesting service."
6868+ }
6969+ }
7070+ }
7171+ }
7272+}
+45
crates/lexgen/src/builder_types.rs
···11+// @generated by jacquard-lexicon. DO NOT EDIT.
22+//
33+// This file was automatically generated from Lexicon schemas.
44+// Any manual changes will be overwritten on the next regeneration.
55+66+/// Marker type indicating a builder field has been set
77+pub struct Set<T>(pub T);
88+impl<T> Set<T> {
99+ /// Extract the inner value
1010+ #[inline]
1111+ pub fn into_inner(self) -> T {
1212+ self.0
1313+ }
1414+}
1515+1616+/// Marker type indicating a builder field has not been set
1717+pub struct Unset;
1818+/// Trait indicating a builder field is set (has a value)
1919+2020+#[jacquard_common::deps::codegen::rustversion::attr(
2121+ since(1.78.0),
2222+ diagnostic::on_unimplemented(
2323+ message = "the field `{Self}` was not set, but this method requires it to be set",
2424+ label = "the field `{Self}` was not set"
2525+ )
2626+)]
2727+pub trait IsSet: private::Sealed {}
2828+/// Trait indicating a builder field is unset (no value yet)
2929+3030+#[jacquard_common::deps::codegen::rustversion::attr(
3131+ since(1.78.0),
3232+ diagnostic::on_unimplemented(
3333+ message = "the field `{Self}` was already set, but this method requires it to be unset",
3434+ label = "the field `{Self}` was already set"
3535+ )
3636+)]
3737+pub trait IsUnset: private::Sealed {}
3838+impl<T> IsSet for Set<T> {}
3939+impl IsUnset for Unset {}
4040+mod private {
4141+ /// Sealed trait to prevent external implementations
4242+ pub trait Sealed {}
4343+ impl<T> Sealed for super::Set<T> {}
4444+ impl Sealed for super::Unset {}
4545+}
+11
crates/lexgen/src/lib.rs
···11+// @generated by jacquard-lexicon. DO NOT EDIT.
22+//
33+// This file was automatically generated from Lexicon schemas.
44+// Any manual changes will be overwritten on the next regeneration.
55+66+extern crate alloc;
77+pub mod builder_types;
88+99+1010+#[cfg(feature = "net_dollware")]
1111+pub mod net_dollware;
+6
crates/lexgen/src/net_dollware.rs
···11+// @generated by jacquard-lexicon. DO NOT EDIT.
22+//
33+// This file was automatically generated from Lexicon schemas.
44+// Any manual changes will be overwritten on the next regeneration.
55+66+pub mod porxie;
+9
crates/lexgen/src/net_dollware/porxie.rs
···11+// @generated by jacquard-lexicon. DO NOT EDIT.
22+//
33+// This file was automatically generated from Lexicon schemas.
44+// Any manual changes will be overwritten on the next regeneration.
55+66+pub mod clear_actor_cache;
77+pub mod clear_blob_cache;
88+pub mod get_blob;
99+pub mod get_blob_policy;
···11+mod blob;
22+mod index;
33+pub mod xrpc;
44+55+pub use blob::get_blob_handler;
66+pub use index::get_index_handler;
77+88+/// A header value for [`header::CACHE_CONTROL`] indicating the response cannot be cached at all.
99+const CACHE_CONTROL_NOCACHE_VALUE: &str = "must-understand, no-store";
1010+1111+#[derive(serde::Serialize)]
1212+pub struct ErrorResponse {
1313+ error: &'static str,
1414+ message: Option<&'static str>,
1515+}
+4
crates/porxie/src/routes/xrpc/mod.rs
···11+mod health;
22+pub mod net_dollware;
33+44+pub use health::get_health_handler;
···11+mod clear_actor_cache;
22+mod clear_blob_cache;
33+mod get_blob;
44+55+pub use clear_actor_cache::clear_actor_cache_handler;
66+pub use clear_blob_cache::clear_blob_cache_handler;
77+pub use get_blob::get_blob_handler_xrpc_compat;
···172172 }
173173 };
174174175175- let validated_bytes = {
175175+ let bytes = {
176176 let response = self.http_client.get(blob_url).send().await.map_err(|err| {
177177 tracing::warn!("failed to request blob from origin: {err:?}");
178178 BlobDownloadError::FetchFailure
···211211 //
212212 // This operation is done via spawn_blocking as creating the digest will block
213213 // this task's executor from switching to other tasks for as long it runs.
214214+ //
215215+ // Passes the bytes as a return value instead of incrementing the reference count.
214216 tokio::task::spawn_blocking({
215215- let bytes = bytes.clone();
216217 let cid = *cid;
217218 move || {
218219 // Enabled Multihashes are set in the multihash-codetable crate features.
···234235 return Err(BlobDownloadError::CidMismatch);
235236 }
236237237237- Ok(())
238238+ Ok(bytes)
238239 }
239240 })
240241 .await
241241- .expect("CID computing task should not panic")?;
242242-243243- bytes
242242+ .expect("CID computing task should not panic")?
244243 };
245244246245 // Infer MIME type from content bytes rather than headers; this is fallible
···248247 //
249248 // TODO: Merge this with the download stream process to reject bad MIMEs
250249 // early?
251251- let mime_type = sniff_mime(&validated_bytes);
250250+ let mime_type = sniff_mime(&bytes);
252251 if !is_mime_allowed(&mime_type, allowed_mimetypes) {
253252 tracing::debug!("blob was inferred to be a disallowed mime type: {mime_type}");
254253 return Err(BlobDownloadError::ForbiddenMimeType);
···257256 // Mark this DID+CID pair as ownership-verified since we just fetched it from the origin.
258257 self.ownership_cache.insert((*cid, did.clone()), ()).await;
259258260260- Ok(BlobData {
261261- bytes: validated_bytes,
262262- mime_type,
263263- })
259259+ Ok(BlobData { bytes, mime_type })
264260 })
265261 .await
266262 }