A human-friendly DSL for ATProto Lexicons
27
fork

Configure Feed

Select the types of activity you want to include in your feed.

Define lol.mlf.package in MLF + mlf-generated-lexicon crate

Dogfood: describe the publish manifest record type using MLF itself.
lexicons/lol/mlf/package.mlf defines a `record package` with a
`self {}` doc block carrying the lexicon-level description, plus two
supporting object types (PublishedItem and ResolvedDependency) that
keep the record's array shapes readable.

Repo-root mlf.toml declares [package].name = "lol.mlf", points
[source] at ./lexicons, and configures a single [[output]] for rust
writing to ./mlf-generated-lexicon/src/generated. We deliberately
don't emit lexicon JSON to the repo — the publish flow produces that
shape when it actually ships records to a PDS; keeping a static copy
would just be a stale duplicate.

Generated code lives in its own workspace crate, mlf-generated-lexicon.
Its src/lib.rs stitches each generated file under a module path that
mirrors the lexicon NSID (so lol.mlf.package shows up as
mlf_generated_lexicon::lol::mlf::package). Crates that want typed
access depend on it like any other workspace member — no build-script
magic, no include! from "up here".

mlf-publish::manifest imports Package / PublishedItem /
ResolvedDependency from the new crate and builds the instance payload
via serde instead of hand-rolled JSON macros. Any drift between the
.mlf schema and the code that builds manifests now surfaces at
compile time. The `com.atproto.lexicon.schema` envelope fields
($type, lexicon, id, description, defs) stay hand-assembled: the
atproto meta-schema only requires `lexicon: integer`, so there's
nothing useful to generate a type from, and the envelope shape is
guarded by the check_meta_schema validator.

authored by stavola.xyz and committed by

Tangled 229ecc80 b92d8371

+227 -44
+8
Cargo.lock
··· 2424 2424 ] 2425 2425 2426 2426 [[package]] 2427 + name = "mlf-generated-lexicon" 2428 + version = "0.1.0" 2429 + dependencies = [ 2430 + "serde", 2431 + ] 2432 + 2433 + [[package]] 2427 2434 name = "mlf-integration-tests" 2428 2435 version = "0.1.0" 2429 2436 dependencies = [ ··· 2512 2519 dependencies = [ 2513 2520 "chrono", 2514 2521 "mlf-atproto", 2522 + "mlf-generated-lexicon", 2515 2523 "serde", 2516 2524 "serde_json", 2517 2525 "thiserror 2.0.17",
+1
Cargo.toml
··· 7 7 "dns-plugins/mlf-dns-cloudflare", 8 8 "mlf-atproto", 9 9 "mlf-cli", 10 + "mlf-generated-lexicon", 10 11 "dns-plugins/mlf-dns-godaddy", 11 12 "dns-plugins/mlf-dns-google", 12 13 "dns-plugins/mlf-dns-namecheap",
+39
lexicons/lol/mlf/package.mlf
··· 1 + /// MLF publish manifest — one record per publish event. 2 + /// 3 + /// Written by `mlf publish` into the author's PDS alongside the 4 + /// lexicons themselves. The record's own CID is the durable identifier 5 + /// for "this specific publish" — a content-addressed release pointer 6 + /// that binds the set of published items and the dependency versions 7 + /// they were resolved against. 8 + self {} 9 + 10 + /// One lexicon record included in a publish. 11 + def type PublishedItem = { 12 + /// NSID of the published lexicon. 13 + nsid!: Nsid, 14 + /// CID of the record as stored on the PDS. 15 + cid!: Cid, 16 + }; 17 + 18 + /// One external dependency pinned at publish time. 19 + def type ResolvedDependency = { 20 + /// NSID of the external lexicon the package linked against. 21 + nsid!: Nsid, 22 + /// CID of the record at the moment of the publish. 23 + cid!: Cid, 24 + }; 25 + 26 + record package { 27 + /// ISO 8601 timestamp of when the publish completed. 28 + publishedAt!: Datetime, 29 + /// Version string of the MLF tool that produced the publish 30 + /// (e.g. `"mlf@0.1.0"`). 31 + tool!: string, 32 + /// Every lexicon record the publish wrote to the PDS in this event, 33 + /// sorted lexicographically by NSID. 34 + published!: PublishedItem[], 35 + /// Every external lexicon the workspace resolved against at publish 36 + /// time, pinned to the remote CID observed during the publish. 37 + /// Empty list when the package has no external dependencies. 38 + resolvedDependencies: ResolvedDependency[], 39 + }
+9
mlf-generated-lexicon/Cargo.toml
··· 1 + [package] 2 + name = "mlf-generated-lexicon" 3 + version = "0.1.0" 4 + edition = "2024" 5 + license = "MIT" 6 + description = "Typed Rust helpers generated from this workspace's MLF lexicons" 7 + 8 + [dependencies] 9 + serde = { version = "1", features = ["derive"] }
+38
mlf-generated-lexicon/src/generated/lol/mlf/package.rs
··· 1 + // Generated from lol.mlf.package 2 + // Do not edit manually 3 + 4 + use serde::{Deserialize, Serialize}; 5 + 6 + /// One lexicon record included in a publish. 7 + #[derive(Debug, Clone, Serialize, Deserialize)] 8 + pub struct PublishedItem { 9 + /// NSID of the published lexicon. 10 + pub nsid: String, 11 + /// CID of the record as stored on the PDS. 12 + pub cid: String, 13 + } 14 + 15 + /// One external dependency pinned at publish time. 16 + #[derive(Debug, Clone, Serialize, Deserialize)] 17 + pub struct ResolvedDependency { 18 + /// NSID of the external lexicon the package linked against. 19 + pub nsid: String, 20 + /// CID of the record at the moment of the publish. 21 + pub cid: String, 22 + } 23 + 24 + #[derive(Debug, Clone, Serialize, Deserialize)] 25 + pub struct Package { 26 + /// ISO 8601 timestamp of when the publish completed. 27 + #[serde(rename = "publishedAt")] 28 + pub published_at: String, 29 + /// Version string of the MLF tool that produced the publish 30 + pub tool: String, 31 + /// Every lexicon record the publish wrote to the PDS in this event, 32 + pub published: Vec<PublishedItem>, 33 + /// Every external lexicon the workspace resolved against at publish 34 + #[serde(rename = "resolvedDependencies")] 35 + #[serde(skip_serializing_if = "Option::is_none")] 36 + pub resolved_dependencies: Option<Vec<ResolvedDependency>>, 37 + } 38 +
+18
mlf-generated-lexicon/src/lib.rs
··· 1 + //! Typed Rust helpers generated from this workspace's MLF lexicons. 2 + //! 3 + //! `mlf generate` writes into `src/generated/`; this crate stitches 4 + //! those files into a module tree that mirrors each lexicon's NSID. 5 + //! Consumers import from the NSID path — e.g. 6 + //! `use mlf_generated_lexicon::lol::mlf::package::Package;`. 7 + //! 8 + //! Source of truth is always the `.mlf` files in the workspace root's 9 + //! `lexicons/` directory. When the schema changes, rerun 10 + //! `mlf generate` and commit the refreshed output. 11 + 12 + pub mod lol { 13 + pub mod mlf { 14 + pub mod package { 15 + include!("generated/lol/mlf/package.rs"); 16 + } 17 + } 18 + }
+1
mlf-publish/Cargo.toml
··· 7 7 8 8 [dependencies] 9 9 mlf-atproto = { path = "../mlf-atproto" } 10 + mlf-generated-lexicon = { path = "../mlf-generated-lexicon" } 10 11 chrono = { version = "0.4", features = ["serde"] } 11 12 serde = { version = "1", features = ["derive"] } 12 13 serde_json = "1"
+106 -44
mlf-publish/src/manifest.rs
··· 8 8 //! The record's own CID (computed by the PDS on `putRecord`) is the 9 9 //! deterministic identifier for the publish — "this version of this 10 10 //! package." There's deliberately no semver; mutation isn't supported. 11 + //! 12 + //! Implementation note: we assemble the instance data (publishedAt, 13 + //! tool, published[], resolvedDependencies[]) using the typed 14 + //! [`mlf_generated_lexicon::lol::mlf::package::Package`] struct emitted by 15 + //! `mlf generate` from `lexicons/lol/mlf/package.mlf` and owned by the 16 + //! `mlf-generated-lexicon` crate. The `com.atproto.lexicon.schema` 17 + //! envelope fields (`$type`, `lexicon`, `id`, `description`, `defs`) 18 + //! are still hand-assembled: the atproto meta-schema only requires 19 + //! `lexicon: integer`, so it's not worth generating a type for — the 20 + //! envelope is stable and trivially verified by the 21 + //! `check_meta_schema` validator. 11 22 12 - use serde_json::{Map, Value, json}; 23 + use mlf_generated_lexicon::lol::mlf::package::{Package, PublishedItem, ResolvedDependency}; 24 + use serde_json::{Map, Value, json, to_value}; 13 25 14 26 /// The NSID the manifest is published under (rkey == NSID, per the 15 27 /// ATProto lexicon spec). ··· 40 52 let mut deps: Vec<(String, String)> = inputs.resolved_deps.to_vec(); 41 53 deps.sort(); 42 54 55 + // Build the instance payload using the typed struct emitted by 56 + // `mlf generate` — any schema change surfaces here as a compile error. 57 + let instance = Package { 58 + published_at: inputs.published_at.to_string(), 59 + tool: inputs.tool.to_string(), 60 + published: published 61 + .into_iter() 62 + .map(|(nsid, cid)| PublishedItem { nsid, cid }) 63 + .collect(), 64 + resolved_dependencies: if deps.is_empty() { 65 + None 66 + } else { 67 + Some( 68 + deps.into_iter() 69 + .map(|(nsid, cid)| ResolvedDependency { nsid, cid }) 70 + .collect(), 71 + ) 72 + }, 73 + }; 74 + 75 + // Merge the instance fields into a `com.atproto.lexicon.schema` 76 + // record envelope. The envelope is hand-assembled because the 77 + // atproto meta-schema is too permissive to generate useful types 78 + // from (it only requires `lexicon: integer`). 79 + let instance_value = to_value(&instance).expect("Package serialises"); 80 + let instance_obj = instance_value 81 + .as_object() 82 + .expect("Package always serialises to a JSON object") 83 + .clone(); 84 + 43 85 let mut obj = Map::new(); 44 86 obj.insert( 45 87 "$type".into(), ··· 51 93 "description".into(), 52 94 Value::String("MLF publish manifest".into()), 53 95 ); 54 - // Minimal valid `defs` so the meta-schema is satisfied. The record 55 - // is a regular lexicon-schema record whose "main" def describes its 56 - // own custom fields. v1 keeps this terse; downstream readers use 57 - // `items`/`resolvedDependencies` directly. 58 - obj.insert( 59 - "defs".into(), 60 - json!({ 61 - "main": { 62 - "type": "record", 63 - "key": "nsid", 64 - "record": { 65 - "type": "object", 66 - "required": ["publishedAt", "tool", "published"], 67 - "properties": { 68 - "publishedAt": {"type": "string"}, 69 - "tool": {"type": "string"}, 70 - "published": {"type": "array"}, 71 - "resolvedDependencies": {"type": "array"} 96 + obj.insert("defs".into(), manifest_defs()); 97 + for (k, v) in instance_obj { 98 + obj.insert(k, v); 99 + } 100 + Value::Object(obj) 101 + } 102 + 103 + /// The `defs` block describing the instance shape. Kept in one place 104 + /// so it's easy to bump if the on-PDS shape ever needs tightening. 105 + /// The shape here matches what `mlf generate lexicon` would emit from 106 + /// our `.mlf` source; keeping it in Rust avoids needing a second 107 + /// generated artifact in the repo. 108 + fn manifest_defs() -> Value { 109 + json!({ 110 + "main": { 111 + "type": "record", 112 + "key": "nsid", 113 + "record": { 114 + "type": "object", 115 + "required": ["publishedAt", "tool", "published"], 116 + "properties": { 117 + "publishedAt": {"type": "string", "format": "datetime"}, 118 + "tool": {"type": "string"}, 119 + "published": { 120 + "type": "array", 121 + "items": {"type": "ref", "ref": "#PublishedItem"} 122 + }, 123 + "resolvedDependencies": { 124 + "type": "array", 125 + "items": {"type": "ref", "ref": "#ResolvedDependency"} 72 126 } 73 127 } 74 128 } 75 - }), 76 - ); 77 - obj.insert( 78 - "publishedAt".into(), 79 - Value::String(inputs.published_at.to_string()), 80 - ); 81 - obj.insert("tool".into(), Value::String(inputs.tool.to_string())); 82 - obj.insert( 83 - "published".into(), 84 - Value::Array( 85 - published 86 - .into_iter() 87 - .map(|(nsid, cid)| json!({"nsid": nsid, "cid": cid})) 88 - .collect(), 89 - ), 90 - ); 91 - obj.insert( 92 - "resolvedDependencies".into(), 93 - Value::Array( 94 - deps.into_iter() 95 - .map(|(nsid, cid)| json!({"nsid": nsid, "cid": cid})) 96 - .collect(), 97 - ), 98 - ); 99 - Value::Object(obj) 129 + }, 130 + "PublishedItem": { 131 + "type": "object", 132 + "required": ["nsid", "cid"], 133 + "properties": { 134 + "nsid": {"type": "string", "format": "nsid"}, 135 + "cid": {"type": "string", "format": "cid"} 136 + } 137 + }, 138 + "ResolvedDependency": { 139 + "type": "object", 140 + "required": ["nsid", "cid"], 141 + "properties": { 142 + "nsid": {"type": "string", "format": "nsid"}, 143 + "cid": {"type": "string", "format": "cid"} 144 + } 145 + } 146 + }) 100 147 } 101 148 102 149 #[cfg(test)] ··· 116 163 }); 117 164 assert_eq!(v["$type"], "com.atproto.lexicon.schema"); 118 165 assert_eq!(v["id"], NSID); 166 + assert_eq!(v["lexicon"], 1); 119 167 // published list is sorted 120 168 assert_eq!(v["published"][0]["nsid"], "com.example.other"); 121 169 assert_eq!(v["published"][1]["nsid"], "com.example.thing"); 122 170 assert_eq!(v["resolvedDependencies"][0]["cid"], "bafy3"); 171 + } 172 + 173 + #[test] 174 + fn empty_deps_omits_resolved_dependencies_field() { 175 + // The generated struct marks `resolved_dependencies` Option with 176 + // skip_serializing_if, so an empty manifest doesn't carry a 177 + // confusing "[]" on-wire. 178 + let v = build(&ManifestInputs { 179 + tool: "mlf@x", 180 + published_at: "t", 181 + published: &[("a".into(), "bafy1".into())], 182 + resolved_deps: &[], 183 + }); 184 + assert!(v.get("resolvedDependencies").is_none()); 123 185 } 124 186 125 187 #[test]
+7
mlf.toml
··· 1 + [package] 2 + name = "lol.mlf" 3 + 1 4 [source] 2 5 directory = "./lexicons" 3 6 4 7 [dependencies] 5 8 dependencies = [] 9 + 10 + [[output]] 11 + type = "rust" 12 + directory = "./mlf-generated-lexicon/src/generated"