A human-friendly DSL for ATProto Lexicons
27
fork

Configure Feed

Select the types of activity you want to include in your feed.

Split manifest into its own collection with TID rkeys

Each publish event now writes a fresh `lol.mlf.package/<tid>` record
instead of clobbering a single `com.atproto.lexicon.schema/lol.mlf.package`.
The schema definition and the manifest instance are different things
in different collections, so they stop colliding when an mlf project
dogfoods `lol.mlf.package` as a user lexicon.

- mlf-atproto: new tid module (sortable base32 TID generator, CAS
loop for monotonicity)
- mlf-publish/manifest: drop the schema envelope; instance carries
`$type = "lol.mlf.package"` and only instance fields
- mlf-cli/publish: write manifest to `lol.mlf.package/<tid>`
- mlf-cli/unpublish: list the manifest collection, pick the newest
by TID, delete everything it lists + the full publish log

authored by stavola.xyz and committed by

Tangled de4c5373 59f4fdf4

+230 -152
+2
mlf-atproto/src/lib.rs
··· 9 9 //! - [`records`] — typed wrappers for `getRecord` / `putRecord` / 10 10 //! `listRecords` / `deleteRecord` 11 11 //! - [`cid`] — client-side CID computation (DAG-CBOR + SHA-256 multihash) 12 + //! - [`tid`] — sortable timestamp identifier generation (rkey) 12 13 //! 13 14 //! This crate is plumbing only — no MLF-specific domain logic lives here. 14 15 //! Consumers (`mlf-lexicon-fetcher`, `mlf-publish`) build domain operations ··· 18 19 pub mod identity; 19 20 pub mod records; 20 21 pub mod session; 22 + pub mod tid; 21 23 pub mod xrpc; 22 24 23 25 pub use identity::{DnsResolver, MockDnsResolver, RealDnsResolver};
+99
mlf-atproto/src/tid.rs
··· 1 + //! ATProto TID (Timestamp Identifier) generation. 2 + //! 3 + //! A TID is a 64-bit integer encoded as 13 characters of a sortable 4 + //! base32 alphabet. The layout (per https://atproto.com/specs/tid): 5 + //! 6 + //! - bit 63: always 0 (reserved) 7 + //! - bits 62..10: 53-bit microseconds since the Unix epoch 8 + //! - bits 9..0: 10-bit clock identifier (random per-process) 9 + //! 10 + //! Sortable base32 alphabet: `234567abcdefghijklmnopqrstuvwxyz`. 11 + //! TIDs sort lexicographically in the same order as chronologically. 12 + 13 + use std::sync::atomic::{AtomicU64, Ordering}; 14 + use std::time::{SystemTime, UNIX_EPOCH}; 15 + 16 + const ALPHABET: &[u8] = b"234567abcdefghijklmnopqrstuvwxyz"; 17 + 18 + /// Track the last microsecond we emitted so that two TIDs generated 19 + /// in the same microsecond still sort in emission order. 20 + static LAST_US: AtomicU64 = AtomicU64::new(0); 21 + 22 + /// Generate a fresh TID. Monotonically increasing within a process. 23 + pub fn generate() -> String { 24 + let now_us = SystemTime::now() 25 + .duration_since(UNIX_EPOCH) 26 + .map(|d| d.as_micros() as u64) 27 + .unwrap_or(0); 28 + // CAS loop to install `max(now_us, prev + 1)` and read back the 29 + // value we actually wrote. `fetch_update` returns the *previous* 30 + // value, which is the wrong half of the trade for us. 31 + let micros = loop { 32 + let prev = LAST_US.load(Ordering::Relaxed); 33 + let next = now_us.max(prev.saturating_add(1)); 34 + if LAST_US 35 + .compare_exchange_weak(prev, next, Ordering::SeqCst, Ordering::Relaxed) 36 + .is_ok() 37 + { 38 + break next; 39 + } 40 + }; 41 + let clock_id: u64 = rand_10_bits(); 42 + let raw = ((micros & ((1 << 53) - 1)) << 10) | (clock_id & 0x3FF); 43 + encode_base32_sortable(raw) 44 + } 45 + 46 + fn rand_10_bits() -> u64 { 47 + // A PID+nanos mix is good enough — the clock ID just needs to 48 + // differ across concurrent processes, not be cryptographic. 49 + let nanos = SystemTime::now() 50 + .duration_since(UNIX_EPOCH) 51 + .map(|d| d.subsec_nanos() as u64) 52 + .unwrap_or(0); 53 + let pid = std::process::id() as u64; 54 + (nanos ^ pid.rotate_left(7)) & 0x3FF 55 + } 56 + 57 + fn encode_base32_sortable(mut n: u64) -> String { 58 + let mut buf = [0u8; 13]; 59 + for slot in buf.iter_mut().rev() { 60 + *slot = ALPHABET[(n & 0x1F) as usize]; 61 + n >>= 5; 62 + } 63 + String::from_utf8(buf.to_vec()).expect("ASCII alphabet") 64 + } 65 + 66 + #[cfg(test)] 67 + mod tests { 68 + use super::*; 69 + 70 + #[test] 71 + fn tid_is_13_chars_sortable_alphabet() { 72 + let t = generate(); 73 + assert_eq!(t.len(), 13); 74 + assert!(t.bytes().all(|b| ALPHABET.contains(&b))); 75 + } 76 + 77 + #[test] 78 + fn tids_are_monotonic() { 79 + let a = generate(); 80 + let b = generate(); 81 + let c = generate(); 82 + assert!(a < b, "{a} < {b}"); 83 + assert!(b < c, "{b} < {c}"); 84 + } 85 + 86 + #[test] 87 + fn tid_encodes_current_time_not_zero() { 88 + // Regression: fetch_update returned the *previous* value, so 89 + // the first call encoded zero micros (= "22222222222xx"). A 90 + // correctly-populated TID has non-zero bits well above the 91 + // bottom 10 (which hold the random clock id). 92 + let tid = generate(); 93 + let leading_zero_chars = tid.bytes().take_while(|&b| b == b'2').count(); 94 + assert!( 95 + leading_zero_chars < 5, 96 + "{tid} has {leading_zero_chars} leading zero chars — time component wasn't populated", 97 + ); 98 + } 99 + }
+12 -5
mlf-cli/src/publish.rs
··· 20 20 use crate::credentials::{CredentialsFile, Scope}; 21 21 use crate::remote_state::{RemoteState, RemoteStateError}; 22 22 use miette::Diagnostic; 23 - use mlf_atproto::{records, session}; 23 + use mlf_atproto::{records, session, tid}; 24 24 use mlf_plugin_host::discovery; 25 25 use mlf_plugin_host::host::{HostError, PluginHandle}; 26 26 use mlf_plugin_host::ui::{DenyInteractiveUi, TerminalUi, UiHandler}; ··· 250 250 251 251 println!("\nPlan:"); 252 252 print!("{}", plan.format_summary()); 253 - if plan.is_empty() && !publish_cfg.manifest { 253 + if plan.is_empty() { 254 + // Nothing to publish → nothing worth recording in the publish log. 255 + // The manifest represents a *publish event*; a no-op isn't one. 254 256 println!("\n(no changes; nothing to publish)"); 255 257 return Ok(()); 256 258 } ··· 329 331 println!(" ✓ {} {}", action.verb(), action.nsid()); 330 332 } 331 333 332 - // 7. Manifest. 334 + // 7. Manifest. Each publish event writes a new instance at 335 + // `lol.mlf.package/<tid>` so the PDS retains the full publish 336 + // history. The schema definition for `lol.mlf.package` itself 337 + // lives separately under `com.atproto.lexicon.schema` (published 338 + // by the mlf project's own repo, not every caller's). 333 339 if publish_cfg.manifest { 334 - println!("Writing manifest (lol.mlf.package)..."); 340 + let tid = tid::generate(); 341 + println!("Writing manifest ({}/{tid})...", manifest::NSID); 335 342 let manifest_record = build_manifest_record(&state, &plan); 336 343 let _ = records::put_record( 337 344 &http, 338 345 &pds_url, 339 346 &sess.access_jwt, 340 347 &sess.did, 341 - "com.atproto.lexicon.schema", 342 348 manifest::NSID, 349 + &tid, 343 350 &manifest_record, 344 351 ) 345 352 .await
+86 -59
mlf-cli/src/unpublish.rs
··· 1 - //! `mlf unpublish` — delete every lexicon in the package's manifest, 2 - //! plus the manifest record itself. Read the published manifest to 3 - //! figure out what was published; if it's missing, refuse (we'd be 4 - //! guessing which records belong to this workspace otherwise). 1 + //! `mlf unpublish` — delete every lexicon in the package's latest 2 + //! manifest, plus every manifest record in the publish log. Manifests 3 + //! live in collection `lol.mlf.package` with TID rkeys; we read the 4 + //! most-recent one to learn what NSIDs belong to us. If no manifest 5 + //! exists we refuse — we'd be guessing which records to delete. 5 6 6 7 use crate::config::{ConfigError, MlfConfig, find_project_root}; 7 8 use crate::credentials::{CredentialsFile, Scope}; 8 - use crate::remote_state::{RemoteState, RemoteStateError}; 9 + use crate::remote_state::{RemoteStateError}; 9 10 use dialoguer::{Confirm, theme::ColorfulTheme}; 10 11 use miette::Diagnostic; 11 12 use mlf_atproto::records::{self, RecordError}; 12 - use mlf_atproto::session; 13 + use mlf_atproto::{identity, session}; 13 14 use mlf_publish::manifest; 14 15 use std::collections::BTreeSet; 15 16 use thiserror::Error; ··· 29 30 NotPublishable, 30 31 31 32 #[error( 32 - "No manifest (`lol.mlf.package`) found on the PDS — refusing to guess which records belong to this workspace" 33 + "No manifest records found in `lol.mlf.package` collection — refusing to guess which records belong to this workspace" 33 34 )] 34 35 #[diagnostic( 35 36 code(mlf::unpublish::no_manifest), 36 - help( 37 - "Delete records manually via `goat lex unpublish`, or publish once first to create a manifest we can read back." 38 - ) 37 + help("Publish once first to create a manifest we can read back.") 39 38 )] 40 39 NoManifest, 41 40 ··· 79 78 if config.publish.is_none() { 80 79 return Err(UnpublishError::NotPublishable); 81 80 } 82 - 83 - println!("Loading remote state..."); 84 - let state = RemoteState::load().await?; 85 - 86 - // The manifest is what tells us "here's what this workspace published." 87 - // If it's missing we don't know which records belong to us, so refuse. 88 - let manifest_record = state 89 - .remote 90 - .get(manifest::NSID) 91 - .ok_or(UnpublishError::NoManifest)? 92 - .record_json 93 - .clone(); 94 - let to_delete = manifest_items(&manifest_record); 81 + let package = config.package.clone(); 95 82 96 - if to_delete.is_empty() { 97 - println!("Manifest lists zero records. Nothing to delete except the manifest itself."); 98 - } else { 99 - println!( 100 - "Manifest lists {} record(s) published under `{}`:", 101 - to_delete.len(), 102 - state.package.name 103 - ); 104 - for nsid in &to_delete { 105 - println!(" - {nsid}"); 106 - } 107 - } 108 - 109 - if !opts.yes && !confirm()? { 110 - return Err(UnpublishError::Cancelled); 111 - } 112 - 113 - // Credentials + session. 83 + // Credentials + session. We need the session before we can read 84 + // the publish log, which lives in the authed repo. 114 85 let creds = load_credentials(&project_root)?; 115 86 let pds_creds = creds.pds.ok_or(UnpublishError::NoPdsCreds)?; 116 87 let handle = pds_creds.handle.clone().ok_or(UnpublishError::NoPdsCreds)?; ··· 122 93 let pds_url = match pds_creds.extra.get("pds").and_then(|v| v.as_str()) { 123 94 Some(url) => url.to_string(), 124 95 None => { 125 - let did = mlf_atproto::identity::resolve_handle_to_did(&http, &handle) 96 + let did = identity::resolve_handle_to_did(&http, &handle) 126 97 .await 127 98 .map_err(|e| UnpublishError::Session(e.to_string()))?; 128 - mlf_atproto::identity::resolve_did_to_pds(&http, &did) 99 + identity::resolve_did_to_pds(&http, &did) 129 100 .await 130 101 .map_err(|e| UnpublishError::Session(e.to_string()))? 131 102 } ··· 134 105 .await 135 106 .map_err(|e| UnpublishError::Session(e.to_string()))?; 136 107 137 - let collection = "com.atproto.lexicon.schema"; 108 + // List every manifest record (sorted newest-first by TID rkey). 109 + println!("Reading publish log..."); 110 + let manifest_records = records::list_all_records(&http, &pds_url, &sess.did, manifest::NSID) 111 + .await 112 + .map_err(|e| UnpublishError::Session(e.to_string()))?; 113 + if manifest_records.is_empty() { 114 + return Err(UnpublishError::NoManifest); 115 + } 116 + let (latest_rkey, latest) = latest_manifest(&manifest_records); 117 + let to_delete = manifest_items(&latest.value); 118 + 119 + if to_delete.is_empty() { 120 + println!("Latest manifest lists zero records."); 121 + } else { 122 + println!( 123 + "Latest manifest ({latest_rkey}) lists {} record(s) published under `{}`:", 124 + to_delete.len(), 125 + package.name 126 + ); 127 + for nsid in &to_delete { 128 + println!(" - {nsid}"); 129 + } 130 + } 131 + if manifest_records.len() > 1 { 132 + println!( 133 + "Publish log has {} manifest record(s) total — all will be removed.", 134 + manifest_records.len() 135 + ); 136 + } 137 + 138 + if !opts.yes && !confirm()? { 139 + return Err(UnpublishError::Cancelled); 140 + } 138 141 142 + // Delete the schema records named in the latest manifest. 139 143 for nsid in &to_delete { 140 - // Only ever delete records in scope of the package — defence in depth 141 - // against a corrupted or hand-edited manifest naming foreign NSIDs. 142 - if !state.package.namespace_is_in_scope(nsid) && nsid != manifest::NSID { 144 + // Defence in depth: only ever delete records inside this 145 + // package's scope. A corrupted or hand-edited manifest pointing 146 + // at foreign NSIDs gets ignored. 147 + if !package.namespace_is_in_scope(nsid) { 143 148 eprintln!("Skipping out-of-scope record `{nsid}`"); 144 149 continue; 145 150 } ··· 148 153 &pds_url, 149 154 &sess.access_jwt, 150 155 &sess.did, 151 - collection, 156 + "com.atproto.lexicon.schema", 152 157 nsid, 153 158 ) 154 159 .await?; 155 160 println!(" ✓ deleted {nsid}"); 156 161 } 157 162 158 - // Finally, the manifest itself. 159 - delete_one( 160 - &http, 161 - &pds_url, 162 - &sess.access_jwt, 163 - &sess.did, 164 - collection, 165 - manifest::NSID, 166 - ) 167 - .await?; 168 - println!(" ✓ deleted {} (manifest)", manifest::NSID); 163 + // Delete every manifest record — the entire publish log for this repo. 164 + for r in &manifest_records { 165 + let rkey = rkey_from_uri(&r.uri); 166 + delete_one( 167 + &http, 168 + &pds_url, 169 + &sess.access_jwt, 170 + &sess.did, 171 + manifest::NSID, 172 + &rkey, 173 + ) 174 + .await?; 175 + println!(" ✓ deleted {}/{rkey} (manifest)", manifest::NSID); 176 + } 169 177 170 178 println!("\n✓ Unpublish complete"); 171 179 Ok(()) 180 + } 181 + 182 + /// Pick the newest manifest. Manifest rkeys are TIDs, which sort 183 + /// lexicographically in chronological order — so the highest rkey wins. 184 + fn latest_manifest(records: &[records::Record]) -> (String, &records::Record) { 185 + let mut best_idx = 0usize; 186 + let mut best_rkey = rkey_from_uri(&records[0].uri); 187 + for (i, r) in records.iter().enumerate().skip(1) { 188 + let rkey = rkey_from_uri(&r.uri); 189 + if rkey > best_rkey { 190 + best_rkey = rkey; 191 + best_idx = i; 192 + } 193 + } 194 + (best_rkey, &records[best_idx]) 195 + } 196 + 197 + fn rkey_from_uri(uri: &str) -> String { 198 + uri.rsplit('/').next().unwrap_or(uri).to_string() 172 199 } 173 200 174 201 fn manifest_items(record: &serde_json::Value) -> BTreeSet<String> {
+27 -88
mlf-publish/src/manifest.rs
··· 1 1 //! `lol.mlf.package` manifest record construction. 2 2 //! 3 - //! The manifest is a single `com.atproto.lexicon.schema` record (rkey 4 - //! `lol.mlf.package`) that acts as the durable pointer for a publish 5 - //! event: a sorted list of `(nsid, cid)` pairs covering every lexicon 6 - //! published in the release, plus a resolved-dependencies list. 3 + //! The manifest is an **instance** of the `lol.mlf.package` record type, 4 + //! written to collection `lol.mlf.package` with a TID rkey. Each publish 5 + //! event gets its own immutable manifest record; the PDS retains the 6 + //! full publish history. 7 7 //! 8 8 //! The record's own CID (computed by the PDS on `putRecord`) is the 9 - //! deterministic identifier for the publish — "this version of this 10 - //! package." There's deliberately no semver; mutation isn't supported. 9 + //! deterministic identifier for this publish event — "this version of 10 + //! this package." There's deliberately no semver; mutation isn't 11 + //! supported. 12 + //! 13 + //! The schema *definition* for `lol.mlf.package` lives separately at 14 + //! `com.atproto.lexicon.schema/lol.mlf.package` — generated from the 15 + //! `.mlf` source like any other lexicon, published by the mlf project 16 + //! itself to its PDS. Third-party mlf users never write that record; 17 + //! they only write manifest instances. 11 18 //! 12 - //! Implementation note: we assemble the instance data (publishedAt, 13 - //! tool, published[], resolvedDependencies[]) using the typed 14 - //! [`mlf_generated_lexicon::lol::mlf::package::Package`] struct emitted by 15 - //! `mlf generate` from `lexicons/lol/mlf/package.mlf` and owned by the 16 - //! `mlf-generated-lexicon` crate. The `com.atproto.lexicon.schema` 17 - //! envelope fields (`$type`, `lexicon`, `id`, `description`, `defs`) 18 - //! are still hand-assembled: the atproto meta-schema only requires 19 - //! `lexicon: integer`, so it's not worth generating a type for — the 20 - //! envelope is stable and trivially verified by the 21 - //! `check_meta_schema` validator. 19 + //! Implementation note: the instance shape is driven by the typed 20 + //! [`mlf_generated_lexicon::lol::mlf::package::Package`] struct emitted 21 + //! from `lexicons/lol/mlf/package.mlf` — any schema change surfaces 22 + //! here as a compile error. 22 23 23 24 use mlf_generated_lexicon::lol::mlf::package::{Package, PublishedItem, ResolvedDependency}; 24 - use serde_json::{Map, Value, json, to_value}; 25 + use serde_json::{Map, Value, to_value}; 25 26 26 - /// The NSID the manifest is published under (rkey == NSID, per the 27 - /// ATProto lexicon spec). 27 + /// The NSID for the manifest record type — also the collection name 28 + /// where instances are written. 28 29 pub const NSID: &str = "lol.mlf.package"; 29 30 30 31 pub struct ManifestInputs<'a> { ··· 43 44 pub resolved_deps: &'a [(String, String)], 44 45 } 45 46 46 - /// Build the manifest record ready to be pushed via `putRecord`. The 47 - /// return value includes the `$type` so the CID compute matches what the 48 - /// PDS will compute on write. 47 + /// Build the manifest instance record ready to be pushed via 48 + /// `putRecord` to collection `lol.mlf.package`. The `$type` matches the 49 + /// collection so the PDS can validate the instance against the schema. 49 50 pub fn build(inputs: &ManifestInputs<'_>) -> Value { 50 51 let mut published: Vec<(String, String)> = inputs.published.to_vec(); 51 52 published.sort(); 52 53 let mut deps: Vec<(String, String)> = inputs.resolved_deps.to_vec(); 53 54 deps.sort(); 54 55 55 - // Build the instance payload using the typed struct emitted by 56 - // `mlf generate` — any schema change surfaces here as a compile error. 57 56 let instance = Package { 58 57 published_at: inputs.published_at.to_string(), 59 58 tool: inputs.tool.to_string(), ··· 72 71 }, 73 72 }; 74 73 75 - // Merge the instance fields into a `com.atproto.lexicon.schema` 76 - // record envelope. The envelope is hand-assembled because the 77 - // atproto meta-schema is too permissive to generate useful types 78 - // from (it only requires `lexicon: integer`). 79 74 let instance_value = to_value(&instance).expect("Package serialises"); 80 75 let instance_obj = instance_value 81 76 .as_object() ··· 83 78 .clone(); 84 79 85 80 let mut obj = Map::new(); 86 - obj.insert( 87 - "$type".into(), 88 - Value::String("com.atproto.lexicon.schema".into()), 89 - ); 90 - obj.insert("lexicon".into(), Value::Number(1.into())); 91 - obj.insert("id".into(), Value::String(NSID.into())); 92 - obj.insert( 93 - "description".into(), 94 - Value::String("MLF publish manifest".into()), 95 - ); 96 - obj.insert("defs".into(), manifest_defs()); 81 + obj.insert("$type".into(), Value::String(NSID.into())); 97 82 for (k, v) in instance_obj { 98 83 obj.insert(k, v); 99 84 } 100 85 Value::Object(obj) 101 86 } 102 87 103 - /// The `defs` block describing the instance shape. Kept in one place 104 - /// so it's easy to bump if the on-PDS shape ever needs tightening. 105 - /// The shape here matches what `mlf generate lexicon` would emit from 106 - /// our `.mlf` source; keeping it in Rust avoids needing a second 107 - /// generated artifact in the repo. 108 - fn manifest_defs() -> Value { 109 - json!({ 110 - "main": { 111 - "type": "record", 112 - "key": "nsid", 113 - "record": { 114 - "type": "object", 115 - "required": ["publishedAt", "tool", "published"], 116 - "properties": { 117 - "publishedAt": {"type": "string", "format": "datetime"}, 118 - "tool": {"type": "string"}, 119 - "published": { 120 - "type": "array", 121 - "items": {"type": "ref", "ref": "#PublishedItem"} 122 - }, 123 - "resolvedDependencies": { 124 - "type": "array", 125 - "items": {"type": "ref", "ref": "#ResolvedDependency"} 126 - } 127 - } 128 - } 129 - }, 130 - "PublishedItem": { 131 - "type": "object", 132 - "required": ["nsid", "cid"], 133 - "properties": { 134 - "nsid": {"type": "string", "format": "nsid"}, 135 - "cid": {"type": "string", "format": "cid"} 136 - } 137 - }, 138 - "ResolvedDependency": { 139 - "type": "object", 140 - "required": ["nsid", "cid"], 141 - "properties": { 142 - "nsid": {"type": "string", "format": "nsid"}, 143 - "cid": {"type": "string", "format": "cid"} 144 - } 145 - } 146 - }) 147 - } 148 - 149 88 #[cfg(test)] 150 89 mod tests { 151 90 use super::*; ··· 161 100 ], 162 101 resolved_deps: &[("com.atproto.repo.strongRef".into(), "bafy3".into())], 163 102 }); 164 - assert_eq!(v["$type"], "com.atproto.lexicon.schema"); 165 - assert_eq!(v["id"], NSID); 166 - assert_eq!(v["lexicon"], 1); 103 + assert_eq!(v["$type"], NSID); 104 + assert!(v.get("lexicon").is_none()); 105 + assert!(v.get("defs").is_none()); 167 106 // published list is sorted 168 107 assert_eq!(v["published"][0]["nsid"], "com.example.other"); 169 108 assert_eq!(v["published"][1]["nsid"], "com.example.thing");
+4
mlf.toml
··· 1 1 [package] 2 2 name = "lol.mlf" 3 3 4 + [publish] 5 + pds = "default" 6 + dns = "cloudflare" 7 + 4 8 [source] 5 9 directory = "./lexicons" 6 10