value serde impl, some bug fixes, attribute macros bunch of tests lexicon codegen planning/partial skeleton

+12

.claude/settings.local.json

··· 1 + { 2 + "permissions": { 3 + "allow": [ 4 + "WebSearch", 5 + "WebFetch(domain:atproto.com)", 6 + "WebFetch(domain:github.com)", 7 + "WebFetch(domain:raw.githubusercontent.com)" 8 + ], 9 + "deny": [], 10 + "ask": [] 11 + } 12 + }

+2 -1

.gitignore

··· 2 2 /result 3 3 /result-lib 4 4 .direnv 5 - 5 + .claude 6 6 /.pre-commit-config.yaml 7 + CLAUDE.md

+115

CLAUDE.md

··· 1 + # CLAUDE.md 2 + 3 + This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 + 5 + ## Project Overview 6 + 7 + Jacquard is a suite of Rust crates for the AT Protocol (atproto/Bluesky). The project emphasizes spec-compliant, validated, performant baseline types with minimal boilerplate. Key design goals: 8 + 9 + - Validated AT Protocol types including typed at:// URIs 10 + - Custom lexicon extension support 11 + - Lexicon `Value` type for working with unknown atproto data (dag-cbor or json) 12 + - Using as much or as little of the crates as needed 13 + 14 + ## Workspace Structure 15 + 16 + This is a Cargo workspace with several crates: 17 + 18 + - **jacquard-common**: Core AT Protocol types (DIDs, handles, at-URIs, NSIDs, TIDs, CIDs, etc.) and the `CowStr` type for efficient string handling 19 + - **jacquard-lexicon**: Lexicon parsing and Rust code generation from lexicon schemas 20 + - **jacquard-api**: Generated API bindings (currently empty/in development) 21 + - **jacquard-derive**: Derive macros for lexicon structures 22 + - **jacquard**: Main binary (currently minimal) 23 + 24 + ## Development Commands 25 + 26 + ### Using Nix (preferred) 27 + ```bash 28 + # Enter dev shell 29 + nix develop 30 + 31 + # Build 32 + nix build 33 + 34 + # Run 35 + nix develop -c cargo run 36 + ``` 37 + 38 + ### Using Cargo/Just 39 + ```bash 40 + # Build 41 + cargo build 42 + 43 + # Run tests 44 + cargo test 45 + 46 + # Run specific test 47 + cargo test <test_name> 48 + 49 + # Run specific package tests 50 + cargo test -p <package_name> 51 + 52 + # Run 53 + cargo run 54 + 55 + # Auto-recompile and run 56 + just watch [ARGS] 57 + 58 + # Format and lint all 59 + just pre-commit-all 60 + ``` 61 + 62 + ## String Type Pattern 63 + 64 + The codebase uses a consistent pattern for validated string types. Each type should have: 65 + 66 + ### Constructors 67 + - `new()`: Construct from a string slice with appropriate lifetime (borrows) 68 + - `new_owned()`: Construct from `impl AsRef<str>`, taking ownership 69 + - `new_static()`: Construct from `&'static str` using `SmolStr`/`CowStr`'s static constructor (no allocation) 70 + - `raw()`: Same as `new()` but panics instead of returning `Result` 71 + - `unchecked()`: Same as `new()` but doesn't validate (marked `unsafe`) 72 + - `as_str()`: Return string slice 73 + 74 + ### Traits 75 + All string types should implement: 76 + - `Serialize` + `Deserialize` (custom impl for latter, sometimes for former) 77 + - `FromStr`, `Display` 78 + - `Debug`, `PartialEq`, `Eq`, `Hash`, `Clone` 79 + - `From<T> for String`, `CowStr`, `SmolStr` 80 + - `From<String>`, `From<CowStr>`, `From<SmolStr>`, or `TryFrom` if likely to fail 81 + - `AsRef<str>` 82 + - `Deref` with `Target = str` (usually) 83 + 84 + ### Implementation Details 85 + - Use `#[repr(transparent)]` when possible (exception: at-uri type and components) 86 + - Use `SmolStr` directly as inner type if most instances will be under 24 bytes 87 + - Use `CowStr` for longer strings to allow borrowing from input 88 + - Implement `IntoStatic` trait to take ownership of string types 89 + 90 + ## Code Style 91 + 92 + - Avoid comments for self-documenting code 93 + - Comments should not detail fixes when refactoring 94 + - Professional writing within source code and comments only 95 + - Prioritize long-term maintainability over implementation speed 96 + 97 + ## Testing 98 + 99 + - Write test cases for all critical code 100 + - Tests can be run per-package or workspace-wide 101 + - Use `cargo test <name>` to run specific tests 102 + - Current test coverage: 89 tests in jacquard-common 103 + 104 + ## Current State & Next Steps 105 + 106 + ### Completed 107 + - ✅ Comprehensive validation tests for all core string types (handle, DID, NSID, TID, record key, AT-URI, datetime, language, identifier) 108 + - ✅ Validated implementations against AT Protocol specs and TypeScript reference implementation 109 + - ✅ String type interface standardization (Language now has `new_static()`, Datetime has full conversion traits) 110 + - ✅ Data serialization: Full serialize/deserialize for `Data<'_>`, `Array`, `Object` with format-specific handling (JSON vs CBOR) 111 + - ✅ CidLink wrapper type with automatic `{"$link": "cid"}` serialization in JSON 112 + - ✅ Integration test with real Bluesky thread data validates round-trip correctness 113 + 114 + ### Next Steps 115 + 1. **Lexicon Code Generation**: Begin work on lexicon-to-Rust code generation now that core types are stable

+80 -8

Cargo.lock

··· 438 438 checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" 439 439 440 440 [[package]] 441 + name = "either" 442 + version = "1.15.0" 443 + source = "registry+https://github.com/rust-lang/crates.io-index" 444 + checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 445 + 446 + [[package]] 441 447 name = "enum_dispatch" 442 448 version = "0.3.13" 443 449 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 728 734 checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 729 735 730 736 [[package]] 737 + name = "itertools" 738 + version = "0.14.0" 739 + source = "registry+https://github.com/rust-lang/crates.io-index" 740 + checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" 741 + dependencies = [ 742 + "either", 743 + ] 744 + 745 + [[package]] 731 746 name = "itoa" 732 747 version = "1.0.15" 733 748 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 742 757 ] 743 758 744 759 [[package]] 760 + name = "jacquard-api" 761 + version = "0.1.0" 762 + 763 + [[package]] 745 764 name = "jacquard-common" 746 765 version = "0.1.0" 747 766 dependencies = [ ··· 755 774 "miette", 756 775 "multibase", 757 776 "multihash", 777 + "num-traits", 758 778 "ouroboros", 759 779 "rand", 760 780 "regex", ··· 768 788 ] 769 789 770 790 [[package]] 791 + name = "jacquard-derive" 792 + version = "0.1.0" 793 + dependencies = [ 794 + "heck 0.5.0", 795 + "itertools", 796 + "jacquard-common", 797 + "jacquard-lexicon", 798 + "prettyplease", 799 + "proc-macro2", 800 + "quote", 801 + "serde", 802 + "serde_json", 803 + "serde_repr", 804 + "serde_with", 805 + "syn 2.0.106", 806 + ] 807 + 808 + [[package]] 771 809 name = "jacquard-lexicon" 772 810 version = "0.1.0" 811 + dependencies = [ 812 + "heck 0.5.0", 813 + "itertools", 814 + "jacquard-common", 815 + "prettyplease", 816 + "proc-macro2", 817 + "quote", 818 + "serde", 819 + "serde_json", 820 + "serde_repr", 821 + "serde_with", 822 + "syn 2.0.106", 823 + ] 773 824 774 825 [[package]] 775 826 name = "js-sys" ··· 958 1009 ] 959 1010 960 1011 [[package]] 1012 + name = "prettyplease" 1013 + version = "0.2.37" 1014 + source = "registry+https://github.com/rust-lang/crates.io-index" 1015 + checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" 1016 + dependencies = [ 1017 + "proc-macro2", 1018 + "syn 2.0.106", 1019 + ] 1020 + 1021 + [[package]] 961 1022 name = "proc-macro-error" 962 1023 version = "1.0.4" 963 1024 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1005 1066 1006 1067 [[package]] 1007 1068 name = "quote" 1008 - version = "1.0.40" 1069 + version = "1.0.41" 1009 1070 source = "registry+https://github.com/rust-lang/crates.io-index" 1010 - checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" 1071 + checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" 1011 1072 dependencies = [ 1012 1073 "proc-macro2", 1013 1074 ] ··· 1140 1201 1141 1202 [[package]] 1142 1203 name = "serde" 1143 - version = "1.0.227" 1204 + version = "1.0.228" 1144 1205 source = "registry+https://github.com/rust-lang/crates.io-index" 1145 - checksum = "80ece43fc6fbed4eb5392ab50c07334d3e577cbf40997ee896fe7af40bba4245" 1206 + checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" 1146 1207 dependencies = [ 1147 1208 "serde_core", 1148 1209 "serde_derive", ··· 1159 1220 1160 1221 [[package]] 1161 1222 name = "serde_core" 1162 - version = "1.0.227" 1223 + version = "1.0.228" 1163 1224 source = "registry+https://github.com/rust-lang/crates.io-index" 1164 - checksum = "7a576275b607a2c86ea29e410193df32bc680303c82f31e275bbfcafe8b33be5" 1225 + checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" 1165 1226 dependencies = [ 1166 1227 "serde_derive", 1167 1228 ] 1168 1229 1169 1230 [[package]] 1170 1231 name = "serde_derive" 1171 - version = "1.0.227" 1232 + version = "1.0.228" 1172 1233 source = "registry+https://github.com/rust-lang/crates.io-index" 1173 - checksum = "51e694923b8824cf0e9b382adf0f60d4e05f348f357b38833a3fa5ed7c2ede04" 1234 + checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" 1174 1235 dependencies = [ 1175 1236 "proc-macro2", 1176 1237 "quote", ··· 1201 1262 "ryu", 1202 1263 "serde", 1203 1264 "serde_core", 1265 + ] 1266 + 1267 + [[package]] 1268 + name = "serde_repr" 1269 + version = "0.1.20" 1270 + source = "registry+https://github.com/rust-lang/crates.io-index" 1271 + checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" 1272 + dependencies = [ 1273 + "proc-macro2", 1274 + "quote", 1275 + "syn 2.0.106", 1204 1276 ] 1205 1277 1206 1278 [[package]]

+356

codegen_plan.md

··· 1 + # Lexicon Codegen Plan 2 + 3 + ## Goal 4 + Generate idiomatic Rust types from AT Protocol lexicon schemas with minimal nesting/indirection. 5 + 6 + ## Existing Infrastructure 7 + 8 + ### Already Implemented 9 + - **lexicon.rs**: Complete lexicon parsing types (`LexiconDoc`, `LexUserType`, `LexObject`, etc) 10 + - **fs.rs**: Directory walking for finding `.json` lexicon files 11 + - **schema.rs**: `find_ref_unions()` - collects union fields from a single lexicon 12 + - **output.rs**: Partial - has string type mapping and doc comment generation 13 + 14 + ### Attribute Macros 15 + - `#[lexicon]` - adds `extra_data` field to structs 16 + - `#[open_union]` - adds `Unknown(Data<'s>)` variant to enums 17 + 18 + ## Design Decisions 19 + 20 + ### Module/File Structure 21 + - NSID `app.bsky.feed.post` → `app_bsky/feed/post.rs` 22 + - Flat module names (no `app::bsky`, just `app_bsky`) 23 + - Parent modules: `app_bsky/feed.rs` with `pub mod post;` 24 + 25 + ### Type Naming 26 + - **Main def**: Use last segment of NSID 27 + - `app.bsky.feed.post#main` → `Post` 28 + - **Other defs**: Pascal-case the def name 29 + - `replyRef` → `ReplyRef` 30 + - **Union variants**: Use last segment of ref NSID 31 + - `app.bsky.embed.images` → `Images` 32 + - Collisions resolved by module path, not type name 33 + - **No proliferation of `Main` types** like atrium has 34 + 35 + ### Type Generation 36 + 37 + #### Records (lexRecord) 38 + ```rust 39 + #[lexicon] 40 + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 41 + #[serde(rename_all = "camelCase")] 42 + pub struct Post<'s> { 43 + /// Client-declared timestamp... 44 + pub created_at: Datetime, 45 + #[serde(skip_serializing_if = "Option::is_none")] 46 + pub embed: Option<RecordEmbed<'s>>, 47 + pub text: CowStr<'s>, 48 + } 49 + ``` 50 + 51 + #### Objects (lexObject) 52 + Same as records but without `#[lexicon]` if inline/not a top-level def. 53 + 54 + #### Unions (lexRefUnion) 55 + ```rust 56 + #[open_union] 57 + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] 58 + #[serde(tag = "$type")] 59 + pub enum RecordEmbed<'s> { 60 + #[serde(rename = "app.bsky.embed.images")] 61 + Images(Box<jacquard_api::app_bsky::embed::Images<'s>>), 62 + #[serde(rename = "app.bsky.embed.video")] 63 + Video(Box<jacquard_api::app_bsky::embed::Video<'s>>), 64 + } 65 + ``` 66 + 67 + - Use `Box<T>` for all variants (handles circular refs) 68 + - `#[open_union]` adds `Unknown(Data<'s>)` catch-all 69 + 70 + #### Queries (lexXrpcQuery) 71 + ```rust 72 + pub struct GetAuthorFeedParams<'s> { 73 + pub actor: AtIdentifier<'s>, 74 + pub limit: Option<i64>, 75 + pub cursor: Option<CowStr<'s>>, 76 + } 77 + 78 + pub struct GetAuthorFeedOutput<'s> { 79 + pub cursor: Option<CowStr<'s>>, 80 + pub feed: Vec<FeedViewPost<'s>>, 81 + } 82 + ``` 83 + 84 + - Flat params/output structs 85 + - No nesting like `Input { params: {...} }` 86 + 87 + #### Procedures (lexXrpcProcedure) 88 + Same as queries but with both `Input` and `Output` structs. 89 + 90 + ### Field Handling 91 + 92 + #### Optional Fields 93 + - Fields not in `required: []` → `Option<T>` 94 + - Add `#[serde(skip_serializing_if = "Option::is_none")]` 95 + 96 + #### Lifetimes 97 + - All types have `'a` lifetime for borrowing from input 98 + - `#[serde(borrow)]` where needed for zero-copy 99 + 100 + #### Type Mapping 101 + - `LexString` with format → specific types (`Datetime`, `Did`, etc) 102 + - `LexString` without format → `CowStr<'a>` 103 + - `LexInteger` → `i64` 104 + - `LexBoolean` → `bool` 105 + - `LexBytes` → `Bytes` 106 + - `LexCidLink` → `CidLink<'a>` 107 + - `LexBlob` → `Blob<'a>` 108 + - `LexRef` → resolve to actual type path 109 + - `LexRefUnion` → generate enum 110 + - `LexArray` → `Vec<T>` 111 + - `LexUnknown` → `Data<'a>` 112 + 113 + ### Reference Resolution 114 + 115 + #### Known Refs 116 + - Check corpus for ref existence 117 + - `#ref: "app.bsky.embed.images"` → `jacquard_api::app_bsky::embed::Images<'a>` 118 + - Handle fragments: `#ref: "com.example.foo#bar"` → `jacquard_api::com_example::foo::Bar<'a>` 119 + 120 + #### Unknown Refs 121 + - **In struct fields**: use `Data<'a>` as fallback type 122 + - **In union variants**: handled by `Unknown(Data<'a>)` variant from `#[open_union]` 123 + - Optional: log warnings for missing refs 124 + 125 + ## Implementation Phases 126 + 127 + ### Phase 1: Corpus Loading & Registry 128 + **Goal**: Load all lexicons into memory for ref resolution 129 + 130 + **Tasks**: 131 + 1. Create `LexiconCorpus` struct 132 + - `HashMap<SmolStr, LexiconDoc<'static>>` - NSID → doc 133 + - Methods: `load_from_dir()`, `get()`, `resolve_ref()` 134 + 2. Load all `.json` files from lexicon directory 135 + 3. Parse into `LexiconDoc` and insert into registry 136 + 4. Handle fragments in refs (`nsid#def`) 137 + 138 + **Output**: Corpus registry that can resolve any ref 139 + 140 + ### Phase 2: Ref Analysis & Union Collection 141 + **Goal**: Build complete picture of what refs exist and what unions need 142 + 143 + **Tasks**: 144 + 1. Extend `find_ref_unions()` to work across entire corpus 145 + 2. For each union, collect all refs and check existence 146 + 3. Build `UnionRegistry`: 147 + - Union name → list of (known refs, unknown refs) 148 + 4. Detect circular refs (optional - or just Box everything) 149 + 150 + **Output**: Complete list of unions to generate with their variants 151 + 152 + ### Phase 3: Code Generation - Core Types 153 + **Goal**: Generate Rust code for individual types 154 + 155 + **Tasks**: 156 + 1. Implement type generators: 157 + - `generate_struct()` for records/objects 158 + - `generate_enum()` for unions 159 + - `generate_field()` for object properties 160 + - `generate_type()` for primitives/refs 161 + 2. Handle optional fields (`required` list) 162 + 3. Add doc comments from `description` 163 + 4. Apply `#[lexicon]` / `#[open_union]` macros 164 + 5. Add serde attributes 165 + 166 + **Output**: `TokenStream` for each type 167 + 168 + ### Phase 4: Module Organization 169 + **Goal**: Organize generated types into module hierarchy 170 + 171 + **Tasks**: 172 + 1. Parse NSID into components: `["app", "bsky", "feed", "post"]` 173 + 2. Determine file paths: `app_bsky/feed/post.rs` 174 + 3. Generate module files: `app_bsky/feed.rs` with `pub mod post;` 175 + 4. Generate root module: `app_bsky.rs` 176 + 5. Handle re-exports if needed 177 + 178 + **Output**: File path → generated code mapping 179 + 180 + ### Phase 5: File Writing 181 + **Goal**: Write generated code to filesystem 182 + 183 + **Tasks**: 184 + 1. Format code with `prettyplease` 185 + 2. Create directory structure 186 + 3. Write module files 187 + 4. Write type files 188 + 5. Optional: run `rustfmt` 189 + 190 + **Output**: Generated code on disk 191 + 192 + ### Phase 6: Testing & Validation 193 + **Goal**: Ensure generated code compiles and works 194 + 195 + **Tasks**: 196 + 1. Generate code for test lexicons 197 + 2. Compile generated code 198 + 3. Test serialization/deserialization 199 + 4. Test union variant matching 200 + 5. Test extra_data capture 201 + 202 + ## Edge Cases & Considerations 203 + 204 + ### Circular References 205 + - **Simple approach**: Union variants always use `Box<T>` → handles all circular refs 206 + - **Alternative**: DFS cycle detection to only Box when needed 207 + - Track visited refs and recursion stack 208 + - If ref appears in rec_stack → cycle detected 209 + - Algorithm: 210 + ```rust 211 + fn has_cycle(corpus, start_ref, visited, rec_stack) -> bool { 212 + visited.insert(start_ref); 213 + rec_stack.insert(start_ref); 214 + 215 + for child_ref in collect_refs_from_def(resolve(start_ref)) { 216 + if !visited.contains(child_ref) { 217 + if has_cycle(corpus, child_ref, visited, rec_stack) { 218 + return true; 219 + } 220 + } else if rec_stack.contains(child_ref) { 221 + return true; // back edge = cycle 222 + } 223 + } 224 + 225 + rec_stack.remove(start_ref); 226 + false 227 + } 228 + ``` 229 + - Only box variants that participate in cycles 230 + - **Recommendation**: Start with simple (always Box), optimize later if needed 231 + 232 + ### Name Collisions 233 + - Multiple types with same name in different lexicons 234 + - Module path disambiguates: `app_bsky::feed::Post` vs `com_example::feed::Post` 235 + 236 + ### Unknown Refs 237 + - Fallback to `Data<'s>` in struct fields 238 + - Caught by `Unknown` variant in unions 239 + - Warn during generation 240 + 241 + ### Inline Defs 242 + - Nested objects/unions in same lexicon 243 + - Generate as separate types in same file 244 + - Keep names scoped to parent (e.g., `PostReplyRef`) 245 + 246 + ### Arrays 247 + - `Vec<T>` for arrays 248 + - Handle nested unions in arrays 249 + 250 + ### Tokens 251 + - Simple marker types 252 + - Generate as unit structs or type aliases? 253 + 254 + ## Traits for Generated Types 255 + 256 + ### Collection Trait (Records) 257 + Records implement the existing `Collection` trait from jacquard-common: 258 + 259 + ```rust 260 + pub struct Post<'a> { 261 + // ... fields 262 + } 263 + 264 + impl Collection for Post<'_> { 265 + const NSID: &'static str = "app.bsky.feed.post"; 266 + type Record = Post<'static>; 267 + } 268 + ``` 269 + 270 + ### XrpcRequest Trait (Queries/Procedures) 271 + New trait for XRPC endpoints: 272 + 273 + ```rust 274 + pub trait XrpcRequest<'x> { 275 + /// The NSID for this XRPC method 276 + const NSID: &'static str; 277 + 278 + /// HTTP method (GET for queries, POST for procedures) 279 + const METHOD: XrpcMethod; 280 + 281 + /// Input encoding (MIME type, e.g., "application/json") 282 + /// None for queries (no body) 283 + const INPUT_ENCODING: Option<&'static str>; 284 + 285 + /// Output encoding (MIME type) 286 + const OUTPUT_ENCODING: &'static str; 287 + 288 + /// Request parameters type (query params or body) 289 + type Params: Serialize; 290 + 291 + /// Response output type 292 + type Output: Deserialize<'x>; 293 + } 294 + 295 + pub enum XrpcMethod { 296 + Query, // GET 297 + Procedure, // POST 298 + } 299 + ``` 300 + 301 + **Generated implementation:** 302 + ```rust 303 + pub struct GetAuthorFeedParams<'a> { 304 + pub actor: AtIdentifier<'a>, 305 + pub limit: Option<i64>, 306 + pub cursor: Option<CowStr<'a>>, 307 + } 308 + 309 + pub struct GetAuthorFeedOutput<'a> { 310 + pub cursor: Option<CowStr<'a>>, 311 + pub feed: Vec<FeedViewPost<'a>>, 312 + } 313 + 314 + impl XrpcRequest for GetAuthorFeedParams<'_> { 315 + const NSID: &'static str = "app.bsky.feed.getAuthorFeed"; 316 + const METHOD: XrpcMethod = XrpcMethod::Query; 317 + const INPUT_ENCODING: Option<&'static str> = None; // queries have no body 318 + const OUTPUT_ENCODING: &'static str = "application/json"; 319 + 320 + type Params = Self; 321 + type Output = GetAuthorFeedOutput<'static>; 322 + } 323 + ``` 324 + 325 + **Encoding variations:** 326 + - Most procedures: `"application/json"` for input/output 327 + - Blob uploads: `"*/*"` or specific MIME type for input 328 + - CAR files: `"application/vnd.ipld.car"` for repo operations 329 + - Read from lexicon's `input.encoding` and `output.encoding` fields 330 + 331 + **Trait benefits:** 332 + - Allows monomorphization (static dispatch) for performance 333 + - Also supports `dyn XrpcRequest` for dynamic dispatch if needed 334 + - Client code can be generic over `impl XrpcRequest` 335 + 336 + ### Subscriptions 337 + WebSocket streams - defer for now. Will need separate trait with message types. 338 + 339 + ## Open Questions 340 + 341 + 1. **Validation**: Generate runtime validation (min/max length, regex, etc)? 342 + 2. **Tokens**: How to represent token types? 343 + 3. **Errors**: How to handle codegen errors (missing refs, invalid schemas)? 344 + 4. **Incremental**: Support incremental codegen (only changed lexicons)? 345 + 5. **Formatting**: Always run rustfmt or rely on prettyplease? 346 + 6. **XrpcRequest location**: Should trait live in jacquard-common or separate jacquard-xrpc crate? 347 + 348 + ## Success Criteria 349 + 350 + - [ ] Generates code for all official AT Protocol lexicons 351 + - [ ] Generated code compiles without errors 352 + - [ ] No `Main` proliferation 353 + - [ ] Union variants have readable names 354 + - [ ] Unknown refs handled gracefully 355 + - [ ] `#[lexicon]` and `#[open_union]` applied correctly 356 + - [ ] Serialization round-trips correctly

+14

crates/jacquard-api/Cargo.toml

··· 1 + [package] 2 + name = "jacquard-api" 3 + edition.workspace = true 4 + version.workspace = true 5 + authors.workspace = true 6 + repository.workspace = true 7 + keywords.workspace = true 8 + categories.workspace = true 9 + readme.workspace = true 10 + documentation.workspace = true 11 + exclude.workspace = true 12 + description.workspace = true 13 + 14 + [dependencies]

+16

crates/jacquard-api/src/lib.rs

··· 1 + //placeholder for codegen api output 2 + 3 + pub fn add(left: u64, right: u64) -> u64 { 4 + left + right 5 + } 6 + 7 + #[cfg(test)] 8 + mod tests { 9 + use super::*; 10 + 11 + #[test] 12 + fn it_works() { 13 + let result = add(2, 2); 14 + assert_eq!(result, 4); 15 + } 16 + }

+1

crates/jacquard-common/Cargo.toml

··· 22 22 miette = "7.6.0" 23 23 multibase = "0.9.1" 24 24 multihash = "0.19.3" 25 + num-traits = "0.2.19" 25 26 ouroboros = "0.18.5" 26 27 rand = "0.9.2" 27 28 regex = "1.11.3"

+22

crates/jacquard-common/src/cowstr.rs

··· 177 177 } 178 178 } 179 179 180 + impl PartialOrd for CowStr<'_> { 181 + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { 182 + Some(match (self, other) { 183 + (CowStr::Borrowed(s1), CowStr::Borrowed(s2)) => s1.cmp(s2), 184 + (CowStr::Borrowed(s1), CowStr::Owned(s2)) => s1.cmp(&s2.as_ref()), 185 + (CowStr::Owned(s1), CowStr::Borrowed(s2)) => s1.as_str().cmp(s2), 186 + (CowStr::Owned(s1), CowStr::Owned(s2)) => s1.cmp(s2), 187 + }) 188 + } 189 + } 190 + 191 + impl Ord for CowStr<'_> { 192 + fn cmp(&self, other: &Self) -> std::cmp::Ordering { 193 + match (self, other) { 194 + (CowStr::Borrowed(s1), CowStr::Borrowed(s2)) => s1.cmp(s2), 195 + (CowStr::Borrowed(s1), CowStr::Owned(s2)) => s1.cmp(&s2.as_ref()), 196 + (CowStr::Owned(s1), CowStr::Borrowed(s2)) => s1.as_str().cmp(s2), 197 + (CowStr::Owned(s1), CowStr::Owned(s2)) => s1.cmp(s2), 198 + } 199 + } 200 + } 201 + 180 202 impl Eq for CowStr<'_> {} 181 203 182 204 impl Hash for CowStr<'_> {

-2

crates/jacquard-common/src/types.rs

··· 1 1 use serde::{Deserialize, Serialize}; 2 2 3 - use crate::types::nsid::Nsid; 4 - 5 3 pub mod aturi; 6 4 pub mod blob; 7 5 pub mod cid;

+77 -1

crates/jacquard-common/src/types/aturi.rs

··· 102 102 pub type UriPathBuf = UriPath<'static>; 103 103 104 104 pub static ATURI_REGEX: LazyLock<Regex> = LazyLock::new(|| { 105 - Regex::new(r##"^at://(?<authority>[a-zA-Z0-9._:%-]+)(/(?<collection>[a-zA-Z0-9-.]+)(/(?<rkey>[a-zA-Z0-9._~:@!$&%')(*+,;=-]+))?)?(#(?<fragment>/[a-zA-Z0-9._~:@!$&%')(*+,;=-[]/\]*))?$"##).unwrap() 105 + // Fragment allows: / and \ and other special chars. In raw string, backslashes are literal. 106 + Regex::new(r##"^at://(?<authority>[a-zA-Z0-9._:%-]+)(/(?<collection>[a-zA-Z0-9-.]+)(/(?<rkey>[a-zA-Z0-9._~:@!$&%')(*+,;=-]+))?)?(#(?<fragment>/[a-zA-Z0-9._~:@!$&%')(*+,;=\-\[\]/\\]*))?$"##).unwrap() 106 107 }); 107 108 108 109 impl<'u> AtUri<'u> { ··· 712 713 self.inner.borrow_uri().as_ref() 713 714 } 714 715 } 716 + 717 + #[cfg(test)] 718 + mod tests { 719 + use super::*; 720 + 721 + #[test] 722 + fn valid_at_uris() { 723 + assert!(AtUri::new("at://did:plc:foo").is_ok()); 724 + assert!(AtUri::new("at://alice.bsky.social").is_ok()); 725 + assert!(AtUri::new("at://did:plc:foo/com.example.post").is_ok()); 726 + assert!(AtUri::new("at://did:plc:foo/com.example.post/123").is_ok()); 727 + } 728 + 729 + #[test] 730 + fn authority_only() { 731 + let uri = AtUri::new("at://alice.test").unwrap(); 732 + assert_eq!(uri.authority().as_str(), "alice.test"); 733 + assert!(uri.collection().is_none()); 734 + assert!(uri.rkey().is_none()); 735 + } 736 + 737 + #[test] 738 + fn authority_and_collection() { 739 + let uri = AtUri::new("at://alice.test/com.example.foo").unwrap(); 740 + assert_eq!(uri.authority().as_str(), "alice.test"); 741 + assert_eq!(uri.collection().unwrap().as_str(), "com.example.foo"); 742 + assert!(uri.rkey().is_none()); 743 + } 744 + 745 + #[test] 746 + fn full_uri() { 747 + let uri = AtUri::new("at://alice.test/com.example.foo/123").unwrap(); 748 + assert_eq!(uri.authority().as_str(), "alice.test"); 749 + assert_eq!(uri.collection().unwrap().as_str(), "com.example.foo"); 750 + assert_eq!(uri.rkey().unwrap().as_ref(), "123"); 751 + } 752 + 753 + #[test] 754 + fn with_fragment() { 755 + let uri = AtUri::new("at://alice.test/com.example.foo/123#/path").unwrap(); 756 + assert_eq!(uri.fragment().as_ref().unwrap().as_ref(), "/path"); 757 + 758 + // Fragment must start with / 759 + assert!(AtUri::new("at://alice.test#path").is_err()); 760 + assert!(AtUri::new("at://alice.test#/foo/bar").is_ok()); 761 + } 762 + 763 + #[test] 764 + fn no_trailing_slash() { 765 + assert!(AtUri::new("at://alice.test/").is_err()); 766 + assert!(AtUri::new("at://alice.test/com.example.foo/").is_err()); 767 + } 768 + 769 + #[test] 770 + fn must_have_authority() { 771 + assert!(AtUri::new("at://").is_err()); 772 + assert!(AtUri::new("at:///com.example.foo").is_err()); 773 + } 774 + 775 + #[test] 776 + fn must_start_with_at_scheme() { 777 + assert!(AtUri::new("alice.test").is_err()); 778 + assert!(AtUri::new("https://alice.test").is_err()); 779 + } 780 + 781 + #[test] 782 + fn max_length() { 783 + // Spec says 8KB max 784 + let long_did = format!("did:plc:{}", "a".repeat(8000)); 785 + let uri = format!("at://{}", long_did); 786 + assert!(uri.len() < 8192); 787 + // Should work if components are valid 788 + // (our DID will fail at 2048 chars, but this tests the URI doesn't impose extra limits) 789 + } 790 + }

+33 -1

crates/jacquard-common/src/types/blob.rs

··· 12 12 str::FromStr, 13 13 }; 14 14 15 - #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)] 15 + #[derive(Deserialize, Debug, Clone, PartialEq, Eq, Hash)] 16 16 #[serde(rename_all = "camelCase")] 17 17 pub struct Blob<'b> { 18 18 pub r#ref: Cid<'b>, 19 19 #[serde(borrow)] 20 20 pub mime_type: MimeType<'b>, 21 21 pub size: usize, 22 + } 23 + 24 + impl Serialize for Blob<'_> { 25 + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> 26 + where 27 + S: Serializer, 28 + { 29 + use serde::ser::SerializeMap; 30 + 31 + if serializer.is_human_readable() { 32 + // JSON: ref needs to be {"$link": "cid"} 33 + let mut map = serializer.serialize_map(Some(4))?; 34 + map.serialize_entry("$type", "blob")?; 35 + 36 + // Serialize ref as {"$link": "cid_string"} 37 + let mut ref_map = std::collections::BTreeMap::new(); 38 + ref_map.insert("$link", self.r#ref.as_str()); 39 + map.serialize_entry("ref", &ref_map)?; 40 + 41 + map.serialize_entry("mimeType", &self.mime_type)?; 42 + map.serialize_entry("size", &self.size)?; 43 + map.end() 44 + } else { 45 + // CBOR: ref is just the CID directly 46 + let mut map = serializer.serialize_map(Some(4))?; 47 + map.serialize_entry("$type", "blob")?; 48 + map.serialize_entry("ref", &self.r#ref)?; 49 + map.serialize_entry("mimeType", &self.mime_type)?; 50 + map.serialize_entry("size", &self.size)?; 51 + map.end() 52 + } 53 + } 22 54 } 23 55 24 56 impl IntoStatic for Blob<'_> {

+266

crates/jacquard-common/src/types/cid.rs

··· 214 214 self.as_str() 215 215 } 216 216 } 217 + 218 + /// CID link wrapper that serializes as {"$link": "cid"} in JSON 219 + /// and as raw CID in CBOR 220 + #[derive(Debug, Clone, PartialEq, Eq, Hash)] 221 + #[repr(transparent)] 222 + pub struct CidLink<'c>(pub Cid<'c>); 223 + 224 + impl<'c> CidLink<'c> { 225 + pub fn new(cid: &'c [u8]) -> Result<Self, Error> { 226 + Ok(Self(Cid::new(cid)?)) 227 + } 228 + 229 + pub fn new_owned(cid: &[u8]) -> Result<CidLink<'static>, Error> { 230 + Ok(CidLink(Cid::new_owned(cid)?)) 231 + } 232 + 233 + pub fn new_static(cid: &'static str) -> Self { 234 + Self(Cid::str(cid)) 235 + } 236 + 237 + pub fn ipld(cid: IpldCid) -> CidLink<'static> { 238 + CidLink(Cid::ipld(cid)) 239 + } 240 + 241 + pub fn str(cid: &'c str) -> Self { 242 + Self(Cid::str(cid)) 243 + } 244 + 245 + pub fn cow_str(cid: CowStr<'c>) -> Self { 246 + Self(Cid::cow_str(cid)) 247 + } 248 + 249 + pub fn as_str(&self) -> &str { 250 + self.0.as_str() 251 + } 252 + 253 + pub fn to_ipld(&self) -> Result<IpldCid, cid::Error> { 254 + self.0.to_ipld() 255 + } 256 + 257 + pub fn into_inner(self) -> Cid<'c> { 258 + self.0 259 + } 260 + } 261 + 262 + impl fmt::Display for CidLink<'_> { 263 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 264 + self.0.fmt(f) 265 + } 266 + } 267 + 268 + impl FromStr for CidLink<'_> { 269 + type Err = Infallible; 270 + 271 + fn from_str(s: &str) -> Result<Self, Self::Err> { 272 + Ok(CidLink(Cid::from_str(s)?)) 273 + } 274 + } 275 + 276 + impl IntoStatic for CidLink<'_> { 277 + type Output = CidLink<'static>; 278 + 279 + fn into_static(self) -> Self::Output { 280 + CidLink(self.0.into_static()) 281 + } 282 + } 283 + 284 + impl Serialize for CidLink<'_> { 285 + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> 286 + where 287 + S: Serializer, 288 + { 289 + if serializer.is_human_readable() { 290 + // JSON: {"$link": "cid_string"} 291 + use serde::ser::SerializeMap; 292 + let mut map = serializer.serialize_map(Some(1))?; 293 + map.serialize_entry("$link", self.0.as_str())?; 294 + map.end() 295 + } else { 296 + // CBOR: raw CID 297 + self.0.serialize(serializer) 298 + } 299 + } 300 + } 301 + 302 + impl<'de> Deserialize<'de> for CidLink<'_> { 303 + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 304 + where 305 + D: Deserializer<'de>, 306 + { 307 + if deserializer.is_human_readable() { 308 + // JSON: expect {"$link": "cid_string"} 309 + struct LinkVisitor; 310 + 311 + impl<'de> Visitor<'de> for LinkVisitor { 312 + type Value = CidLink<'static>; 313 + 314 + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 315 + formatter.write_str("a CID link object with $link field") 316 + } 317 + 318 + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> 319 + where 320 + A: serde::de::MapAccess<'de>, 321 + { 322 + use serde::de::Error; 323 + 324 + let mut link: Option<String> = None; 325 + 326 + while let Some(key) = map.next_key::<String>()? { 327 + if key == "$link" { 328 + link = Some(map.next_value()?); 329 + } else { 330 + // Skip unknown fields 331 + let _: serde::de::IgnoredAny = map.next_value()?; 332 + } 333 + } 334 + 335 + if let Some(cid_str) = link { 336 + Ok(CidLink(Cid::from(cid_str))) 337 + } else { 338 + Err(A::Error::missing_field("$link")) 339 + } 340 + } 341 + } 342 + 343 + deserializer.deserialize_map(LinkVisitor) 344 + } else { 345 + // CBOR: raw CID 346 + Ok(CidLink(Cid::deserialize(deserializer)?)) 347 + } 348 + } 349 + } 350 + 351 + impl From<CidLink<'_>> for String { 352 + fn from(value: CidLink) -> Self { 353 + value.0.into() 354 + } 355 + } 356 + 357 + impl<'c> From<CidLink<'c>> for CowStr<'c> { 358 + fn from(value: CidLink<'c>) -> Self { 359 + value.0.into() 360 + } 361 + } 362 + 363 + impl From<String> for CidLink<'_> { 364 + fn from(value: String) -> Self { 365 + CidLink(Cid::from(value)) 366 + } 367 + } 368 + 369 + impl<'c> From<CowStr<'c>> for CidLink<'c> { 370 + fn from(value: CowStr<'c>) -> Self { 371 + CidLink(Cid::from(value)) 372 + } 373 + } 374 + 375 + impl From<IpldCid> for CidLink<'_> { 376 + fn from(value: IpldCid) -> Self { 377 + CidLink(Cid::from(value)) 378 + } 379 + } 380 + 381 + impl<'c> From<Cid<'c>> for CidLink<'c> { 382 + fn from(value: Cid<'c>) -> Self { 383 + CidLink(value) 384 + } 385 + } 386 + 387 + impl<'c> From<CidLink<'c>> for Cid<'c> { 388 + fn from(value: CidLink<'c>) -> Self { 389 + value.0 390 + } 391 + } 392 + 393 + impl AsRef<str> for CidLink<'_> { 394 + fn as_ref(&self) -> &str { 395 + self.0.as_ref() 396 + } 397 + } 398 + 399 + impl Deref for CidLink<'_> { 400 + type Target = str; 401 + 402 + fn deref(&self) -> &Self::Target { 403 + self.0.deref() 404 + } 405 + } 406 + 407 + #[cfg(test)] 408 + mod tests { 409 + use super::*; 410 + 411 + const TEST_CID: &str = "bafyreih4g7bvo6hdq2juolev5bfzpbo4ewkxh5mzxwgvkjp3kitc6hqkha"; 412 + 413 + #[test] 414 + fn cidlink_serialize_json() { 415 + let link = CidLink::str(TEST_CID); 416 + let json = serde_json::to_string(&link).unwrap(); 417 + assert_eq!(json, r#"{"$link":"bafyreih4g7bvo6hdq2juolev5bfzpbo4ewkxh5mzxwgvkjp3kitc6hqkha"}"#); 418 + } 419 + 420 + #[test] 421 + fn cidlink_deserialize_json() { 422 + let json = r#"{"$link":"bafyreih4g7bvo6hdq2juolev5bfzpbo4ewkxh5mzxwgvkjp3kitc6hqkha"}"#; 423 + let link: CidLink = serde_json::from_str(json).unwrap(); 424 + assert_eq!(link.as_str(), TEST_CID); 425 + } 426 + 427 + #[test] 428 + fn cidlink_roundtrip_json() { 429 + let link = CidLink::str(TEST_CID); 430 + let json = serde_json::to_string(&link).unwrap(); 431 + let parsed: CidLink = serde_json::from_str(&json).unwrap(); 432 + assert_eq!(link, parsed); 433 + assert_eq!(link.as_str(), TEST_CID); 434 + } 435 + 436 + #[test] 437 + fn cidlink_constructors() { 438 + let link1 = CidLink::str(TEST_CID); 439 + let link2 = CidLink::cow_str(CowStr::Borrowed(TEST_CID)); 440 + let link3 = CidLink::from(TEST_CID.to_string()); 441 + let link4 = CidLink::new_static(TEST_CID); 442 + 443 + assert_eq!(link1.as_str(), TEST_CID); 444 + assert_eq!(link2.as_str(), TEST_CID); 445 + assert_eq!(link3.as_str(), TEST_CID); 446 + assert_eq!(link4.as_str(), TEST_CID); 447 + } 448 + 449 + #[test] 450 + fn cidlink_conversions() { 451 + let link = CidLink::str(TEST_CID); 452 + 453 + // CidLink -> Cid 454 + let cid: Cid = link.clone().into(); 455 + assert_eq!(cid.as_str(), TEST_CID); 456 + 457 + // Cid -> CidLink 458 + let link2: CidLink = cid.into(); 459 + assert_eq!(link2.as_str(), TEST_CID); 460 + 461 + // CidLink -> String 462 + let s: String = link.clone().into(); 463 + assert_eq!(s, TEST_CID); 464 + 465 + // CidLink -> CowStr 466 + let cow: CowStr = link.into(); 467 + assert_eq!(cow.as_ref(), TEST_CID); 468 + } 469 + 470 + #[test] 471 + fn cidlink_display() { 472 + let link = CidLink::str(TEST_CID); 473 + assert_eq!(format!("{}", link), TEST_CID); 474 + } 475 + 476 + #[test] 477 + fn cidlink_deref() { 478 + let link = CidLink::str(TEST_CID); 479 + assert_eq!(&*link, TEST_CID); 480 + assert_eq!(link.as_ref(), TEST_CID); 481 + } 482 + }

+73 -1

crates/jacquard-common/src/types/datetime.rs

··· 1 1 use chrono::DurationRound; 2 2 use serde::Serializer; 3 3 use serde::{Deserialize, Deserializer, Serialize, de::Error}; 4 - use smol_str::ToSmolStr; 4 + use smol_str::{SmolStr, ToSmolStr}; 5 + use std::fmt; 5 6 use std::sync::LazyLock; 6 7 use std::{cmp, str::FromStr}; 7 8 ··· 163 164 } 164 165 } 165 166 } 167 + 168 + impl From<chrono::DateTime<chrono::FixedOffset>> for Datetime { 169 + fn from(dt: chrono::DateTime<chrono::FixedOffset>) -> Self { 170 + Self::new(dt) 171 + } 172 + } 173 + 174 + impl From<Datetime> for String { 175 + fn from(value: Datetime) -> Self { 176 + value.serialized.to_string() 177 + } 178 + } 179 + 180 + impl From<Datetime> for SmolStr { 181 + fn from(value: Datetime) -> Self { 182 + match value.serialized { 183 + CowStr::Borrowed(s) => SmolStr::new(s), 184 + CowStr::Owned(s) => s, 185 + } 186 + } 187 + } 188 + 189 + impl From<Datetime> for CowStr<'static> { 190 + fn from(value: Datetime) -> Self { 191 + value.serialized 192 + } 193 + } 194 + 195 + impl AsRef<str> for Datetime { 196 + fn as_ref(&self) -> &str { 197 + self.as_str() 198 + } 199 + } 200 + 201 + impl fmt::Display for Datetime { 202 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 203 + f.write_str(self.as_str()) 204 + } 205 + } 206 + 207 + #[cfg(test)] 208 + mod tests { 209 + use super::*; 210 + 211 + #[test] 212 + fn valid_datetimes() { 213 + assert!(Datetime::from_str("2023-01-15T12:30:45.123456Z").is_ok()); 214 + assert!(Datetime::from_str("2023-01-15T12:30:45Z").is_ok()); 215 + assert!(Datetime::from_str("2023-01-15T12:30:45+00:00").is_ok()); 216 + assert!(Datetime::from_str("2023-01-15T12:30:45-05:00").is_ok()); 217 + } 218 + 219 + #[test] 220 + fn microsecond_precision() { 221 + let dt = Datetime::from_str("2023-01-15T12:30:45.123456Z").unwrap(); 222 + assert!(dt.as_str().contains(".123456")); 223 + } 224 + 225 + #[test] 226 + fn requires_timezone() { 227 + // Missing timezone should fail 228 + assert!(Datetime::from_str("2023-01-15T12:30:45").is_err()); 229 + } 230 + 231 + #[test] 232 + fn round_trip() { 233 + let original = "2023-01-15T12:30:45.123456Z"; 234 + let dt = Datetime::from_str(original).unwrap(); 235 + assert_eq!(dt.as_str(), original); 236 + } 237 + }

+97

crates/jacquard-common/src/types/did.rs

··· 12 12 #[repr(transparent)] 13 13 pub struct Did<'d>(CowStr<'d>); 14 14 15 + /// Regex for DID validation per AT Protocol spec. 16 + /// 17 + /// Note: This regex allows `%` in the identifier but prevents DIDs from ending with `:` or `%`. 18 + /// It does NOT validate that percent-encoding is well-formed (i.e., `%XX` where XX are hex digits). 19 + /// This matches the behavior of the official TypeScript implementation, which also does not 20 + /// enforce percent-encoding validity at validation time. While the spec states "percent sign 21 + /// must be followed by two hex characters," this is treated as a best practice rather than 22 + /// a hard validation requirement. 15 23 pub static DID_REGEX: LazyLock<Regex> = 16 24 LazyLock::new(|| Regex::new(r"^did:[a-z]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$").unwrap()); 17 25 ··· 193 201 self.as_str() 194 202 } 195 203 } 204 + 205 + #[cfg(test)] 206 + mod tests { 207 + use super::*; 208 + 209 + #[test] 210 + fn valid_dids() { 211 + assert!(Did::new("did:plc:abc123").is_ok()); 212 + assert!(Did::new("did:web:example.com").is_ok()); 213 + assert!(Did::new("did:method:val_ue").is_ok()); 214 + assert!(Did::new("did:method:val-ue").is_ok()); 215 + assert!(Did::new("did:method:val.ue").is_ok()); 216 + assert!(Did::new("did:method:val%20ue").is_ok()); 217 + } 218 + 219 + #[test] 220 + fn prefix_stripping() { 221 + assert_eq!(Did::new("at://did:plc:foo").unwrap().as_str(), "did:plc:foo"); 222 + assert_eq!(Did::new("did:plc:foo").unwrap().as_str(), "did:plc:foo"); 223 + } 224 + 225 + #[test] 226 + fn must_start_with_did() { 227 + assert!(Did::new("DID:plc:foo").is_err()); 228 + assert!(Did::new("plc:foo").is_err()); 229 + assert!(Did::new("foo").is_err()); 230 + } 231 + 232 + #[test] 233 + fn method_must_be_lowercase() { 234 + assert!(Did::new("did:plc:foo").is_ok()); 235 + assert!(Did::new("did:PLC:foo").is_err()); 236 + assert!(Did::new("did:Plc:foo").is_err()); 237 + } 238 + 239 + #[test] 240 + fn cannot_end_with_colon_or_percent() { 241 + assert!(Did::new("did:plc:foo:").is_err()); 242 + assert!(Did::new("did:plc:foo%").is_err()); 243 + assert!(Did::new("did:plc:foo:bar").is_ok()); 244 + } 245 + 246 + #[test] 247 + fn max_length() { 248 + let valid_2048 = format!("did:plc:{}", "a".repeat(2048 - 8)); 249 + assert_eq!(valid_2048.len(), 2048); 250 + assert!(Did::new(&valid_2048).is_ok()); 251 + 252 + let too_long_2049 = format!("did:plc:{}", "a".repeat(2049 - 8)); 253 + assert_eq!(too_long_2049.len(), 2049); 254 + assert!(Did::new(&too_long_2049).is_err()); 255 + } 256 + 257 + #[test] 258 + fn allowed_characters() { 259 + assert!(Did::new("did:method:abc123").is_ok()); 260 + assert!(Did::new("did:method:ABC123").is_ok()); 261 + assert!(Did::new("did:method:a_b_c").is_ok()); 262 + assert!(Did::new("did:method:a-b-c").is_ok()); 263 + assert!(Did::new("did:method:a.b.c").is_ok()); 264 + assert!(Did::new("did:method:a:b:c").is_ok()); 265 + } 266 + 267 + #[test] 268 + fn disallowed_characters() { 269 + assert!(Did::new("did:method:a b").is_err()); 270 + assert!(Did::new("did:method:a@b").is_err()); 271 + assert!(Did::new("did:method:a#b").is_err()); 272 + assert!(Did::new("did:method:a?b").is_err()); 273 + } 274 + 275 + #[test] 276 + fn percent_encoding() { 277 + // Valid percent encoding 278 + assert!(Did::new("did:method:foo%20bar").is_ok()); 279 + assert!(Did::new("did:method:foo%2Fbar").is_ok()); 280 + 281 + // DIDs cannot end with % 282 + assert!(Did::new("did:method:foo%").is_err()); 283 + 284 + // IMPORTANT: The regex does NOT validate that percent-encoding is well-formed. 285 + // This matches the TypeScript reference implementation's behavior. 286 + // While the spec says "percent sign must be followed by two hex characters", 287 + // implementations treat this as a best practice, not a hard validation requirement. 288 + // Thus, malformed percent encoding like %2x is accepted by the regex. 289 + assert!(Did::new("did:method:foo%2x").is_ok()); 290 + assert!(Did::new("did:method:foo%ZZ").is_ok()); 291 + } 292 + }

+120 -26

crates/jacquard-common/src/types/handle.rs

··· 21 21 impl<'h> Handle<'h> { 22 22 /// Fallible constructor, validates, borrows from input 23 23 /// 24 - /// Accepts (and strips) preceding '@' if present 24 + /// Accepts (and strips) preceding '@' or 'at://' if present 25 25 pub fn new(handle: &'h str) -> Result<Self, AtStrError> { 26 - let handle = handle 26 + let stripped = handle 27 27 .strip_prefix("at://") 28 - .unwrap_or(handle) 29 - .strip_prefix('@') 28 + .or_else(|| handle.strip_prefix('@')) 30 29 .unwrap_or(handle); 31 - if handle.len() > 253 { 32 - Err(AtStrError::too_long("handle", handle, 253, handle.len())) 33 - } else if !HANDLE_REGEX.is_match(handle) { 30 + 31 + if stripped.len() > 253 { 32 + Err(AtStrError::too_long("handle", stripped, 253, stripped.len())) 33 + } else if !HANDLE_REGEX.is_match(stripped) { 34 34 Err(AtStrError::regex( 35 35 "handle", 36 - handle, 36 + stripped, 37 37 SmolStr::new_static("invalid"), 38 38 )) 39 - } else if ends_with(handle, DISALLOWED_TLDS) { 40 - Err(AtStrError::disallowed("handle", handle, DISALLOWED_TLDS)) 39 + } else if ends_with(stripped, DISALLOWED_TLDS) { 40 + Err(AtStrError::disallowed("handle", stripped, DISALLOWED_TLDS)) 41 41 } else { 42 - Ok(Self(CowStr::Borrowed(handle))) 42 + Ok(Self(CowStr::Borrowed(stripped))) 43 43 } 44 44 } 45 45 46 46 /// Fallible constructor, validates, takes ownership 47 47 pub fn new_owned(handle: impl AsRef<str>) -> Result<Self, AtStrError> { 48 48 let handle = handle.as_ref(); 49 - let handle = handle 49 + let stripped = handle 50 50 .strip_prefix("at://") 51 - .unwrap_or(handle) 52 - .strip_prefix('@') 51 + .or_else(|| handle.strip_prefix('@')) 53 52 .unwrap_or(handle); 53 + let handle = stripped; 54 54 if handle.len() > 253 { 55 55 Err(AtStrError::too_long("handle", handle, 253, handle.len())) 56 56 } else if !HANDLE_REGEX.is_match(handle) { ··· 68 68 69 69 /// Fallible constructor, validates, doesn't allocate 70 70 pub fn new_static(handle: &'static str) -> Result<Self, AtStrError> { 71 - let handle = handle 71 + let stripped = handle 72 72 .strip_prefix("at://") 73 - .unwrap_or(handle) 74 - .strip_prefix('@') 73 + .or_else(|| handle.strip_prefix('@')) 75 74 .unwrap_or(handle); 75 + let handle = stripped; 76 76 if handle.len() > 253 { 77 77 Err(AtStrError::too_long("handle", handle, 253, handle.len())) 78 78 } else if !HANDLE_REGEX.is_match(handle) { ··· 92 92 /// or API values you know are valid (rather than using serde), this is the one to use. 93 93 /// The From<String> and From<CowStr> impls use the same logic. 94 94 /// 95 - /// Accepts (and strips) preceding '@' if present 95 + /// Accepts (and strips) preceding '@' or 'at://' if present 96 96 pub fn raw(handle: &'h str) -> Self { 97 - let handle = handle 97 + let stripped = handle 98 98 .strip_prefix("at://") 99 - .unwrap_or(handle) 100 - .strip_prefix('@') 99 + .or_else(|| handle.strip_prefix('@')) 101 100 .unwrap_or(handle); 101 + let handle = stripped; 102 102 if handle.len() > 253 { 103 103 panic!("handle too long") 104 104 } else if !HANDLE_REGEX.is_match(handle) { ··· 113 113 /// Infallible constructor for when you *know* the string is a valid handle. 114 114 /// Marked unsafe because responsibility for upholding the invariant is on the developer. 115 115 /// 116 - /// Accepts (and strips) preceding '@' if present 116 + /// Accepts (and strips) preceding '@' or 'at://' if present 117 117 pub unsafe fn unchecked(handle: &'h str) -> Self { 118 - let handle = handle 118 + let stripped = handle 119 119 .strip_prefix("at://") 120 - .unwrap_or(handle) 121 - .strip_prefix('@') 120 + .or_else(|| handle.strip_prefix('@')) 122 121 .unwrap_or(handle); 123 - Self(CowStr::Borrowed(handle)) 122 + Self(CowStr::Borrowed(stripped)) 124 123 } 125 124 126 125 pub fn as_str(&self) -> &str { ··· 208 207 self.as_str() 209 208 } 210 209 } 210 + 211 + #[cfg(test)] 212 + mod tests { 213 + use super::*; 214 + 215 + #[test] 216 + fn valid_handles() { 217 + assert!(Handle::new("alice.test").is_ok()); 218 + assert!(Handle::new("foo.bsky.social").is_ok()); 219 + assert!(Handle::new("a.b.c.d.e").is_ok()); 220 + assert!(Handle::new("a1.b2.c3").is_ok()); 221 + assert!(Handle::new("name-with-dash.com").is_ok()); 222 + } 223 + 224 + #[test] 225 + fn prefix_stripping() { 226 + assert_eq!(Handle::new("@alice.test").unwrap().as_str(), "alice.test"); 227 + assert_eq!(Handle::new("at://alice.test").unwrap().as_str(), "alice.test"); 228 + assert_eq!(Handle::new("alice.test").unwrap().as_str(), "alice.test"); 229 + } 230 + 231 + #[test] 232 + fn max_length() { 233 + // 253 chars: three 63-char segments + one 61-char segment + 3 dots = 253 234 + let s1 = format!("a{}a", "b".repeat(61)); // 63 235 + let s2 = format!("c{}c", "d".repeat(61)); // 63 236 + let s3 = format!("e{}e", "f".repeat(61)); // 63 237 + let s4 = format!("g{}g", "h".repeat(59)); // 61 238 + let valid_253 = format!("{}.{}.{}.{}", s1, s2, s3, s4); 239 + assert_eq!(valid_253.len(), 253); 240 + assert!(Handle::new(&valid_253).is_ok()); 241 + 242 + // 254 chars: make last segment 62 chars 243 + let s4_long = format!("g{}g", "h".repeat(60)); // 62 244 + let too_long_254 = format!("{}.{}.{}.{}", s1, s2, s3, s4_long); 245 + assert_eq!(too_long_254.len(), 254); 246 + assert!(Handle::new(&too_long_254).is_err()); 247 + } 248 + 249 + #[test] 250 + fn segment_length_constraints() { 251 + let valid_63_char_segment = format!("{}.com", "a".repeat(63)); 252 + assert!(Handle::new(&valid_63_char_segment).is_ok()); 253 + 254 + let too_long_64_char_segment = format!("{}.com", "a".repeat(64)); 255 + assert!(Handle::new(&too_long_64_char_segment).is_err()); 256 + } 257 + 258 + #[test] 259 + fn hyphen_placement() { 260 + assert!(Handle::new("valid-label.com").is_ok()); 261 + assert!(Handle::new("-nope.com").is_err()); 262 + assert!(Handle::new("nope-.com").is_err()); 263 + } 264 + 265 + #[test] 266 + fn tld_must_start_with_letter() { 267 + assert!(Handle::new("foo.bar").is_ok()); 268 + assert!(Handle::new("foo.9bar").is_err()); 269 + } 270 + 271 + #[test] 272 + fn disallowed_tlds() { 273 + assert!(Handle::new("foo.local").is_err()); 274 + assert!(Handle::new("foo.localhost").is_err()); 275 + assert!(Handle::new("foo.arpa").is_err()); 276 + assert!(Handle::new("foo.invalid").is_err()); 277 + assert!(Handle::new("foo.internal").is_err()); 278 + assert!(Handle::new("foo.example").is_err()); 279 + assert!(Handle::new("foo.alt").is_err()); 280 + assert!(Handle::new("foo.onion").is_err()); 281 + } 282 + 283 + #[test] 284 + fn minimum_segments() { 285 + assert!(Handle::new("a.b").is_ok()); 286 + assert!(Handle::new("a").is_err()); 287 + assert!(Handle::new("com").is_err()); 288 + } 289 + 290 + #[test] 291 + fn invalid_characters() { 292 + assert!(Handle::new("foo!bar.com").is_err()); 293 + assert!(Handle::new("foo_bar.com").is_err()); 294 + assert!(Handle::new("foo bar.com").is_err()); 295 + assert!(Handle::new("foo@bar.com").is_err()); 296 + } 297 + 298 + #[test] 299 + fn empty_segments() { 300 + assert!(Handle::new("foo..com").is_err()); 301 + assert!(Handle::new(".foo.com").is_err()); 302 + assert!(Handle::new("foo.com.").is_err()); 303 + } 304 + }

+37

crates/jacquard-common/src/types/ident.rs

··· 168 168 } 169 169 } 170 170 } 171 + 172 + #[cfg(test)] 173 + mod tests { 174 + use super::*; 175 + 176 + #[test] 177 + fn parses_did() { 178 + let ident = AtIdentifier::new("did:plc:foo").unwrap(); 179 + assert!(matches!(ident, AtIdentifier::Did(_))); 180 + assert_eq!(ident.as_str(), "did:plc:foo"); 181 + } 182 + 183 + #[test] 184 + fn parses_handle() { 185 + let ident = AtIdentifier::new("alice.test").unwrap(); 186 + assert!(matches!(ident, AtIdentifier::Handle(_))); 187 + assert_eq!(ident.as_str(), "alice.test"); 188 + } 189 + 190 + #[test] 191 + fn did_takes_precedence() { 192 + // DID is tried first, so valid DIDs are parsed as DIDs 193 + let ident = AtIdentifier::new("did:web:alice.test").unwrap(); 194 + assert!(matches!(ident, AtIdentifier::Did(_))); 195 + } 196 + 197 + #[test] 198 + fn from_types() { 199 + let did = Did::new("did:plc:foo").unwrap(); 200 + let ident: AtIdentifier = did.into(); 201 + assert!(matches!(ident, AtIdentifier::Did(_))); 202 + 203 + let handle = Handle::new("alice.test").unwrap(); 204 + let ident: AtIdentifier = handle.into(); 205 + assert!(matches!(ident, AtIdentifier::Handle(_))); 206 + } 207 + }

+35 -3

crates/jacquard-common/src/types/language.rs

··· 22 22 T: AsRef<str> + ?Sized, 23 23 { 24 24 let tag = langtag::LangTag::new(lang)?; 25 - Ok(Language(SmolStr::new_inline(tag.as_str()))) 25 + Ok(Language(SmolStr::new(tag.as_str()))) 26 + } 27 + 28 + /// Parses an IETF language tag from a static string. 29 + pub fn new_static(lang: &'static str) -> Result<Self, langtag::InvalidLangTag<&'static str>> { 30 + let tag = langtag::LangTag::new(lang)?; 31 + Ok(Language(SmolStr::new_static(tag.as_str()))) 26 32 } 27 33 28 34 /// Infallible constructor for when you *know* the string is a valid IETF language tag. ··· 32 38 pub fn raw(lang: impl AsRef<str>) -> Self { 33 39 let lang = lang.as_ref(); 34 40 let tag = langtag::LangTag::new(lang).expect("valid IETF language tag"); 35 - Language(SmolStr::new_inline(tag.as_str())) 41 + Language(SmolStr::new(tag.as_str())) 36 42 } 37 43 38 44 /// Infallible constructor for when you *know* the string is a valid IETF language tag. 39 45 /// Marked unsafe because responsibility for upholding the invariant is on the developer. 40 46 pub unsafe fn unchecked(lang: impl AsRef<str>) -> Self { 41 47 let lang = lang.as_ref(); 42 - Self(SmolStr::new_inline(lang)) 48 + Self(SmolStr::new(lang)) 43 49 } 44 50 45 51 /// Returns the LANG as a string slice. ··· 112 118 self.as_str() 113 119 } 114 120 } 121 + 122 + #[cfg(test)] 123 + mod tests { 124 + use super::*; 125 + 126 + #[test] 127 + fn valid_language_tags() { 128 + assert!(Language::new("en").is_ok()); 129 + assert!(Language::new("en-US").is_ok()); 130 + assert!(Language::new("zh-Hans").is_ok()); 131 + assert!(Language::new("es-419").is_ok()); 132 + } 133 + 134 + #[test] 135 + fn case_insensitive_but_preserves() { 136 + let lang = Language::new("en-US").unwrap(); 137 + assert_eq!(lang.as_str(), "en-US"); 138 + } 139 + 140 + #[test] 141 + fn invalid_tags() { 142 + assert!(Language::new("").is_err()); 143 + assert!(Language::new("not_a_tag").is_err()); 144 + assert!(Language::new("123").is_err()); 145 + } 146 + }

+94

crates/jacquard-common/src/types/nsid.rs

··· 221 221 self.as_str() 222 222 } 223 223 } 224 + 225 + #[cfg(test)] 226 + mod tests { 227 + use super::*; 228 + 229 + #[test] 230 + fn valid_nsids() { 231 + assert!(Nsid::new("com.example.foo").is_ok()); 232 + assert!(Nsid::new("com.example.fooBar").is_ok()); 233 + assert!(Nsid::new("com.long-domain.foo").is_ok()); 234 + assert!(Nsid::new("a.b.c").is_ok()); 235 + assert!(Nsid::new("a1.b2.c3").is_ok()); 236 + } 237 + 238 + #[test] 239 + fn minimum_segments() { 240 + assert!(Nsid::new("a.b.c").is_ok()); // 3 segments minimum 241 + assert!(Nsid::new("a.b").is_err()); 242 + assert!(Nsid::new("a").is_err()); 243 + } 244 + 245 + #[test] 246 + fn domain_and_name_parsing() { 247 + let nsid = Nsid::new("com.example.fooBar").unwrap(); 248 + assert_eq!(nsid.domain_authority(), "com.example"); 249 + assert_eq!(nsid.name(), "fooBar"); 250 + } 251 + 252 + #[test] 253 + fn max_length() { 254 + // 317 chars: 63 + 63 + 63 + 63 + 63 = 315 + 4 dots + 1 = 320, too much 255 + // try: 63 + 63 + 63 + 63 + 62 = 314 + 4 dots = 318, still too much 256 + // try: 63 + 63 + 63 + 63 + 61 = 313 + 4 dots = 317 257 + let s1 = format!("a{}a", "b".repeat(61)); 258 + let s2 = format!("c{}c", "d".repeat(61)); 259 + let s3 = format!("e{}e", "f".repeat(61)); 260 + let s4 = format!("g{}g", "h".repeat(61)); 261 + let s5 = format!("i{}i", "j".repeat(59)); 262 + let valid_317 = format!("{}.{}.{}.{}.{}", s1, s2, s3, s4, s5); 263 + assert_eq!(valid_317.len(), 317); 264 + assert!(Nsid::new(&valid_317).is_ok()); 265 + 266 + let s5_long = format!("i{}i", "j".repeat(60)); 267 + let too_long_318 = format!("{}.{}.{}.{}.{}", s1, s2, s3, s4, s5_long); 268 + assert_eq!(too_long_318.len(), 318); 269 + assert!(Nsid::new(&too_long_318).is_err()); 270 + } 271 + 272 + #[test] 273 + fn segment_length() { 274 + let valid_63 = format!("{}.{}.foo", "a".repeat(63), "b".repeat(63)); 275 + assert!(Nsid::new(&valid_63).is_ok()); 276 + 277 + let too_long_64 = format!("{}.b.foo", "a".repeat(64)); 278 + assert!(Nsid::new(&too_long_64).is_err()); 279 + } 280 + 281 + #[test] 282 + fn first_segment_cannot_start_with_digit() { 283 + assert!(Nsid::new("com.example.foo").is_ok()); 284 + assert!(Nsid::new("9com.example.foo").is_err()); 285 + } 286 + 287 + #[test] 288 + fn name_segment_rules() { 289 + assert!(Nsid::new("com.example.foo").is_ok()); 290 + assert!(Nsid::new("com.example.fooBar123").is_ok()); 291 + assert!(Nsid::new("com.example.9foo").is_err()); // can't start with digit 292 + assert!(Nsid::new("com.example.foo-bar").is_err()); // no hyphens in name 293 + } 294 + 295 + #[test] 296 + fn domain_segment_rules() { 297 + assert!(Nsid::new("foo-bar.example.baz").is_ok()); 298 + assert!(Nsid::new("foo.bar-baz.qux").is_ok()); 299 + assert!(Nsid::new("-foo.bar.baz").is_err()); // can't start with hyphen 300 + assert!(Nsid::new("foo-.bar.baz").is_err()); // can't end with hyphen 301 + } 302 + 303 + #[test] 304 + fn case_sensitivity() { 305 + // Domain should be case-insensitive per spec (but not enforced in validation) 306 + // Name is case-sensitive 307 + assert!(Nsid::new("com.example.fooBar").is_ok()); 308 + assert!(Nsid::new("com.example.FooBar").is_ok()); 309 + } 310 + 311 + #[test] 312 + fn no_hyphens_in_name() { 313 + assert!(Nsid::new("com.example.foo").is_ok()); 314 + assert!(Nsid::new("com.example.foo-bar").is_err()); 315 + assert!(Nsid::new("com.example.fooBar").is_ok()); 316 + } 317 + }

+69

crates/jacquard-common/src/types/recordkey.rs

··· 417 417 self.as_str() 418 418 } 419 419 } 420 + 421 + #[cfg(test)] 422 + mod tests { 423 + use super::*; 424 + 425 + #[test] 426 + fn valid_rkeys() { 427 + assert!(Rkey::new("3jzfcijpj2z2a").is_ok()); // TID format 428 + assert!(Rkey::new("self").is_ok()); // literal 429 + assert!(Rkey::new("com.example.foo").is_ok()); // NSID format 430 + assert!(Rkey::new("foo-bar_baz").is_ok()); 431 + assert!(Rkey::new("foo:bar").is_ok()); 432 + assert!(Rkey::new("foo~bar").is_ok()); 433 + } 434 + 435 + #[test] 436 + fn length_constraints() { 437 + assert!(Rkey::new("a").is_ok()); // min 1 438 + let valid_512 = "a".repeat(512); 439 + assert_eq!(valid_512.len(), 512); 440 + assert!(Rkey::new(&valid_512).is_ok()); 441 + 442 + let too_long_513 = "a".repeat(513); 443 + assert_eq!(too_long_513.len(), 513); 444 + assert!(Rkey::new(&too_long_513).is_err()); 445 + } 446 + 447 + #[test] 448 + fn disallowed_literals() { 449 + assert!(Rkey::new(".").is_err()); 450 + assert!(Rkey::new("..").is_err()); 451 + assert!(Rkey::new("...").is_ok()); // 3+ dots is fine 452 + } 453 + 454 + #[test] 455 + fn allowed_characters() { 456 + assert!(Rkey::new("abc123").is_ok()); 457 + assert!(Rkey::new("ABC123").is_ok()); 458 + assert!(Rkey::new("foo-bar").is_ok()); 459 + assert!(Rkey::new("foo_bar").is_ok()); 460 + assert!(Rkey::new("foo.bar").is_ok()); 461 + assert!(Rkey::new("foo:bar").is_ok()); 462 + assert!(Rkey::new("foo~bar").is_ok()); 463 + } 464 + 465 + #[test] 466 + fn disallowed_characters() { 467 + assert!(Rkey::new("foo bar").is_err()); 468 + assert!(Rkey::new("foo@bar").is_err()); 469 + assert!(Rkey::new("foo#bar").is_err()); 470 + assert!(Rkey::new("foo/bar").is_err()); 471 + assert!(Rkey::new("foo\\bar").is_err()); 472 + } 473 + 474 + #[test] 475 + fn literal_key_self() { 476 + let key = LiteralKey::<SelfRecord>::new("self").unwrap(); 477 + assert_eq!(key.as_str(), "self"); 478 + 479 + assert!(LiteralKey::<SelfRecord>::new("Self").is_ok()); // case insensitive 480 + assert!(LiteralKey::<SelfRecord>::new("other").is_err()); 481 + } 482 + 483 + #[test] 484 + fn literal_key_disallowed() { 485 + assert!(LiteralKey::<SelfRecord>::new(".").is_err()); 486 + assert!(LiteralKey::<SelfRecord>::new("..").is_err()); 487 + } 488 + }

+1 -1

crates/jacquard-common/src/types/string.rs

··· 8 8 CowStr, 9 9 types::{ 10 10 aturi::AtUri, 11 - cid::Cid, 11 + cid::{Cid, CidLink}, 12 12 datetime::Datetime, 13 13 did::Did, 14 14 handle::Handle,

+70

crates/jacquard-common/src/types/tid.rs

··· 307 307 Self::new() 308 308 } 309 309 } 310 + 311 + #[cfg(test)] 312 + mod tests { 313 + use super::*; 314 + 315 + #[test] 316 + fn valid_tids() { 317 + assert!(Tid::new("3jzfcijpj2z2a").is_ok()); 318 + assert!(Tid::new("2222222222222").is_ok()); 319 + assert!(Tid::new("j7777777777777").is_err()); // j is valid for first char but makes high bit set 320 + } 321 + 322 + #[test] 323 + fn exact_length() { 324 + assert!(Tid::new("3jzfcijpj2z2a").is_ok()); 325 + assert!(Tid::new("3jzfcijpj2z2").is_err()); // 12 chars 326 + assert!(Tid::new("3jzfcijpj2z2aa").is_err()); // 14 chars 327 + } 328 + 329 + #[test] 330 + fn first_char_constraint() { 331 + // First char must be 2-7 or a-j (not k-z) 332 + assert!(Tid::new("2222222222222").is_ok()); 333 + assert!(Tid::new("7777777777777").is_ok()); 334 + assert!(Tid::new("a222222222222").is_ok()); 335 + assert!(Tid::new("j222222222222").is_ok()); 336 + assert!(Tid::new("k222222222222").is_err()); 337 + assert!(Tid::new("z222222222222").is_err()); 338 + } 339 + 340 + #[test] 341 + fn remaining_chars_constraint() { 342 + // Remaining 12 chars must be 2-7 or a-z 343 + assert!(Tid::new("3abcdefghijkl").is_ok()); 344 + assert!(Tid::new("3zzzzzzzzzzzz").is_ok()); 345 + assert!(Tid::new("3222222222222").is_ok()); 346 + assert!(Tid::new("3777777777777").is_ok()); 347 + } 348 + 349 + #[test] 350 + fn disallowed_characters() { 351 + assert!(Tid::new("3jzfcijpj2z2A").is_err()); // uppercase 352 + assert!(Tid::new("3jzfcijpj2z21").is_err()); // 1 not allowed 353 + assert!(Tid::new("3jzfcijpj2z28").is_err()); // 8 not allowed 354 + assert!(Tid::new("3jzfcijpj2z2-").is_err()); // special char 355 + } 356 + 357 + #[test] 358 + fn generation_and_comparison() { 359 + let tid1 = Tid::now_0(); 360 + std::thread::sleep(std::time::Duration::from_micros(10)); 361 + let tid2 = Tid::now_0(); 362 + 363 + assert!(tid1.as_str().len() == 13); 364 + assert!(tid2.as_str().len() == 13); 365 + assert!(tid2.newer_than(&tid1)); 366 + assert!(tid1.older_than(&tid2)); 367 + } 368 + 369 + #[test] 370 + fn ticker_monotonic() { 371 + let mut ticker = Ticker::new(); 372 + let tid1 = ticker.next(None); 373 + let tid2 = ticker.next(Some(tid1.clone())); 374 + let tid3 = ticker.next(Some(tid2.clone())); 375 + 376 + assert!(tid2.newer_than(&tid1)); 377 + assert!(tid3.newer_than(&tid2)); 378 + } 379 + }

+21 -324

crates/jacquard-common/src/types/value.rs

··· 1 - use base64::{ 2 - Engine, 3 - prelude::{BASE64_STANDARD, BASE64_STANDARD_NO_PAD, BASE64_URL_SAFE, BASE64_URL_SAFE_NO_PAD}, 4 - }; 1 + use crate::types::{DataModelType, blob::Blob, string::*}; 5 2 use bytes::Bytes; 6 3 use ipld_core::ipld::Ipld; 7 - use serde::{Deserialize, Deserializer, Serialize, Serializer}; 8 4 use smol_str::{SmolStr, ToSmolStr}; 9 - use std::{collections::BTreeMap, str::FromStr}; 10 - use url::Url; 5 + use std::collections::BTreeMap; 11 6 12 - use crate::types::{ 13 - DataModelType, LexiconStringType, 14 - blob::{Blob, MimeType}, 15 - string::*, 16 - }; 7 + pub mod parsing; 8 + pub mod serde_impl; 9 + 10 + #[cfg(test)] 11 + mod tests; 17 12 18 13 #[derive(Debug, Clone, PartialEq, Eq)] 19 14 pub enum Data<'s> { ··· 21 16 Boolean(bool), 22 17 Integer(i64), 23 18 String(AtprotoStr<'s>), 24 - Bytes(Bytes), // maybe need custom type for serialization 25 - CidLink(Cid<'s>), // maybe need custom type for serialization 19 + Bytes(Bytes), 20 + CidLink(Cid<'s>), 26 21 Array(Array<'s>), 27 22 Object(Object<'s>), 28 23 Blob(Blob<'s>), ··· 98 93 json: &'s serde_json::Map<String, serde_json::Value>, 99 94 ) -> Result<Data<'s>, AtDataError> { 100 95 if let Some(type_field) = json.get("$type").and_then(|v| v.as_str()) { 101 - if infer_from_type(type_field) == DataModelType::Blob { 102 - if let Some(blob) = json_to_blob(json) { 96 + if parsing::infer_from_type(type_field) == DataModelType::Blob { 97 + if let Some(blob) = parsing::json_to_blob(json) { 103 98 return Ok(Data::Blob(blob)); 104 99 } 105 100 } ··· 110 105 if key == "$type" { 111 106 map.insert(key.to_smolstr(), Data::from_json(value)?); 112 107 } 113 - match string_key_type_guess(key) { 108 + match parsing::string_key_type_guess(key) { 114 109 DataModelType::Null if value.is_null() => { 115 110 map.insert(key.to_smolstr(), Data::Null); 116 111 } ··· 121 116 map.insert(key.to_smolstr(), Data::Integer(value.as_i64().unwrap())); 122 117 } 123 118 DataModelType::Bytes if value.is_string() => { 124 - map.insert(key.to_smolstr(), decode_bytes(value.as_str().unwrap())); 119 + map.insert( 120 + key.to_smolstr(), 121 + parsing::decode_bytes(value.as_str().unwrap()), 122 + ); 125 123 } 126 124 DataModelType::CidLink => { 127 125 if let Some(value) = value.as_object() { ··· 153 151 ); 154 152 } 155 153 DataModelType::String(string_type) if value.is_string() => { 156 - insert_string(&mut map, key, value.as_str().unwrap(), string_type); 154 + parsing::insert_string(&mut map, key, value.as_str().unwrap(), string_type)?; 157 155 } 158 156 _ => { 159 157 map.insert(key.to_smolstr(), Data::from_json(value)?); ··· 166 164 167 165 pub fn from_cbor(cbor: &'s BTreeMap<String, Ipld>) -> Result<Data<'s>, AtDataError> { 168 166 if let Some(Ipld::String(type_field)) = cbor.get("$type") { 169 - if infer_from_type(type_field) == DataModelType::Blob { 170 - if let Some(blob) = cbor_to_blob(cbor) { 167 + if parsing::infer_from_type(type_field) == DataModelType::Blob { 168 + if let Some(blob) = parsing::cbor_to_blob(cbor) { 171 169 return Ok(Data::Blob(blob)); 172 170 } 173 171 } ··· 178 176 if key == "$type" { 179 177 map.insert(key.to_smolstr(), Data::from_cbor(value)?); 180 178 } 181 - match (string_key_type_guess(key), value) { 179 + match (parsing::string_key_type_guess(key), value) { 182 180 (DataModelType::Null, Ipld::Null) => { 183 181 map.insert(key.to_smolstr(), Data::Null); 184 182 } ··· 201 199 map.insert(key.to_smolstr(), Object::from_cbor(value)?); 202 200 } 203 201 (DataModelType::String(string_type), Ipld::String(value)) => { 204 - insert_string(&mut map, key, value, string_type); 202 + parsing::insert_string(&mut map, key, value, string_type)?; 205 203 } 206 204 _ => { 207 205 map.insert(key.to_smolstr(), Data::from_cbor(value)?); ··· 212 210 Ok(Data::Object(Object(map))) 213 211 } 214 212 } 215 - 216 - pub fn insert_string<'s>( 217 - map: &mut BTreeMap<SmolStr, Data<'s>>, 218 - key: &'s str, 219 - value: &'s str, 220 - string_type: LexiconStringType, 221 - ) -> Result<(), AtDataError> { 222 - match string_type { 223 - LexiconStringType::Datetime => { 224 - if let Ok(datetime) = Datetime::from_str(value) { 225 - map.insert( 226 - key.to_smolstr(), 227 - Data::String(AtprotoStr::Datetime(datetime)), 228 - ); 229 - } else { 230 - map.insert( 231 - key.to_smolstr(), 232 - Data::String(AtprotoStr::String(value.into())), 233 - ); 234 - } 235 - } 236 - LexiconStringType::AtUri => { 237 - if let Ok(value) = AtUri::new(value) { 238 - map.insert(key.to_smolstr(), Data::String(AtprotoStr::AtUri(value))); 239 - } else { 240 - map.insert( 241 - key.to_smolstr(), 242 - Data::String(AtprotoStr::String(value.into())), 243 - ); 244 - } 245 - } 246 - LexiconStringType::Did => { 247 - if let Ok(value) = Did::new(value) { 248 - map.insert(key.to_smolstr(), Data::String(AtprotoStr::Did(value))); 249 - } else { 250 - map.insert( 251 - key.to_smolstr(), 252 - Data::String(AtprotoStr::String(value.into())), 253 - ); 254 - } 255 - } 256 - LexiconStringType::Handle => { 257 - if let Ok(value) = Handle::new(value) { 258 - map.insert(key.to_smolstr(), Data::String(AtprotoStr::Handle(value))); 259 - } else { 260 - map.insert( 261 - key.to_smolstr(), 262 - Data::String(AtprotoStr::String(value.into())), 263 - ); 264 - } 265 - } 266 - LexiconStringType::AtIdentifier => { 267 - if let Ok(value) = AtIdentifier::new(value) { 268 - map.insert( 269 - key.to_smolstr(), 270 - Data::String(AtprotoStr::AtIdentifier(value)), 271 - ); 272 - } else { 273 - map.insert( 274 - key.to_smolstr(), 275 - Data::String(AtprotoStr::String(value.into())), 276 - ); 277 - } 278 - } 279 - LexiconStringType::Nsid => { 280 - if let Ok(value) = Nsid::new(value) { 281 - map.insert(key.to_smolstr(), Data::String(AtprotoStr::Nsid(value))); 282 - } else { 283 - map.insert( 284 - key.to_smolstr(), 285 - Data::String(AtprotoStr::String(value.into())), 286 - ); 287 - } 288 - } 289 - LexiconStringType::Cid => { 290 - if let Ok(value) = Cid::new(value.as_bytes()) { 291 - map.insert(key.to_smolstr(), Data::String(AtprotoStr::Cid(value))); 292 - } else { 293 - map.insert( 294 - key.to_smolstr(), 295 - Data::String(AtprotoStr::String(value.into())), 296 - ); 297 - } 298 - } 299 - LexiconStringType::Language => { 300 - if let Ok(value) = Language::new(value) { 301 - map.insert(key.to_smolstr(), Data::String(AtprotoStr::Language(value))); 302 - } else { 303 - map.insert( 304 - key.to_smolstr(), 305 - Data::String(AtprotoStr::String(value.into())), 306 - ); 307 - } 308 - } 309 - LexiconStringType::Tid => { 310 - if let Ok(value) = Tid::new(value) { 311 - map.insert(key.to_smolstr(), Data::String(AtprotoStr::Tid(value))); 312 - } else { 313 - map.insert( 314 - key.to_smolstr(), 315 - Data::String(AtprotoStr::String(value.into())), 316 - ); 317 - } 318 - } 319 - LexiconStringType::RecordKey => { 320 - if let Ok(value) = Rkey::new(value) { 321 - map.insert( 322 - key.to_smolstr(), 323 - Data::String(AtprotoStr::RecordKey(RecordKey::from(value))), 324 - ); 325 - } else { 326 - map.insert( 327 - key.to_smolstr(), 328 - Data::String(AtprotoStr::String(value.into())), 329 - ); 330 - } 331 - } 332 - LexiconStringType::Uri(_) => { 333 - if let Ok(uri) = Uri::new(value) { 334 - map.insert(key.to_smolstr(), Data::String(AtprotoStr::Uri(uri))); 335 - } else { 336 - map.insert( 337 - key.to_smolstr(), 338 - Data::String(AtprotoStr::String(value.into())), 339 - ); 340 - } 341 - } 342 - LexiconStringType::String => { 343 - map.insert(key.to_smolstr(), Data::String(parse_string(value))); 344 - } 345 - } 346 - Ok(()) 347 - } 348 - 349 - /// smarter parsing to avoid trying as many posibilities. 350 - pub fn parse_string<'s>(string: &'s str) -> AtprotoStr<'s> { 351 - if string.len() < 2048 && string.starts_with("did:") { 352 - if let Ok(did) = Did::new(string) { 353 - return AtprotoStr::Did(did); 354 - } 355 - } else if string.starts_with("20") && string.ends_with("Z") { 356 - // probably a date (for the next 75 years) 357 - if let Ok(datetime) = Datetime::from_str(string) { 358 - return AtprotoStr::Datetime(datetime); 359 - } 360 - } else if string.starts_with("at://") { 361 - if let Ok(uri) = AtUri::new(string) { 362 - return AtprotoStr::AtUri(uri); 363 - } 364 - } else if string.starts_with("https://") { 365 - if let Ok(uri) = Url::parse(string) { 366 - return AtprotoStr::Uri(Uri::Https(uri)); 367 - } 368 - } else if string.starts_with("wss://") { 369 - if let Ok(uri) = Url::parse(string) { 370 - return AtprotoStr::Uri(Uri::Https(uri)); 371 - } 372 - } else if string.starts_with("ipfs://") { 373 - return AtprotoStr::Uri(Uri::Cid(Cid::str(string))); 374 - } else if string.contains('.') && !string.contains([' ', '\n']) { 375 - if string.len() < 253 && Url::parse(string).is_ok() { 376 - // probably a handle 377 - if let Ok(handle) = AtIdentifier::new(string) { 378 - return AtprotoStr::AtIdentifier(handle); 379 - } else { 380 - return AtprotoStr::Uri(Uri::Any(string.into())); 381 - } 382 - } else if let Ok(nsid) = Nsid::new(string) { 383 - return AtprotoStr::Nsid(nsid); 384 - } 385 - } else if string.len() == 13 { 386 - if let Ok(tid) = Tid::new(string) { 387 - return AtprotoStr::Tid(tid); 388 - } 389 - } else if !string.contains([' ', '\n']) { 390 - // cid? 391 - if let Ok(cid) = Cid::new(string.as_bytes()) { 392 - return AtprotoStr::Cid(cid); 393 - } 394 - } 395 - 396 - AtprotoStr::String(string.into()) 397 - } 398 - 399 - /// First-level guess at what we should parse the corresponding value as 400 - /// Helps speed up parsing, avoids some ambiguities. 401 - pub fn string_key_type_guess(key: &str) -> DataModelType { 402 - match key { 403 - "cid" => DataModelType::String(LexiconStringType::Cid), 404 - "uri" => DataModelType::String(LexiconStringType::Uri(super::UriType::Any)), 405 - "did" => DataModelType::String(LexiconStringType::Did), 406 - "handle" => DataModelType::String(LexiconStringType::AtIdentifier), 407 - "ref" => DataModelType::CidLink, 408 - "list" => DataModelType::String(LexiconStringType::AtUri), 409 - "blobref" => DataModelType::Blob, 410 - "createdAt" | "created" | "indexedAt" | "issuedAt" | "updatedAt" | "playedTime" => { 411 - DataModelType::String(LexiconStringType::Datetime) 412 - } 413 - "size" | "width" | "height" => DataModelType::Integer, 414 - "value" | "record" | "embed" => DataModelType::Object, 415 - "text" | "displayName" | "alt" | "name" | "description" => { 416 - DataModelType::String(LexiconStringType::String) 417 - } 418 - "langs" | "blobs" | "images" | "labels" => DataModelType::Array, 419 - "$bytes" => DataModelType::Bytes, 420 - "$link" => DataModelType::String(LexiconStringType::Cid), 421 - "$type" => DataModelType::String(LexiconStringType::String), 422 - 423 - // we assume others are strings speficially because it's easy to check if a serde_json::Value 424 - // or Ipld value is at least a string, and then we fall back to Object/Map. 425 - _ => DataModelType::String(LexiconStringType::String), 426 - } 427 - } 428 - 429 - pub fn cbor_to_blob<'b>(blob: &'b BTreeMap<String, Ipld>) -> Option<Blob<'b>> { 430 - let mime_type = blob.get("mimeType").and_then(|o| { 431 - if let Ipld::String(string) = o { 432 - Some(string) 433 - } else { 434 - None 435 - } 436 - }); 437 - if let Some(Ipld::Link(value)) = blob.get("ref") { 438 - let size = blob.get("size").and_then(|o| { 439 - if let Ipld::Integer(i) = o { 440 - Some(*i as i64) 441 - } else { 442 - None 443 - } 444 - }); 445 - if let (Some(mime_type), Some(size)) = (mime_type, size) { 446 - return Some(Blob { 447 - r#ref: Cid::ipld(*value), 448 - mime_type: MimeType::raw(mime_type), 449 - size: size as usize, 450 - }); 451 - } 452 - } else if let Some(Ipld::String(value)) = blob.get("cid") { 453 - if let Some(mime_type) = mime_type { 454 - return Some(Blob { 455 - r#ref: Cid::str(value), 456 - mime_type: MimeType::raw(mime_type), 457 - size: 0, 458 - }); 459 - } 460 - } 461 - 462 - None 463 - } 464 - 465 - pub fn json_to_blob<'b>(blob: &'b serde_json::Map<String, serde_json::Value>) -> Option<Blob<'b>> { 466 - let mime_type = blob.get("mimeType").and_then(|v| v.as_str()); 467 - if let Some(value) = blob.get("ref") { 468 - if let Some(value) = value 469 - .as_object() 470 - .and_then(|o| o.get("$link")) 471 - .and_then(|v| v.as_str()) 472 - { 473 - let size = blob.get("size").and_then(|v| v.as_u64()); 474 - if let (Some(mime_type), Some(size)) = (mime_type, size) { 475 - return Some(Blob { 476 - r#ref: Cid::str(value), 477 - mime_type: MimeType::raw(mime_type), 478 - size: size as usize, 479 - }); 480 - } 481 - } 482 - } else if let Some(value) = blob.get("cid").and_then(|v| v.as_str()) { 483 - if let Some(mime_type) = mime_type { 484 - return Some(Blob { 485 - r#ref: Cid::str(value), 486 - mime_type: MimeType::raw(mime_type), 487 - size: 0, 488 - }); 489 - } 490 - } 491 - 492 - None 493 - } 494 - 495 - pub fn infer_from_type(type_field: &str) -> DataModelType { 496 - match type_field { 497 - "blob" => DataModelType::Blob, 498 - _ => DataModelType::Object, 499 - } 500 - } 501 - 502 - pub fn decode_bytes<'s>(bytes: &'s str) -> Data<'s> { 503 - // First one should just work. rest are insurance. 504 - if let Ok(bytes) = BASE64_STANDARD.decode(bytes) { 505 - Data::Bytes(Bytes::from_owner(bytes)) 506 - } else if let Ok(bytes) = BASE64_STANDARD_NO_PAD.decode(bytes) { 507 - Data::Bytes(Bytes::from_owner(bytes)) 508 - } else if let Ok(bytes) = BASE64_URL_SAFE.decode(bytes) { 509 - Data::Bytes(Bytes::from_owner(bytes)) 510 - } else if let Ok(bytes) = BASE64_URL_SAFE_NO_PAD.decode(bytes) { 511 - Data::Bytes(Bytes::from_owner(bytes)) 512 - } else { 513 - Data::String(AtprotoStr::String(bytes.into())) 514 - } 515 - }

+320

crates/jacquard-common/src/types/value/parsing.rs

··· 1 + use crate::{ 2 + IntoStatic, 3 + types::{ 4 + DataModelType, LexiconStringType, UriType, 5 + blob::{Blob, MimeType}, 6 + string::*, 7 + value::{AtDataError, Data}, 8 + }, 9 + }; 10 + use base64::{ 11 + Engine, 12 + prelude::{BASE64_STANDARD, BASE64_STANDARD_NO_PAD, BASE64_URL_SAFE, BASE64_URL_SAFE_NO_PAD}, 13 + }; 14 + use bytes::Bytes; 15 + use ipld_core::ipld::Ipld; 16 + use smol_str::{SmolStr, ToSmolStr}; 17 + use std::{collections::BTreeMap, str::FromStr}; 18 + use url::Url; 19 + 20 + pub fn insert_string<'s>( 21 + map: &mut BTreeMap<SmolStr, Data<'s>>, 22 + key: &'s str, 23 + value: &'s str, 24 + string_type: LexiconStringType, 25 + ) -> Result<(), AtDataError> { 26 + match string_type { 27 + LexiconStringType::Datetime => { 28 + if let Ok(datetime) = Datetime::from_str(value) { 29 + map.insert( 30 + key.to_smolstr(), 31 + Data::String(AtprotoStr::Datetime(datetime)), 32 + ); 33 + } else { 34 + map.insert( 35 + key.to_smolstr(), 36 + Data::String(AtprotoStr::String(value.into())), 37 + ); 38 + } 39 + } 40 + LexiconStringType::AtUri => { 41 + if let Ok(value) = AtUri::new(value) { 42 + map.insert(key.to_smolstr(), Data::String(AtprotoStr::AtUri(value))); 43 + } else { 44 + map.insert( 45 + key.to_smolstr(), 46 + Data::String(AtprotoStr::String(value.into())), 47 + ); 48 + } 49 + } 50 + LexiconStringType::Did => { 51 + if let Ok(value) = Did::new(value) { 52 + map.insert(key.to_smolstr(), Data::String(AtprotoStr::Did(value))); 53 + } else { 54 + map.insert( 55 + key.to_smolstr(), 56 + Data::String(AtprotoStr::String(value.into())), 57 + ); 58 + } 59 + } 60 + LexiconStringType::Handle => { 61 + if let Ok(value) = Handle::new(value) { 62 + map.insert(key.to_smolstr(), Data::String(AtprotoStr::Handle(value))); 63 + } else { 64 + map.insert( 65 + key.to_smolstr(), 66 + Data::String(AtprotoStr::String(value.into())), 67 + ); 68 + } 69 + } 70 + LexiconStringType::AtIdentifier => { 71 + if let Ok(value) = AtIdentifier::new(value) { 72 + map.insert( 73 + key.to_smolstr(), 74 + Data::String(AtprotoStr::AtIdentifier(value)), 75 + ); 76 + } else { 77 + map.insert( 78 + key.to_smolstr(), 79 + Data::String(AtprotoStr::String(value.into())), 80 + ); 81 + } 82 + } 83 + LexiconStringType::Nsid => { 84 + if let Ok(value) = Nsid::new(value) { 85 + map.insert(key.to_smolstr(), Data::String(AtprotoStr::Nsid(value))); 86 + } else { 87 + map.insert( 88 + key.to_smolstr(), 89 + Data::String(AtprotoStr::String(value.into())), 90 + ); 91 + } 92 + } 93 + LexiconStringType::Cid => { 94 + if let Ok(value) = Cid::new(value.as_bytes()) { 95 + map.insert(key.to_smolstr(), Data::String(AtprotoStr::Cid(value))); 96 + } else { 97 + map.insert( 98 + key.to_smolstr(), 99 + Data::String(AtprotoStr::String(value.into())), 100 + ); 101 + } 102 + } 103 + LexiconStringType::Language => { 104 + if let Ok(value) = Language::new(value) { 105 + map.insert(key.to_smolstr(), Data::String(AtprotoStr::Language(value))); 106 + } else { 107 + map.insert( 108 + key.to_smolstr(), 109 + Data::String(AtprotoStr::String(value.into())), 110 + ); 111 + } 112 + } 113 + LexiconStringType::Tid => { 114 + if let Ok(value) = Tid::new(value) { 115 + map.insert(key.to_smolstr(), Data::String(AtprotoStr::Tid(value))); 116 + } else { 117 + map.insert( 118 + key.to_smolstr(), 119 + Data::String(AtprotoStr::String(value.into())), 120 + ); 121 + } 122 + } 123 + LexiconStringType::RecordKey => { 124 + if let Ok(value) = Rkey::new(value) { 125 + map.insert( 126 + key.to_smolstr(), 127 + Data::String(AtprotoStr::RecordKey(RecordKey::from(value))), 128 + ); 129 + } else { 130 + map.insert( 131 + key.to_smolstr(), 132 + Data::String(AtprotoStr::String(value.into())), 133 + ); 134 + } 135 + } 136 + LexiconStringType::Uri(_) => { 137 + if let Ok(uri) = Uri::new(value) { 138 + map.insert(key.to_smolstr(), Data::String(AtprotoStr::Uri(uri))); 139 + } else { 140 + map.insert( 141 + key.to_smolstr(), 142 + Data::String(AtprotoStr::String(value.into())), 143 + ); 144 + } 145 + } 146 + LexiconStringType::String => { 147 + map.insert(key.to_smolstr(), Data::String(parse_string(value))); 148 + } 149 + } 150 + Ok(()) 151 + } 152 + 153 + /// smarter parsing to avoid trying as many posibilities. 154 + pub fn parse_string<'s>(string: &'s str) -> AtprotoStr<'s> { 155 + if string.len() < 2048 && string.starts_with("did:") { 156 + if let Ok(did) = Did::new(string) { 157 + return AtprotoStr::Did(did); 158 + } 159 + } else if string.starts_with("20") && string.ends_with("Z") { 160 + // probably a date (for the next 75 years) 161 + if let Ok(datetime) = Datetime::from_str(string) { 162 + return AtprotoStr::Datetime(datetime); 163 + } 164 + } else if string.starts_with("at://") { 165 + if let Ok(uri) = AtUri::new(string) { 166 + return AtprotoStr::AtUri(uri); 167 + } 168 + } else if string.starts_with("https://") { 169 + if let Ok(uri) = Url::parse(string) { 170 + return AtprotoStr::Uri(Uri::Https(uri)); 171 + } 172 + } else if string.starts_with("wss://") { 173 + if let Ok(uri) = Url::parse(string) { 174 + return AtprotoStr::Uri(Uri::Https(uri)); 175 + } 176 + } else if string.starts_with("ipfs://") { 177 + return AtprotoStr::Uri(Uri::Cid(Cid::str(string))); 178 + } else if string.contains('.') && !string.contains([' ', '\n']) { 179 + if string.len() < 253 && Url::parse(string).is_ok() { 180 + // probably a handle 181 + if let Ok(handle) = AtIdentifier::new(string) { 182 + return AtprotoStr::AtIdentifier(handle); 183 + } else { 184 + return AtprotoStr::Uri(Uri::Any(string.into())); 185 + } 186 + } else if let Ok(nsid) = Nsid::new(string) { 187 + return AtprotoStr::Nsid(nsid); 188 + } 189 + } else if string.len() == 13 { 190 + if let Ok(tid) = Tid::new(string) { 191 + return AtprotoStr::Tid(tid); 192 + } 193 + } else if !string.contains([' ', '\n']) && string.len() > 20 { 194 + // CID: must be longer than typical short strings to avoid false positives 195 + // Most CIDs are 46+ chars (base32 encoded), minimum realistic is around 30 196 + if let Ok(cid) = Cid::new(string.as_bytes()) { 197 + return AtprotoStr::Cid(cid); 198 + } 199 + } 200 + 201 + AtprotoStr::String(string.into()) 202 + } 203 + 204 + /// First-level guess at what we should parse the corresponding value as 205 + /// Helps speed up parsing, avoids some ambiguities. 206 + pub fn string_key_type_guess(key: &str) -> DataModelType { 207 + match key { 208 + "cid" => DataModelType::String(LexiconStringType::Cid), 209 + "uri" => DataModelType::String(LexiconStringType::Uri(UriType::Any)), 210 + "did" => DataModelType::String(LexiconStringType::Did), 211 + "handle" => DataModelType::String(LexiconStringType::AtIdentifier), 212 + "ref" => DataModelType::CidLink, 213 + "list" => DataModelType::String(LexiconStringType::AtUri), 214 + "blobref" => DataModelType::Blob, 215 + "createdAt" | "created" | "indexedAt" | "issuedAt" | "updatedAt" | "playedTime" => { 216 + DataModelType::String(LexiconStringType::Datetime) 217 + } 218 + "size" | "width" | "height" => DataModelType::Integer, 219 + "value" | "record" | "embed" => DataModelType::Object, 220 + "text" | "displayName" | "alt" | "name" | "description" => { 221 + DataModelType::String(LexiconStringType::String) 222 + } 223 + "langs" | "blobs" | "images" | "labels" => DataModelType::Array, 224 + "$bytes" => DataModelType::Bytes, 225 + "$link" => DataModelType::String(LexiconStringType::Cid), 226 + "$type" => DataModelType::String(LexiconStringType::String), 227 + 228 + // we assume others are strings speficially because it's easy to check if a serde_json::Value 229 + // or Ipld value is at least a string, and then we fall back to Object/Map. 230 + _ => DataModelType::String(LexiconStringType::String), 231 + } 232 + } 233 + 234 + pub fn cbor_to_blob<'b>(blob: &'b BTreeMap<String, Ipld>) -> Option<Blob<'b>> { 235 + let mime_type = blob.get("mimeType").and_then(|o| { 236 + if let Ipld::String(string) = o { 237 + Some(string) 238 + } else { 239 + None 240 + } 241 + }); 242 + if let Some(Ipld::Link(value)) = blob.get("ref") { 243 + let size = blob.get("size").and_then(|o| { 244 + if let Ipld::Integer(i) = o { 245 + Some(*i as i64) 246 + } else { 247 + None 248 + } 249 + }); 250 + if let (Some(mime_type), Some(size)) = (mime_type, size) { 251 + return Some(Blob { 252 + r#ref: Cid::ipld(*value), 253 + mime_type: MimeType::raw(mime_type), 254 + size: size as usize, 255 + }); 256 + } 257 + } else if let Some(Ipld::String(value)) = blob.get("cid") { 258 + if let Some(mime_type) = mime_type { 259 + return Some(Blob { 260 + r#ref: Cid::str(value), 261 + mime_type: MimeType::raw(mime_type), 262 + size: 0, 263 + }); 264 + } 265 + } 266 + 267 + None 268 + } 269 + 270 + pub fn json_to_blob<'b>(blob: &'b serde_json::Map<String, serde_json::Value>) -> Option<Blob<'b>> { 271 + let mime_type = blob.get("mimeType").and_then(|v| v.as_str()); 272 + if let Some(value) = blob.get("ref") { 273 + if let Some(value) = value 274 + .as_object() 275 + .and_then(|o| o.get("$link")) 276 + .and_then(|v| v.as_str()) 277 + { 278 + let size = blob.get("size").and_then(|v| v.as_u64()); 279 + if let (Some(mime_type), Some(size)) = (mime_type, size) { 280 + return Some(Blob { 281 + r#ref: Cid::str(value), 282 + mime_type: MimeType::raw(mime_type), 283 + size: size as usize, 284 + }); 285 + } 286 + } 287 + } else if let Some(value) = blob.get("cid").and_then(|v| v.as_str()) { 288 + if let Some(mime_type) = mime_type { 289 + return Some(Blob { 290 + r#ref: Cid::str(value), 291 + mime_type: MimeType::raw(mime_type), 292 + size: 0, 293 + }); 294 + } 295 + } 296 + 297 + None 298 + } 299 + 300 + pub fn infer_from_type(type_field: &str) -> DataModelType { 301 + match type_field { 302 + "blob" => DataModelType::Blob, 303 + _ => DataModelType::Object, 304 + } 305 + } 306 + 307 + pub fn decode_bytes<'s>(bytes: &str) -> Data<'s> { 308 + // First one should just work. rest are insurance. 309 + if let Ok(bytes) = BASE64_STANDARD.decode(bytes) { 310 + Data::Bytes(Bytes::from_owner(bytes)) 311 + } else if let Ok(bytes) = BASE64_STANDARD_NO_PAD.decode(bytes) { 312 + Data::Bytes(Bytes::from_owner(bytes)) 313 + } else if let Ok(bytes) = BASE64_URL_SAFE.decode(bytes) { 314 + Data::Bytes(Bytes::from_owner(bytes)) 315 + } else if let Ok(bytes) = BASE64_URL_SAFE_NO_PAD.decode(bytes) { 316 + Data::Bytes(Bytes::from_owner(bytes)) 317 + } else { 318 + Data::String(AtprotoStr::String(CowStr::Borrowed(bytes).into_static())) 319 + } 320 + }

+390

crates/jacquard-common/src/types/value/serde_impl.rs

··· 1 + use core::fmt; 2 + use std::{collections::BTreeMap, str::FromStr}; 3 + 4 + use base64::{Engine, prelude::BASE64_STANDARD}; 5 + use bytes::Bytes; 6 + use serde::{Deserialize, Deserializer, Serialize, Serializer}; 7 + use smol_str::SmolStr; 8 + 9 + use crate::{ 10 + IntoStatic, 11 + types::{ 12 + DataModelType, LexiconStringType, 13 + blob::{Blob, MimeType}, 14 + string::*, 15 + value::{ 16 + Array, AtDataError, Data, Object, 17 + parsing::{decode_bytes, infer_from_type, parse_string, string_key_type_guess}, 18 + }, 19 + }, 20 + }; 21 + 22 + impl Serialize for Data<'_> { 23 + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> 24 + where 25 + S: Serializer, 26 + { 27 + match self { 28 + Data::Null => serializer.serialize_none(), 29 + Data::Boolean(b) => serializer.serialize_bool(*b), 30 + Data::Integer(i) => serializer.serialize_i64(*i), 31 + Data::String(s) => s.serialize(serializer), 32 + Data::Bytes(bytes) => { 33 + if serializer.is_human_readable() { 34 + // JSON: {"$bytes": "base64 string"} 35 + use serde::ser::SerializeMap; 36 + let mut map = serializer.serialize_map(Some(1))?; 37 + map.serialize_entry("$bytes", &BASE64_STANDARD.encode(bytes))?; 38 + map.end() 39 + } else { 40 + // CBOR: raw bytes 41 + serializer.serialize_bytes(bytes) 42 + } 43 + } 44 + Data::CidLink(cid) => { 45 + if serializer.is_human_readable() { 46 + // JSON: {"$link": "cid_string"} 47 + use serde::ser::SerializeMap; 48 + let mut map = serializer.serialize_map(Some(1))?; 49 + map.serialize_entry("$link", cid.as_str())?; 50 + map.end() 51 + } else { 52 + // CBOR: raw cid (Cid's serialize handles this) 53 + cid.serialize(serializer) 54 + } 55 + } 56 + Data::Array(arr) => arr.serialize(serializer), 57 + Data::Object(obj) => obj.serialize(serializer), 58 + Data::Blob(blob) => blob.serialize(serializer), 59 + } 60 + } 61 + } 62 + 63 + impl<'de> Deserialize<'de> for Data<'de> { 64 + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 65 + where 66 + D: Deserializer<'de>, 67 + { 68 + deserializer.deserialize_any(DataVisitor) 69 + } 70 + } 71 + 72 + struct DataVisitor; 73 + 74 + impl<'de: 'v, 'v> serde::de::Visitor<'v> for DataVisitor { 75 + type Value = Data<'v>; 76 + 77 + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 78 + formatter.write_str("any valid AT Protocol data value") 79 + } 80 + 81 + fn visit_none<E>(self) -> Result<Self::Value, E> 82 + where 83 + E: serde::de::Error, 84 + { 85 + Ok(Data::Null) 86 + } 87 + 88 + fn visit_unit<E>(self) -> Result<Self::Value, E> 89 + where 90 + E: serde::de::Error, 91 + { 92 + Ok(Data::Null) 93 + } 94 + 95 + fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E> 96 + where 97 + E: serde::de::Error, 98 + { 99 + Ok(Data::Boolean(v)) 100 + } 101 + 102 + fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E> 103 + where 104 + E: serde::de::Error, 105 + { 106 + Ok(Data::Integer(v)) 107 + } 108 + 109 + fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E> 110 + where 111 + E: serde::de::Error, 112 + { 113 + Ok(Data::Integer(v as i64)) 114 + } 115 + 116 + fn visit_f64<E>(self, _v: f64) -> Result<Self::Value, E> 117 + where 118 + E: serde::de::Error, 119 + { 120 + Err(E::custom( 121 + "floating point numbers not allowed in AT protocol data", 122 + )) 123 + } 124 + 125 + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> 126 + where 127 + E: serde::de::Error, 128 + { 129 + Ok(Data::String(AtprotoStr::String( 130 + CowStr::Borrowed(v).into_static(), 131 + ))) 132 + } 133 + 134 + fn visit_borrowed_str<E>(self, v: &'v str) -> Result<Self::Value, E> 135 + where 136 + E: serde::de::Error, 137 + { 138 + // Don't infer type here - just store as plain string 139 + // Type inference happens in apply_type_inference based on field names 140 + Ok(Data::String(AtprotoStr::String(v.into()))) 141 + } 142 + 143 + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> 144 + where 145 + E: serde::de::Error, 146 + { 147 + Ok(Data::String(AtprotoStr::String(v.into()))) 148 + } 149 + 150 + fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E> 151 + where 152 + E: serde::de::Error, 153 + { 154 + Ok(Data::Bytes(Bytes::copy_from_slice(v))) 155 + } 156 + 157 + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> 158 + where 159 + A: serde::de::SeqAccess<'v>, 160 + { 161 + let mut array = Vec::new(); 162 + while let Some(elem) = seq.next_element()? { 163 + array.push(elem); 164 + } 165 + Ok(Data::Array(Array(array))) 166 + } 167 + 168 + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> 169 + where 170 + A: serde::de::MapAccess<'v>, 171 + { 172 + use serde::de::Error; 173 + 174 + // Peek at first key to check for special single-key patterns 175 + let mut temp_map: BTreeMap<SmolStr, Data<'v>> = BTreeMap::new(); 176 + 177 + while let Some(key) = map.next_key::<SmolStr>()? { 178 + // Check for special patterns on single-key maps 179 + if temp_map.is_empty() { 180 + if key.as_str() == "$link" { 181 + // {"$link": "cid_string"} pattern 182 + let cid_str: String = map.next_value()?; 183 + // Check if there are more keys 184 + if let Some(next_key) = map.next_key::<SmolStr>()? { 185 + // More keys, treat as regular object 186 + temp_map.insert(key, Data::String(AtprotoStr::String(cid_str.into()))); 187 + let next_value: Data = map.next_value()?; 188 + temp_map.insert(next_key, next_value); 189 + continue; 190 + } else { 191 + // Only key, return CidLink 192 + return Ok(Data::CidLink(Cid::from(cid_str))); 193 + } 194 + } else if key.as_str() == "$bytes" { 195 + // {"$bytes": "base64_string"} pattern 196 + let bytes_str: String = map.next_value()?; 197 + // Check if there are more keys 198 + if map.next_key::<SmolStr>()?.is_some() { 199 + // More keys, treat as regular object - shouldn't happen but handle it 200 + temp_map.insert(key, Data::String(AtprotoStr::String(bytes_str.into()))); 201 + continue; 202 + } else { 203 + // Only key, decode and return bytes 204 + return Ok(decode_bytes(&bytes_str)); 205 + } 206 + } 207 + } 208 + 209 + let value: Data = map.next_value()?; 210 + temp_map.insert(key, value); 211 + } 212 + 213 + // Second pass: apply type inference and check for special patterns 214 + apply_type_inference(temp_map).map_err(A::Error::custom) 215 + } 216 + } 217 + 218 + fn apply_type_inference<'s>(mut map: BTreeMap<SmolStr, Data<'s>>) -> Result<Data<'s>, AtDataError> { 219 + // Check for CID link pattern first: {"$link": "cid_string"} 220 + if map.len() == 1 { 221 + if let Some(Data::String(AtprotoStr::String(link))) = map.get("$link") { 222 + // Need to extract ownership, can't borrow from map we're about to consume 223 + let link_owned = link.clone(); 224 + return Ok(Data::CidLink(Cid::cow_str(link_owned))); 225 + } 226 + } 227 + 228 + // Check for $type field to detect special structures 229 + let type_field = map.get("$type").and_then(|v| { 230 + if let Data::String(AtprotoStr::String(s)) = v { 231 + Some(s.as_ref()) 232 + } else { 233 + None 234 + } 235 + }); 236 + 237 + // Check for blob 238 + if let Some(type_str) = type_field { 239 + if type_str == "blob" && infer_from_type(type_str) == DataModelType::Blob { 240 + // Try to construct blob from the collected data 241 + let ref_cid = map.get("ref").and_then(|v| { 242 + if let Data::CidLink(cid) = v { 243 + Some(cid.clone()) 244 + } else { 245 + None 246 + } 247 + }); 248 + 249 + let mime_type = map.get("mimeType").and_then(|v| { 250 + if let Data::String(AtprotoStr::String(s)) = v { 251 + Some(s.clone()) 252 + } else { 253 + None 254 + } 255 + }); 256 + 257 + let size = map.get("size").and_then(|v| { 258 + if let Data::Integer(i) = v { 259 + Some(*i as usize) 260 + } else { 261 + None 262 + } 263 + }); 264 + 265 + if let (Some(ref_cid), Some(mime_cowstr), Some(size)) = (ref_cid, mime_type, size) { 266 + return Ok(Data::Blob(Blob { 267 + r#ref: ref_cid, 268 + mime_type: MimeType::from(mime_cowstr), 269 + size, 270 + })); 271 + } 272 + } 273 + } 274 + 275 + // Apply type inference for string fields based on key names (mutate in place) 276 + for (key, value) in map.iter_mut() { 277 + if let Data::String(AtprotoStr::String(s)) = value.to_owned() { 278 + let type_hint = string_key_type_guess(key.as_str()); 279 + let refined = match type_hint { 280 + DataModelType::String(string_type) => refine_string_by_type(s, string_type), 281 + DataModelType::Bytes => { 282 + // Decode base64 283 + decode_bytes(&s) 284 + } 285 + DataModelType::CidLink if key.as_str() == "$link" => { 286 + Data::CidLink(Cid::from_str(&s).unwrap()) 287 + } 288 + _ => continue, // no refinement needed 289 + }; 290 + *value = refined; 291 + } 292 + } 293 + 294 + Ok(Data::Object(Object(map))) 295 + } 296 + 297 + fn refine_string_by_type<'s>(s: CowStr<'s>, string_type: LexiconStringType) -> Data<'s> { 298 + match string_type { 299 + LexiconStringType::Datetime => Datetime::from_str(&s) 300 + .map(|dt| Data::String(AtprotoStr::Datetime(dt))) 301 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.clone()))), 302 + LexiconStringType::AtUri => AtUri::new_owned(s.clone()) 303 + .map(|uri| Data::String(AtprotoStr::AtUri(uri))) 304 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.clone()))), 305 + LexiconStringType::Did => Did::new_owned(s.clone()) 306 + .map(|did| Data::String(AtprotoStr::Did(did))) 307 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.clone()))), 308 + LexiconStringType::Handle => Handle::new_owned(s.clone()) 309 + .map(|handle| Data::String(AtprotoStr::Handle(handle))) 310 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.clone()))), 311 + LexiconStringType::AtIdentifier => AtIdentifier::new_owned(s.clone()) 312 + .map(|ident| Data::String(AtprotoStr::AtIdentifier(ident))) 313 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.clone()))), 314 + LexiconStringType::Nsid => Nsid::new_owned(s.clone()) 315 + .map(|nsid| Data::String(AtprotoStr::Nsid(nsid))) 316 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.clone()))), 317 + LexiconStringType::Cid => Cid::new_owned(s.as_bytes()) 318 + .map(|cid| Data::String(AtprotoStr::Cid(cid))) 319 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.into()))), 320 + LexiconStringType::Language => Language::new(&s) 321 + .map(|lang| Data::String(AtprotoStr::Language(lang))) 322 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.clone()))), 323 + LexiconStringType::Tid => Tid::new(s.clone()) 324 + .map(|tid| Data::String(AtprotoStr::Tid(tid))) 325 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.clone()))), 326 + LexiconStringType::RecordKey => Rkey::new_owned(s.clone()) 327 + .map(|rkey| Data::String(AtprotoStr::RecordKey(RecordKey::from(rkey)))) 328 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.clone()))), 329 + LexiconStringType::Uri(_) => Uri::new_owned(s.clone()) 330 + .map(|uri| Data::String(AtprotoStr::Uri(uri))) 331 + .unwrap_or_else(|_| Data::String(AtprotoStr::String(s.clone()))), 332 + LexiconStringType::String => Data::String(parse_string(&s).into_static()), 333 + } 334 + } 335 + 336 + impl Serialize for Array<'_> { 337 + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> 338 + where 339 + S: Serializer, 340 + { 341 + use serde::ser::SerializeSeq; 342 + let mut seq = serializer.serialize_seq(Some(self.0.len()))?; 343 + for item in &self.0 { 344 + seq.serialize_element(item)?; 345 + } 346 + seq.end() 347 + } 348 + } 349 + 350 + impl<'de> Deserialize<'de> for Array<'de> { 351 + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 352 + where 353 + D: Deserializer<'de>, 354 + { 355 + // Just deserialize as Vec<Data> directly - the Data visitor handles everything 356 + let vec: Vec<Data<'de>> = Deserialize::deserialize(deserializer)?; 357 + Ok(Array(vec)) 358 + } 359 + } 360 + 361 + impl Serialize for Object<'_> { 362 + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> 363 + where 364 + S: Serializer, 365 + { 366 + use serde::ser::SerializeMap; 367 + let mut map = serializer.serialize_map(Some(self.0.len()))?; 368 + for (key, value) in &self.0 { 369 + map.serialize_entry(key.as_str(), value)?; 370 + } 371 + map.end() 372 + } 373 + } 374 + 375 + impl<'de> Deserialize<'de> for Object<'de> { 376 + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 377 + where 378 + D: Deserializer<'de>, 379 + { 380 + use serde::de::Error; 381 + 382 + // Deserialize via Data, then extract the Object 383 + // The Data visitor handles all the type inference and special cases 384 + let data: Data<'de> = Data::deserialize(deserializer)?; 385 + match data { 386 + Data::Object(obj) => Ok(obj), 387 + _ => Err(D::Error::custom("expected object, got something else")), 388 + } 389 + } 390 + }

+364

crates/jacquard-common/src/types/value/test_thread.json

··· 1 + { 2 + "hasOtherReplies": false, 3 + "thread": [ 4 + { 5 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k3p7lek2k", 6 + "depth": 0, 7 + "value": { 8 + "$type": "app.bsky.unspecced.defs#threadItemPost", 9 + "post": { 10 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k3p7lek2k", 11 + "cid": "bafyreicvplbzmlrbwdxv2zpbhibziexxnwmiskvzbapjtnidofzlh4yk64", 12 + "author": { 13 + "did": "did:plc:hbpefio3f5csc44msmbgioxz", 14 + "handle": "sharonk.bsky.social", 15 + "displayName": "Sharon", 16 + "avatar": "https://cdn.bsky.app/img/avatar/plain/did:plc:hbpefio3f5csc44msmbgioxz/bafkreia7dcruptjvvv7t46322zqsuqukkwblihzrm3f45r246o5zjulyn4@jpeg", 17 + "associated": { 18 + "chat": { "allowIncoming": "following" }, 19 + "activitySubscription": { "allowSubscriptions": "mutuals" } 20 + }, 21 + "viewer": { 22 + "muted": false, 23 + "blockedBy": false, 24 + "following": "at://did:plc:yfvwmnlztr4dwkb7hwz55r2g/app.bsky.graph.follow/3l6aft4mu2324", 25 + "followedBy": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.graph.follow/3kygkag25zo2v" 26 + }, 27 + "labels": [ 28 + { 29 + "cts": "2024-05-11T03:48:55.341Z", 30 + "src": "did:plc:e4elbtctnfqocyfcml6h2lf7", 31 + "uri": "did:plc:hbpefio3f5csc44msmbgioxz", 32 + "val": "bluesky-elder", 33 + "ver": 1 34 + }, 35 + { 36 + "src": "did:plc:hbpefio3f5csc44msmbgioxz", 37 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.actor.profile/self", 38 + "cid": "bafyreihtzylnytd2224tatvvpktt5rwvdwppkyhlsrweshlrmnqcq32vsy", 39 + "val": "!no-unauthenticated", 40 + "cts": "1970-01-01T00:00:00.000Z" 41 + } 42 + ], 43 + "createdAt": "2023-04-13T22:29:27.076Z" 44 + }, 45 + "record": { 46 + "$type": "app.bsky.feed.post", 47 + "createdAt": "2025-10-01T17:15:19.282Z", 48 + "embed": { 49 + "$type": "app.bsky.embed.record", 50 + "record": { 51 + "cid": "bafyreidmo5ot3qoctmgw2vcckrqzy5hexocp5vto554a5kprwxhbsy3oqi", 52 + "uri": "at://did:plc:2whlowi5jjjqrdrrj4lxh2lx/app.bsky.feed.post/3m25ixj2fec2a" 53 + } 54 + }, 55 + "langs": ["en"], 56 + "text": "Sora 2 going to hit boomer epistemology like a hurricane" 57 + }, 58 + "embed": { 59 + "$type": "app.bsky.embed.record#view", 60 + "record": { 61 + "$type": "app.bsky.embed.record#viewRecord", 62 + "uri": "at://did:plc:2whlowi5jjjqrdrrj4lxh2lx/app.bsky.feed.post/3m25ixj2fec2a", 63 + "cid": "bafyreidmo5ot3qoctmgw2vcckrqzy5hexocp5vto554a5kprwxhbsy3oqi", 64 + "author": { 65 + "did": "did:plc:2whlowi5jjjqrdrrj4lxh2lx", 66 + "handle": "eliothiggins.bsky.social", 67 + "displayName": "Eliot Higgins", 68 + "avatar": "https://cdn.bsky.app/img/avatar/plain/did:plc:2whlowi5jjjqrdrrj4lxh2lx/bafkreiarcjakxx7hkgtfocqilj22vxgmyskl43blurk6vwu2nfvd5ihueu@jpeg", 69 + "associated": { "activitySubscription": { "allowSubscriptions": "followers" } }, 70 + "viewer": { "muted": false, "blockedBy": false }, 71 + "labels": [], 72 + "createdAt": "2024-11-06T08:20:47.084Z", 73 + "verification": { 74 + "verifications": [ 75 + { 76 + "issuer": "did:plc:z72i7hdynmk6r22z27h6tvur", 77 + "uri": "at://did:plc:z72i7hdynmk6r22z27h6tvur/app.bsky.graph.verification/3lv44jce72j2q", 78 + "isValid": true, 79 + "createdAt": "2025-07-29T12:33:45.033Z" 80 + } 81 + ], 82 + "verifiedStatus": "valid", 83 + "trustedVerifierStatus": "none" 84 + } 85 + }, 86 + "value": { 87 + "$type": "app.bsky.feed.post", 88 + "createdAt": "2025-10-01T16:55:04.861Z", 89 + "embed": { 90 + "$type": "app.bsky.embed.video", 91 + "aspectRatio": { "height": 720, "width": 1280 }, 92 + "video": { 93 + "$type": "blob", 94 + "ref": { "$link": "bafkreid7ybejd5s2vv2j7d4aajjlmdgazguemcnuliiyfn6coxpwp2mi6y" }, 95 + "mimeType": "video/mp4", 96 + "size": 2244592 97 + } 98 + }, 99 + "langs": ["en"], 100 + "text": "Really good news for fans of garbage in their timelines, Sora 2 allows anyone to use copyrighted characters to sell you cryptocurrency." 101 + }, 102 + "labels": [], 103 + "likeCount": 248, 104 + "replyCount": 18, 105 + "repostCount": 67, 106 + "quoteCount": 52, 107 + "indexedAt": "2025-10-01T16:55:06.653Z", 108 + "embeds": [ 109 + { 110 + "$type": "app.bsky.embed.video#view", 111 + "cid": "bafkreid7ybejd5s2vv2j7d4aajjlmdgazguemcnuliiyfn6coxpwp2mi6y", 112 + "playlist": "https://video.bsky.app/watch/did%3Aplc%3A2whlowi5jjjqrdrrj4lxh2lx/bafkreid7ybejd5s2vv2j7d4aajjlmdgazguemcnuliiyfn6coxpwp2mi6y/playlist.m3u8", 113 + "thumbnail": "https://video.bsky.app/watch/did%3Aplc%3A2whlowi5jjjqrdrrj4lxh2lx/bafkreid7ybejd5s2vv2j7d4aajjlmdgazguemcnuliiyfn6coxpwp2mi6y/thumbnail.jpg", 114 + "aspectRatio": { "height": 720, "width": 1280 } 115 + } 116 + ] 117 + } 118 + }, 119 + "bookmarkCount": 3, 120 + "replyCount": 1, 121 + "repostCount": 7, 122 + "likeCount": 76, 123 + "quoteCount": 1, 124 + "indexedAt": "2025-10-01T17:15:19.523Z", 125 + "viewer": { "bookmarked": false, "threadMuted": false, "replyDisabled": false, "embeddingDisabled": false }, 126 + "labels": [], 127 + "threadgate": { 128 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.threadgate/3m25k3p7lek2k", 129 + "cid": "bafyreiclfmvpqfsfhpl4cffaw6elfdusx5wosinugljpwy2u3ckjse3rse", 130 + "record": { 131 + "$type": "app.bsky.feed.threadgate", 132 + "allow": [ 133 + { "$type": "app.bsky.feed.threadgate#followingRule" }, 134 + { "$type": "app.bsky.feed.threadgate#mentionRule" }, 135 + { "$type": "app.bsky.feed.threadgate#followerRule" } 136 + ], 137 + "createdAt": "2025-10-01T17:15:19.285Z", 138 + "hiddenReplies": [], 139 + "post": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k3p7lek2k" 140 + }, 141 + "lists": [] 142 + } 143 + }, 144 + "moreParents": false, 145 + "moreReplies": 0, 146 + "opThread": true, 147 + "hiddenByThreadgate": false, 148 + "mutedByViewer": false 149 + } 150 + }, 151 + { 152 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k6zjhps2k", 153 + "depth": 1, 154 + "value": { 155 + "$type": "app.bsky.unspecced.defs#threadItemPost", 156 + "post": { 157 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k6zjhps2k", 158 + "cid": "bafyreieqxxi7nwep5nuhogkv3tgub4rk4pv5tbh3m6yyf66nqdmmrknwsa", 159 + "author": { 160 + "did": "did:plc:hbpefio3f5csc44msmbgioxz", 161 + "handle": "sharonk.bsky.social", 162 + "displayName": "Sharon", 163 + "avatar": "https://cdn.bsky.app/img/avatar/plain/did:plc:hbpefio3f5csc44msmbgioxz/bafkreia7dcruptjvvv7t46322zqsuqukkwblihzrm3f45r246o5zjulyn4@jpeg", 164 + "associated": { 165 + "chat": { "allowIncoming": "following" }, 166 + "activitySubscription": { "allowSubscriptions": "mutuals" } 167 + }, 168 + "viewer": { 169 + "muted": false, 170 + "blockedBy": false, 171 + "following": "at://did:plc:yfvwmnlztr4dwkb7hwz55r2g/app.bsky.graph.follow/3l6aft4mu2324", 172 + "followedBy": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.graph.follow/3kygkag25zo2v" 173 + }, 174 + "labels": [ 175 + { 176 + "cts": "2024-05-11T03:48:55.341Z", 177 + "src": "did:plc:e4elbtctnfqocyfcml6h2lf7", 178 + "uri": "did:plc:hbpefio3f5csc44msmbgioxz", 179 + "val": "bluesky-elder", 180 + "ver": 1 181 + }, 182 + { 183 + "src": "did:plc:hbpefio3f5csc44msmbgioxz", 184 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.actor.profile/self", 185 + "cid": "bafyreihtzylnytd2224tatvvpktt5rwvdwppkyhlsrweshlrmnqcq32vsy", 186 + "val": "!no-unauthenticated", 187 + "cts": "1970-01-01T00:00:00.000Z" 188 + } 189 + ], 190 + "createdAt": "2023-04-13T22:29:27.076Z" 191 + }, 192 + "record": { 193 + "$type": "app.bsky.feed.post", 194 + "createdAt": "2025-10-01T17:17:10.755Z", 195 + "langs": ["en"], 196 + "reply": { 197 + "parent": { 198 + "cid": "bafyreicvplbzmlrbwdxv2zpbhibziexxnwmiskvzbapjtnidofzlh4yk64", 199 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k3p7lek2k" 200 + }, 201 + "root": { 202 + "cid": "bafyreicvplbzmlrbwdxv2zpbhibziexxnwmiskvzbapjtnidofzlh4yk64", 203 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k3p7lek2k" 204 + } 205 + }, 206 + "text": "it's funny how these all feel like stuff that would be playing on the Cyberpunk 2077 mediafeeds" 207 + }, 208 + "bookmarkCount": 0, 209 + "replyCount": 2, 210 + "repostCount": 1, 211 + "likeCount": 28, 212 + "quoteCount": 0, 213 + "indexedAt": "2025-10-01T17:17:11.164Z", 214 + "viewer": { "bookmarked": false, "threadMuted": false, "replyDisabled": false, "embeddingDisabled": false }, 215 + "labels": [] 216 + }, 217 + "moreParents": false, 218 + "moreReplies": 0, 219 + "opThread": true, 220 + "hiddenByThreadgate": false, 221 + "mutedByViewer": false 222 + } 223 + }, 224 + { 225 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25l6doydc26", 226 + "depth": 2, 227 + "value": { 228 + "$type": "app.bsky.unspecced.defs#threadItemPost", 229 + "post": { 230 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25l6doydc26", 231 + "cid": "bafyreia7yd5gbxdw4djkmocr6sbg2bgczup6d6pdmctsspdrne3vhcll6q", 232 + "author": { 233 + "did": "did:plc:hbpefio3f5csc44msmbgioxz", 234 + "handle": "sharonk.bsky.social", 235 + "displayName": "Sharon", 236 + "avatar": "https://cdn.bsky.app/img/avatar/plain/did:plc:hbpefio3f5csc44msmbgioxz/bafkreia7dcruptjvvv7t46322zqsuqukkwblihzrm3f45r246o5zjulyn4@jpeg", 237 + "associated": { 238 + "chat": { "allowIncoming": "following" }, 239 + "activitySubscription": { "allowSubscriptions": "mutuals" } 240 + }, 241 + "viewer": { 242 + "muted": false, 243 + "blockedBy": false, 244 + "following": "at://did:plc:yfvwmnlztr4dwkb7hwz55r2g/app.bsky.graph.follow/3l6aft4mu2324", 245 + "followedBy": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.graph.follow/3kygkag25zo2v" 246 + }, 247 + "labels": [ 248 + { 249 + "cts": "2024-05-11T03:48:55.341Z", 250 + "src": "did:plc:e4elbtctnfqocyfcml6h2lf7", 251 + "uri": "did:plc:hbpefio3f5csc44msmbgioxz", 252 + "val": "bluesky-elder", 253 + "ver": 1 254 + }, 255 + { 256 + "src": "did:plc:hbpefio3f5csc44msmbgioxz", 257 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.actor.profile/self", 258 + "cid": "bafyreihtzylnytd2224tatvvpktt5rwvdwppkyhlsrweshlrmnqcq32vsy", 259 + "val": "!no-unauthenticated", 260 + "cts": "1970-01-01T00:00:00.000Z" 261 + } 262 + ], 263 + "createdAt": "2023-04-13T22:29:27.076Z" 264 + }, 265 + "record": { 266 + "$type": "app.bsky.feed.post", 267 + "createdAt": "2025-10-01T17:34:41.609Z", 268 + "langs": ["en"], 269 + "reply": { 270 + "parent": { 271 + "cid": "bafyreieqxxi7nwep5nuhogkv3tgub4rk4pv5tbh3m6yyf66nqdmmrknwsa", 272 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k6zjhps2k" 273 + }, 274 + "root": { 275 + "cid": "bafyreicvplbzmlrbwdxv2zpbhibziexxnwmiskvzbapjtnidofzlh4yk64", 276 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k3p7lek2k" 277 + } 278 + }, 279 + "text": "\"Morty, I've turned myself into a rugpull, Morty!\"" 280 + }, 281 + "bookmarkCount": 0, 282 + "replyCount": 0, 283 + "repostCount": 0, 284 + "likeCount": 3, 285 + "quoteCount": 0, 286 + "indexedAt": "2025-10-01T17:34:41.858Z", 287 + "viewer": { "bookmarked": false, "threadMuted": false, "replyDisabled": false, "embeddingDisabled": false }, 288 + "labels": [] 289 + }, 290 + "moreParents": false, 291 + "moreReplies": 0, 292 + "opThread": true, 293 + "hiddenByThreadgate": false, 294 + "mutedByViewer": false 295 + } 296 + }, 297 + { 298 + "uri": "at://did:plc:duaatzbzy7qm4ppl2hluilpg/app.bsky.feed.post/3m25knxro3s2t", 299 + "depth": 2, 300 + "value": { 301 + "$type": "app.bsky.unspecced.defs#threadItemPost", 302 + "post": { 303 + "uri": "at://did:plc:duaatzbzy7qm4ppl2hluilpg/app.bsky.feed.post/3m25knxro3s2t", 304 + "cid": "bafyreidppmo5vaepnafjlwc5doxmk7n3hrctve6cmcz67hhkcuxf5cr6tm", 305 + "author": { 306 + "did": "did:plc:duaatzbzy7qm4ppl2hluilpg", 307 + "handle": "gentlemanengineer.bsky.social", 308 + "displayName": "Chris Magerkurth", 309 + "avatar": "https://cdn.bsky.app/img/avatar/plain/did:plc:duaatzbzy7qm4ppl2hluilpg/bafkreifqxgriyccwjt5pwahsx6sju2bdmtoytqyubxoxlfwk4rv3eedkia@jpeg", 310 + "associated": { "activitySubscription": { "allowSubscriptions": "followers" } }, 311 + "viewer": { "muted": false, "blockedBy": false }, 312 + "labels": [], 313 + "createdAt": "2025-02-22T02:03:41.644Z" 314 + }, 315 + "record": { 316 + "$type": "app.bsky.feed.post", 317 + "createdAt": "2025-10-01T17:25:32.243Z", 318 + "langs": ["en"], 319 + "reply": { 320 + "parent": { 321 + "cid": "bafyreieqxxi7nwep5nuhogkv3tgub4rk4pv5tbh3m6yyf66nqdmmrknwsa", 322 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k6zjhps2k" 323 + }, 324 + "root": { 325 + "cid": "bafyreicvplbzmlrbwdxv2zpbhibziexxnwmiskvzbapjtnidofzlh4yk64", 326 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k3p7lek2k" 327 + } 328 + }, 329 + "text": "Or on Interdimensional Cable from Rick and Morty." 330 + }, 331 + "bookmarkCount": 0, 332 + "replyCount": 0, 333 + "repostCount": 0, 334 + "likeCount": 1, 335 + "quoteCount": 0, 336 + "indexedAt": "2025-10-01T17:25:25.858Z", 337 + "viewer": { "bookmarked": false, "threadMuted": false, "replyDisabled": false, "embeddingDisabled": false }, 338 + "labels": [] 339 + }, 340 + "moreParents": false, 341 + "moreReplies": 0, 342 + "opThread": false, 343 + "hiddenByThreadgate": false, 344 + "mutedByViewer": false 345 + } 346 + } 347 + ], 348 + "threadgate": { 349 + "uri": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.threadgate/3m25k3p7lek2k", 350 + "cid": "bafyreiclfmvpqfsfhpl4cffaw6elfdusx5wosinugljpwy2u3ckjse3rse", 351 + "record": { 352 + "$type": "app.bsky.feed.threadgate", 353 + "allow": [ 354 + { "$type": "app.bsky.feed.threadgate#followingRule" }, 355 + { "$type": "app.bsky.feed.threadgate#mentionRule" }, 356 + { "$type": "app.bsky.feed.threadgate#followerRule" } 357 + ], 358 + "createdAt": "2025-10-01T17:15:19.285Z", 359 + "hiddenReplies": [], 360 + "post": "at://did:plc:hbpefio3f5csc44msmbgioxz/app.bsky.feed.post/3m25k3p7lek2k" 361 + }, 362 + "lists": [] 363 + } 364 + }

+305

crates/jacquard-common/src/types/value/tests.rs

··· 1 + use super::*; 2 + 3 + /// Canonicalize JSON by sorting object keys recursively 4 + fn canonicalize_json(value: &serde_json::Value) -> serde_json::Value { 5 + match value { 6 + serde_json::Value::Object(map) => { 7 + let mut sorted_map = serde_json::Map::new(); 8 + let mut keys: Vec<_> = map.keys().collect(); 9 + keys.sort(); 10 + for key in keys { 11 + sorted_map.insert(key.clone(), canonicalize_json(&map[key])); 12 + } 13 + serde_json::Value::Object(sorted_map) 14 + } 15 + serde_json::Value::Array(arr) => { 16 + serde_json::Value::Array(arr.iter().map(canonicalize_json).collect()) 17 + } 18 + other => other.clone(), 19 + } 20 + } 21 + 22 + #[test] 23 + fn serialize_deserialize_null() { 24 + let data = Data::Null; 25 + 26 + // JSON roundtrip 27 + let json = serde_json::to_string(&data).unwrap(); 28 + assert_eq!(json, "null"); 29 + let parsed: Data = serde_json::from_str(&json).unwrap(); 30 + assert_eq!(data, parsed); 31 + assert!(matches!(parsed, Data::Null)); 32 + } 33 + 34 + #[test] 35 + fn serialize_deserialize_boolean() { 36 + let data = Data::Boolean(true); 37 + 38 + let json = serde_json::to_string(&data).unwrap(); 39 + assert_eq!(json, "true"); 40 + let parsed: Data = serde_json::from_str(&json).unwrap(); 41 + assert_eq!(data, parsed); 42 + } 43 + 44 + #[test] 45 + fn serialize_deserialize_integer() { 46 + let data = Data::Integer(42); 47 + 48 + let json = serde_json::to_string(&data).unwrap(); 49 + assert_eq!(json, "42"); 50 + let parsed: Data = serde_json::from_str(&json).unwrap(); 51 + assert_eq!(data, parsed); 52 + } 53 + 54 + #[test] 55 + fn serialize_deserialize_string() { 56 + let data = Data::String(AtprotoStr::String("hello world".into())); 57 + 58 + let json = serde_json::to_string(&data).unwrap(); 59 + assert_eq!(json, r#""hello world""#); 60 + let parsed: Data = serde_json::from_str(&json).unwrap(); 61 + assert_eq!(data, parsed); 62 + } 63 + 64 + #[test] 65 + fn serialize_deserialize_bytes_json() { 66 + let data = Data::Bytes(Bytes::from_static(b"hello")); 67 + 68 + // JSON: should be {"$bytes": "base64"} 69 + let json = serde_json::to_string(&data).unwrap(); 70 + assert!(json.contains("$bytes")); 71 + assert!(json.contains("aGVsbG8=")); // base64("hello") 72 + 73 + let parsed: Data = serde_json::from_str(&json).unwrap(); 74 + assert_eq!(data, parsed); 75 + } 76 + 77 + #[test] 78 + fn serialize_deserialize_cid_link_json() { 79 + let data = Data::CidLink(Cid::str("bafyreih4g7bvo6hdq2juolev5bfzpbo4ewkxh5mzxwgvkjp3kitc6hqkha")); 80 + 81 + // JSON: should be {"$link": "cid_string"} 82 + let json = serde_json::to_string(&data).unwrap(); 83 + assert!(json.contains("$link")); 84 + assert!(json.contains("bafyreih4g7bvo6hdq2juolev5bfzpbo4ewkxh5mzxwgvkjp3kitc6hqkha")); 85 + 86 + let parsed: Data = serde_json::from_str(&json).unwrap(); 87 + match parsed { 88 + Data::CidLink(cid) => assert_eq!(cid.as_str(), "bafyreih4g7bvo6hdq2juolev5bfzpbo4ewkxh5mzxwgvkjp3kitc6hqkha"), 89 + _ => panic!("expected CidLink"), 90 + } 91 + } 92 + 93 + #[test] 94 + fn serialize_deserialize_array() { 95 + let data = Data::Array(Array(vec![ 96 + Data::Null, 97 + Data::Boolean(true), 98 + Data::Integer(42), 99 + Data::String(AtprotoStr::String("test".into())), 100 + ])); 101 + 102 + let json = serde_json::to_string(&data).unwrap(); 103 + let parsed: Data = serde_json::from_str(&json).unwrap(); 104 + assert_eq!(data, parsed); 105 + 106 + // Verify structure 107 + if let Data::Array(Array(items)) = parsed { 108 + assert_eq!(items.len(), 4); 109 + assert!(matches!(items[0], Data::Null)); 110 + assert!(matches!(items[1], Data::Boolean(true))); 111 + assert!(matches!(items[2], Data::Integer(42))); 112 + if let Data::String(AtprotoStr::String(s)) = &items[3] { 113 + assert_eq!(s.as_ref(), "test"); 114 + } else { 115 + panic!("expected plain string"); 116 + } 117 + } else { 118 + panic!("expected array"); 119 + } 120 + } 121 + 122 + #[test] 123 + fn serialize_deserialize_object() { 124 + let mut map = BTreeMap::new(); 125 + map.insert("name".to_smolstr(), Data::String(AtprotoStr::String("alice".into()))); 126 + map.insert("age".to_smolstr(), Data::Integer(30)); 127 + map.insert("active".to_smolstr(), Data::Boolean(true)); 128 + 129 + let data = Data::Object(Object(map)); 130 + 131 + let json = serde_json::to_string(&data).unwrap(); 132 + let parsed: Data = serde_json::from_str(&json).unwrap(); 133 + assert_eq!(data, parsed); 134 + } 135 + 136 + #[test] 137 + fn type_inference_datetime() { 138 + // Field name "createdAt" should infer datetime type 139 + let json = r#"{"createdAt": "2023-01-15T12:30:45.123456Z"}"#; 140 + let data: Data = serde_json::from_str(json).unwrap(); 141 + 142 + if let Data::Object(obj) = data { 143 + if let Some(Data::String(AtprotoStr::Datetime(dt))) = obj.0.get("createdAt") { 144 + // Verify it's actually parsed correctly 145 + assert_eq!(dt.as_str(), "2023-01-15T12:30:45.123456Z"); 146 + } else { 147 + panic!("createdAt should be parsed as Datetime"); 148 + } 149 + } else { 150 + panic!("expected object"); 151 + } 152 + } 153 + 154 + #[test] 155 + fn type_inference_did() { 156 + let json = r#"{"did": "did:plc:abc123"}"#; 157 + let data: Data = serde_json::from_str(json).unwrap(); 158 + 159 + if let Data::Object(obj) = data { 160 + if let Some(Data::String(AtprotoStr::Did(did))) = obj.0.get("did") { 161 + assert_eq!(did.as_str(), "did:plc:abc123"); 162 + } else { 163 + panic!("did should be parsed as Did"); 164 + } 165 + } else { 166 + panic!("expected object"); 167 + } 168 + } 169 + 170 + #[test] 171 + fn type_inference_uri() { 172 + let json = r#"{"uri": "at://alice.test/com.example.foo/123"}"#; 173 + let data: Data = serde_json::from_str(json).unwrap(); 174 + 175 + if let Data::Object(obj) = data { 176 + // "uri" field gets inferred as Uri type, but at:// should parse to AtUri 177 + match obj.0.get("uri") { 178 + Some(Data::String(AtprotoStr::AtUri(_))) | Some(Data::String(AtprotoStr::Uri(_))) => { 179 + // Success 180 + } 181 + _ => panic!("uri should be parsed as Uri or AtUri"), 182 + } 183 + } else { 184 + panic!("expected object"); 185 + } 186 + } 187 + 188 + #[test] 189 + fn blob_deserialization() { 190 + let json = r#"{ 191 + "$type": "blob", 192 + "ref": {"$link": "bafyreih4g7bvo6hdq2juolev5bfzpbo4ewkxh5mzxwgvkjp3kitc6hqkha"}, 193 + "mimeType": "image/png", 194 + "size": 12345 195 + }"#; 196 + 197 + let data: Data = serde_json::from_str(json).unwrap(); 198 + 199 + if let Data::Blob(blob) = data { 200 + assert_eq!(blob.mime_type.as_str(), "image/png"); 201 + assert_eq!(blob.size, 12345); 202 + } else { 203 + panic!("expected blob"); 204 + } 205 + } 206 + 207 + #[test] 208 + fn reject_floats() { 209 + let json = "42.5"; // float literal 210 + 211 + let result: Result<Data, _> = serde_json::from_str(json); 212 + assert!(result.is_err()); 213 + } 214 + 215 + #[test] 216 + fn nested_objects() { 217 + let json = r#"{ 218 + "user": { 219 + "name": "alice", 220 + "profile": { 221 + "bio": "test bio", 222 + "createdAt": "2023-01-15T12:30:45Z" 223 + } 224 + } 225 + }"#; 226 + 227 + let data: Data = serde_json::from_str(json).unwrap(); 228 + 229 + // Should successfully parse with nested type inference 230 + if let Data::Object(obj) = data { 231 + assert!(obj.0.contains_key("user")); 232 + } else { 233 + panic!("expected object"); 234 + } 235 + } 236 + 237 + #[test] 238 + fn integration_bluesky_thread() { 239 + // Real bluesky thread data with complex nested structures 240 + let json = include_str!("test_thread.json"); 241 + let data: Data = serde_json::from_str(json).unwrap(); 242 + 243 + // Verify top-level structure 244 + if let Data::Object(obj) = data { 245 + // Should have "thread" array 246 + assert!(obj.0.contains_key("thread")); 247 + 248 + // Verify thread is an array 249 + if let Some(Data::Array(thread)) = obj.0.get("thread") { 250 + assert!(!thread.0.is_empty()); 251 + 252 + // Check first thread item 253 + if let Some(Data::Object(item)) = thread.0.first() { 254 + // Should have "uri" field parsed as AtUri 255 + if let Some(Data::String(AtprotoStr::AtUri(uri))) = item.0.get("uri") { 256 + assert!(uri.as_str().starts_with("at://did:plc:")); 257 + } 258 + 259 + // Should have "value" object 260 + if let Some(Data::Object(value)) = item.0.get("value") { 261 + // Should have post object 262 + if let Some(Data::Object(post)) = value.0.get("post") { 263 + // CID should be parsed as Cid 264 + if let Some(Data::String(AtprotoStr::Cid(cid))) = post.0.get("cid") { 265 + assert!(cid.as_str().starts_with("bafy")); 266 + } 267 + 268 + // Author should have DID 269 + if let Some(Data::Object(author)) = post.0.get("author") { 270 + if let Some(Data::String(AtprotoStr::Did(did))) = author.0.get("did") { 271 + assert!(did.as_str().starts_with("did:plc:")); 272 + } 273 + 274 + // createdAt should be parsed as Datetime 275 + if let Some(Data::String(AtprotoStr::Datetime(_))) = 276 + author.0.get("createdAt") 277 + { 278 + // Success 279 + } else { 280 + panic!("author.createdAt should be Datetime"); 281 + } 282 + } 283 + } 284 + } 285 + } 286 + } else { 287 + panic!("thread should be an array"); 288 + } 289 + 290 + // Verify serialization produces same JSON structure 291 + let serialized = serde_json::to_string(&obj).unwrap(); 292 + 293 + // Parse both as generic serde_json::Value to compare structure 294 + let original_value: serde_json::Value = serde_json::from_str(json).unwrap(); 295 + let serialized_value: serde_json::Value = serde_json::from_str(&serialized).unwrap(); 296 + 297 + // Canonicalize by sorting keys 298 + let original_canonical = canonicalize_json(&original_value); 299 + let serialized_canonical = canonicalize_json(&serialized_value); 300 + 301 + assert_eq!(original_canonical, serialized_canonical, "Serialized JSON should match original structure") 302 + } else { 303 + panic!("expected top-level object"); 304 + } 305 + }

+29

crates/jacquard-derive/Cargo.toml

··· 1 + [package] 2 + name = "jacquard-derive" 3 + edition.workspace = true 4 + version.workspace = true 5 + authors.workspace = true 6 + repository.workspace = true 7 + keywords.workspace = true 8 + categories.workspace = true 9 + readme.workspace = true 10 + documentation.workspace = true 11 + exclude.workspace = true 12 + description.workspace = true 13 + 14 + [lib] 15 + proc-macro = true 16 + 17 + [dependencies] 18 + heck = "0.5.0" 19 + itertools = "0.14.0" 20 + jacquard-common = { version = "0.1.0", path = "../jacquard-common" } 21 + jacquard-lexicon = { version = "0.1.0", path = "../jacquard-lexicon" } 22 + prettyplease = "0.2.37" 23 + proc-macro2 = "1.0.101" 24 + quote = "1.0.41" 25 + serde = { version = "1.0.228", features = ["derive"] } 26 + serde_json = "1.0.145" 27 + serde_repr = "0.1.20" 28 + serde_with = "3.14.1" 29 + syn = "2.0.106"

+117

crates/jacquard-derive/src/lib.rs

··· 1 + use proc_macro::TokenStream; 2 + use quote::quote; 3 + use syn::{Data, DeriveInput, Fields, parse_macro_input}; 4 + 5 + /// Attribute macro that adds an `extra_data` field to structs to capture unknown fields 6 + /// during deserialization. 7 + /// 8 + /// # Example 9 + /// ```ignore 10 + /// #[lexicon] 11 + /// struct Post<'s> { 12 + /// text: &'s str, 13 + /// } 14 + /// // Expands to: 15 + /// // struct Post<'s> { 16 + /// // text: &'s str, 17 + /// // #[serde(flatten)] 18 + /// // pub extra_data: BTreeMap<SmolStr, Data<'s>>, 19 + /// // } 20 + /// ``` 21 + #[proc_macro_attribute] 22 + pub fn lexicon(_attr: TokenStream, item: TokenStream) -> TokenStream { 23 + let mut input = parse_macro_input!(item as DeriveInput); 24 + 25 + match &mut input.data { 26 + Data::Struct(data_struct) => { 27 + if let Fields::Named(fields) = &mut data_struct.fields { 28 + // Check if extra_data field already exists 29 + let has_extra_data = fields 30 + .named 31 + .iter() 32 + .any(|f| f.ident.as_ref().map(|i| i == "extra_data").unwrap_or(false)); 33 + 34 + if !has_extra_data { 35 + // Determine the lifetime parameter to use 36 + let lifetime = if let Some(lt) = input.generics.lifetimes().next() { 37 + quote! { #lt } 38 + } else { 39 + quote! { 'static } 40 + }; 41 + 42 + // Add the extra_data field 43 + let new_field: syn::Field = syn::parse_quote! { 44 + #[serde(flatten)] 45 + pub extra_data: ::std::collections::BTreeMap< 46 + ::jacquard_common::smol_str::SmolStr, 47 + ::jacquard_common::types::value::Data<#lifetime> 48 + > 49 + }; 50 + fields.named.push(new_field); 51 + } 52 + } else { 53 + return syn::Error::new_spanned( 54 + input, 55 + "lexicon attribute can only be used on structs with named fields", 56 + ) 57 + .to_compile_error() 58 + .into(); 59 + } 60 + 61 + quote! { #input }.into() 62 + } 63 + _ => syn::Error::new_spanned(input, "lexicon attribute can only be used on structs") 64 + .to_compile_error() 65 + .into(), 66 + } 67 + } 68 + 69 + /// Attribute macro that adds an `Other(Data)` variant to enums to make them open unions. 70 + /// 71 + /// # Example 72 + /// ```ignore 73 + /// #[open_union] 74 + /// enum RecordEmbed<'s> { 75 + /// #[serde(rename = "app.bsky.embed.images")] 76 + /// Images(Images), 77 + /// } 78 + /// // Expands to: 79 + /// // enum RecordEmbed<'s> { 80 + /// // #[serde(rename = "app.bsky.embed.images")] 81 + /// // Images(Images), 82 + /// // #[serde(untagged)] 83 + /// // Unknown(Data<'s>), 84 + /// // } 85 + /// ``` 86 + #[proc_macro_attribute] 87 + pub fn open_union(_attr: TokenStream, item: TokenStream) -> TokenStream { 88 + let mut input = parse_macro_input!(item as DeriveInput); 89 + 90 + match &mut input.data { 91 + Data::Enum(data_enum) => { 92 + // Check if Other variant already exists 93 + let has_other = data_enum.variants.iter().any(|v| v.ident == "Other"); 94 + 95 + if !has_other { 96 + // Determine the lifetime parameter to use 97 + let lifetime = if let Some(lt) = input.generics.lifetimes().next() { 98 + quote! { #lt } 99 + } else { 100 + quote! { 'static } 101 + }; 102 + 103 + // Add the Other variant 104 + let new_variant: syn::Variant = syn::parse_quote! { 105 + #[serde(untagged)] 106 + Unknown(::jacquard_common::types::value::Data<#lifetime>) 107 + }; 108 + data_enum.variants.push(new_variant); 109 + } 110 + 111 + quote! { #input }.into() 112 + } 113 + _ => syn::Error::new_spanned(input, "open_union attribute can only be used on enums") 114 + .to_compile_error() 115 + .into(), 116 + } 117 + }

+89

crates/jacquard-derive/tests/lexicon.rs

··· 1 + use jacquard_derive::lexicon; 2 + use serde::{Deserialize, Serialize}; 3 + 4 + #[lexicon] 5 + #[derive(Serialize, Deserialize, Debug, PartialEq)] 6 + #[serde(rename_all = "camelCase")] 7 + struct TestRecord<'s> { 8 + text: &'s str, 9 + count: i64, 10 + } 11 + 12 + #[test] 13 + fn test_lexicon_adds_extra_data_field() { 14 + let json = r#"{"text":"hello","count":42,"unknown":"field","another":123}"#; 15 + 16 + let record: TestRecord = serde_json::from_str(json).unwrap(); 17 + 18 + assert_eq!(record.text, "hello"); 19 + assert_eq!(record.count, 42); 20 + assert_eq!(record.extra_data.len(), 2); 21 + assert!(record.extra_data.contains_key("unknown")); 22 + assert!(record.extra_data.contains_key("another")); 23 + } 24 + 25 + #[test] 26 + fn test_lexicon_roundtrip() { 27 + use jacquard_common::CowStr; 28 + use jacquard_common::types::value::Data; 29 + use std::collections::BTreeMap; 30 + 31 + let mut extra = BTreeMap::new(); 32 + extra.insert( 33 + "custom".into(), 34 + Data::String(jacquard_common::types::string::AtprotoStr::String( 35 + CowStr::Borrowed("value"), 36 + )), 37 + ); 38 + extra.insert( 39 + "number".into(), 40 + Data::Integer(42), 41 + ); 42 + extra.insert( 43 + "nested".into(), 44 + Data::Object(jacquard_common::types::value::Object({ 45 + let mut nested_map = BTreeMap::new(); 46 + nested_map.insert( 47 + "inner".into(), 48 + Data::Boolean(true), 49 + ); 50 + nested_map 51 + })), 52 + ); 53 + 54 + let record = TestRecord { 55 + text: "test", 56 + count: 100, 57 + extra_data: extra, 58 + }; 59 + 60 + let json = serde_json::to_string(&record).unwrap(); 61 + let parsed: TestRecord = serde_json::from_str(&json).unwrap(); 62 + 63 + assert_eq!(record, parsed); 64 + assert_eq!(parsed.extra_data.len(), 3); 65 + 66 + // Verify the extra fields were preserved 67 + assert!(parsed.extra_data.contains_key("custom")); 68 + assert!(parsed.extra_data.contains_key("number")); 69 + assert!(parsed.extra_data.contains_key("nested")); 70 + 71 + // Verify the values 72 + if let Some(Data::String(s)) = parsed.extra_data.get("custom") { 73 + assert_eq!(s.as_str(), "value"); 74 + } else { 75 + panic!("expected custom field to be a string"); 76 + } 77 + 78 + if let Some(Data::Integer(n)) = parsed.extra_data.get("number") { 79 + assert_eq!(*n, 42); 80 + } else { 81 + panic!("expected number field to be an integer"); 82 + } 83 + 84 + if let Some(Data::Object(obj)) = parsed.extra_data.get("nested") { 85 + assert!(obj.0.contains_key("inner")); 86 + } else { 87 + panic!("expected nested field to be an object"); 88 + } 89 + }

+117

crates/jacquard-derive/tests/open_union.rs

··· 1 + use jacquard_derive::open_union; 2 + use serde::{Deserialize, Serialize}; 3 + 4 + #[open_union] 5 + #[derive(Serialize, Deserialize, Debug, PartialEq)] 6 + #[serde(tag = "$type")] 7 + enum TestUnion<'s> { 8 + #[serde(rename = "com.example.typeA")] 9 + TypeA { value: &'s str }, 10 + #[serde(rename = "com.example.typeB")] 11 + TypeB { count: i64 }, 12 + } 13 + 14 + #[test] 15 + fn test_open_union_known_variant() { 16 + let json = r#"{"$type":"com.example.typeA","value":"hello"}"#; 17 + let union: TestUnion = serde_json::from_str(json).unwrap(); 18 + 19 + match union { 20 + TestUnion::TypeA { value } => assert_eq!(value, "hello"), 21 + _ => panic!("expected TypeA"), 22 + } 23 + } 24 + 25 + #[test] 26 + fn test_open_union_unknown_variant() { 27 + use jacquard_common::types::value::{Data, Object}; 28 + 29 + let json = r#"{"$type":"com.example.unknown","data":"something"}"#; 30 + let union: TestUnion = serde_json::from_str(json).unwrap(); 31 + 32 + match union { 33 + TestUnion::Unknown(Data::Object(obj)) => { 34 + // Verify the captured data contains the expected fields 35 + assert!(obj.0.contains_key("$type")); 36 + assert!(obj.0.contains_key("data")); 37 + 38 + // Check the actual values 39 + if let Some(Data::String(type_str)) = obj.0.get("$type") { 40 + assert_eq!(type_str.as_str(), "com.example.unknown"); 41 + } else { 42 + panic!("expected $type field to be a string"); 43 + } 44 + 45 + if let Some(Data::String(data_str)) = obj.0.get("data") { 46 + assert_eq!(data_str.as_str(), "something"); 47 + } else { 48 + panic!("expected data field to be a string"); 49 + } 50 + } 51 + _ => panic!("expected Unknown variant with Object data"), 52 + } 53 + } 54 + 55 + #[test] 56 + fn test_open_union_roundtrip() { 57 + let union = TestUnion::TypeB { count: 42 }; 58 + let json = serde_json::to_string(&union).unwrap(); 59 + let parsed: TestUnion = serde_json::from_str(&json).unwrap(); 60 + 61 + assert_eq!(union, parsed); 62 + 63 + // Verify the $type field is present 64 + assert!(json.contains(r#""$type":"com.example.typeB""#)); 65 + } 66 + 67 + #[test] 68 + fn test_open_union_unknown_roundtrip() { 69 + use jacquard_common::types::value::{Data, Object}; 70 + use std::collections::BTreeMap; 71 + 72 + // Create an Unknown variant with complex data 73 + let mut map = BTreeMap::new(); 74 + map.insert( 75 + "$type".into(), 76 + Data::String(jacquard_common::types::string::AtprotoStr::String( 77 + "com.example.custom".into(), 78 + )), 79 + ); 80 + map.insert("field1".into(), Data::Integer(123)); 81 + map.insert("field2".into(), Data::Boolean(false)); 82 + 83 + let union = TestUnion::Unknown(Data::Object(Object(map))); 84 + 85 + let json = serde_json::to_string(&union).unwrap(); 86 + let parsed: TestUnion = serde_json::from_str(&json).unwrap(); 87 + 88 + // Should deserialize back as Unknown since the type is not recognized 89 + match parsed { 90 + TestUnion::Unknown(Data::Object(obj)) => { 91 + assert_eq!(obj.0.len(), 3); 92 + assert!(obj.0.contains_key("$type")); 93 + assert!(obj.0.contains_key("field1")); 94 + assert!(obj.0.contains_key("field2")); 95 + 96 + // Verify values 97 + if let Some(Data::String(s)) = obj.0.get("$type") { 98 + assert_eq!(s.as_str(), "com.example.custom"); 99 + } else { 100 + panic!("expected $type to be a string"); 101 + } 102 + 103 + if let Some(Data::Integer(n)) = obj.0.get("field1") { 104 + assert_eq!(*n, 123); 105 + } else { 106 + panic!("expected field1 to be an integer"); 107 + } 108 + 109 + if let Some(Data::Boolean(b)) = obj.0.get("field2") { 110 + assert_eq!(*b, false); 111 + } else { 112 + panic!("expected field2 to be a boolean"); 113 + } 114 + } 115 + _ => panic!("expected Unknown variant"), 116 + } 117 + }

+11

crates/jacquard-lexicon/Cargo.toml

··· 12 12 description.workspace = true 13 13 14 14 [dependencies] 15 + heck = "0.5.0" 16 + itertools = "0.14.0" 17 + jacquard-common = { version = "0.1.0", path = "../jacquard-common" } 18 + prettyplease = "0.2.37" 19 + proc-macro2 = "1.0.101" 20 + quote = "1.0.41" 21 + serde = { version = "1.0.228", features = ["derive"] } 22 + serde_json = "1.0.145" 23 + serde_repr = "0.1.20" 24 + serde_with = "3.14.1" 25 + syn = "2.0.106"

+36

crates/jacquard-lexicon/src/fs.rs

··· 1 + // Forked from atrium-codegen 2 + // https://github.com/sugyan/atrium/blob/main/lexicon/atrium-codegen/src/fs.rs 3 + 4 + use std::ffi::OsStr; 5 + use std::fs::read_dir; 6 + use std::io::Result; 7 + use std::path::{Path, PathBuf}; 8 + 9 + fn walk<F>(path: &Path, results: &mut Vec<PathBuf>, f: &mut F) -> Result<()> 10 + where 11 + F: FnMut(&Path) -> bool, 12 + { 13 + if f(path) { 14 + results.push(path.into()); 15 + } 16 + if path.is_dir() { 17 + for entry in read_dir(path)? { 18 + walk(&entry?.path(), results, f)?; 19 + } 20 + } 21 + Ok(()) 22 + } 23 + 24 + pub(crate) fn find_schemas(path: &Path) -> Result<Vec<impl AsRef<Path>>> { 25 + let mut results = Vec::new(); 26 + walk(path, &mut results, &mut |path| { 27 + path.extension().and_then(OsStr::to_str) == Some("json") 28 + })?; 29 + Ok(results) 30 + } 31 + 32 + pub(crate) fn find_dirs(path: &Path) -> Result<Vec<impl AsRef<Path>>> { 33 + let mut results = Vec::new(); 34 + walk(path, &mut results, &mut |path| path.is_dir())?; 35 + Ok(results) 36 + }

+433

crates/jacquard-lexicon/src/lexicon.rs

··· 1 + // Forked from atrium-lexicon 2 + // https://github.com/atrium-rs/atrium/blob/main/lexicon/atrium-lex/src/lexicon.rs 3 + // https://github.com/atrium-rs/atrium/blob/main/lexicon/atrium-lex/src/lib.rs 4 + 5 + use jacquard_common::{CowStr, smol_str::SmolStr, types::blob::MimeType}; 6 + use serde::{Deserialize, Serialize}; 7 + use serde_repr::{Deserialize_repr, Serialize_repr}; 8 + use serde_with::skip_serializing_none; 9 + use std::collections::BTreeMap; 10 + 11 + #[derive(Debug, Serialize_repr, Deserialize_repr, PartialEq, Eq, Clone, Copy)] 12 + #[repr(u8)] 13 + pub enum Lexicon { 14 + Lexicon1 = 1, 15 + } 16 + #[skip_serializing_none] 17 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 18 + pub struct LexiconDoc<'s> { 19 + pub lexicon: Lexicon, 20 + #[serde(borrow)] 21 + pub id: CowStr<'s>, 22 + pub revision: Option<u32>, 23 + pub description: Option<CowStr<'s>>, 24 + pub defs: BTreeMap<SmolStr, LexUserType<'s>>, 25 + } 26 + 27 + // primitives 28 + 29 + #[skip_serializing_none] 30 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 31 + pub struct LexBoolean<'s> { 32 + #[serde(borrow)] 33 + pub description: Option<CowStr<'s>>, 34 + pub default: Option<bool>, 35 + pub r#const: Option<bool>, 36 + } 37 + 38 + /// The Lexicon type `integer`. 39 + /// 40 + /// Lexicon integers are [specified] as signed and 64-bit, which means that values will 41 + /// always fit in an `i64`. 42 + /// 43 + /// [specified]: https://atproto.com/specs/data-model#data-types 44 + #[skip_serializing_none] 45 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 46 + pub struct LexInteger<'s> { 47 + #[serde(borrow)] 48 + pub description: Option<CowStr<'s>>, 49 + pub default: Option<i64>, 50 + pub minimum: Option<i64>, 51 + pub maximum: Option<i64>, 52 + pub r#enum: Option<Vec<i64>>, 53 + pub r#const: Option<i64>, 54 + } 55 + 56 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Copy)] 57 + #[serde(rename_all = "kebab-case")] 58 + pub enum LexStringFormat { 59 + Datetime, 60 + Uri, 61 + AtUri, 62 + Did, 63 + Handle, 64 + AtIdentifier, 65 + Nsid, 66 + Cid, 67 + Language, 68 + Tid, 69 + RecordKey, 70 + } 71 + #[skip_serializing_none] 72 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 73 + #[serde(rename_all = "camelCase")] 74 + pub struct LexString<'s> { 75 + #[serde(borrow)] 76 + pub description: Option<CowStr<'s>>, 77 + pub format: Option<LexStringFormat>, 78 + pub default: Option<CowStr<'s>>, 79 + pub min_length: Option<usize>, 80 + pub max_length: Option<usize>, 81 + pub min_graphemes: Option<usize>, 82 + pub max_graphemes: Option<usize>, 83 + pub r#enum: Option<Vec<CowStr<'s>>>, 84 + pub r#const: Option<CowStr<'s>>, 85 + pub known_values: Option<Vec<CowStr<'s>>>, 86 + } 87 + 88 + #[skip_serializing_none] 89 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 90 + pub struct LexUnknown<'s> { 91 + #[serde(borrow)] 92 + pub description: Option<CowStr<'s>>, 93 + } 94 + // ipld types 95 + 96 + #[skip_serializing_none] 97 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 98 + #[serde(rename_all = "camelCase")] 99 + pub struct LexBytes<'s> { 100 + #[serde(borrow)] 101 + pub description: Option<CowStr<'s>>, 102 + pub max_length: Option<usize>, 103 + pub min_length: Option<usize>, 104 + } 105 + 106 + #[skip_serializing_none] 107 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 108 + pub struct LexCidLink<'s> { 109 + #[serde(borrow)] 110 + pub description: Option<CowStr<'s>>, 111 + } 112 + 113 + // references 114 + 115 + #[skip_serializing_none] 116 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 117 + pub struct LexRef<'s> { 118 + #[serde(borrow)] 119 + pub description: Option<CowStr<'s>>, 120 + pub r#ref: CowStr<'s>, 121 + } 122 + 123 + #[skip_serializing_none] 124 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 125 + pub struct LexRefUnion<'s> { 126 + #[serde(borrow)] 127 + pub description: Option<CowStr<'s>>, 128 + pub refs: Vec<CowStr<'s>>, 129 + pub closed: Option<bool>, 130 + } 131 + 132 + // blobs 133 + 134 + #[skip_serializing_none] 135 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 136 + #[serde(rename_all = "camelCase")] 137 + pub struct LexBlob<'s> { 138 + #[serde(borrow)] 139 + pub description: Option<CowStr<'s>>, 140 + pub accept: Option<Vec<MimeType<'s>>>, 141 + pub max_size: Option<usize>, 142 + } 143 + 144 + // complex types 145 + 146 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 147 + #[serde(tag = "type", rename_all = "kebab-case")] 148 + pub enum LexArrayItem<'s> { 149 + // lexPrimitive 150 + Boolean(LexBoolean<'s>), 151 + Integer(LexInteger<'s>), 152 + String(LexString<'s>), 153 + Unknown(LexUnknown<'s>), 154 + // lexIpldType 155 + Bytes(LexBytes<'s>), 156 + CidLink(LexCidLink<'s>), 157 + // lexBlob 158 + #[serde(borrow)] 159 + Blob(LexBlob<'s>), 160 + // lexRefVariant 161 + Ref(LexRef<'s>), 162 + Union(LexRefUnion<'s>), 163 + } 164 + #[skip_serializing_none] 165 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 166 + #[serde(rename_all = "camelCase")] 167 + pub struct LexArray<'s> { 168 + #[serde(borrow)] 169 + pub description: Option<CowStr<'s>>, 170 + pub items: LexArrayItem<'s>, 171 + pub min_length: Option<usize>, 172 + pub max_length: Option<usize>, 173 + } 174 + 175 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 176 + #[serde(tag = "type", rename_all = "lowercase")] 177 + pub enum LexPrimitiveArrayItem<'s> { 178 + // lexPrimitive 179 + #[serde(borrow)] 180 + Boolean(LexBoolean<'s>), 181 + Integer(LexInteger<'s>), 182 + String(LexString<'s>), 183 + Unknown(LexUnknown<'s>), 184 + } 185 + #[skip_serializing_none] 186 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 187 + #[serde(rename_all = "camelCase")] 188 + pub struct LexPrimitiveArray<'s> { 189 + #[serde(borrow)] 190 + pub description: Option<CowStr<'s>>, 191 + pub items: LexPrimitiveArrayItem<'s>, 192 + pub min_length: Option<usize>, 193 + pub max_length: Option<usize>, 194 + } 195 + 196 + #[skip_serializing_none] 197 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 198 + pub struct LexToken<'s> { 199 + #[serde(borrow)] 200 + pub description: Option<CowStr<'s>>, 201 + } 202 + 203 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 204 + #[serde(tag = "type", rename_all = "kebab-case")] 205 + pub enum LexObjectProperty<'s> { 206 + // lexRefVariant 207 + #[serde(borrow)] 208 + Ref(LexRef<'s>), 209 + Union(LexRefUnion<'s>), 210 + // lexIpldType 211 + Bytes(LexBytes<'s>), 212 + CidLink(LexCidLink<'s>), 213 + // lexArray 214 + Array(LexArray<'s>), 215 + // lexBlob 216 + Blob(LexBlob<'s>), 217 + // lexPrimitive 218 + Boolean(LexBoolean<'s>), 219 + Integer(LexInteger<'s>), 220 + String(LexString<'s>), 221 + Unknown(LexUnknown<'s>), 222 + } 223 + #[skip_serializing_none] 224 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 225 + pub struct LexObject<'s> { 226 + #[serde(borrow)] 227 + pub description: Option<CowStr<'s>>, 228 + pub required: Option<Vec<SmolStr>>, 229 + pub nullable: Option<Vec<SmolStr>>, 230 + pub properties: BTreeMap<SmolStr, LexObjectProperty<'s>>, 231 + } 232 + 233 + // xrpc 234 + 235 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 236 + #[serde(tag = "type", rename_all = "lowercase")] 237 + pub enum LexXrpcParametersProperty<'s> { 238 + // lexPrimitive 239 + #[serde(borrow)] 240 + Boolean(LexBoolean<'s>), 241 + Integer(LexInteger<'s>), 242 + String(LexString<'s>), 243 + Unknown(LexUnknown<'s>), 244 + // lexPrimitiveArray 245 + Array(LexPrimitiveArray<'s>), 246 + } 247 + #[skip_serializing_none] 248 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 249 + pub struct LexXrpcParameters<'s> { 250 + #[serde(borrow)] 251 + pub description: Option<CowStr<'s>>, 252 + pub required: Option<Vec<SmolStr>>, 253 + pub properties: BTreeMap<SmolStr, LexXrpcParametersProperty<'s>>, 254 + } 255 + 256 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 257 + #[serde(tag = "type", rename_all = "lowercase")] 258 + pub enum LexXrpcBodySchema<'s> { 259 + // lexRefVariant 260 + #[serde(borrow)] 261 + Ref(LexRef<'s>), 262 + Union(LexRefUnion<'s>), 263 + // lexObject 264 + Object(LexObject<'s>), 265 + } 266 + #[skip_serializing_none] 267 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 268 + pub struct LexXrpcBody<'s> { 269 + #[serde(borrow)] 270 + pub description: Option<CowStr<'s>>, 271 + pub encoding: CowStr<'s>, 272 + pub schema: Option<LexXrpcBodySchema<'s>>, 273 + } 274 + 275 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 276 + #[serde(tag = "type", rename_all = "lowercase")] 277 + pub enum LexXrpcSubscriptionMessageSchema<'s> { 278 + // lexRefVariant 279 + #[serde(borrow)] 280 + Ref(LexRef<'s>), 281 + Union(LexRefUnion<'s>), 282 + // lexObject 283 + Object(LexObject<'s>), 284 + } 285 + #[skip_serializing_none] 286 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 287 + pub struct LexXrpcSubscriptionMessage<'s> { 288 + #[serde(borrow)] 289 + pub description: Option<CowStr<'s>>, 290 + pub schema: Option<LexXrpcSubscriptionMessageSchema<'s>>, 291 + } 292 + 293 + #[skip_serializing_none] 294 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 295 + pub struct LexXrpcError<'s> { 296 + #[serde(borrow)] 297 + pub description: Option<CowStr<'s>>, 298 + pub name: CowStr<'s>, 299 + } 300 + 301 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 302 + #[serde(tag = "type", rename_all = "lowercase")] 303 + pub enum LexXrpcQueryParameter<'s> { 304 + #[serde(borrow)] 305 + Params(LexXrpcParameters<'s>), 306 + } 307 + #[skip_serializing_none] 308 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 309 + pub struct LexXrpcQuery<'s> { 310 + #[serde(borrow)] 311 + pub description: Option<CowStr<'s>>, 312 + pub parameters: Option<LexXrpcQueryParameter<'s>>, 313 + pub output: Option<LexXrpcBody<'s>>, 314 + pub errors: Option<Vec<LexXrpcError<'s>>>, 315 + } 316 + 317 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 318 + #[serde(tag = "type", rename_all = "lowercase")] 319 + pub enum LexXrpcProcedureParameter<'s> { 320 + #[serde(borrow)] 321 + Params(LexXrpcParameters<'s>), 322 + } 323 + #[skip_serializing_none] 324 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 325 + pub struct LexXrpcProcedure<'s> { 326 + #[serde(borrow)] 327 + pub description: Option<CowStr<'s>>, 328 + pub parameters: Option<LexXrpcProcedureParameter<'s>>, 329 + pub input: Option<LexXrpcBody<'s>>, 330 + pub output: Option<LexXrpcBody<'s>>, 331 + pub errors: Option<Vec<LexXrpcError<'s>>>, 332 + } 333 + 334 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 335 + #[serde(tag = "type", rename_all = "lowercase")] 336 + pub enum LexXrpcSubscriptionParameter<'s> { 337 + #[serde(borrow)] 338 + Params(LexXrpcParameters<'s>), 339 + } 340 + #[skip_serializing_none] 341 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 342 + pub struct LexXrpcSubscription<'s> { 343 + #[serde(borrow)] 344 + pub description: Option<CowStr<'s>>, 345 + pub parameters: Option<LexXrpcSubscriptionParameter<'s>>, 346 + pub message: Option<LexXrpcSubscriptionMessage<'s>>, 347 + pub infos: Option<Vec<LexXrpcError<'s>>>, 348 + pub errors: Option<Vec<LexXrpcError<'s>>>, 349 + } 350 + 351 + // database 352 + 353 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 354 + #[serde(tag = "type", rename_all = "lowercase")] 355 + pub enum LexRecordRecord<'s> { 356 + #[serde(borrow)] 357 + Object(LexObject<'s>), 358 + } 359 + #[skip_serializing_none] 360 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 361 + pub struct LexRecord<'s> { 362 + #[serde(borrow)] 363 + pub description: Option<CowStr<'s>>, 364 + pub key: Option<CowStr<'s>>, 365 + pub record: LexRecordRecord<'s>, 366 + } 367 + 368 + // core 369 + 370 + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] 371 + #[serde(tag = "type", rename_all = "kebab-case")] 372 + pub enum LexUserType<'s> { 373 + // lexRecord 374 + #[serde(borrow)] 375 + Record(LexRecord<'s>), 376 + // lexXrpcQuery 377 + #[serde(rename = "query")] 378 + XrpcQuery(LexXrpcQuery<'s>), 379 + // lexXrpcProcedure 380 + #[serde(rename = "procedure")] 381 + XrpcProcedure(LexXrpcProcedure<'s>), 382 + // lexXrpcSubscription 383 + #[serde(rename = "subscription")] 384 + XrpcSubscription(LexXrpcSubscription<'s>), 385 + // lexBlob 386 + Blob(LexBlob<'s>), 387 + // lexArray 388 + Array(LexArray<'s>), 389 + // lexToken 390 + Token(LexToken<'s>), 391 + // lexObject 392 + Object(LexObject<'s>), 393 + // lexBoolean, 394 + Boolean(LexBoolean<'s>), 395 + // lexInteger, 396 + Integer(LexInteger<'s>), 397 + // lexString, 398 + String(LexString<'s>), 399 + // lexBytes 400 + Bytes(LexBytes<'s>), 401 + // lexCidLink 402 + CidLink(LexCidLink<'s>), 403 + // lexUnknown 404 + Unknown(LexUnknown<'s>), 405 + } 406 + 407 + #[cfg(test)] 408 + mod tests { 409 + use super::*; 410 + 411 + const LEXICON_EXAMPLE_TOKEN: &str = r#" 412 + { 413 + "lexicon": 1, 414 + "id": "com.socialapp.actorUser", 415 + "defs": { 416 + "main": { 417 + "type": "token", 418 + "description": "Actor type of 'User'" 419 + } 420 + } 421 + }"#; 422 + 423 + #[test] 424 + fn parse() { 425 + let doc = serde_json::from_str::<LexiconDoc>(LEXICON_EXAMPLE_TOKEN) 426 + .expect("failed to deserialize"); 427 + assert_eq!(doc.lexicon, Lexicon::Lexicon1); 428 + assert_eq!(doc.id, "com.socialapp.actorUser"); 429 + assert_eq!(doc.revision, None); 430 + assert_eq!(doc.description, None); 431 + assert_eq!(doc.defs.len(), 1); 432 + } 433 + }

+49 -12

crates/jacquard-lexicon/src/lib.rs

··· 1 - pub fn add(left: u64, right: u64) -> u64 { 2 - left + right 3 - } 1 + pub mod fs; 2 + pub mod lexicon; 3 + pub mod output; 4 + pub mod schema; 4 5 5 - #[cfg(test)] 6 - mod tests { 7 - use super::*; 6 + // #[lexicon] 7 + // #[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)] 8 + // #[serde(rename_all = "camelCase")] 9 + // pub struct Post<'s> { 10 + // ///Client-declared timestamp when this post was originally created. 11 + // pub created_at: jacquard_common::types::string::Datetime, 12 + // #[serde(skip_serializing_if = "core::option::Option::is_none")] 13 + // pub embed: core::option::Option<RecordEmbed<'s>>, 14 + // ///DEPRECATED: replaced by app.bsky.richtext.facet. 15 + // #[serde(skip_serializing_if = "core::option::Option::is_none")] 16 + // pub entities: core::option::Option<Vec<Entity<'s>>>, 17 + // ///Annotations of text (mentions, URLs, hashtags, etc) 18 + // #[serde(skip_serializing_if = "core::option::Option::is_none")] 19 + // pub facets: core::option::Option<Vec<jacquard_api::app_bsky::richtext::Facet<'s>>>, 20 + // ///Self-label values for this post. Effectively content warnings. 21 + // #[serde(skip_serializing_if = "core::option::Option::is_none")] 22 + // pub labels: core::option::Option<RecordLabels<'s>>, 23 + // ///Indicates human language of post primary text content. 24 + // #[serde(skip_serializing_if = "core::option::Option::is_none")] 25 + // pub langs: core::option::Option<Vec<jacquard_common::types::string::Language>>, 26 + // #[serde(skip_serializing_if = "core::option::Option::is_none")] 27 + // pub reply: core::option::Option<ReplyRef<'s>>, 28 + // ///Additional hashtags, in addition to any included in post text and facets. 29 + // #[serde(skip_serializing_if = "core::option::Option::is_none")] 30 + // pub tags: core::option::Option<Vec<jacquard_common::CowStr<'s>>>, 31 + // ///The primary post content. May be an empty string, if there are embeds. 32 + // #[serde(borrow)] 33 + // pub text: jacquard_common::CowStr<'s>, 34 + // } 8 35 9 - #[test] 10 - fn it_works() { 11 - let result = add(2, 2); 12 - assert_eq!(result, 4); 13 - } 14 - } 36 + // #[open_union] 37 + // #[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)] 38 + // #[serde(tag = "$type")] 39 + // pub enum RecordEmbed<'s> { 40 + // #[serde(borrow)] 41 + // #[serde(rename = "app.bsky.embed.images")] 42 + // EmbedImages(Box<jacquard_api::app_bsky::embed::Images<'s>>), 43 + // #[serde(rename = "app.bsky.embed.video")] 44 + // EmbedVideo(Box<jacquard_api::app_bsky::embed::Video<'s>>), 45 + // #[serde(rename = "app.bsky.embed.external")] 46 + // EmbedExternal(Box<jacquard_api::app_bsky::embed::External<'s>>), 47 + // #[serde(rename = "app.bsky.embed.record")] 48 + // EmbedRecord(Box<jacquard_api::app_bsky::embed::Record<'s>>), 49 + // #[serde(rename = "app.bsky.embed.recordWithMedia")] 50 + // EmbedRecordWithMedia(Box<jacquard_api::app_bsky::embed::RecordWithMedia<'s>>), 51 + // }

+41

crates/jacquard-lexicon/src/output.rs

··· 1 + use crate::lexicon::*; 2 + use heck::{ToPascalCase, ToShoutySnakeCase, ToSnakeCase}; 3 + use itertools::Itertools; 4 + use jacquard_common::CowStr; 5 + use proc_macro2::TokenStream; 6 + use quote::{format_ident, quote}; 7 + use std::collections::{HashMap, HashSet}; 8 + use syn::{Path, Result}; 9 + 10 + fn string_type<'s>(string: &'s LexString<'s>) -> Result<(TokenStream, TokenStream)> { 11 + let description = description(&string.description); 12 + let typ = match string.format { 13 + Some(LexStringFormat::AtIdentifier) => { 14 + quote!(jacquard_common::types::string::AtIdentifier<'s>) 15 + } 16 + Some(LexStringFormat::Cid) => quote!(jacquard_common::types::string::Cid<'s>), 17 + Some(LexStringFormat::Datetime) => quote!(jacquard_common::types::string::Datetime), 18 + Some(LexStringFormat::Did) => quote!(jacquard_common::types::string::Did<'s>), 19 + Some(LexStringFormat::Handle) => quote!(jacquard_common::types::string::Handle<'s>), 20 + Some(LexStringFormat::Nsid) => quote!(jacquard_common::types::string::Nsid<'s>), 21 + Some(LexStringFormat::Language) => quote!(jacquard_common::types::string::Language), 22 + Some(LexStringFormat::Tid) => quote!(jacquard_common::types::string::Tid), 23 + Some(LexStringFormat::RecordKey) => quote!( 24 + jacquard_common::types::string::RecordKey<jacquard_common::types::string::Rkey<'s>> 25 + ), 26 + Some(LexStringFormat::Uri) => quote!(jacquard_common::types::string::Uri<'s>), 27 + Some(LexStringFormat::AtUri) => quote!(jacquard_common::types::string::AtUri<'s>), 28 + // TODO: other formats (uri, at-uri) 29 + _ => quote!(CowStr<'s>), 30 + }; 31 + Ok((description, typ)) 32 + } 33 + 34 + fn description<'s>(description: &Option<CowStr<'s>>) -> TokenStream { 35 + if let Some(description) = description { 36 + let description = description.as_ref(); 37 + quote!(#[doc = #description]) 38 + } else { 39 + quote!() 40 + } 41 + }

+142

crates/jacquard-lexicon/src/schema.rs

··· 1 + // Forked from atrium-codegen 2 + // https://github.com/sugyan/atrium/blob/main/lexicon/atrium-codegen/src/schema.rs 3 + 4 + use crate::lexicon::*; 5 + use heck::ToPascalCase; 6 + use jacquard_common::{ 7 + CowStr, IntoStatic, 8 + smol_str::{self, SmolStr, ToSmolStr}, 9 + }; 10 + use std::collections::BTreeMap; 11 + 12 + pub(crate) fn find_ref_unions<'s>( 13 + defs: &'s BTreeMap<SmolStr, LexUserType<'s>>, 14 + ) -> Vec<(SmolStr, LexRefUnion<'s>)> { 15 + let mut unions = Vec::new(); 16 + for (key, def) in defs { 17 + match def { 18 + LexUserType::Record(record) => { 19 + let LexRecordRecord::Object(object) = &record.record; 20 + find_ref_unions_in_object(object, SmolStr::new_static("Record"), &mut unions); 21 + } 22 + LexUserType::XrpcQuery(query) => { 23 + if let Some(output) = &query.output { 24 + if let Some(schema) = &output.schema { 25 + find_ref_unions_in_body_schema( 26 + schema, 27 + SmolStr::new_static("Output"), 28 + &mut unions, 29 + ); 30 + } 31 + } 32 + } 33 + LexUserType::XrpcProcedure(procedure) => { 34 + if let Some(input) = &procedure.input { 35 + if let Some(schema) = &input.schema { 36 + find_ref_unions_in_body_schema( 37 + schema, 38 + SmolStr::new_static("Input"), 39 + &mut unions, 40 + ); 41 + } 42 + } 43 + if let Some(output) = &procedure.output { 44 + if let Some(schema) = &output.schema { 45 + find_ref_unions_in_body_schema( 46 + schema, 47 + SmolStr::new_static("Output"), 48 + &mut unions, 49 + ); 50 + } 51 + } 52 + } 53 + LexUserType::XrpcSubscription(subscription) => { 54 + if let Some(message) = &subscription.message { 55 + if let Some(schema) = &message.schema { 56 + find_ref_unions_in_subscription_message_schema( 57 + schema, 58 + SmolStr::new_static("Message"), 59 + &mut unions, 60 + ); 61 + } 62 + } 63 + } 64 + LexUserType::Array(array) => { 65 + find_ref_unions_in_array( 66 + array, 67 + CowStr::Borrowed(&key.to_pascal_case()).into_static(), 68 + &mut unions, 69 + ); 70 + } 71 + LexUserType::Object(object) => { 72 + find_ref_unions_in_object(object, key.to_pascal_case().to_smolstr(), &mut unions); 73 + } 74 + _ => {} 75 + } 76 + } 77 + unions.sort_by_cached_key(|(name, _)| name.clone()); 78 + unions 79 + } 80 + 81 + fn find_ref_unions_in_body_schema<'s>( 82 + schema: &'s LexXrpcBodySchema, 83 + name: SmolStr, 84 + unions: &mut Vec<(SmolStr, LexRefUnion<'s>)>, 85 + ) { 86 + match schema { 87 + LexXrpcBodySchema::Union(_) => unimplemented!(), 88 + LexXrpcBodySchema::Object(object) => find_ref_unions_in_object(object, name, unions), 89 + _ => {} 90 + } 91 + } 92 + 93 + fn find_ref_unions_in_subscription_message_schema<'s>( 94 + schema: &'s LexXrpcSubscriptionMessageSchema, 95 + name: SmolStr, 96 + unions: &mut Vec<(SmolStr, LexRefUnion<'s>)>, 97 + ) { 98 + match schema { 99 + LexXrpcSubscriptionMessageSchema::Union(union) => { 100 + unions.push((name.into(), union.clone())); 101 + } 102 + LexXrpcSubscriptionMessageSchema::Object(object) => { 103 + find_ref_unions_in_object(object, name, unions) 104 + } 105 + _ => {} 106 + } 107 + } 108 + 109 + fn find_ref_unions_in_array<'s>( 110 + array: &'s LexArray, 111 + name: CowStr<'s>, 112 + unions: &mut Vec<(SmolStr, LexRefUnion<'s>)>, 113 + ) { 114 + if let LexArrayItem::Union(union) = &array.items { 115 + unions.push((smol_str::format_smolstr!("{}", name), union.clone())); 116 + } 117 + } 118 + 119 + fn find_ref_unions_in_object<'s>( 120 + object: &'s LexObject, 121 + name: SmolStr, 122 + unions: &mut Vec<(SmolStr, LexRefUnion<'s>)>, 123 + ) { 124 + for (k, property) in &object.properties { 125 + match property { 126 + LexObjectProperty::Union(union) => { 127 + unions.push(( 128 + smol_str::format_smolstr!("{name}{}", k.to_pascal_case()), 129 + union.clone(), 130 + )); 131 + } 132 + LexObjectProperty::Array(array) => { 133 + find_ref_unions_in_array( 134 + array, 135 + CowStr::Borrowed(&(name.to_string() + &k.to_pascal_case())).into_static(), 136 + unions, 137 + ); 138 + } 139 + _ => {} 140 + } 141 + } 142 + }

Configure Feed

Configure Feed