Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

parse dids

"links" is all references, so blocks, follows, etc. which are just direct dids also need to be detected.

phil 1a7278a2 283754e2

+166 -14
+2 -6
src/at_uri.rs
··· 17 17 if !s.is_ascii() { 18 18 return None; 19 19 } 20 - // // A-Za-z0-9 . - _ ~ 21 - // if !s.chars().all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '-' | '_' | '~')) { 22 - // return None 23 - // } 24 20 25 21 // Maximum overall length is 8 kilobytes (which may be shortened in the future) 26 22 if s.len() > (8 * 2_usize.pow(10)) { ··· 59 55 60 56 // The URI scheme is `at`, and an authority part preceded with double slashes is always 61 57 // required, so the URI always starts at:// 62 - // -> the spec doesn't explicitly say, but it seems like uri schemes are case-insensitive 58 + // -> the spec doesn't explicitly say, but uri schemes can be case-insensitive? 63 59 let (proto, rest) = s.split_at_checked(5)?; 64 60 if !proto.eq_ignore_ascii_case("at://") { 65 61 return None; ··· 225 221 ( 226 222 "at://bad-example.com/a/../b", 227 223 Some("at://bad-example.com/b"), 228 - "paths have traversals resolved (oof)", 224 + "paths have traversals resolved (oof)", // reminder to self: we are normalizing, not sanitizing 229 225 ), 230 226 ( 231 227 "at://bad-example.com/../",
+152
src/did.rs
··· 1 + /// see https://atproto.com/specs/did#at-protocol-did-identifier-syntax 2 + /// this parser is intentinonally lax: it should accept all valid DIDs, and 3 + /// may accept some invalid DIDs. 4 + /// 5 + /// at the moment this implementation might also be quite bad and incomplete 6 + pub fn parse_did(s: &str) -> Option<String> { 7 + // for now, just working through the rules laid out in the docs in order, 8 + // without much regard for efficiency for now. 9 + 10 + // The entire URI is made up of a subset of ASCII, containing letters (A-Z, a-z), 11 + // digits (0-9), period, underscore, colon, percent sign, or hyphen (._:%-) 12 + if !s 13 + .chars() 14 + .all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '_' | ':' | '%' | '-')) 15 + { 16 + return None; 17 + } 18 + 19 + // The URI is case-sensitive 20 + // -> (nothing to check) 21 + 22 + // The URI starts with lowercase `did:` 23 + let unprefixed = s.strip_prefix("did:")?; 24 + 25 + // The method segment is one or more lowercase letters (a-z), followed by : 26 + let (method, identifier) = unprefixed.split_once(':')?; 27 + if !method.chars().all(|c| c.is_ascii_lowercase()) { 28 + return None; 29 + } 30 + 31 + // The remainder of the URI (the identifier) may contain any of the above-allowed 32 + // ASCII characters, except for percent-sign (%) 33 + // -> ok, ugh, gotta know our encoding context for this 34 + 35 + // The URI (and thus the remaining identifier) may not end in ':'. 36 + if identifier.ends_with(':') { 37 + return None; 38 + } 39 + 40 + // Percent-sign (%) is used for "percent encoding" in the identifier section, and 41 + // must always be followed by two hex characters 42 + // -> again incoding context (bleh) 43 + 44 + // Query (?) and fragment (#) sections are allowed in DID URIs, but not in DID 45 + // identifiers. In the context of atproto, the query and fragment parts are not 46 + // allowed. 47 + // -> disallow here -- the uri decoder should already split them out first. 48 + 49 + // DID identifiers do not generally have a maximum length restriction, but in the 50 + // context of atproto, there is an initial hard limit of 2 KB. 51 + // -> we're in atproto, so sure, let's enforce it. (would be sensible to do this 52 + // -> first but we're following doc order) 53 + if s.len() > (2 * 2_usize.pow(10)) { 54 + return None; 55 + } 56 + 57 + // -> it's not actually written in the spec, but by example in the spec, the 58 + // -> identifier cannot be empty 59 + if identifier.is_empty() { 60 + return None; 61 + } 62 + 63 + Some(s.to_string()) 64 + // the only normalization we might want would be percent-decoding, but we 65 + // probably leave that to the uri decoder 66 + } 67 + 68 + #[cfg(test)] 69 + mod tests { 70 + use super::*; 71 + 72 + #[test] 73 + fn test_did_parse() { 74 + for (case, expected, detail) in vec![ 75 + ("", None, "empty str"), 76 + (" ", None, "whitespace str"), 77 + ("z", None, "not a did"), 78 + ("did:plc", None, "no identifier separator colon"), 79 + ("did:plc:", None, "missing identifier"), 80 + ( 81 + "did:web:bad-example.com", 82 + Some("did:web:bad-example.com"), 83 + "web did", 84 + ), 85 + ( 86 + "did:plc:hdhoaan3xa3jiuq4fg4mefid", 87 + Some("did:plc:hdhoaan3xa3jiuq4fg4mefid"), 88 + "plc did", 89 + ), 90 + ( 91 + "DID:plc:hdhoaan3xa3jiuq4fg4mefid", 92 + None, 93 + "'did:' prefix must be lowercase", 94 + ), 95 + ( 96 + "did:ok:z", 97 + Some("did:ok:z"), 98 + "unknown did methods are allowed", 99 + ), 100 + ("did:BAD:z", None, "non-lowercase methods are not allowed"), 101 + ("did:bad:z$z", None, "invalid chars are not allowed"), 102 + ( 103 + "did:ok:z:z", 104 + Some("did:ok:z:z"), 105 + "colons are allowed in identifier", 106 + ), 107 + ("did:bad:z:", None, "colons not are allowed at the end"), 108 + ("did:bad:z?q=y", None, "queries are not allowed in atproto"), 109 + ("did:bad:z#a", None, "anchors are not allowed in atproto"), 110 + ] { 111 + assert_eq!(parse_did(case), expected.map(|s| s.to_string()), "{detail}"); 112 + } 113 + } 114 + 115 + #[test] 116 + fn test_doc_exmples_atproto() { 117 + // https://atproto.com/specs/did#at-protocol-did-identifier-syntax 118 + for case in vec!["did:plc:z72i7hdynmk6r22z27h6tvur", "did:web:blueskyweb.xyz"] { 119 + assert!(parse_did(case).is_some(), "should pass: {case}") 120 + } 121 + } 122 + 123 + #[test] 124 + fn test_doc_exmples_lexicon() { 125 + // https://atproto.com/specs/did#at-protocol-did-identifier-syntax 126 + for case in vec![ 127 + "did:method:val:two", 128 + "did:m:v", 129 + "did:method::::val", 130 + "did:method:-:_:.", 131 + "did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N", 132 + ] { 133 + assert!(parse_did(case).is_some(), "should pass: {case}") 134 + } 135 + } 136 + 137 + #[test] 138 + fn test_doc_exmples_invalid() { 139 + // https://atproto.com/specs/did#at-protocol-did-identifier-syntax 140 + for case in vec![ 141 + "did:METHOD:val", 142 + "did:m123:val", 143 + "DID:method:val", 144 + "did:method:", 145 + "did:method:val/two", 146 + "did:method:val?two", 147 + "did:method:val#two", 148 + ] { 149 + assert!(parse_did(case).is_none(), "should fail: {case}") 150 + } 151 + } 152 + }
+12 -8
src/lib.rs
··· 1 1 use fluent_uri::Uri; 2 2 3 3 pub mod at_uri; 4 + pub mod did; 4 5 5 6 #[derive(Debug, PartialEq)] 6 7 pub enum Link { 7 8 AtUri(String), 8 9 Uri(String), 9 - } 10 - 11 - // normalizing is a bit opinionated but ehhh 12 - pub fn parse_at_uri(s: &str) -> Option<String> { 13 - at_uri::parse_at_uri(s) 10 + Did(String), 14 11 } 15 12 16 13 // normalizing is a bit opinionated but eh ··· 19 16 } 20 17 21 18 pub fn parse_any(s: &str) -> Option<Link> { 22 - parse_at_uri(s) 23 - .map(Link::AtUri) 24 - .or_else(|| parse_uri(s).map(Link::Uri)) 19 + at_uri::parse_at_uri(s).map(Link::AtUri).or_else(|| { 20 + did::parse_did(s) 21 + .map(Link::Did) 22 + .or_else(|| parse_uri(s).map(Link::Uri)) 23 + }) 25 24 } 26 25 27 26 #[cfg(test)] ··· 60 59 "at://did:plc:44ybard66vv44zksje25o7dz/app.bsky.feed.post/3jwdwj2ctlk26".into() 61 60 )), 62 61 ); 62 + 63 + assert_eq!( 64 + parse_any("did:plc:44ybard66vv44zksje25o7dz"), 65 + Some(Link::Did("did:plc:44ybard66vv44zksje25o7dz".into())) 66 + ) 63 67 } 64 68 }