Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
75
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 306 lines 11 kB view raw
1use fluent_uri::{Uri, UriRef}; 2use std::sync::LazyLock; 3 4static BASE: LazyLock<Uri<&str>> = LazyLock::new(|| Uri::parse("https://example.com").unwrap()); 5 6// normalizing is a bit opinionated but eh 7/// see "Full AT URI Syntax" at https://atproto.com/specs/at-uri-scheme 8/// this parser is intentinonally lax: it should accept all valid at-uris, and 9/// may accept some invalid at-uris. 10/// 11/// at the moment this implementation is quite bad and incomplete 12pub fn parse_at_uri(s: &str) -> Option<String> { 13 // for now, just working through the rules laid out in the docs in order, 14 // without much regard for efficiency for now. 15 16 // The overall URI is restricted to a subset of ASCII characters 17 if !s.is_ascii() { 18 return None; 19 } 20 21 // Maximum overall length is 8 kilobytes (which may be shortened in the future) 22 if s.len() > (8 * 2_usize.pow(10)) { 23 return None; 24 } 25 26 // Hex-encoding of characters is permitted (but in practice not necessary) 27 // -> decode any unreserved characters. from rfc 3986: 28 // -> For consistency, percent-encoded octets in the ranges of ALPHA 29 // -> (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E), 30 // -> underscore (%5F), or tilde (%7E) should not be created by URI 31 // -> producers and, when found in a URI, should be decoded to their 32 // -> corresponding unreserved characters by URI normalizers. 33 let s = if let Some((unencoded_prefix, rest)) = s.split_once('%') { 34 let mut out = String::with_capacity(s.len()); 35 out.push_str(unencoded_prefix); 36 for segment in rest.split('%') { 37 let Some((hex2, unencoded_suffix)) = segment.split_at_checked(2) else { 38 return None; // bail: % must always be followed by 2 hex digits 39 }; 40 let Ok(decoded) = u8::from_str_radix(hex2, 16).map(char::from) else { 41 return None; // bail: % must be followed by decodable hex 42 }; 43 if matches!(decoded, 'A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '-' | '_' | '~') { 44 out.push(decoded); 45 } else { 46 out.push('%'); 47 out.push_str(&hex2.to_ascii_uppercase()); // norm 48 } 49 out.push_str(unencoded_suffix); 50 } 51 out 52 } else { 53 s.to_string() 54 }; 55 56 // The URI scheme is `at`, and an authority part preceded with double slashes is always 57 // required, so the URI always starts at:// 58 // -> the spec doesn't explicitly say, but uri schemes can be case-insensitive? 59 let (proto, rest) = s.split_at_checked(5)?; 60 if !proto.eq_ignore_ascii_case("at://") { 61 return None; 62 } 63 64 // An authority section is required and must be non-empty. the authority can be either an 65 // atproto Handle, or a DID meeting the restrictions for use with atproto. note that the 66 // authority part can not be interpreted as a host:port pair, because of the use of colon 67 // characters (:) in DIDs. Colons and unreserved characters should not be escaped in DIDs, 68 // but other reserved characters (including #, /, $, &, @) must be escaped. 69 // Note that none of the current "blessed" DID methods for atproto allow these 70 // characters in DID identifiers 71 72 // An optional path section may follow the authority. The path may contain multiple segments 73 // separated by a single slash (/). Generic URI path normalization rules may be used. 74 75 // An optional query part is allowed, following generic URI syntax restrictions 76 77 // An optional fragment part is allowed, using JSON Path syntax 78 79 // -> work backwards from fragment, query, path -> authority 80 let mut base = rest; 81 let (mut fragment, mut query, mut path) = (None, None, None); 82 if let Some((pre, f)) = base.split_once('#') { 83 base = pre; 84 fragment = Some(f); 85 } 86 if let Some((pre, q)) = base.split_once('?') { 87 base = pre; 88 query = Some(q); 89 } 90 if let Some((pre, p)) = base.split_once('/') { 91 base = pre; 92 path = Some(p); 93 } 94 let mut authority = base.to_string(); 95 96 if authority.is_empty() { 97 return None; 98 } 99 100 // Normalization: Authority as handle: lowercased 101 if !authority.starts_with("did:") { 102 // lowercase handles 103 authority.make_ascii_lowercase(); 104 } 105 106 // Normalization: No trailing slashes in path part 107 // Normalization: No duplicate slashes or "dot" sections in path part (/./ or /abc/../ for example) 108 // -> be so lazy 109 let path = match path { 110 Some(p) => { 111 let p = p.trim_end_matches('/'); 112 let uri_ref = UriRef::parse(p).ok()?; // fully bail if we can't parse path 113 let resolved = uri_ref.resolve_against(&*BASE).unwrap(); // both fail conditions are specific to BASE 114 let normalized = resolved.normalize().path().to_string(); 115 let without_trailing_slashes = normalized.trim_end_matches('/'); 116 Some(without_trailing_slashes.to_string()) 117 } 118 None => None, 119 }; 120 121 let mut out = format!("at://{authority}"); 122 if let Some(p) = path { 123 // no need for `/` -- it's added by fluent_uri normalization 124 out.push_str(&p); 125 } 126 if let Some(q) = query { 127 out.push('?'); 128 out.push_str(q); 129 } 130 if let Some(f) = fragment { 131 out.push('#'); 132 out.push_str(f); 133 } 134 135 Some(out) 136 137 // there's a more normalization to do still. ugh. 138} 139 140pub fn at_uri_collection(at_uri: &str) -> Option<String> { 141 let (proto, rest) = at_uri.split_at_checked(5)?; 142 if !proto.eq_ignore_ascii_case("at://") { 143 return None; 144 } 145 let (_did, rest) = rest.split_once('/')?; 146 if let Some((collection, _path_rest)) = rest.split_once('/') { 147 return Some(collection.to_string()); 148 } 149 if let Some((collection, _query_rest)) = rest.split_once('?') { 150 return Some(collection.to_string()); 151 } 152 if let Some((collection, _hash_rest)) = rest.split_once('#') { 153 return Some(collection.to_string()); 154 } 155 Some(rest.to_string()) 156} 157 158#[cfg(test)] 159mod tests { 160 use super::*; 161 162 #[test] 163 fn test_at_uri_parse() { 164 for (case, expected, detail) in vec![ 165 ("", None, "empty"), 166 (" ", None, "whitespace"), 167 ("https://bad-example.com", None, "not at scheme"), 168 ("at://µcosm.bad-example.com", None, "not ascii"), 169 ( 170 "at://bad-example.com", 171 Some("at://bad-example.com"), 172 "handle, authority-only", 173 ), 174 ( 175 "at://did:plc:hdhoaan3xa3jiuq4fg4mefid", 176 Some("at://did:plc:hdhoaan3xa3jiuq4fg4mefid"), 177 "DID, authority-only", 178 ), 179 ( 180 "at://bad-example.com/app.bsky.feed.post/3jwdwj2ctlk26", 181 Some("at://bad-example.com/app.bsky.feed.post/3jwdwj2ctlk26"), 182 "bsky post (handle)", 183 ), 184 ( 185 "at://did:plc:hdhoaan3xa3jiuq4fg4mefid/app.bsky.feed.post/3ldqksainxc27", 186 Some("at://did:plc:hdhoaan3xa3jiuq4fg4mefid/app.bsky.feed.post/3ldqksainxc27"), 187 "bsky post (DID)", 188 ), 189 ( 190 "AT://bad-example.com", 191 Some("at://bad-example.com"), 192 "scheme case is normalized", 193 ), 194 ( 195 "at://bad-example.com", 196 Some("at://bad-example.com"), 197 "scheme case is normalized", 198 ), 199 ( 200 "at://bad-example.com?q=z", 201 Some("at://bad-example.com?q=z"), 202 "query is allowed", 203 ), 204 ( 205 "at://bad-example.com#a", 206 Some("at://bad-example.com#a"), 207 "fragment is allowed", 208 ), 209 ( 210 "at://bad-example.com/%", 211 None, 212 "invalid percent-encoding: ends with %", 213 ), 214 ( 215 "at://bad-example.com/%2", 216 None, 217 "invalid percent-encoding: ends with only one digit after %", 218 ), 219 ( 220 "at://bad-example.com/%ZZ", 221 None, 222 "invalid percent-encoding: non-hex after %", 223 ), 224 ( 225 "at://bad-example.com/%3A", 226 Some("at://bad-example.com/%3A"), 227 "valid percent-encoding is left", 228 ), 229 ( 230 "at://bad-example.com/%3a", 231 Some("at://bad-example.com/%3A"), 232 "valid percent-encoding is hex-uppercased", 233 ), 234 ( 235 "at://bad-example.com/%61/%62", 236 Some("at://bad-example.com/a/b"), 237 "unreserved characters are percent-decoded", 238 ), 239 ( 240 "at://bad-example.com/a/../b", 241 Some("at://bad-example.com/b"), 242 "paths have traversals resolved (oof)", // reminder to self: we are normalizing, not sanitizing 243 ), 244 ( 245 "at://bad-example.com/../", 246 Some("at://bad-example.com"), 247 "paths always have trailing slashes removed", 248 ), 249 ] { 250 assert_eq!( 251 parse_at_uri(case), 252 expected.map(|s| s.to_string()), 253 "{detail}" 254 ); 255 } 256 } 257 258 #[test] 259 fn test_at_uri_collection() { 260 for (case, expected, detail) in vec![ 261 ("", None, "empty"), 262 ("at://did:plc:vc7f4oafdgxsihk4cry2xpze", None, "did only"), 263 ( 264 "at://did:plc:vc7f4oafdgxsihk4cry2xpze/collec.tion", 265 Some("collec.tion"), 266 "no path (weird)", 267 ), 268 ( 269 "at://did:plc:vc7f4oafdgxsihk4cry2xpze/collec.tion/path", 270 Some("collec.tion"), 271 "normal at-uri", 272 ), 273 ( 274 "at://did:plc:vc7f4oafdgxsihk4cry2xpze/collec.tion?query", 275 Some("collec.tion"), 276 "colleciton with query", 277 ), 278 ( 279 "at://did:plc:vc7f4oafdgxsihk4cry2xpze/collec.tion#hash", 280 Some("collec.tion"), 281 "colleciton with hash", 282 ), 283 ( 284 "at://did:plc:vc7f4oafdgxsihk4cry2xpze/collec.tion/path?query#hash", 285 Some("collec.tion"), 286 "colleciton with everything", 287 ), 288 ( 289 "at://did:web:example.com/collec.tion/path", 290 Some("collec.tion"), 291 "did:web", 292 ), 293 ( 294 "at://did:web:example.com/col.lec.tio.ns.so.long.going.on.and.on", 295 Some("col.lec.tio.ns.so.long.going.on.and.on"), 296 "long collection", 297 ), 298 ] { 299 assert_eq!( 300 at_uri_collection(case), 301 expected.map(|s| s.to_string()), 302 "{detail}" 303 ); 304 } 305 } 306}