Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

hello

phil 760e7808

+336
+1
.gitignore
··· 1 + /target
+139
Cargo.lock
··· 1 + # This file is automatically @generated by Cargo. 2 + # It is not intended for manual editing. 3 + version = 3 4 + 5 + [[package]] 6 + name = "anyhow" 7 + version = "1.0.95" 8 + source = "registry+https://github.com/rust-lang/crates.io-index" 9 + checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" 10 + 11 + [[package]] 12 + name = "borrow-or-share" 13 + version = "0.2.2" 14 + source = "registry+https://github.com/rust-lang/crates.io-index" 15 + checksum = "3eeab4423108c5d7c744f4d234de88d18d636100093ae04caf4825134b9c3a32" 16 + 17 + [[package]] 18 + name = "fluent-uri" 19 + version = "0.3.2" 20 + source = "registry+https://github.com/rust-lang/crates.io-index" 21 + checksum = "1918b65d96df47d3591bed19c5cca17e3fa5d0707318e4b5ef2eae01764df7e5" 22 + dependencies = [ 23 + "borrow-or-share", 24 + "ref-cast", 25 + ] 26 + 27 + [[package]] 28 + name = "links" 29 + version = "0.1.0" 30 + dependencies = [ 31 + "anyhow", 32 + "fluent-uri", 33 + "nom", 34 + "thiserror", 35 + "tinyjson", 36 + ] 37 + 38 + [[package]] 39 + name = "memchr" 40 + version = "2.7.4" 41 + source = "registry+https://github.com/rust-lang/crates.io-index" 42 + checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 43 + 44 + [[package]] 45 + name = "minimal-lexical" 46 + version = "0.2.1" 47 + source = "registry+https://github.com/rust-lang/crates.io-index" 48 + checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 49 + 50 + [[package]] 51 + name = "nom" 52 + version = "7.1.3" 53 + source = "registry+https://github.com/rust-lang/crates.io-index" 54 + checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 55 + dependencies = [ 56 + "memchr", 57 + "minimal-lexical", 58 + ] 59 + 60 + [[package]] 61 + name = "proc-macro2" 62 + version = "1.0.92" 63 + source = "registry+https://github.com/rust-lang/crates.io-index" 64 + checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" 65 + dependencies = [ 66 + "unicode-ident", 67 + ] 68 + 69 + [[package]] 70 + name = "quote" 71 + version = "1.0.38" 72 + source = "registry+https://github.com/rust-lang/crates.io-index" 73 + checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" 74 + dependencies = [ 75 + "proc-macro2", 76 + ] 77 + 78 + [[package]] 79 + name = "ref-cast" 80 + version = "1.0.23" 81 + source = "registry+https://github.com/rust-lang/crates.io-index" 82 + checksum = "ccf0a6f84d5f1d581da8b41b47ec8600871962f2a528115b542b362d4b744931" 83 + dependencies = [ 84 + "ref-cast-impl", 85 + ] 86 + 87 + [[package]] 88 + name = "ref-cast-impl" 89 + version = "1.0.23" 90 + source = "registry+https://github.com/rust-lang/crates.io-index" 91 + checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" 92 + dependencies = [ 93 + "proc-macro2", 94 + "quote", 95 + "syn", 96 + ] 97 + 98 + [[package]] 99 + name = "syn" 100 + version = "2.0.95" 101 + source = "registry+https://github.com/rust-lang/crates.io-index" 102 + checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" 103 + dependencies = [ 104 + "proc-macro2", 105 + "quote", 106 + "unicode-ident", 107 + ] 108 + 109 + [[package]] 110 + name = "thiserror" 111 + version = "2.0.9" 112 + source = "registry+https://github.com/rust-lang/crates.io-index" 113 + checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc" 114 + dependencies = [ 115 + "thiserror-impl", 116 + ] 117 + 118 + [[package]] 119 + name = "thiserror-impl" 120 + version = "2.0.9" 121 + source = "registry+https://github.com/rust-lang/crates.io-index" 122 + checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4" 123 + dependencies = [ 124 + "proc-macro2", 125 + "quote", 126 + "syn", 127 + ] 128 + 129 + [[package]] 130 + name = "tinyjson" 131 + version = "2.5.1" 132 + source = "registry+https://github.com/rust-lang/crates.io-index" 133 + checksum = "9ab95735ea2c8fd51154d01e39cf13912a78071c2d89abc49a7ef102a7dd725a" 134 + 135 + [[package]] 136 + name = "unicode-ident" 137 + version = "1.0.14" 138 + source = "registry+https://github.com/rust-lang/crates.io-index" 139 + checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
+11
Cargo.toml
··· 1 + [package] 2 + name = "links" 3 + version = "0.1.0" 4 + edition = "2021" 5 + 6 + [dependencies] 7 + anyhow = "1.0.95" 8 + fluent-uri = "0.3.2" 9 + nom = "7.1.3" 10 + thiserror = "2.0.9" 11 + tinyjson = "2.5.1"
+122
readme.md
··· 1 + µcosm links 2 + =========== 3 + 4 + optimistically extract links from arbitrary atproto records, optionally resolving canonical representations and possibly validating StrongRefs. 5 + 6 + 7 + status 8 + ------ 9 + 10 + not at all ready (yet) 11 + 12 + --- 13 + 14 + as far as i can tell, atproto lexicons today don't follow much of a convention for referencing across documents: sometimes it's a StrongRef, sometimes it's a DID, sometimes it's a bare at-uri. lexicon authors choose any old link-sounding key name for the key in their document. 15 + 16 + it's pretty messy so embrace the mess: atproto wants to be part of the web, so this library will also extract URLs and other URIs if you want it to. all the links. 17 + 18 + 19 + why 20 + --- 21 + 22 + the atproto firehose that bluesky sprays at you will contain raw _contents_ from peoples' pdses. these are isolated, decontextualized updates. it's very easy to build some kinds of interesting downstream apps off of this feed. 23 + 24 + - bluesky posts (firesky, deletions, ) 25 + - blueksy post stats (emojis, ) 26 + - trending keywords () 27 + 28 + but bringing almost kind of _context_ into your project requires a big step up in complexity and potentially cost: you're entering "appview" territory. _how many likes does a post have? who follows this account?_ 29 + 30 + you own your atproto data: it's kept in your personal data repository (PDS) and noone else can write to it. when someone likes your post, they create a "like" record in their _own_ pds, and that like belongs to _them_, not to you/your post. 31 + 32 + in the firehose you'll see a `app.bsky.feed.post` record created, with no details about who has liked it. then you'll see separate `app.bsky.feed.like` records show up for each like that comes in on that post, with no context about the post except a random-looking reference to it. storing these in order to do so is up to you! 33 + 34 + **so, why** 35 + 36 + everything is links, and they're a mess, but they all kinda work the same, so maybe some tooling can bring down that big step in complexity from firehose raw-content apps -> apps requiring any social context. 37 + 38 + everything is links: 39 + 40 + - likes 41 + - follows 42 + - blocks 43 + - reposts 44 + - quotes 45 + 46 + some low-level things you could make from links: 47 + 48 + - notification streams (part of ucosm) 49 + - a global reverse index (part of ucosm) 50 + 51 + i think that making these low-level services as easy to use as jetstream could open up pathways for building more atproto apps that operate at full scale with interesting features for reasonable effort at low cost to operate. 52 + 53 + 54 + extracting links 55 + --------------- 56 + 57 + 58 + - low-level: pass a &str of a field value and get a parsed link back 59 + 60 + - med-level: pass a &str of record in json form and get a list of parsed links + json paths back. (todo: should also handle dag-cbor prob?) 61 + 62 + - high-ish level: pass the json record and maybe apply some pre-loaded rules based on known lexicons to get the best result. 63 + 64 + for now, a link is only considered if it matches for the entire value of the record's field -- links embedded in text content are not included. note that urls in bluesky posts _will_ still be extracted, since they are broken out into facets. 65 + 66 + 67 + resolving / canonicalizing links 68 + -------------------------------- 69 + 70 + 71 + ### at-uris 72 + 73 + every at-uri has at least two equivalent forms, one with a `DID`, and one with an account handle. the at-uri spec [illustrates this by example](https://atproto.com/specs/at-uri-scheme): 74 + 75 + - `at://did:plc:44ybard66vv44zksje25o7dz/app.bsky.feed.post/3jwdwj2ctlk26` 76 + - `at://bnewbold.bsky.team/app.bsky.feed.post/3jwdwj2ctlk26` 77 + 78 + some applications, like a reverse link index, may wish to canonicalize at-uris to a single form. the `DID`-form is stable as an account changes its handle and probably the right choice to canonicalize to, but maybe some apps would actually perfer to canonicalise to handles? 79 + 80 + hopefully atrium will make it easy to resolve at-uris. 81 + 82 + 83 + ### urls 84 + 85 + canonicalizing URLs is more annoying but also a bit more established. lots of details. 86 + 87 + - do we have to deal with punycode? 88 + - follow redirects (todo: only permanent ones, or all?) 89 + - check for rel=canonical http header and possibly follow it 90 + - check link rel=canonical meta tag and possibly follow it 91 + - do we need to check site maps?? 92 + - do we have to care at all about AMP? 93 + - do we want anything to do with url shorteners?? 94 + - how do multilingual sites affect this? 95 + - do we have to care about `script type="application/ld+json"` ??? 96 + 97 + ugh. is there a crate for this. 98 + 99 + 100 + ### relative uris? 101 + 102 + links might be relative, in which case they might need to be made absolute before being useful. is that a concern for this library, or up to the user? (seems like we might not have context here to determine its absolute) 103 + 104 + 105 + ### canonicalizing 106 + 107 + there should be a few async functions available to canonicalize already-parsed links. 108 + 109 + - what happens if a link can't be resolved? 110 + 111 + 112 + --- 113 + 114 + - using `tinyjson` because it's nice -- maybe should switch to serde_json to share deps with atrium? 115 + 116 + - would use atrium for parsing at-uris, but it's not in there. there's a did-only version in the non-lib commands.rs. its identifier parser is strict to did + handle, which makes sense, but for our purposes we might want to allow unknown methods too? 117 + 118 + - rsky-syntax has an aturi 119 + - adenosyne also 120 + - might come back to these 121 + 122 +
+63
src/lib.rs
··· 1 + use fluent_uri::Uri; 2 + 3 + #[derive(Debug, PartialEq)] 4 + pub enum Link { 5 + AtUri(String), 6 + Uri(String), 7 + } 8 + 9 + // normalizing is a bit opinionated 10 + pub fn parse_at_uri(_s: &str) -> Option<String> { 11 + // TODO 12 + None 13 + } 14 + 15 + // normalizing is a bit opinionated 16 + pub fn parse_uri(s: &str) -> Option<String> { 17 + Uri::parse(s).map(|u| u.normalize().into_string()).ok() 18 + } 19 + 20 + pub fn parse_any(s: &str) -> Option<Link> { 21 + parse_at_uri(s) 22 + .map(Link::AtUri) 23 + .or_else(|| parse_uri(s).map(Link::Uri)) 24 + } 25 + 26 + #[cfg(test)] 27 + mod tests { 28 + use super::*; 29 + 30 + #[test] 31 + fn test_uri_parse() { 32 + let s = "https://example.com"; 33 + let uri = parse_uri(s).unwrap(); 34 + assert_eq!(uri.as_str(), s); 35 + } 36 + 37 + #[test] 38 + fn test_uri_normalizes() { 39 + let s = "HTTPS://example.com/../"; 40 + let uri = parse_uri(s).unwrap(); 41 + assert_eq!(uri.as_str(), "https://example.com/"); 42 + } 43 + 44 + #[test] 45 + fn test_uri_invalid() { 46 + assert!(parse_uri("https:\\bad-example.com").is_none()); 47 + } 48 + 49 + #[test] 50 + fn test_any_parse() { 51 + assert_eq!( 52 + parse_any("https://example.com"), 53 + Some(Link::Uri("https://example.com".into())) 54 + ); 55 + 56 + assert_eq!( 57 + parse_any("at://did:plc:44ybard66vv44zksje25o7dz/app.bsky.feed.post/3jwdwj2ctlk26"), 58 + Some(Link::AtUri( 59 + "at://did:plc:44ybard66vv44zksje25o7dz/app.bsky.feed.post/3jwdwj2ctlk26".into() 60 + )), 61 + ); 62 + } 63 + }