Parakeet is a Rust-based Bluesky AppServer aiming to implement most of the functionality required to support the Bluesky client
appview atproto bluesky rust appserver
66
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'post-tweaks' into 'main'

Post Tweaks

See merge request parakeet-social/parakeet!23

Mia 59cee77e 1e908e61

+757 -224
+6 -1
consumer/src/backfill/mod.rs
··· 275 275 follows: Vec<(String, String, DateTime<Utc>)>, 276 276 list_items: Vec<(String, records::AppBskyGraphListItem)>, 277 277 verifications: Vec<(String, Cid, records::AppBskyGraphVerification)>, 278 + threadgates: Vec<(String, Cid, records::AppBskyFeedThreadgate)>, // not COPY'd but needs to be kept until last. 278 279 records: Vec<(String, Cid)>, 279 280 } 280 281 281 282 impl CopyStore { 282 283 async fn submit(self, t: &mut Transaction<'_>, did: &str) -> Result<(), tokio_postgres::Error> { 283 284 db::copy::copy_likes(t, did, self.likes).await?; 284 - db::copy::copy_posts(t, did, self.posts).await?; 285 285 db::copy::copy_reposts(t, did, self.reposts).await?; 286 286 db::copy::copy_blocks(t, did, self.blocks).await?; 287 287 db::copy::copy_follows(t, did, self.follows).await?; 288 288 db::copy::copy_list_items(t, self.list_items).await?; 289 289 db::copy::copy_verification(t, did, self.verifications).await?; 290 + db::copy::copy_posts(t, did, self.posts).await?; 291 + for (at_uri, cid, record) in self.threadgates { 292 + db::threadgate_enforce_backfill(t, did, &record).await?; 293 + db::threadgate_upsert(t, &at_uri, cid, record).await?; 294 + } 290 295 db::copy::copy_records(t, did, self.records).await?; 291 296 292 297 Ok(())
+11 -1
consumer/src/backfill/repo.rs
··· 4 4 }; 5 5 use crate::indexer::records; 6 6 use crate::indexer::types::{AggregateDeltaStore, RecordTypes}; 7 + use crate::utils::at_uri_is_by; 7 8 use crate::{db, indexer}; 8 9 use deadpool_postgres::Transaction; 9 10 use ipld_core::cid::Cid; ··· 144 145 db::maintain_self_labels(t, did, Some(cid), &at_uri, labels).await?; 145 146 } 146 147 if let Some(embed) = rec.embed.clone().and_then(|embed| embed.into_bsky()) { 147 - db::post_embed_insert(t, &at_uri, embed, rec.created_at).await?; 148 + db::post_embed_insert(t, &at_uri, embed, rec.created_at, true).await?; 148 149 } 149 150 150 151 deltas.incr(did, AggregateType::ProfilePost).await; ··· 165 166 copies 166 167 .reposts 167 168 .push((rkey.to_string(), rec.subject, rec.via, rec.created_at)); 169 + } 170 + RecordTypes::AppBskyFeedThreadgate(record) => { 171 + if !at_uri_is_by(&record.post, did) { 172 + tracing::warn!("tried to create a threadgate on a post we don't control!"); 173 + return Ok(()); 174 + } 175 + 176 + copies.push_record(&at_uri, cid); 177 + copies.threadgates.push((at_uri, cid, record)); 168 178 } 169 179 RecordTypes::AppBskyGraphBlock(rec) => { 170 180 copies.push_record(&at_uri, cid);
+38 -3
consumer/src/db/copy.rs
··· 1 1 use super::PgExecResult; 2 2 use crate::indexer::records; 3 - use crate::utils::strongref_to_parts; 3 + use crate::utils::{extract_mentions_and_tags, merge_tags, strongref_to_parts}; 4 4 use chrono::prelude::*; 5 5 use deadpool_postgres::Transaction; 6 6 use futures::pin_mut; ··· 119 119 .await 120 120 } 121 121 122 - const POST_STMT: &str = "COPY posts_tmp (at_uri, cid, did, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, root_cid, embed, embed_subtype, created_at) FROM STDIN (FORMAT binary)"; 122 + const POST_STMT: &str = "COPY posts_tmp (at_uri, cid, did, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, root_cid, embed, embed_subtype, mentions, created_at) FROM STDIN (FORMAT binary)"; 123 123 const POST_TYPES: &[Type] = &[ 124 124 Type::TEXT, 125 125 Type::TEXT, ··· 135 135 Type::TEXT, 136 136 Type::TEXT, 137 137 Type::TEXT, 138 + Type::TEXT_ARRAY, 138 139 Type::TIMESTAMP, 139 140 ]; 140 141 pub async fn copy_posts( ··· 159 160 160 161 for (at_uri, cid, post) in data { 161 162 let record = serde_json::to_value(&post).unwrap(); 163 + let (mentions, tags) = post 164 + .facets 165 + .as_ref() 166 + .map(|v| extract_mentions_and_tags(v)) 167 + .unzip(); 162 168 let facets = post.facets.and_then(|v| serde_json::to_value(v).ok()); 163 169 let embed = post.embed.as_ref().map(|v| v.as_str()); 164 170 let embed_subtype = post.embed.as_ref().and_then(|v| v.subtype()); 165 171 let (parent_uri, parent_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.parent)); 166 172 let (root_uri, root_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.root)); 173 + 174 + let tags = merge_tags(tags, post.tags); 167 175 168 176 let writer = writer.as_mut(); 169 177 writer ··· 175 183 &post.text, 176 184 &facets, 177 185 &post.langs.unwrap_or_default(), 178 - &post.tags.unwrap_or_default(), 186 + &tags, 179 187 &parent_uri, 180 188 &parent_cid, 181 189 &root_uri, 182 190 &root_cid, 183 191 &embed, 184 192 &embed_subtype, 193 + &mentions, 185 194 &post.created_at.naive_utc(), 186 195 ]) 187 196 .await?; 188 197 } 189 198 190 199 writer.finish().await?; 200 + 201 + let threadgated: Vec<(String, String, DateTime<Utc>)> = conn 202 + .query( 203 + "SELECT root_uri, p.at_uri, p.created_at FROM posts_tmp p INNER JOIN threadgates t ON root_uri = post_uri WHERE t.allow IS NOT NULL", 204 + &[], 205 + ) 206 + .await? 207 + .into_iter() 208 + .map(|v| (v.get(0), v.get(1), v.get(2))).collect(); 209 + 210 + for (root, post, created_at) in threadgated { 211 + match super::post_enforce_threadgate(conn, &root, did, created_at, true).await { 212 + Ok(true) => { 213 + conn.execute( 214 + "UPDATE posts_tmp SET violates_threadgate=TRUE WHERE at_uri=$1", 215 + &[&post], 216 + ) 217 + .await?; 218 + } 219 + Ok(false) => continue, 220 + Err(e) => { 221 + tracing::error!("failed to check threadgate enforcement: {e}"); 222 + continue; 223 + } 224 + } 225 + } 191 226 192 227 conn.execute("INSERT INTO posts (SELECT * FROM posts_tmp)", &[]) 193 228 .await
+208
consumer/src/db/gates.rs
··· 1 + use super::{PgExecResult, PgResult}; 2 + use crate::indexer::records::{ 3 + AppBskyFeedThreadgate, ThreadgateRule, THREADGATE_RULE_FOLLOWER, THREADGATE_RULE_FOLLOWING, 4 + THREADGATE_RULE_LIST, THREADGATE_RULE_MENTION, 5 + }; 6 + use chrono::prelude::*; 7 + use chrono::{DateTime, Utc}; 8 + use deadpool_postgres::GenericClient; 9 + use std::collections::HashSet; 10 + 11 + pub async fn post_enforce_threadgate<C: GenericClient>( 12 + conn: &mut C, 13 + root: &str, 14 + post_author: &str, 15 + post_created_at: DateTime<Utc>, 16 + is_backfill: bool, 17 + ) -> PgResult<bool> { 18 + // check if the root and the current post are the same author 19 + // strip "at://" then break into parts by '/' 20 + let parts = root[5..].split('/').collect::<Vec<_>>(); 21 + let root_author = parts[0]; 22 + if root_author == post_author { 23 + return Ok(false); 24 + } 25 + 26 + let tg_data = super::threadgate_get(conn, root).await?; 27 + 28 + let Some((created_at, allow, allow_lists)) = tg_data else { 29 + return Ok(false); 30 + }; 31 + 32 + // when backfilling, there's no point continuing if the record is dated before the threadgate 33 + if is_backfill && post_created_at < created_at { 34 + return Ok(false); 35 + } 36 + 37 + if allow.is_empty() { 38 + return Ok(true); 39 + } 40 + 41 + let allow: HashSet<String> = HashSet::from_iter(allow); 42 + 43 + if allow.contains(THREADGATE_RULE_FOLLOWER) || allow.contains(THREADGATE_RULE_FOLLOWING) { 44 + let profile_state: Option<(bool, bool)> = conn 45 + .query_opt( 46 + "SELECT following IS NOT NULL, followed IS NOT NULL FROM profile_states WHERE did=$1 AND subject=$2", 47 + &[&root_author, &post_author], 48 + ) 49 + .await? 50 + .map(|v| (v.get(0), v.get(1))); 51 + 52 + if let Some((following, followed)) = profile_state { 53 + if allow.contains(THREADGATE_RULE_FOLLOWER) && followed { 54 + return Ok(false); 55 + } 56 + 57 + if allow.contains(THREADGATE_RULE_FOLLOWING) && following { 58 + return Ok(false); 59 + } 60 + } 61 + } 62 + 63 + // check mentions 64 + if allow.contains(THREADGATE_RULE_MENTION) { 65 + let mentions: Vec<String> = conn 66 + .query_opt("SELECT mentions FROM posts WHERE at_uri=$1", &[&root]) 67 + .await? 68 + .map(|r| r.get(0)) 69 + .unwrap_or_default(); 70 + 71 + if mentions.contains(&post_author.to_owned()) { 72 + return Ok(false); 73 + } 74 + } 75 + 76 + if allow.contains(THREADGATE_RULE_LIST) { 77 + if allow_lists.is_empty() { 78 + return Ok(true); 79 + } 80 + 81 + let count: i64 = conn 82 + .query_one( 83 + "SELECT count(*) FROM list_items WHERE list_uri=ANY($1) AND subject=$2", 84 + &[&allow_lists, &post_author], 85 + ) 86 + .await? 87 + .get(0); 88 + if count != 0 { 89 + return Ok(false); 90 + } 91 + } 92 + 93 + Ok(true) 94 + } 95 + 96 + pub async fn postgate_maintain_detaches<C: GenericClient>( 97 + conn: &mut C, 98 + post: &str, 99 + detached: &[String], 100 + disable_effective: Option<NaiveDateTime>, 101 + ) -> PgExecResult { 102 + conn.execute( 103 + "SELECT maintain_postgates($1, $2, $3)", 104 + &[&post, &detached, &disable_effective], 105 + ) 106 + .await 107 + } 108 + 109 + // variant of post_enforce_threadgate that runs when backfilling to clean up any posts already in DB 110 + pub async fn threadgate_enforce_backfill<C: GenericClient>( 111 + conn: &mut C, 112 + root_author: &str, 113 + threadgate: &AppBskyFeedThreadgate, 114 + ) -> PgExecResult { 115 + // pull out allow - if it's None we can skip this gate. 116 + let Some(allow) = threadgate.allow.as_ref() else { 117 + return Ok(0); 118 + }; 119 + 120 + let root = &threadgate.post; 121 + 122 + if allow.is_empty() { 123 + // blind update everything 124 + return conn.execute( 125 + "UPDATE posts SET violates_threadgate=TRUE WHERE root_uri=$1 AND did != $2 AND created_at >= $3", 126 + &[&root, &root_author, &threadgate.created_at], 127 + ).await; 128 + } 129 + 130 + // pull authors with our root_uri where the author is not the root author and are dated after created_at 131 + // this is mutable because we'll remove ALLOWED dids 132 + let mut dids: HashSet<String> = conn 133 + .query( 134 + "SELECT DISTINCT did FROM posts WHERE root_uri=$1 AND did != $2 AND created_at >= $3", 135 + &[&root, &root_author, &threadgate.created_at], 136 + ) 137 + .await? 138 + .into_iter() 139 + .map(|row| row.get(0)) 140 + .collect(); 141 + 142 + // this will be empty if there are no replies. 143 + if dids.is_empty() { 144 + return Ok(0); 145 + } 146 + 147 + let allowed_lists = allow 148 + .iter() 149 + .filter_map(|rule| match rule { 150 + ThreadgateRule::List { list } => Some(list), 151 + _ => None, 152 + }) 153 + .collect::<Vec<_>>(); 154 + 155 + let allow: HashSet<_> = HashSet::from_iter(allow.into_iter().map(|v| v.as_str())); 156 + 157 + if allow.contains(THREADGATE_RULE_FOLLOWER) && !dids.is_empty() { 158 + let current_dids: Vec<_> = dids.iter().collect(); 159 + 160 + let res = conn.query( 161 + "SELECT subject FROM profile_states WHERE did=$1 AND subject=ANY($2) AND followed IS NOT NULL", 162 + &[&root_author, &current_dids] 163 + ).await?; 164 + 165 + dids = &dids - &HashSet::from_iter(res.into_iter().map(|r| r.get(0))); 166 + } 167 + 168 + if allow.contains(THREADGATE_RULE_FOLLOWING) && !dids.is_empty() { 169 + let current_dids: Vec<_> = dids.iter().collect(); 170 + 171 + let res = conn.query( 172 + "SELECT subject FROM profile_states WHERE did=$1 AND subject=ANY($2) AND following IS NOT NULL", 173 + &[&root_author, &current_dids] 174 + ).await?; 175 + 176 + dids = &dids - &HashSet::from_iter(res.into_iter().map(|r| r.get(0))); 177 + } 178 + 179 + if allow.contains(THREADGATE_RULE_MENTION) && !dids.is_empty() { 180 + let mentions: Vec<String> = conn 181 + .query_opt("SELECT mentions FROM posts WHERE at_uri=$1", &[&root]) 182 + .await? 183 + .map(|r| r.get(0)) 184 + .unwrap_or_default(); 185 + 186 + dids = &dids - &HashSet::from_iter(mentions); 187 + } 188 + 189 + if allow.contains(THREADGATE_RULE_LIST) && !dids.is_empty() { 190 + let current_dids: Vec<_> = dids.iter().collect(); 191 + 192 + let res = conn 193 + .query( 194 + "SELECT subject FROM list_items WHERE list_uri = ANY($1) AND subject = ANY($2)", 195 + &[&allowed_lists, &current_dids], 196 + ) 197 + .await?; 198 + 199 + dids = &dids - &HashSet::from_iter(res.into_iter().map(|r| r.get(0))); 200 + } 201 + 202 + let dids = dids.into_iter().collect::<Vec<_>>(); 203 + 204 + conn.execute( 205 + "UPDATE posts SET violates_threadgate=TRUE WHERE root_uri = $1 AND did = ANY($2) AND created_at >= $3", 206 + &[&threadgate.post, &dids, &threadgate.created_at] 207 + ).await 208 + }
+2
consumer/src/db/mod.rs
··· 7 7 mod actor; 8 8 mod backfill; 9 9 pub mod copy; 10 + mod gates; 10 11 mod labels; 11 12 mod record; 12 13 13 14 pub use actor::*; 14 15 pub use backfill::*; 16 + pub use gates::*; 15 17 pub use labels::*; 16 18 pub use record::*;
+70 -28
consumer/src/db/record.rs
··· 1 1 use super::{PgExecResult, PgOptResult, PgResult}; 2 2 use crate::indexer::records::*; 3 - use crate::utils::{blob_ref, strongref_to_parts}; 3 + use crate::utils::{blob_ref, extract_mentions_and_tags, merge_tags, strongref_to_parts}; 4 4 use chrono::prelude::*; 5 5 use deadpool_postgres::GenericClient; 6 6 use ipld_core::cid::Cid; 7 7 use lexica::community_lexicon::bookmarks::Bookmark; 8 + use std::collections::HashSet; 8 9 9 10 pub async fn record_upsert<C: GenericClient>( 10 11 conn: &mut C, ··· 317 318 repo: &str, 318 319 cid: Cid, 319 320 rec: AppBskyFeedPost, 321 + is_backfill: bool, 320 322 ) -> PgExecResult { 321 323 let cid = cid.to_string(); 322 324 let record = serde_json::to_value(&rec).unwrap(); 325 + let (mentions, tags) = rec 326 + .facets 327 + .as_ref() 328 + .map(|v| extract_mentions_and_tags(v)) 329 + .unzip(); 323 330 let facets = rec.facets.and_then(|v| serde_json::to_value(v).ok()); 324 331 let (parent_uri, parent_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.parent)); 325 332 let (root_uri, root_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.root)); 326 333 let embed = rec.embed.as_ref().map(|v| v.as_str()); 327 334 let embed_subtype = rec.embed.as_ref().and_then(|v| v.subtype()); 328 335 336 + // if there is a root, we need to check for the presence of a threadgate. 337 + let violates_threadgate = match &root_uri { 338 + Some(root) => { 339 + super::post_enforce_threadgate(conn, root, repo, rec.created_at, is_backfill).await? 340 + } 341 + None => false, 342 + }; 343 + 344 + let tags = merge_tags(tags, rec.tags); 345 + 329 346 let count = conn 330 347 .execute( 331 348 include_str!("sql/post_insert.sql"), ··· 337 354 &rec.text, 338 355 &facets, 339 356 &rec.langs.unwrap_or_default(), 340 - &rec.tags.unwrap_or_default(), 357 + &tags, 341 358 &parent_uri, 342 359 &parent_cid, 343 360 &root_uri, 344 361 &root_cid, 345 362 &embed, 346 363 &embed_subtype, 364 + &mentions, 365 + &violates_threadgate, 347 366 &rec.created_at, 348 367 ], 349 368 ) 350 369 .await?; 351 370 352 371 if let Some(embed) = rec.embed.and_then(|embed| embed.into_bsky()) { 353 - post_embed_insert(conn, at_uri, embed, rec.created_at).await?; 372 + post_embed_insert(conn, at_uri, embed, rec.created_at, is_backfill).await?; 354 373 } 355 374 356 375 Ok(count) ··· 380 399 post: &str, 381 400 embed: AppBskyEmbed, 382 401 created_at: DateTime<Utc>, 402 + is_backfill: bool, 383 403 ) -> PgExecResult { 384 404 match embed { 385 405 AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await, 386 406 AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await, 387 407 AppBskyEmbed::External(embed) => post_embed_external_insert(conn, post, embed).await, 388 408 AppBskyEmbed::Record(embed) => { 389 - post_embed_record_insert(conn, post, embed, created_at).await 409 + post_embed_record_insert(conn, post, embed, created_at, is_backfill).await 390 410 } 391 411 AppBskyEmbed::RecordWithMedia(embed) => { 392 - post_embed_record_insert(conn, post, embed.record, created_at).await?; 412 + post_embed_record_insert(conn, post, embed.record, created_at, is_backfill).await?; 393 413 match *embed.media { 394 414 AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await, 395 415 AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await, ··· 476 496 ).await 477 497 } 478 498 499 + const PG_DISABLE_RULE: &str = "app.bsky.feed.postgate#disableRule"; 479 500 async fn post_embed_record_insert<C: GenericClient>( 480 501 conn: &mut C, 481 502 post: &str, 482 503 embed: AppBskyEmbedRecord, 483 504 post_created_at: DateTime<Utc>, 505 + is_backfill: bool, 484 506 ) -> PgExecResult { 485 507 // strip "at://" then break into parts by '/' 486 508 let parts = embed.record.uri[5..].split('/').collect::<Vec<_>>(); 487 509 488 510 let detached = if parts[1] == "app.bsky.feed.post" { 489 - let postgate_effective: Option<DateTime<Utc>> = conn 490 - .query_opt( 491 - "SELECT created_at FROM postgates WHERE post_uri=$1", 492 - &[&post], 493 - ) 494 - .await? 495 - .map(|v| v.get(0)); 511 + let pg_data = postgate_get(conn, post).await?; 496 512 497 - postgate_effective 498 - .map(|v| Utc::now().min(post_created_at) > v) 499 - .unwrap_or_default() 513 + if let Some((effective, detached, rules)) = pg_data { 514 + let detached: HashSet<String> = HashSet::from_iter(detached); 515 + let rules: HashSet<String> = HashSet::from_iter(rules); 516 + let compare_date = match is_backfill { 517 + true => post_created_at, 518 + false => Utc::now(), 519 + }; 520 + 521 + detached.contains(post) || (rules.contains(PG_DISABLE_RULE) && compare_date > effective) 522 + } else { 523 + false 524 + } 500 525 } else { 501 526 false 502 527 }; ··· 505 530 "INSERT INTO post_embed_record (post_uri, record_type, uri, cid, detached) VALUES ($1, $2, $3, $4, $5)", 506 531 &[&post, &parts[1], &embed.record.uri, &embed.record.cid.to_string(), &detached], 507 532 ).await 533 + } 534 + 535 + async fn postgate_get<C: GenericClient>( 536 + conn: &mut C, 537 + post: &str, 538 + ) -> PgOptResult<(DateTime<Utc>, Vec<String>, Vec<String>)> { 539 + let res = conn 540 + .query_opt( 541 + "SELECT created_at, detached, rules FROM postgates WHERE post_uri=$1", 542 + &[&post], 543 + ) 544 + .await? 545 + .map(|v| (v.get(0), v.get(1), v.get(2))); 546 + 547 + Ok(res) 508 548 } 509 549 510 550 pub async fn postgate_upsert<C: GenericClient>( ··· 536 576 pub async fn postgate_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 537 577 conn.execute("DELETE FROM postgates WHERE at_uri=$1", &[&at_uri]) 538 578 .await 539 - } 540 - 541 - pub async fn postgate_maintain_detaches<C: GenericClient>( 542 - conn: &mut C, 543 - post: &str, 544 - detached: &[String], 545 - disable_effective: Option<NaiveDateTime>, 546 - ) -> PgExecResult { 547 - conn.execute( 548 - "SELECT maintain_postgates($1, $2, $3)", 549 - &[&post, &detached, &disable_effective], 550 - ) 551 - .await 552 579 } 553 580 554 581 pub async fn profile_upsert<C: GenericClient>( ··· 698 725 pub async fn status_delete<C: GenericClient>(conn: &mut C, did: &str) -> PgExecResult { 699 726 conn.execute("DELETE FROM statuses WHERE did=$1", &[&did]) 700 727 .await 728 + } 729 + 730 + pub async fn threadgate_get<C: GenericClient>( 731 + conn: &mut C, 732 + post: &str, 733 + ) -> PgOptResult<(DateTime<Utc>, Vec<String>, Vec<String>)> { 734 + let res = conn 735 + .query_opt( 736 + "SELECT created_at, allow, allowed_lists FROM threadgates WHERE post_uri=$1 AND allow IS NOT NULL", 737 + &[&post], 738 + ) 739 + .await? 740 + .map(|v| (v.get(0), v.get(1), v.get(2))); 741 + 742 + Ok(res) 701 743 } 702 744 703 745 pub async fn threadgate_upsert<C: GenericClient>(
+2 -2
consumer/src/db/sql/post_insert.sql
··· 1 1 INSERT INTO posts (at_uri, did, cid, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, 2 - root_cid, embed, embed_subtype, created_at) 3 - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) 2 + root_cid, embed, embed_subtype, mentions, violates_threadgate, created_at) 3 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17) 4 4 ON CONFLICT DO NOTHING
+1 -1
consumer/src/indexer/mod.rs
··· 625 625 }); 626 626 627 627 let labels = record.labels.clone(); 628 - db::post_insert(conn, at_uri, repo, cid, record).await?; 628 + db::post_insert(conn, at_uri, repo, cid, record, false).await?; 629 629 if let Some(labels) = labels { 630 630 db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?; 631 631 }
+9 -4
consumer/src/indexer/records.rs
··· 272 272 pub hidden_replies: Vec<String>, 273 273 } 274 274 275 + pub const THREADGATE_RULE_MENTION: &str = "app.bsky.feed.threadgate#mentionRule"; 276 + pub const THREADGATE_RULE_FOLLOWER: &str = "app.bsky.feed.threadgate#followerRule"; 277 + pub const THREADGATE_RULE_FOLLOWING: &str = "app.bsky.feed.threadgate#followingRule"; 278 + pub const THREADGATE_RULE_LIST: &str = "app.bsky.feed.threadgate#listRule"; 279 + 275 280 #[derive(Debug, Deserialize, Serialize)] 276 281 #[serde(tag = "$type")] 277 282 pub enum ThreadgateRule { ··· 288 293 impl ThreadgateRule { 289 294 pub fn as_str(&self) -> &'static str { 290 295 match self { 291 - ThreadgateRule::Mention => "app.bsky.feed.threadgate#mentionRule", 292 - ThreadgateRule::Follower => "app.bsky.feed.threadgate#followerRule", 293 - ThreadgateRule::Following => "app.bsky.feed.threadgate#followingRule", 294 - ThreadgateRule::List { .. } => "app.bsky.feed.threadgate#listRule", 296 + ThreadgateRule::Mention => THREADGATE_RULE_MENTION, 297 + ThreadgateRule::Follower => THREADGATE_RULE_FOLLOWER, 298 + ThreadgateRule::Following => THREADGATE_RULE_FOLLOWING, 299 + ThreadgateRule::List { .. } => THREADGATE_RULE_LIST, 295 300 } 296 301 } 297 302 }
+31
consumer/src/utils.rs
··· 1 + use lexica::app_bsky::richtext::{Facet, FacetMain, FacetOuter}; 1 2 use lexica::{Blob, StrongRef}; 2 3 use serde::{Deserialize, Deserializer}; 3 4 ··· 39 40 40 41 did == split_aturi[2] 41 42 } 43 + 44 + pub fn extract_mentions_and_tags(from: &[FacetMain]) -> (Vec<String>, Vec<String>) { 45 + let (mentions, tags) = from 46 + .iter() 47 + .flat_map(|v| { 48 + v.features.iter().map(|facet| match facet { 49 + FacetOuter::Bsky(Facet::Mention { did }) => (Some(did), None), 50 + FacetOuter::Bsky(Facet::Tag { tag }) => (None, Some(tag)), 51 + _ => (None, None), 52 + }) 53 + }) 54 + .unzip::<_, _, Vec<_>, Vec<_>>(); 55 + 56 + let mentions = mentions.into_iter().flatten().cloned().collect(); 57 + let tags = tags.into_iter().flatten().cloned().collect(); 58 + 59 + (mentions, tags) 60 + } 61 + 62 + pub fn merge_tags<T>(t1: Option<Vec<T>>, t2: Option<Vec<T>>) -> Vec<T> { 63 + match (t1, t2) { 64 + (Some(t1), None) => t1, 65 + (None, Some(t2)) => t2, 66 + (Some(mut t1), Some(t2)) => { 67 + t1.extend(t2); 68 + t1 69 + } 70 + _ => Vec::default(), 71 + } 72 + }
+15
migrations/2025-09-27-171241_post-tweaks/down.sql
··· 1 + alter table posts 2 + drop column mentions, 3 + drop column violates_threadgate; 4 + 5 + drop trigger t_author_feed_ins_post on posts; 6 + drop trigger t_author_feed_del_post on posts; 7 + drop trigger t_author_feed_ins_repost on reposts; 8 + drop trigger t_author_feed_del_repost on reposts; 9 + 10 + drop function f_author_feed_ins_post; 11 + drop function f_author_feed_del_post; 12 + drop function f_author_feed_ins_repost; 13 + drop function f_author_feed_del_repost; 14 + 15 + drop table author_feeds;
+79
migrations/2025-09-27-171241_post-tweaks/up.sql
··· 1 + alter table posts 2 + add column mentions text[], 3 + add column violates_threadgate bool not null default false; 4 + 5 + create table author_feeds 6 + ( 7 + uri text primary key, 8 + cid text not null, 9 + post text not null, 10 + did text not null, 11 + typ text not null, 12 + sort_at timestamptz not null 13 + ); 14 + 15 + -- author_feeds post triggers 16 + create function f_author_feed_ins_post() returns trigger 17 + language plpgsql as 18 + $$ 19 + begin 20 + insert into author_feeds (uri, cid, post, did, typ, sort_at) 21 + VALUES (NEW.at_uri, NEW.cid, NEW.at_uri, NEW.did, 'post', NEW.created_at) 22 + on conflict do nothing; 23 + return NEW; 24 + end; 25 + $$; 26 + 27 + create trigger t_author_feed_ins_post 28 + before insert 29 + on posts 30 + for each row 31 + execute procedure f_author_feed_ins_post(); 32 + 33 + create function f_author_feed_del_post() returns trigger 34 + language plpgsql as 35 + $$ 36 + begin 37 + delete from author_feeds where did = OLD.did and item = OLD.at_uri and typ = 'post'; 38 + return OLD; 39 + end; 40 + $$; 41 + 42 + create trigger t_author_feed_del_post 43 + before delete 44 + on posts 45 + for each row 46 + execute procedure f_author_feed_del_post(); 47 + 48 + -- author_feeds repost triggers 49 + create function f_author_feed_ins_repost() returns trigger 50 + language plpgsql as 51 + $$ 52 + begin 53 + insert into author_feeds (uri, cid, post, did, typ, sort_at) 54 + VALUES ('at://' || NEW.did || 'app.bsky.feed.repost' || NEW.rkey, NEW.post_cid, NEW.post, NEW.did, 'repost', NEW.created_at) 55 + on conflict do nothing; 56 + return NEW; 57 + end; 58 + $$; 59 + 60 + create trigger t_author_feed_ins_repost 61 + before insert 62 + on reposts 63 + for each row 64 + execute procedure f_author_feed_ins_repost(); 65 + 66 + create function f_author_feed_del_repost() returns trigger 67 + language plpgsql as 68 + $$ 69 + begin 70 + delete from author_feeds where did = OLD.did and item = OLD.post and typ = 'repost'; 71 + return OLD; 72 + end; 73 + $$; 74 + 75 + create trigger t_author_feed_del_repost 76 + before delete 77 + on reposts 78 + for each row 79 + execute procedure f_author_feed_del_repost();
+16
parakeet-db/src/models.rs
··· 148 148 pub embed: Option<String>, 149 149 pub embed_subtype: Option<String>, 150 150 151 + pub mentions: Option<Vec<Option<String>>>, 152 + pub violates_threadgate: bool, 153 + 151 154 pub created_at: DateTime<Utc>, 152 155 pub indexed_at: NaiveDateTime, 153 156 } ··· 414 417 pub subject_type: &'a str, 415 418 pub tags: Vec<String>, 416 419 } 420 + 421 + #[derive(Debug, Queryable, Selectable, Identifiable)] 422 + #[diesel(table_name = crate::schema::author_feeds)] 423 + #[diesel(primary_key(uri))] 424 + #[diesel(check_for_backend(diesel::pg::Pg))] 425 + pub struct AuthorFeedItem { 426 + pub uri: String, 427 + pub cid: String, 428 + pub post: String, 429 + pub did: String, 430 + pub typ: String, 431 + pub sort_at: DateTime<Utc>, 432 + }
+14
parakeet-db/src/schema.rs
··· 13 13 } 14 14 15 15 diesel::table! { 16 + author_feeds (uri) { 17 + uri -> Text, 18 + cid -> Text, 19 + post -> Text, 20 + did -> Text, 21 + typ -> Text, 22 + sort_at -> Timestamptz, 23 + } 24 + } 25 + 26 + diesel::table! { 16 27 backfill (repo, repo_ver) { 17 28 repo -> Text, 18 29 repo_ver -> Text, ··· 284 295 embed_subtype -> Nullable<Text>, 285 296 created_at -> Timestamptz, 286 297 indexed_at -> Timestamp, 298 + mentions -> Nullable<Array<Nullable<Text>>>, 299 + violates_threadgate -> Bool, 287 300 } 288 301 } 289 302 ··· 429 442 430 443 diesel::allow_tables_to_appear_in_same_query!( 431 444 actors, 445 + author_feeds, 432 446 backfill, 433 447 backfill_jobs, 434 448 blocks,
+159 -80
parakeet/src/hydration/posts.rs
··· 3 3 use lexica::app_bsky::actor::ProfileViewBasic; 4 4 use lexica::app_bsky::embed::Embed; 5 5 use lexica::app_bsky::feed::{ 6 - BlockedAuthor, FeedViewPost, PostView, PostViewerState, ReplyRef, ReplyRefPost, ThreadgateView, 6 + BlockedAuthor, FeedReasonRepost, FeedViewPost, FeedViewPostReason, PostView, PostViewerState, 7 + ReplyRef, ReplyRefPost, ThreadgateView, 7 8 }; 8 9 use lexica::app_bsky::graph::ListViewBasic; 9 10 use lexica::app_bsky::RecordStats; ··· 32 33 } 33 34 } 34 35 36 + type HydratePostsRet = ( 37 + models::Post, 38 + ProfileViewBasic, 39 + Vec<models::Label>, 40 + Option<Embed>, 41 + Option<ThreadgateView>, 42 + Option<PostViewerState>, 43 + Option<PostStats>, 44 + ); 45 + 35 46 fn build_postview( 36 - post: models::Post, 37 - author: ProfileViewBasic, 38 - labels: Vec<models::Label>, 39 - embed: Option<Embed>, 40 - threadgate: Option<ThreadgateView>, 41 - viewer: Option<PostViewerState>, 42 - stats: Option<PostStats>, 47 + (post, author, labels, embed, threadgate, viewer, stats): HydratePostsRet, 43 48 ) -> PostView { 44 49 let stats = stats 45 50 .map(|stats| RecordStats { ··· 135 140 let threadgate = self.hydrate_threadgate(threadgate).await; 136 141 let labels = self.get_label(&post.at_uri).await; 137 142 138 - Some(build_postview( 143 + Some(build_postview(( 139 144 post, author, labels, embed, threadgate, viewer, stats, 140 - )) 145 + ))) 141 146 } 142 147 143 - pub async fn hydrate_posts(&self, posts: Vec<String>) -> HashMap<String, PostView> { 148 + async fn hydrate_posts_inner(&self, posts: Vec<String>) -> HashMap<String, HydratePostsRet> { 144 149 let stats = self.loaders.post_stats.load_many(posts.clone()).await; 145 150 let posts = self.loaders.posts.load_many(posts).await; 146 151 ··· 150 155 .unzip::<_, _, Vec<_>, Vec<_>>(); 151 156 let authors = self.hydrate_profiles_basic(authors).await; 152 157 153 - let post_labels = self.get_label_many(&post_uris).await; 154 - let viewer_data = self.get_post_viewer_states(&post_uris).await; 158 + let mut post_labels = self.get_label_many(&post_uris).await; 159 + let mut viewer_data = self.get_post_viewer_states(&post_uris).await; 155 160 156 161 let threadgates = posts 157 162 .values() ··· 159 164 .collect(); 160 165 let threadgates = self.hydrate_threadgates(threadgates).await; 161 166 162 - let embeds = self.hydrate_embeds(post_uris).await; 167 + let mut embeds = self.hydrate_embeds(post_uris).await; 163 168 164 169 posts 165 170 .into_iter() 166 171 .filter_map(|(uri, (post, threadgate))| { 167 - let author = authors.get(&post.did)?; 168 - let embed = embeds.get(&uri).cloned(); 172 + let author = authors.get(&post.did)?.clone(); 173 + let embed = embeds.remove(&uri); 169 174 let threadgate = threadgate.and_then(|tg| threadgates.get(&tg.at_uri).cloned()); 170 - let labels = post_labels.get(&uri).cloned().unwrap_or_default(); 175 + let labels = post_labels.remove(&uri).unwrap_or_default(); 171 176 let stats = stats.get(&uri).cloned(); 172 - let viewer = viewer_data.get(&uri).cloned(); 177 + let viewer = viewer_data.remove(&uri); 173 178 174 179 Some(( 175 180 uri, 176 - build_postview( 177 - post, 178 - author.to_owned(), 179 - labels, 180 - embed, 181 - threadgate, 182 - viewer, 183 - stats, 184 - ), 181 + (post, author, labels, embed, threadgate, viewer, stats), 185 182 )) 186 183 }) 187 184 .collect() 188 185 } 189 186 190 - pub async fn hydrate_feed_posts(&self, posts: Vec<String>) -> HashMap<String, FeedViewPost> { 191 - let stats = self.loaders.post_stats.load_many(posts.clone()).await; 192 - let posts = self.loaders.posts.load_many(posts).await; 193 - 194 - let (authors, post_uris) = posts 195 - .values() 196 - .map(|(post, _)| (post.did.clone(), post.at_uri.clone())) 197 - .unzip::<_, _, Vec<_>, Vec<_>>(); 198 - let authors = self.hydrate_profiles_basic(authors).await; 187 + pub async fn hydrate_posts(&self, posts: Vec<String>) -> HashMap<String, PostView> { 188 + self.hydrate_posts_inner(posts) 189 + .await 190 + .into_iter() 191 + .map(|(uri, data)| (uri, build_postview(data))) 192 + .collect() 193 + } 199 194 200 - let post_labels = self.get_label_many(&post_uris).await; 201 - let viewer_data = self.get_post_viewer_states(&post_uris).await; 202 - let embeds = self.hydrate_embeds(post_uris).await; 195 + pub async fn hydrate_feed_posts( 196 + &self, 197 + posts: Vec<RawFeedItem>, 198 + author_threads_only: bool, 199 + ) -> Vec<FeedViewPost> { 200 + let post_uris = posts 201 + .iter() 202 + .map(|item| item.post_uri().to_string()) 203 + .collect::<Vec<_>>(); 204 + let mut posts_hyd = self.hydrate_posts_inner(post_uris).await; 203 205 204 - let reply_refs = posts 206 + // we shouldn't show the parent when the post violates a threadgate. 207 + let reply_refs = posts_hyd 205 208 .values() 206 - .flat_map(|(post, _)| [post.parent_uri.clone(), post.root_uri.clone()]) 209 + .filter(|(post, ..)| !post.violates_threadgate) 210 + .flat_map(|(post, ..)| [post.parent_uri.clone(), post.root_uri.clone()]) 207 211 .flatten() 208 212 .collect::<Vec<_>>(); 209 - 210 213 let reply_posts = self.hydrate_posts(reply_refs).await; 211 214 215 + let repost_profiles = posts 216 + .iter() 217 + .filter_map(|item| item.repost_by()) 218 + .collect::<Vec<_>>(); 219 + let profiles_hydrated = self.hydrate_profiles_basic(repost_profiles).await; 220 + 212 221 posts 213 222 .into_iter() 214 - .filter_map(|(post_uri, (post, _))| { 215 - let author = authors.get(&post.did)?; 223 + .filter_map(|item| { 224 + let post = posts_hyd.remove(item.post_uri())?; 225 + let context = item.context(); 226 + 227 + let reply = if let RawFeedItem::Post { .. } = item { 228 + let root_uri = post.0.root_uri.as_ref(); 229 + let parent_uri = post.0.parent_uri.as_ref(); 216 230 217 - let root = post.root_uri.as_ref().and_then(|uri| reply_posts.get(uri)); 218 - let parent = post 219 - .parent_uri 220 - .as_ref() 221 - .and_then(|uri| reply_posts.get(uri)); 231 + let (root, parent) = if author_threads_only { 232 + if root_uri.is_some() && parent_uri.is_some() { 233 + let root = root_uri.and_then(|uri| posts_hyd.get(uri))?; 234 + let parent = parent_uri.and_then(|uri| posts_hyd.get(uri))?; 235 + 236 + let root = build_postview(root.clone()); 237 + let parent = build_postview(parent.clone()); 238 + 239 + (Some(root), Some(parent)) 240 + } else { 241 + (None, None) 242 + } 243 + } else { 244 + let root = root_uri.and_then(|uri| reply_posts.get(uri)).cloned(); 245 + let parent = parent_uri.and_then(|uri| reply_posts.get(uri)).cloned(); 246 + 247 + (root, parent) 248 + }; 222 249 223 - let reply = if post.parent_uri.is_some() && post.root_uri.is_some() { 224 - Some(ReplyRef { 225 - root: root.cloned().map(postview_to_replyref).unwrap_or( 226 - ReplyRefPost::NotFound { 227 - uri: post.root_uri.as_ref().unwrap().clone(), 228 - not_found: true, 229 - }, 230 - ), 231 - parent: parent.cloned().map(postview_to_replyref).unwrap_or( 232 - ReplyRefPost::NotFound { 233 - uri: post.parent_uri.as_ref().unwrap().clone(), 234 - not_found: true, 235 - }, 236 - ), 237 - grandparent_author: None, 238 - }) 250 + if root_uri.is_some() || parent_uri.is_some() { 251 + Some(ReplyRef { 252 + root: root.map(postview_to_replyref).unwrap_or( 253 + ReplyRefPost::NotFound { 254 + uri: root_uri.unwrap().to_owned(), 255 + not_found: true, 256 + }, 257 + ), 258 + parent: parent.map(postview_to_replyref).unwrap_or( 259 + ReplyRefPost::NotFound { 260 + uri: parent_uri.unwrap().to_owned(), 261 + not_found: true, 262 + }, 263 + ), 264 + grandparent_author: None, 265 + }) 266 + } else { 267 + None 268 + } 239 269 } else { 240 270 None 241 271 }; 242 272 243 - let embed = embeds.get(&post_uri).cloned(); 244 - let labels = post_labels.get(&post_uri).cloned().unwrap_or_default(); 245 - let stats = stats.get(&post_uri).cloned(); 246 - let viewer = viewer_data.get(&post_uri).cloned(); 247 - let post = 248 - build_postview(post, author.to_owned(), labels, embed, None, viewer, stats); 273 + let reason = match item { 274 + RawFeedItem::Repost { uri, by, at, .. } => { 275 + Some(FeedViewPostReason::Repost(FeedReasonRepost { 276 + by: profiles_hydrated.get(&by).cloned()?, 277 + uri: Some(uri), 278 + cid: None, 279 + indexed_at: at, 280 + })) 281 + } 282 + RawFeedItem::Pin { .. } => Some(FeedViewPostReason::Pin), 283 + _ => None, 284 + }; 285 + 286 + let post = build_postview(post); 249 287 250 - Some(( 251 - post_uri, 252 - FeedViewPost { 253 - post, 254 - reply, 255 - reason: None, 256 - feed_context: None, 257 - }, 258 - )) 288 + Some(FeedViewPost { 289 + post, 290 + reply, 291 + reason, 292 + feed_context: context, 293 + }) 259 294 }) 260 295 .collect() 261 296 } ··· 299 334 _ => ReplyRefPost::Post(post), 300 335 } 301 336 } 337 + 338 + #[derive(Debug)] 339 + pub enum RawFeedItem { 340 + Pin { 341 + uri: String, 342 + context: Option<String>, 343 + }, 344 + Post { 345 + uri: String, 346 + context: Option<String>, 347 + }, 348 + Repost { 349 + uri: String, 350 + post: String, 351 + by: String, 352 + at: chrono::DateTime<chrono::Utc>, 353 + context: Option<String>, 354 + }, 355 + } 356 + 357 + impl RawFeedItem { 358 + fn post_uri(&self) -> &str { 359 + match self { 360 + RawFeedItem::Pin { uri, .. } => uri, 361 + RawFeedItem::Post { uri, .. } => uri, 362 + RawFeedItem::Repost { post, .. } => post, 363 + } 364 + } 365 + 366 + fn repost_by(&self) -> Option<String> { 367 + match self { 368 + RawFeedItem::Repost { by, .. } => Some(by.clone()), 369 + _ => None, 370 + } 371 + } 372 + 373 + fn context(&self) -> Option<String> { 374 + match self { 375 + RawFeedItem::Pin { context, .. } => context.clone(), 376 + RawFeedItem::Post { context, .. } => context.clone(), 377 + RawFeedItem::Repost { context, .. } => context.clone(), 378 + } 379 + } 380 + }
+2 -2
parakeet/src/sql/thread.sql
··· 1 1 with recursive thread as (select at_uri, parent_uri, root_uri, 0 as depth 2 2 from posts 3 - where parent_uri = $1 3 + where parent_uri = $1 and violates_threadgate=FALSE 4 4 union all 5 5 select p.at_uri, p.parent_uri, p.root_uri, thread.depth + 1 6 6 from posts p 7 7 join thread on p.parent_uri = thread.at_uri 8 - where thread.depth <= $2) 8 + where thread.depth <= $2 and p.violates_threadgate=FALSE) 9 9 select * 10 10 from thread 11 11 order by depth desc;
+4 -2
parakeet/src/sql/thread_parent.sql
··· 1 1 with recursive parents as (select at_uri, cid, parent_uri, root_uri, 0 as depth 2 2 from posts 3 - where at_uri = (select parent_uri from posts where at_uri = $1) 3 + where 4 + at_uri = (select parent_uri from posts where at_uri = $1 and violates_threadgate = FALSE) 4 5 union all 5 6 select p.at_uri, p.cid, p.parent_uri, p.root_uri, parents.depth + 1 6 7 from posts p 7 8 join parents on p.at_uri = parents.parent_uri 8 - where parents.depth <= $2) 9 + where parents.depth <= $2 10 + and p.violates_threadgate = FALSE) 9 11 select * 10 12 from parents 11 13 order by depth desc;
+7 -8
parakeet/src/xrpc/app_bsky/feed/likes.rs
··· 1 + use crate::hydration::posts::RawFeedItem; 1 2 use crate::hydration::StatefulHydrator; 2 3 use crate::xrpc::error::{Error, XrpcResult}; 3 4 use crate::xrpc::extract::{AtpAcceptLabelers, AtpAuth}; ··· 57 58 .last() 58 59 .map(|(last, _)| last.timestamp_millis().to_string()); 59 60 60 - let at_uris = results 61 + let raw_feed = results 61 62 .iter() 62 - .map(|(_, uri)| uri.clone()) 63 + .map(|(_, uri)| RawFeedItem::Post { 64 + uri: uri.clone(), 65 + context: None, 66 + }) 63 67 .collect::<Vec<_>>(); 64 68 65 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 66 - 67 - let feed: Vec<_> = results 68 - .into_iter() 69 - .filter_map(|(_, uri)| posts.remove(&uri)) 70 - .collect(); 69 + let feed = hyd.hydrate_feed_posts(raw_feed, false).await; 71 70 72 71 Ok(Json(FeedRes { cursor, feed })) 73 72 }
+83 -92
parakeet/src/xrpc/app_bsky/feed/posts.rs
··· 1 + use crate::hydration::posts::RawFeedItem; 1 2 use crate::hydration::StatefulHydrator; 2 3 use crate::xrpc::app_bsky::graph::lists::ListWithCursorQuery; 3 4 use crate::xrpc::error::{Error, XrpcResult}; ··· 16 17 use diesel_async::{AsyncPgConnection, RunQueryDsl}; 17 18 use lexica::app_bsky::actor::ProfileView; 18 19 use lexica::app_bsky::feed::{ 19 - BlockedAuthor, FeedReasonRepost, FeedSkeletonResponse, FeedViewPost, FeedViewPostReason, 20 - PostView, SkeletonReason, ThreadViewPost, ThreadViewPostType, ThreadgateView, 20 + BlockedAuthor, FeedSkeletonResponse, FeedViewPost, PostView, SkeletonReason, ThreadViewPost, 21 + ThreadViewPostType, ThreadgateView, 21 22 }; 22 - use parakeet_db::schema; 23 + use parakeet_db::{models, schema}; 23 24 use reqwest::Url; 24 25 use serde::{Deserialize, Serialize}; 25 26 use std::collections::HashMap; ··· 113 114 114 115 let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, maybe_auth); 115 116 116 - let at_uris = skeleton.feed.iter().map(|v| v.post.clone()).collect(); 117 117 let repost_skeleton = skeleton 118 118 .feed 119 119 .iter() ··· 122 122 _ => None, 123 123 }) 124 124 .collect::<Vec<_>>(); 125 + let mut repost_data = get_skeleton_repost_data(&mut conn, repost_skeleton).await; 125 126 126 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 127 - let mut repost_data = get_skeleton_repost_data(&mut conn, &hyd, repost_skeleton).await; 128 - 129 - let feed = skeleton 127 + let raw_feed = skeleton 130 128 .feed 131 129 .into_iter() 132 - .filter_map(|item| { 133 - let mut post = posts.remove(&item.post)?; 134 - let reason = match item.reason { 135 - Some(SkeletonReason::Repost { repost }) => { 136 - repost_data.remove(&repost).map(FeedViewPostReason::Repost) 137 - } 138 - Some(SkeletonReason::Pin {}) => Some(FeedViewPostReason::Pin), 139 - _ => None, 140 - }; 141 - 142 - post.reason = reason; 143 - post.feed_context = item.feed_context; 144 - 145 - Some(post) 130 + .filter_map(|v| match v.reason { 131 + Some(SkeletonReason::Repost { repost }) => { 132 + repost_data 133 + .remove_entry(&repost) 134 + .map(|(uri, (by, at))| RawFeedItem::Repost { 135 + uri, 136 + post: v.post, 137 + by, 138 + at: at.and_utc(), 139 + context: v.feed_context, 140 + }) 141 + } 142 + Some(SkeletonReason::Pin {}) => Some(RawFeedItem::Pin { 143 + uri: v.post, 144 + context: v.feed_context, 145 + }), 146 + None => Some(RawFeedItem::Post { 147 + uri: v.post, 148 + context: v.feed_context, 149 + }), 146 150 }) 147 151 .collect(); 148 152 153 + let feed = hyd.hydrate_feed_posts(raw_feed, false).await; 154 + 149 155 Ok(Json(FeedRes { 150 156 cursor: skeleton.cursor, 151 157 feed, 152 158 })) 153 159 } 154 160 155 - #[derive(Debug, Deserialize)] 161 + #[derive(Debug, Default, Eq, PartialEq, Deserialize)] 156 162 #[serde(rename_all = "snake_case")] 157 163 pub enum GetAuthorFeedFilter { 164 + #[default] 158 165 PostsWithReplies, 159 166 PostsNoReplies, 160 167 PostsWithMedia, 161 168 PostsAndAuthorThreads, 162 169 PostsWithVideo, 163 - } 164 - 165 - impl Default for GetAuthorFeedFilter { 166 - fn default() -> Self { 167 - Self::PostsWithReplies 168 - } 169 170 } 170 171 171 172 #[derive(Debug, Deserialize)] ··· 209 210 210 211 let pin = match query.include_pins && query.cursor.is_none() { 211 212 false => None, 212 - true => match crate::db::get_pinned_post_uri(&mut conn, &did).await? { 213 - Some(post) => hyd.hydrate_post(post).await, 214 - None => None, 215 - }, 213 + true => crate::db::get_pinned_post_uri(&mut conn, &did).await?, 216 214 }; 217 215 218 216 let limit = query.limit.unwrap_or(50).clamp(1, 100); 219 217 220 - let mut posts_query = schema::posts::table 221 - .select((schema::posts::created_at, schema::posts::at_uri)) 222 - .filter(schema::posts::did.eq(did)) 218 + let mut posts_query = schema::author_feeds::table 219 + .select(models::AuthorFeedItem::as_select()) 220 + .left_join(schema::posts::table.on(schema::posts::at_uri.eq(schema::author_feeds::post))) 221 + .filter(schema::author_feeds::did.eq(&did)) 223 222 .into_boxed(); 224 223 225 224 if let Some(cursor) = datetime_cursor(query.cursor.as_ref()) { 226 - posts_query = posts_query.filter(schema::posts::created_at.lt(cursor)); 225 + posts_query = posts_query.filter(schema::author_feeds::sort_at.lt(cursor)); 227 226 } 228 227 228 + let author_threads_only = query.filter == GetAuthorFeedFilter::PostsAndAuthorThreads; 229 229 posts_query = match query.filter { 230 - GetAuthorFeedFilter::PostsWithReplies => posts_query, 230 + GetAuthorFeedFilter::PostsWithReplies => { 231 + posts_query.filter(schema::author_feeds::typ.eq("post")) 232 + } 231 233 GetAuthorFeedFilter::PostsNoReplies => { 232 234 posts_query.filter(schema::posts::parent_uri.is_null()) 233 235 } 234 - GetAuthorFeedFilter::PostsWithMedia => posts_query.filter(embed_type_filter(&[ 235 - "app.bsky.embed.video", 236 - "app.bsky.embed.images", 237 - ])), 236 + GetAuthorFeedFilter::PostsWithMedia => posts_query.filter( 237 + embed_type_filter(&["app.bsky.embed.video", "app.bsky.embed.images"]) 238 + .and(schema::author_feeds::typ.eq("post")), 239 + ), 238 240 GetAuthorFeedFilter::PostsAndAuthorThreads => posts_query.filter( 239 241 (schema::posts::parent_uri 240 - .like(format!("at://{}/%", &query.actor)) 242 + .like(format!("at://{did}/%")) 241 243 .or(schema::posts::parent_uri.is_null())) 242 244 .and( 243 245 schema::posts::root_uri 244 - .like(format!("at://{}/%", &query.actor)) 246 + .like(format!("at://{did}/%")) 245 247 .or(schema::posts::root_uri.is_null()), 246 248 ), 247 249 ), 248 - GetAuthorFeedFilter::PostsWithVideo => { 249 - posts_query.filter(embed_type_filter(&["app.bsky.embed.video"])) 250 - } 250 + GetAuthorFeedFilter::PostsWithVideo => posts_query.filter( 251 + embed_type_filter(&["app.bsky.embed.video"]).and(schema::author_feeds::typ.eq("post")), 252 + ), 251 253 }; 252 254 253 255 let results = posts_query 254 - .order(schema::posts::created_at.desc()) 256 + .order(schema::author_feeds::sort_at.desc()) 255 257 .limit(limit as i64) 256 - .load::<(chrono::DateTime<chrono::Utc>, String)>(&mut conn) 258 + .load(&mut conn) 257 259 .await?; 258 260 259 261 let cursor = results 260 262 .last() 261 - .map(|(last, _)| last.timestamp_millis().to_string()); 263 + .map(|item| item.sort_at.timestamp_millis().to_string()); 262 264 263 - let at_uris = results 264 - .iter() 265 - .map(|(_, uri)| uri.clone()) 266 - .collect::<Vec<_>>(); 267 - 268 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 269 - 270 - let mut feed: Vec<_> = results 265 + let mut raw_feed = results 271 266 .into_iter() 272 - .filter_map(|(_, uri)| posts.remove(&uri)) 273 - .collect(); 267 + .filter_map(|item| match &*item.typ { 268 + "post" => Some(RawFeedItem::Post { 269 + uri: item.post, 270 + context: None, 271 + }), 272 + "repost" => Some(RawFeedItem::Repost { 273 + uri: item.uri, 274 + post: item.post, 275 + by: item.did, 276 + at: item.sort_at, 277 + context: None, 278 + }), 279 + _ => None, 280 + }) 281 + .collect::<Vec<_>>(); 274 282 275 283 if let Some(post) = pin { 276 - feed.insert( 284 + raw_feed.insert( 277 285 0, 278 - FeedViewPost { 279 - post, 280 - reply: None, 281 - reason: Some(FeedViewPostReason::Pin), 282 - feed_context: None, 286 + RawFeedItem::Pin { 287 + uri: post, 288 + context: None, 283 289 }, 284 290 ); 285 291 } 292 + 293 + let feed = hyd.hydrate_feed_posts(raw_feed, author_threads_only).await; 286 294 287 295 Ok(Json(FeedRes { cursor, feed })) 288 296 } ··· 325 333 .last() 326 334 .map(|(last, _)| last.timestamp_millis().to_string()); 327 335 328 - let at_uris = results 336 + let raw_feed = results 329 337 .iter() 330 - .map(|(_, uri)| uri.clone()) 338 + .map(|(_, uri)| RawFeedItem::Post { 339 + uri: uri.clone(), 340 + context: None, 341 + }) 331 342 .collect::<Vec<_>>(); 332 343 333 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 334 - 335 - let feed = results 336 - .into_iter() 337 - .filter_map(|(_, uri)| posts.remove(&uri)) 338 - .collect(); 344 + let feed = hyd.hydrate_feed_posts(raw_feed, false).await; 339 345 340 346 Ok(Json(FeedRes { cursor, feed })) 341 347 } ··· 669 675 } 670 676 } 671 677 672 - async fn get_skeleton_repost_data<'a>( 678 + async fn get_skeleton_repost_data( 673 679 conn: &mut AsyncPgConnection, 674 - hyd: &StatefulHydrator<'a>, 675 680 reposts: Vec<String>, 676 - ) -> HashMap<String, FeedReasonRepost> { 681 + ) -> HashMap<String, (String, NaiveDateTime)> { 677 682 let Ok(repost_data) = schema::records::table 678 683 .select(( 679 684 schema::records::at_uri, ··· 687 692 return HashMap::new(); 688 693 }; 689 694 690 - let profiles = repost_data.iter().map(|(_, did, _)| did.clone()).collect(); 691 - let profiles = hyd.hydrate_profiles_basic(profiles).await; 692 - 693 695 repost_data 694 696 .into_iter() 695 - .filter_map(|(uri, did, indexed_at)| { 696 - let by = profiles.get(&did).cloned()?; 697 - 698 - let repost = FeedReasonRepost { 699 - by, 700 - uri: Some(uri.clone()), 701 - cid: None, // okay, we do have this, but the app doesn't seem to be bothered about not setting it. 702 - indexed_at: indexed_at.and_utc(), 703 - }; 704 - 705 - Some((uri, repost)) 706 - }) 697 + .map(|(uri, did, at)| (uri, (did, at))) 707 698 .collect() 708 699 } 709 700