feat: add BearBlog RSS feed source support · desertthunder.dev/pai@cde71d0

+19

DEPLOYMENT.md

··· 271 271 - `0 */6 * * *` - Every 6 hours 272 272 - `0 0 * * *` - Daily at midnight 273 273 274 + ### Environment Variables 275 + 276 + Configure sources in `wrangler.toml` under `[vars]`: 277 + 278 + ```toml 279 + [vars] 280 + # Substack RSS feed URL 281 + SUBSTACK_URL = "https://patternmatched.substack.com" 282 + 283 + # Bluesky handle 284 + BLUESKY_HANDLE = "desertthunder.dev" 285 + 286 + # Leaflet publications (comma-separated id:url pairs) 287 + LEAFLET_URLS = "desertthunder:https://desertthunder.leaflet.pub,stormlightlabs:https://stormlightlabs.leaflet.pub" 288 + 289 + # BearBlog publications (comma-separated id:url pairs) 290 + BEARBLOG_URLS = "desertthunder:https://desertthunder.bearblog.dev" 291 + ``` 292 + 274 293 ### API Endpoints 275 294 276 295 The Worker exposes the same API as the self-hosted server:

+56 -1

README.md

··· 2 2 3 3 # Personal Activity Index 4 4 5 - A CLI that ingests content from Substack, Bluesky, and Leaflet into SQLite, with an optional Cloudflare Worker + D1 deployment path. 5 + A CLI that ingests content from Substack, Bluesky, Leaflet, and BearBlog into SQLite, with an optional Cloudflare Worker + D1 deployment path. 6 6 7 7 ## Features 8 8 ··· 10 10 - **Substack** via RSS feeds 11 11 - **Bluesky** via AT Protocol 12 12 - **Leaflet** publications via RSS feeds 13 + - **BearBlog** publications via RSS feeds 13 14 - Local SQLite storage with full-text search 14 15 - Flexible filtering and querying via `pai list` / `pai export` 15 16 - Self-hostable HTTP API (`pai serve` exposes `/api/feed`, `/api/item/{id}`, and `/status`) ··· 222 223 <title>Dev Log: 2025-11-22</title> 223 224 <link>https://desertthunder.leaflet.pub/3m6a7fuk7u22p</link> 224 225 <guid>https://desertthunder.leaflet.pub/3m6a7fuk7u22p</guid> 226 + <pubDate>Fri, 22 Nov 2025 16:22:54 +0000</pubDate> 227 + <description>Post summary or excerpt</description> 228 + </item> 229 + ``` 230 + 231 + ### BearBlog (RSS) 232 + 233 + #### Overview 234 + 235 + BearBlog is a minimalist blogging platform that provides RSS feeds at `{slug}.bearblog.dev/feed/`, making them straightforward to fetch using standard RSS parsing. 236 + 237 + **Implementation:** 238 + 239 + - Fetches RSS feed using `feed-rs` parser 240 + - Maps RSS `<item>` elements to standardized `Item` struct 241 + - Supports multiple blogs via config array 242 + - Uses entry ID from feed, falls back to link if missing 243 + - Normalizes publication dates to ISO 8601 format 244 + 245 + **Key mappings:** 246 + 247 + - `id` = RSS entry ID or link 248 + - `source_kind` = `bearblog` 249 + - `source_id` = Blog ID from config (e.g., `desertthunder`) 250 + - `title` = RSS entry title 251 + - `summary` = RSS entry summary/description 252 + - `url` = RSS entry link 253 + - `content_html` = RSS content body (if available) 254 + - `author` = RSS entry author 255 + - `published_at` = RSS published date or updated date (normalized to ISO 8601) 256 + 257 + **Configuration:** 258 + 259 + BearBlog supports multiple blogs through array configuration: 260 + 261 + ```toml 262 + [[sources.bearblog]] 263 + enabled = true 264 + id = "desertthunder" 265 + base_url = "https://desertthunder.bearblog.dev" 266 + 267 + [[sources.bearblog]] 268 + enabled = true 269 + id = "another-blog" 270 + base_url = "https://another-blog.bearblog.dev" 271 + ``` 272 + 273 + **Example RSS structure:** 274 + 275 + ```xml 276 + <item> 277 + <title>My Blog Post</title> 278 + <link>https://desertthunder.bearblog.dev/my-blog-post</link> 279 + <guid>https://desertthunder.bearblog.dev/my-blog-post</guid> 225 280 <pubDate>Fri, 22 Nov 2025 16:22:54 +0000</pubDate> 226 281 <description>Post summary or excerpt</description> 227 282 </item>

+6

config.example.toml

··· 39 39 enabled = true 40 40 id = "stormlightlabs" 41 41 base_url = "https://stormlightlabs.leaflet.pub" 42 + 43 + # BearBlog publications (can have multiple) 44 + [[sources.bearblog]] 45 + enabled = true 46 + id = "desertthunder" 47 + base_url = "https://desertthunder.bearblog.dev"

+133

core/src/fetchers/bearblog.rs

··· 1 + use crate::{BearBlogConfig, Item, PaiError, Result, SourceFetcher, SourceKind, Storage}; 2 + use chrono::Utc; 3 + use feed_rs::parser; 4 + 5 + /// Fetcher for BearBlog publications via RSS 6 + /// 7 + /// Retrieves posts from BearBlog blogs by parsing their RSS feeds. 8 + /// Each BearBlog provides an RSS feed at {slug}.bearblog.dev/feed/. 9 + pub struct BearBlogFetcher { 10 + config: BearBlogConfig, 11 + client: reqwest::Client, 12 + } 13 + 14 + impl BearBlogFetcher { 15 + /// Creates a new BearBlog fetcher with the given configuration 16 + pub fn new(config: BearBlogConfig) -> Self { 17 + Self { config, client: reqwest::Client::new() } 18 + } 19 + 20 + /// Fetches and parses the RSS feed 21 + async fn fetch_feed(&self) -> Result<feed_rs::model::Feed> { 22 + let feed_url = format!("{}/feed/", self.config.base_url.trim_end_matches('/')); 23 + let response = self 24 + .client 25 + .get(&feed_url) 26 + .send() 27 + .await 28 + .map_err(|e| PaiError::Fetch(format!("Failed to fetch BearBlog RSS feed: {e}")))?; 29 + 30 + let body = response 31 + .text() 32 + .await 33 + .map_err(|e| PaiError::Fetch(format!("Failed to read response body: {e}")))?; 34 + 35 + parser::parse(body.as_bytes()).map_err(|e| PaiError::Parse(format!("Failed to parse RSS feed: {e}"))) 36 + } 37 + } 38 + 39 + impl SourceFetcher for BearBlogFetcher { 40 + fn sync(&self, storage: &dyn Storage) -> Result<()> { 41 + let runtime = 42 + tokio::runtime::Runtime::new().map_err(|e| PaiError::Fetch(format!("Failed to create runtime: {e}")))?; 43 + 44 + runtime.block_on(async { 45 + let feed = self.fetch_feed().await?; 46 + 47 + for entry in feed.entries { 48 + let id = entry.id.clone(); 49 + let url = entry 50 + .links 51 + .first() 52 + .map(|link| link.href.clone()) 53 + .unwrap_or_else(|| id.clone()); 54 + 55 + let title = entry.title.as_ref().map(|t| t.content.clone()); 56 + let summary = entry.summary.as_ref().map(|s| s.content.clone()); 57 + let author = entry.authors.first().map(|a| a.name.clone()); 58 + let content_html = entry.content.and_then(|c| c.body); 59 + 60 + let published_at = entry 61 + .published 62 + .or(entry.updated) 63 + .map(|dt| dt.to_rfc3339()) 64 + .unwrap_or_else(|| Utc::now().to_rfc3339()); 65 + 66 + let item = Item { 67 + id, 68 + source_kind: SourceKind::BearBlog, 69 + source_id: self.config.id.clone(), 70 + author, 71 + title, 72 + summary, 73 + url, 74 + content_html, 75 + published_at, 76 + created_at: Utc::now().to_rfc3339(), 77 + }; 78 + 79 + storage.insert_or_replace_item(&item)?; 80 + } 81 + 82 + Ok(()) 83 + }) 84 + } 85 + } 86 + 87 + #[cfg(test)] 88 + mod tests { 89 + use super::*; 90 + 91 + #[test] 92 + fn parse_valid_rss() { 93 + let rss = r#"<?xml version="1.0" encoding="UTF-8"?> 94 + <rss version="2.0"> 95 + <channel> 96 + <title>Test BearBlog</title> 97 + <link>https://test.bearblog.dev</link> 98 + <description>Test blog</description> 99 + <item> 100 + <title>Test Post</title> 101 + <link>https://test.bearblog.dev/test-post</link> 102 + <guid>test-guid</guid> 103 + <pubDate>Mon, 01 Jan 2024 12:00:00 +0000</pubDate> 104 + <description>Test summary</description> 105 + </item> 106 + </channel> 107 + </rss>"#; 108 + 109 + let feed = parser::parse(rss.as_bytes()).unwrap(); 110 + assert_eq!(feed.entries.len(), 1); 111 + assert_eq!(feed.entries[0].title.as_ref().unwrap().content, "Test Post"); 112 + } 113 + 114 + #[test] 115 + fn parse_invalid_rss() { 116 + let invalid_rss = "this is not valid XML"; 117 + let result = parser::parse(invalid_rss.as_bytes()); 118 + assert!(result.is_err()); 119 + } 120 + 121 + #[test] 122 + fn parse_empty_rss() { 123 + let rss = r#"<?xml version="1.0" encoding="UTF-8"?> 124 + <rss version="2.0"> 125 + <channel> 126 + <title>Empty Feed</title> 127 + </channel> 128 + </rss>"#; 129 + 130 + let feed = parser::parse(rss.as_bytes()).unwrap(); 131 + assert_eq!(feed.entries.len(), 0); 132 + } 133 + }

+2

core/src/fetchers/mod.rs

··· 1 + mod bearblog; 1 2 mod bluesky; 2 3 mod leaflet; 3 4 mod substack; 4 5 6 + pub use bearblog::BearBlogFetcher; 5 7 pub use bluesky::BlueskyFetcher; 6 8 pub use leaflet::LeafletFetcher; 7 9 pub use substack::SubstackFetcher;

+36 -1

core/src/lib.rs

··· 5 5 use std::{fmt, str::FromStr}; 6 6 use thiserror::Error; 7 7 8 - pub use fetchers::{BlueskyFetcher, LeafletFetcher, SubstackFetcher}; 8 + pub use fetchers::{BearBlogFetcher, BlueskyFetcher, LeafletFetcher, SubstackFetcher}; 9 9 10 10 /// Errors that can occur in the Personal Activity Index 11 11 #[derive(Error, Debug)] ··· 41 41 Substack, 42 42 Bluesky, 43 43 Leaflet, 44 + BearBlog, 44 45 } 45 46 46 47 impl fmt::Display for SourceKind { ··· 49 50 SourceKind::Substack => write!(f, "substack"), 50 51 SourceKind::Bluesky => write!(f, "bluesky"), 51 52 SourceKind::Leaflet => write!(f, "leaflet"), 53 + SourceKind::BearBlog => write!(f, "bearblog"), 52 54 } 53 55 } 54 56 } ··· 61 63 "substack" => Ok(SourceKind::Substack), 62 64 "bluesky" => Ok(SourceKind::Bluesky), 63 65 "leaflet" => Ok(SourceKind::Leaflet), 66 + "bearblog" => Ok(SourceKind::BearBlog), 64 67 _ => Err(PaiError::UnknownSourceKind(s.to_string())), 65 68 } 66 69 } ··· 146 149 pub base_url: String, 147 150 } 148 151 152 + /// Configuration for a single BearBlog publication 153 + #[derive(Debug, Clone, Deserialize, Serialize)] 154 + pub struct BearBlogConfig { 155 + #[serde(default)] 156 + pub enabled: bool, 157 + pub id: String, 158 + pub base_url: String, 159 + } 160 + 149 161 /// Database configuration 150 162 #[derive(Debug, Clone, Deserialize, Serialize, Default)] 151 163 pub struct DatabaseConfig { ··· 175 187 pub bluesky: Option<BlueskyConfig>, 176 188 #[serde(default)] 177 189 pub leaflet: Vec<LeafletConfig>, 190 + #[serde(default)] 191 + pub bearblog: Vec<BearBlogConfig>, 178 192 } 179 193 180 194 /// Configuration for all sources ··· 273 287 } 274 288 } 275 289 290 + for bearblog_config in &config.sources.bearblog { 291 + if !bearblog_config.enabled { 292 + continue; 293 + } 294 + 295 + let should_sync = match (kind, source_id) { 296 + (Some(k), _) if k != SourceKind::BearBlog => false, 297 + (_, Some(sid)) => bearblog_config.id == sid, 298 + _ => true, 299 + }; 300 + 301 + if should_sync { 302 + let fetcher = BearBlogFetcher::new(bearblog_config.clone()); 303 + fetcher.sync(storage)?; 304 + synced_count += 1; 305 + } 306 + } 307 + 276 308 Ok(synced_count) 277 309 } 278 310 ··· 285 317 assert_eq!(SourceKind::Substack.to_string(), "substack"); 286 318 assert_eq!(SourceKind::Bluesky.to_string(), "bluesky"); 287 319 assert_eq!(SourceKind::Leaflet.to_string(), "leaflet"); 320 + assert_eq!(SourceKind::BearBlog.to_string(), "bearblog"); 288 321 } 289 322 290 323 #[test] ··· 292 325 assert_eq!("substack".parse::<SourceKind>().unwrap(), SourceKind::Substack); 293 326 assert_eq!("BLUESKY".parse::<SourceKind>().unwrap(), SourceKind::Bluesky); 294 327 assert_eq!("Leaflet".parse::<SourceKind>().unwrap(), SourceKind::Leaflet); 328 + assert_eq!("bearblog".parse::<SourceKind>().unwrap(), SourceKind::BearBlog); 329 + assert_eq!("BEARBLOG".parse::<SourceKind>().unwrap(), SourceKind::BearBlog); 295 330 assert!("invalid".parse::<SourceKind>().is_err()); 296 331 } 297 332

-4

rustfmt.toml

··· 1 1 max_width = 120 2 2 fn_params_layout = "Compressed" 3 - fn_single_line = true 4 - fn_args_layout = "Compressed" 5 - format_strings = true 6 3 single_line_if_else_max_width = 100 7 4 single_line_let_else_max_width = 100 8 - struct_field_align_threshold = 20 9 5 use_field_init_shorthand = true 10 6 struct_lit_width=100

+128 -1

worker/src/lib.rs

··· 8 8 substack: Option<SubstackConfig>, 9 9 bluesky: Option<BlueskyConfig>, 10 10 leaflet: Vec<LeafletConfig>, 11 + bearblog: Vec<BearBlogConfig>, 11 12 } 12 13 13 14 #[derive(Deserialize)] ··· 22 23 23 24 #[derive(Deserialize)] 24 25 struct LeafletConfig { 26 + id: String, 27 + base_url: String, 28 + } 29 + 30 + #[derive(Deserialize)] 31 + struct BearBlogConfig { 25 32 id: String, 26 33 base_url: String, 27 34 } ··· 187 194 } 188 195 } 189 196 197 + for bearblog_config in config.bearblog { 198 + match sync_bearblog(&bearblog_config, &db).await { 199 + Ok(count) => { 200 + console_log!("Synced {} items from BearBlog ({})", count, bearblog_config.id); 201 + synced += count; 202 + } 203 + Err(e) => console_error!("BearBlog sync failed for {}: {}", bearblog_config.id, e), 204 + } 205 + } 206 + 190 207 console_log!("Sync completed: {} total items", synced); 191 208 Ok(()) 192 209 } ··· 218 235 Vec::new() 219 236 }; 220 237 221 - Ok(SyncConfig { substack, bluesky, leaflet }) 238 + let bearblog = if let Ok(urls) = env.var("BEARBLOG_URLS") { 239 + urls.to_string() 240 + .split(',') 241 + .filter_map(|entry| { 242 + let parts: Vec<&str> = entry.trim().splitn(2, ':').collect(); 243 + if parts.len() == 2 { 244 + Some(BearBlogConfig { id: parts[0].to_string(), base_url: parts[1].to_string() }) 245 + } else { 246 + None 247 + } 248 + }) 249 + .collect() 250 + } else { 251 + Vec::new() 252 + }; 253 + 254 + Ok(SyncConfig { substack, bluesky, leaflet, bearblog }) 222 255 } 223 256 224 257 async fn sync_substack(config: &SubstackConfig, db: &D1Database) -> Result<usize> { ··· 414 447 Ok(count) 415 448 } 416 449 450 + async fn sync_bearblog(config: &BearBlogConfig, db: &D1Database) -> Result<usize> { 451 + let feed_url = format!("{}/feed/", config.base_url.trim_end_matches('/')); 452 + 453 + let mut req = Request::new(&feed_url, Method::Get)?; 454 + req.headers_mut()?.set("User-Agent", "pai-worker/0.1.0")?; 455 + 456 + let mut resp = Fetch::Request(req).send().await?; 457 + let body = resp.text().await?; 458 + 459 + let channel = 460 + rss::Channel::read_from(body.as_bytes()).map_err(|e| Error::RustError(format!("Failed to parse RSS: {e}")))?; 461 + 462 + let mut count = 0; 463 + 464 + for item in channel.items() { 465 + let id = item.guid().map(|g| g.value()).unwrap_or(item.link().unwrap_or("")); 466 + let url = item.link().unwrap_or(id); 467 + let title = item.title(); 468 + let summary = item.description(); 469 + let author = item.author(); 470 + let content_html = item.content(); 471 + 472 + let published_at = item 473 + .pub_date() 474 + .and_then(|s| chrono::DateTime::parse_from_rfc2822(s).ok()) 475 + .map(|dt| dt.to_rfc3339()) 476 + .unwrap_or_else(|| chrono::Utc::now().to_rfc3339()); 477 + 478 + let created_at = chrono::Utc::now().to_rfc3339(); 479 + 480 + let stmt = db.prepare( 481 + "INSERT OR REPLACE INTO items (id, source_kind, source_id, author, title, summary, url, content_html, published_at, created_at) 482 + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)" 483 + ); 484 + 485 + stmt.bind(&[ 486 + id.into(), 487 + "bearblog".into(), 488 + config.id.clone().into(), 489 + author.map(|s| s.into()).unwrap_or(JsValue::NULL), 490 + title.map(|s| s.into()).unwrap_or(JsValue::NULL), 491 + summary.map(|s| s.into()).unwrap_or(JsValue::NULL), 492 + url.into(), 493 + content_html.map(|s| s.into()).unwrap_or(JsValue::NULL), 494 + published_at.into(), 495 + created_at.into(), 496 + ])? 497 + .run() 498 + .await?; 499 + 500 + count += 1; 501 + } 502 + 503 + Ok(count) 504 + } 505 + 417 506 fn normalize_source_id(base_url: &str) -> String { 418 507 base_url 419 508 .trim_start_matches("https://") ··· 580 669 api_url, 581 670 "https://public.api.bsky.app/xrpc/com.atproto.repo.listRecords?repo=desertthunder.bsky.social&collection=pub.leaflet.post&limit=50" 582 671 ); 672 + } 673 + 674 + #[test] 675 + fn test_bearblog_feed_url_construction() { 676 + let base_url = "https://desertthunder.bearblog.dev"; 677 + let feed_url = format!("{}/feed/", base_url.trim_end_matches('/')); 678 + assert_eq!(feed_url, "https://desertthunder.bearblog.dev/feed/"); 679 + } 680 + 681 + #[test] 682 + fn test_bearblog_config_parsing() { 683 + let entry = "desertthunder:https://desertthunder.bearblog.dev"; 684 + let parts: Vec<&str> = entry.trim().splitn(2, ':').collect(); 685 + assert_eq!(parts.len(), 2); 686 + assert_eq!(parts[0], "desertthunder"); 687 + assert_eq!(parts[1], "https://desertthunder.bearblog.dev"); 688 + } 689 + 690 + #[test] 691 + fn test_bearblog_config_parsing_multiple() { 692 + let urls = "id1:https://blog1.bearblog.dev,id2:https://blog2.bearblog.dev"; 693 + let configs: Vec<_> = urls 694 + .split(',') 695 + .filter_map(|entry| { 696 + let parts: Vec<&str> = entry.trim().splitn(2, ':').collect(); 697 + if parts.len() == 2 { 698 + Some((parts[0].to_string(), parts[1].to_string())) 699 + } else { 700 + None 701 + } 702 + }) 703 + .collect(); 704 + 705 + assert_eq!(configs.len(), 2); 706 + assert_eq!(configs[0].0, "id1"); 707 + assert_eq!(configs[0].1, "https://blog1.bearblog.dev"); 708 + assert_eq!(configs[1].0, "id2"); 709 + assert_eq!(configs[1].1, "https://blog2.bearblog.dev"); 583 710 } 584 711 }

+4

worker/wrangler.example.toml

··· 32 32 # Format: "id1:https://pub1.leaflet.pub,id2:https://pub2.leaflet.pub" 33 33 LEAFLET_URLS = "desertthunder:https://desertthunder.leaflet.pub,stormlightlabs:https://stormlightlabs.leaflet.pub" 34 34 35 + # BearBlog publications (comma-separated id:url pairs) 36 + # Format: "id1:https://blog1.bearblog.dev,id2:https://blog2.bearblog.dev" 37 + BEARBLOG_URLS = "desertthunder:https://desertthunder.bearblog.dev" 38 + 35 39 # Optional: Logging level 36 40 # LOG_LEVEL = "info"

Configure Feed

Configure Feed