···271271- `0 */6 * * *` - Every 6 hours
272272- `0 0 * * *` - Daily at midnight
273273274274+### Environment Variables
275275+276276+Configure sources in `wrangler.toml` under `[vars]`:
277277+278278+```toml
279279+[vars]
280280+# Substack RSS feed URL
281281+SUBSTACK_URL = "https://patternmatched.substack.com"
282282+283283+# Bluesky handle
284284+BLUESKY_HANDLE = "desertthunder.dev"
285285+286286+# Leaflet publications (comma-separated id:url pairs)
287287+LEAFLET_URLS = "desertthunder:https://desertthunder.leaflet.pub,stormlightlabs:https://stormlightlabs.leaflet.pub"
288288+289289+# BearBlog publications (comma-separated id:url pairs)
290290+BEARBLOG_URLS = "desertthunder:https://desertthunder.bearblog.dev"
291291+```
292292+274293### API Endpoints
275294276295The Worker exposes the same API as the self-hosted server:
+56-1
README.md
···2233# Personal Activity Index
4455-A CLI that ingests content from Substack, Bluesky, and Leaflet into SQLite, with an optional Cloudflare Worker + D1 deployment path.
55+A CLI that ingests content from Substack, Bluesky, Leaflet, and BearBlog into SQLite, with an optional Cloudflare Worker + D1 deployment path.
6677## Features
88···1010 - **Substack** via RSS feeds
1111 - **Bluesky** via AT Protocol
1212 - **Leaflet** publications via RSS feeds
1313+ - **BearBlog** publications via RSS feeds
1314- Local SQLite storage with full-text search
1415- Flexible filtering and querying via `pai list` / `pai export`
1516- Self-hostable HTTP API (`pai serve` exposes `/api/feed`, `/api/item/{id}`, and `/status`)
···222223 <title>Dev Log: 2025-11-22</title>
223224 <link>https://desertthunder.leaflet.pub/3m6a7fuk7u22p</link>
224225 <guid>https://desertthunder.leaflet.pub/3m6a7fuk7u22p</guid>
226226+ <pubDate>Fri, 22 Nov 2025 16:22:54 +0000</pubDate>
227227+ <description>Post summary or excerpt</description>
228228+</item>
229229+```
230230+231231+### BearBlog (RSS)
232232+233233+#### Overview
234234+235235+BearBlog is a minimalist blogging platform that provides RSS feeds at `{slug}.bearblog.dev/feed/`, making them straightforward to fetch using standard RSS parsing.
236236+237237+**Implementation:**
238238+239239+- Fetches RSS feed using `feed-rs` parser
240240+- Maps RSS `<item>` elements to standardized `Item` struct
241241+- Supports multiple blogs via config array
242242+- Uses entry ID from feed, falls back to link if missing
243243+- Normalizes publication dates to ISO 8601 format
244244+245245+**Key mappings:**
246246+247247+- `id` = RSS entry ID or link
248248+- `source_kind` = `bearblog`
249249+- `source_id` = Blog ID from config (e.g., `desertthunder`)
250250+- `title` = RSS entry title
251251+- `summary` = RSS entry summary/description
252252+- `url` = RSS entry link
253253+- `content_html` = RSS content body (if available)
254254+- `author` = RSS entry author
255255+- `published_at` = RSS published date or updated date (normalized to ISO 8601)
256256+257257+**Configuration:**
258258+259259+BearBlog supports multiple blogs through array configuration:
260260+261261+```toml
262262+[[sources.bearblog]]
263263+enabled = true
264264+id = "desertthunder"
265265+base_url = "https://desertthunder.bearblog.dev"
266266+267267+[[sources.bearblog]]
268268+enabled = true
269269+id = "another-blog"
270270+base_url = "https://another-blog.bearblog.dev"
271271+```
272272+273273+**Example RSS structure:**
274274+275275+```xml
276276+<item>
277277+ <title>My Blog Post</title>
278278+ <link>https://desertthunder.bearblog.dev/my-blog-post</link>
279279+ <guid>https://desertthunder.bearblog.dev/my-blog-post</guid>
225280 <pubDate>Fri, 22 Nov 2025 16:22:54 +0000</pubDate>
226281 <description>Post summary or excerpt</description>
227282</item>
···11+use crate::{BearBlogConfig, Item, PaiError, Result, SourceFetcher, SourceKind, Storage};
22+use chrono::Utc;
33+use feed_rs::parser;
44+55+/// Fetcher for BearBlog publications via RSS
66+///
77+/// Retrieves posts from BearBlog blogs by parsing their RSS feeds.
88+/// Each BearBlog provides an RSS feed at {slug}.bearblog.dev/feed/.
99+pub struct BearBlogFetcher {
1010+ config: BearBlogConfig,
1111+ client: reqwest::Client,
1212+}
1313+1414+impl BearBlogFetcher {
1515+ /// Creates a new BearBlog fetcher with the given configuration
1616+ pub fn new(config: BearBlogConfig) -> Self {
1717+ Self { config, client: reqwest::Client::new() }
1818+ }
1919+2020+ /// Fetches and parses the RSS feed
2121+ async fn fetch_feed(&self) -> Result<feed_rs::model::Feed> {
2222+ let feed_url = format!("{}/feed/", self.config.base_url.trim_end_matches('/'));
2323+ let response = self
2424+ .client
2525+ .get(&feed_url)
2626+ .send()
2727+ .await
2828+ .map_err(|e| PaiError::Fetch(format!("Failed to fetch BearBlog RSS feed: {e}")))?;
2929+3030+ let body = response
3131+ .text()
3232+ .await
3333+ .map_err(|e| PaiError::Fetch(format!("Failed to read response body: {e}")))?;
3434+3535+ parser::parse(body.as_bytes()).map_err(|e| PaiError::Parse(format!("Failed to parse RSS feed: {e}")))
3636+ }
3737+}
3838+3939+impl SourceFetcher for BearBlogFetcher {
4040+ fn sync(&self, storage: &dyn Storage) -> Result<()> {
4141+ let runtime =
4242+ tokio::runtime::Runtime::new().map_err(|e| PaiError::Fetch(format!("Failed to create runtime: {e}")))?;
4343+4444+ runtime.block_on(async {
4545+ let feed = self.fetch_feed().await?;
4646+4747+ for entry in feed.entries {
4848+ let id = entry.id.clone();
4949+ let url = entry
5050+ .links
5151+ .first()
5252+ .map(|link| link.href.clone())
5353+ .unwrap_or_else(|| id.clone());
5454+5555+ let title = entry.title.as_ref().map(|t| t.content.clone());
5656+ let summary = entry.summary.as_ref().map(|s| s.content.clone());
5757+ let author = entry.authors.first().map(|a| a.name.clone());
5858+ let content_html = entry.content.and_then(|c| c.body);
5959+6060+ let published_at = entry
6161+ .published
6262+ .or(entry.updated)
6363+ .map(|dt| dt.to_rfc3339())
6464+ .unwrap_or_else(|| Utc::now().to_rfc3339());
6565+6666+ let item = Item {
6767+ id,
6868+ source_kind: SourceKind::BearBlog,
6969+ source_id: self.config.id.clone(),
7070+ author,
7171+ title,
7272+ summary,
7373+ url,
7474+ content_html,
7575+ published_at,
7676+ created_at: Utc::now().to_rfc3339(),
7777+ };
7878+7979+ storage.insert_or_replace_item(&item)?;
8080+ }
8181+8282+ Ok(())
8383+ })
8484+ }
8585+}
8686+8787+#[cfg(test)]
8888+mod tests {
8989+ use super::*;
9090+9191+ #[test]
9292+ fn parse_valid_rss() {
9393+ let rss = r#"<?xml version="1.0" encoding="UTF-8"?>
9494+<rss version="2.0">
9595+<channel>
9696+ <title>Test BearBlog</title>
9797+ <link>https://test.bearblog.dev</link>
9898+ <description>Test blog</description>
9999+ <item>
100100+ <title>Test Post</title>
101101+ <link>https://test.bearblog.dev/test-post</link>
102102+ <guid>test-guid</guid>
103103+ <pubDate>Mon, 01 Jan 2024 12:00:00 +0000</pubDate>
104104+ <description>Test summary</description>
105105+ </item>
106106+</channel>
107107+</rss>"#;
108108+109109+ let feed = parser::parse(rss.as_bytes()).unwrap();
110110+ assert_eq!(feed.entries.len(), 1);
111111+ assert_eq!(feed.entries[0].title.as_ref().unwrap().content, "Test Post");
112112+ }
113113+114114+ #[test]
115115+ fn parse_invalid_rss() {
116116+ let invalid_rss = "this is not valid XML";
117117+ let result = parser::parse(invalid_rss.as_bytes());
118118+ assert!(result.is_err());
119119+ }
120120+121121+ #[test]
122122+ fn parse_empty_rss() {
123123+ let rss = r#"<?xml version="1.0" encoding="UTF-8"?>
124124+<rss version="2.0">
125125+<channel>
126126+ <title>Empty Feed</title>
127127+</channel>
128128+</rss>"#;
129129+130130+ let feed = parser::parse(rss.as_bytes()).unwrap();
131131+ assert_eq!(feed.entries.len(), 0);
132132+ }
133133+}
+2
core/src/fetchers/mod.rs
···11+mod bearblog;
12mod bluesky;
23mod leaflet;
34mod substack;
4566+pub use bearblog::BearBlogFetcher;
57pub use bluesky::BlueskyFetcher;
68pub use leaflet::LeafletFetcher;
79pub use substack::SubstackFetcher;