personal activity index (bluesky, leaflet, substack)
pai.desertthunder.dev
rss
bluesky
1mod fetchers;
2
3use serde::{Deserialize, Serialize};
4use std::path::Path;
5use std::{fmt, str::FromStr};
6use thiserror::Error;
7
8pub use fetchers::{BlueskyFetcher, LeafletFetcher, SubstackFetcher};
9
10/// Errors that can occur in the Personal Activity Index
11#[derive(Error, Debug)]
12pub enum PaiError {
13 #[error("Unknown source kind: {0}")]
14 UnknownSourceKind(String),
15
16 #[error("Invalid argument: {0}")]
17 InvalidArgument(String),
18
19 #[error("Storage error: {0}")]
20 Storage(String),
21
22 #[error("Fetch error: {0}")]
23 Fetch(String),
24
25 #[error("Parse error: {0}")]
26 Parse(String),
27
28 #[error("Configuration error: {0}")]
29 Config(String),
30
31 #[error("IO error: {0}")]
32 Io(#[from] std::io::Error),
33}
34
35pub type Result<T> = std::result::Result<T, PaiError>;
36
37/// Represents the different source types supported by the indexer
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
39#[serde(rename_all = "lowercase")]
40pub enum SourceKind {
41 Substack,
42 Bluesky,
43 Leaflet,
44}
45
46impl fmt::Display for SourceKind {
47 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48 match self {
49 SourceKind::Substack => write!(f, "substack"),
50 SourceKind::Bluesky => write!(f, "bluesky"),
51 SourceKind::Leaflet => write!(f, "leaflet"),
52 }
53 }
54}
55
56impl std::str::FromStr for SourceKind {
57 type Err = PaiError;
58
59 fn from_str(s: &str) -> Result<Self> {
60 match s.to_lowercase().as_str() {
61 "substack" => Ok(SourceKind::Substack),
62 "bluesky" => Ok(SourceKind::Bluesky),
63 "leaflet" => Ok(SourceKind::Leaflet),
64 _ => Err(PaiError::UnknownSourceKind(s.to_string())),
65 }
66 }
67}
68
69/// Represents a single content item from any source
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct Item {
72 /// Unique identifier for the item
73 pub id: String,
74 /// The source type this item came from
75 pub source_kind: SourceKind,
76 /// The specific source instance identifier (e.g., domain or handle)
77 pub source_id: String,
78 /// Author of the content
79 pub author: Option<String>,
80 /// Title of the content
81 pub title: Option<String>,
82 /// Summary or excerpt of the content
83 pub summary: Option<String>,
84 /// Canonical URL for the content
85 pub url: String,
86 /// Full HTML content
87 pub content_html: Option<String>,
88 /// When the content was published (ISO 8601)
89 pub published_at: String,
90 /// When this item was created in our database (ISO 8601)
91 pub created_at: String,
92}
93
94/// Filter criteria for listing items
95#[derive(Debug, Default, Clone)]
96pub struct ListFilter {
97 /// Filter by source kind
98 pub source_kind: Option<SourceKind>,
99 /// Filter by specific source ID
100 pub source_id: Option<String>,
101 /// Maximum number of items to return
102 pub limit: Option<usize>,
103 /// Only items published at or after this time (ISO 8601)
104 pub since: Option<String>,
105 /// Substring search on title/summary
106 pub query: Option<String>,
107}
108
109/// Storage trait for persisting and retrieving items
110pub trait Storage {
111 /// Insert or replace an item in storage
112 fn insert_or_replace_item(&self, item: &Item) -> Result<()>;
113
114 /// List items matching the given filter
115 fn list_items(&self, filter: &ListFilter) -> Result<Vec<Item>>;
116}
117
118/// Trait for fetching content from a specific source
119pub trait SourceFetcher {
120 /// Synchronize content from this source into storage
121 fn sync(&self, storage: &dyn Storage) -> Result<()>;
122}
123
124/// Configuration for Substack source
125#[derive(Debug, Clone, Deserialize, Serialize)]
126pub struct SubstackConfig {
127 #[serde(default)]
128 pub enabled: bool,
129 pub base_url: String,
130}
131
132/// Configuration for Bluesky source
133#[derive(Debug, Clone, Deserialize, Serialize)]
134pub struct BlueskyConfig {
135 #[serde(default)]
136 pub enabled: bool,
137 pub handle: String,
138}
139
140/// Configuration for a single Leaflet publication
141#[derive(Debug, Clone, Deserialize, Serialize)]
142pub struct LeafletConfig {
143 #[serde(default)]
144 pub enabled: bool,
145 pub id: String,
146 pub base_url: String,
147}
148
149/// Database configuration
150#[derive(Debug, Clone, Deserialize, Serialize, Default)]
151pub struct DatabaseConfig {
152 pub path: Option<String>,
153}
154
155/// Deployment mode configuration
156#[derive(Debug, Clone, Deserialize, Serialize, Default)]
157pub struct DeploymentConfig {
158 #[serde(default)]
159 pub mode: String,
160 pub cloudflare: Option<CloudflareConfig>,
161}
162
163/// Cloudflare deployment configuration
164#[derive(Debug, Clone, Deserialize, Serialize)]
165pub struct CloudflareConfig {
166 pub worker_name: String,
167 pub d1_binding: String,
168 pub database_name: String,
169}
170
171/// Sources configuration section
172#[derive(Debug, Clone, Deserialize, Serialize, Default)]
173pub struct SourcesConfig {
174 pub substack: Option<SubstackConfig>,
175 pub bluesky: Option<BlueskyConfig>,
176 #[serde(default)]
177 pub leaflet: Vec<LeafletConfig>,
178}
179
180/// Configuration for all sources
181#[derive(Debug, Clone, Deserialize, Serialize, Default)]
182pub struct Config {
183 #[serde(default)]
184 pub database: DatabaseConfig,
185 #[serde(default)]
186 pub deployment: DeploymentConfig,
187 #[serde(default)]
188 pub sources: SourcesConfig,
189}
190
191impl Config {
192 /// Load configuration from a TOML file
193 ///
194 /// Reads and parses the config file, validating the structure and required fields.
195 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
196 let content =
197 std::fs::read_to_string(path).map_err(|e| PaiError::Config(format!("Failed to read config file: {e}")))?;
198 Self::from_str(&content)
199 }
200}
201
202impl FromStr for Config {
203 type Err = PaiError;
204
205 fn from_str(s: &str) -> Result<Self> {
206 toml::from_str(s).map_err(|e| PaiError::Config(format!("Failed to parse config: {e}")))
207 }
208}
209
210/// Synchronize all enabled sources
211///
212/// Calls each configured source fetcher to retrieve and store content.
213/// Returns the number of sources successfully synced.
214///
215/// Filters sources based on optional kind and source_id parameters.
216pub fn sync_all_sources(
217 config: &Config, storage: &dyn Storage, kind: Option<SourceKind>, source_id: Option<&str>,
218) -> Result<usize> {
219 let mut synced_count = 0;
220
221 if let Some(ref substack_config) = config.sources.substack {
222 let should_sync = substack_config.enabled
223 && match (kind, source_id) {
224 (Some(k), _) if k != SourceKind::Substack => false,
225 (_, Some(sid)) => {
226 let substack_id = substack_config
227 .base_url
228 .trim_start_matches("https://")
229 .trim_start_matches("http://")
230 .trim_end_matches('/');
231 substack_id == sid
232 }
233 _ => true,
234 };
235
236 if should_sync {
237 let fetcher = SubstackFetcher::new(substack_config.clone());
238 fetcher.sync(storage)?;
239 synced_count += 1;
240 }
241 }
242
243 if let Some(ref bluesky_config) = config.sources.bluesky {
244 let should_sync = bluesky_config.enabled
245 && match (kind, source_id) {
246 (Some(k), _) if k != SourceKind::Bluesky => false,
247 (_, Some(sid)) => bluesky_config.handle == sid,
248 _ => true,
249 };
250
251 if should_sync {
252 let fetcher = BlueskyFetcher::new(bluesky_config.clone());
253 fetcher.sync(storage)?;
254 synced_count += 1;
255 }
256 }
257
258 for leaflet_config in &config.sources.leaflet {
259 if !leaflet_config.enabled {
260 continue;
261 }
262
263 let should_sync = match (kind, source_id) {
264 (Some(k), _) if k != SourceKind::Leaflet => false,
265 (_, Some(sid)) => leaflet_config.id == sid,
266 _ => true,
267 };
268
269 if should_sync {
270 let fetcher = LeafletFetcher::new(leaflet_config.clone());
271 fetcher.sync(storage)?;
272 synced_count += 1;
273 }
274 }
275
276 Ok(synced_count)
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282
283 #[test]
284 fn source_kind_display() {
285 assert_eq!(SourceKind::Substack.to_string(), "substack");
286 assert_eq!(SourceKind::Bluesky.to_string(), "bluesky");
287 assert_eq!(SourceKind::Leaflet.to_string(), "leaflet");
288 }
289
290 #[test]
291 fn source_kind_parse() {
292 assert_eq!("substack".parse::<SourceKind>().unwrap(), SourceKind::Substack);
293 assert_eq!("BLUESKY".parse::<SourceKind>().unwrap(), SourceKind::Bluesky);
294 assert_eq!("Leaflet".parse::<SourceKind>().unwrap(), SourceKind::Leaflet);
295 assert!("invalid".parse::<SourceKind>().is_err());
296 }
297
298 #[test]
299 fn error_unknown_source_kind() {
300 let err = "unknown".parse::<SourceKind>().unwrap_err();
301 assert!(matches!(err, PaiError::UnknownSourceKind(_)));
302 assert_eq!(err.to_string(), "Unknown source kind: unknown");
303 }
304
305 #[test]
306 fn list_filter_default() {
307 let filter = ListFilter::default();
308 assert!(filter.source_kind.is_none());
309 assert!(filter.source_id.is_none());
310 assert!(filter.limit.is_none());
311 assert!(filter.since.is_none());
312 assert!(filter.query.is_none());
313 }
314
315 #[test]
316 fn config_parse_empty() {
317 let config = Config::from_str("").unwrap();
318 assert!(config.sources.substack.is_none());
319 assert!(config.sources.bluesky.is_none());
320 assert!(config.sources.leaflet.is_empty());
321 }
322
323 #[test]
324 fn config_parse_substack() {
325 let toml = r#"
326[sources.substack]
327enabled = true
328base_url = "https://patternmatched.substack.com"
329"#;
330 let config = Config::from_str(toml).unwrap();
331 let substack = config.sources.substack.as_ref().unwrap();
332 assert!(substack.enabled);
333 assert_eq!(substack.base_url, "https://patternmatched.substack.com");
334 }
335
336 #[test]
337 fn config_parse_bluesky() {
338 let toml = r#"
339[sources.bluesky]
340enabled = true
341handle = "desertthunder.dev"
342"#;
343 let config = Config::from_str(toml).unwrap();
344 let bluesky = config.sources.bluesky.as_ref().unwrap();
345 assert!(bluesky.enabled);
346 assert_eq!(bluesky.handle, "desertthunder.dev");
347 }
348
349 #[test]
350 fn config_parse_leaflet_multiple() {
351 let toml = r#"
352[[sources.leaflet]]
353enabled = true
354id = "desertthunder"
355base_url = "https://desertthunder.leaflet.pub"
356
357[[sources.leaflet]]
358enabled = true
359id = "stormlightlabs"
360base_url = "https://stormlightlabs.leaflet.pub"
361"#;
362 let config = Config::from_str(toml).unwrap();
363 assert_eq!(config.sources.leaflet.len(), 2);
364 assert_eq!(config.sources.leaflet[0].id, "desertthunder");
365 assert_eq!(config.sources.leaflet[1].id, "stormlightlabs");
366 }
367
368 #[test]
369 fn config_parse_all_sources() {
370 let toml = r#"
371[database]
372path = "/tmp/test.db"
373
374[deployment]
375mode = "sqlite"
376
377[sources.substack]
378enabled = true
379base_url = "https://test.substack.com"
380
381[sources.bluesky]
382enabled = false
383handle = "test.bsky.social"
384
385[[sources.leaflet]]
386enabled = true
387id = "test"
388base_url = "https://test.leaflet.pub"
389"#;
390 let config = Config::from_str(toml).unwrap();
391 assert_eq!(config.database.path, Some("/tmp/test.db".to_string()));
392 assert_eq!(config.deployment.mode, "sqlite");
393 assert!(config.sources.substack.is_some());
394 assert!(config.sources.bluesky.is_some());
395 assert_eq!(config.sources.leaflet.len(), 1);
396 }
397
398 #[test]
399 fn config_parse_invalid_toml() {
400 let toml = "this is not valid toml {{{";
401 assert!(Config::from_str(toml).is_err());
402 }
403
404 #[test]
405 fn config_parse_missing_required_field() {
406 let toml = r#"
407[sources.substack]
408enabled = true
409"#;
410 let result = Config::from_str(toml);
411 assert!(result.is_err());
412 }
413
414 #[test]
415 fn config_default_enabled_false() {
416 let toml = r#"
417[sources.substack]
418base_url = "https://test.substack.com"
419"#;
420 let config = Config::from_str(toml).unwrap();
421 let substack = config.sources.substack.as_ref().unwrap();
422 assert!(!substack.enabled);
423 }
424}