personal activity index (bluesky, leaflet, substack) pai.desertthunder.dev
rss bluesky
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at a55bc23c35eeb8ac8cf652f1ebac1eb258132bd6 424 lines 12 kB view raw
1mod fetchers; 2 3use serde::{Deserialize, Serialize}; 4use std::path::Path; 5use std::{fmt, str::FromStr}; 6use thiserror::Error; 7 8pub use fetchers::{BlueskyFetcher, LeafletFetcher, SubstackFetcher}; 9 10/// Errors that can occur in the Personal Activity Index 11#[derive(Error, Debug)] 12pub enum PaiError { 13 #[error("Unknown source kind: {0}")] 14 UnknownSourceKind(String), 15 16 #[error("Invalid argument: {0}")] 17 InvalidArgument(String), 18 19 #[error("Storage error: {0}")] 20 Storage(String), 21 22 #[error("Fetch error: {0}")] 23 Fetch(String), 24 25 #[error("Parse error: {0}")] 26 Parse(String), 27 28 #[error("Configuration error: {0}")] 29 Config(String), 30 31 #[error("IO error: {0}")] 32 Io(#[from] std::io::Error), 33} 34 35pub type Result<T> = std::result::Result<T, PaiError>; 36 37/// Represents the different source types supported by the indexer 38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] 39#[serde(rename_all = "lowercase")] 40pub enum SourceKind { 41 Substack, 42 Bluesky, 43 Leaflet, 44} 45 46impl fmt::Display for SourceKind { 47 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 48 match self { 49 SourceKind::Substack => write!(f, "substack"), 50 SourceKind::Bluesky => write!(f, "bluesky"), 51 SourceKind::Leaflet => write!(f, "leaflet"), 52 } 53 } 54} 55 56impl std::str::FromStr for SourceKind { 57 type Err = PaiError; 58 59 fn from_str(s: &str) -> Result<Self> { 60 match s.to_lowercase().as_str() { 61 "substack" => Ok(SourceKind::Substack), 62 "bluesky" => Ok(SourceKind::Bluesky), 63 "leaflet" => Ok(SourceKind::Leaflet), 64 _ => Err(PaiError::UnknownSourceKind(s.to_string())), 65 } 66 } 67} 68 69/// Represents a single content item from any source 70#[derive(Debug, Clone, Serialize, Deserialize)] 71pub struct Item { 72 /// Unique identifier for the item 73 pub id: String, 74 /// The source type this item came from 75 pub source_kind: SourceKind, 76 /// The specific source instance identifier (e.g., domain or handle) 77 pub source_id: String, 78 /// Author of the content 79 pub author: Option<String>, 80 /// Title of the content 81 pub title: Option<String>, 82 /// Summary or excerpt of the content 83 pub summary: Option<String>, 84 /// Canonical URL for the content 85 pub url: String, 86 /// Full HTML content 87 pub content_html: Option<String>, 88 /// When the content was published (ISO 8601) 89 pub published_at: String, 90 /// When this item was created in our database (ISO 8601) 91 pub created_at: String, 92} 93 94/// Filter criteria for listing items 95#[derive(Debug, Default, Clone)] 96pub struct ListFilter { 97 /// Filter by source kind 98 pub source_kind: Option<SourceKind>, 99 /// Filter by specific source ID 100 pub source_id: Option<String>, 101 /// Maximum number of items to return 102 pub limit: Option<usize>, 103 /// Only items published at or after this time (ISO 8601) 104 pub since: Option<String>, 105 /// Substring search on title/summary 106 pub query: Option<String>, 107} 108 109/// Storage trait for persisting and retrieving items 110pub trait Storage { 111 /// Insert or replace an item in storage 112 fn insert_or_replace_item(&self, item: &Item) -> Result<()>; 113 114 /// List items matching the given filter 115 fn list_items(&self, filter: &ListFilter) -> Result<Vec<Item>>; 116} 117 118/// Trait for fetching content from a specific source 119pub trait SourceFetcher { 120 /// Synchronize content from this source into storage 121 fn sync(&self, storage: &dyn Storage) -> Result<()>; 122} 123 124/// Configuration for Substack source 125#[derive(Debug, Clone, Deserialize, Serialize)] 126pub struct SubstackConfig { 127 #[serde(default)] 128 pub enabled: bool, 129 pub base_url: String, 130} 131 132/// Configuration for Bluesky source 133#[derive(Debug, Clone, Deserialize, Serialize)] 134pub struct BlueskyConfig { 135 #[serde(default)] 136 pub enabled: bool, 137 pub handle: String, 138} 139 140/// Configuration for a single Leaflet publication 141#[derive(Debug, Clone, Deserialize, Serialize)] 142pub struct LeafletConfig { 143 #[serde(default)] 144 pub enabled: bool, 145 pub id: String, 146 pub base_url: String, 147} 148 149/// Database configuration 150#[derive(Debug, Clone, Deserialize, Serialize, Default)] 151pub struct DatabaseConfig { 152 pub path: Option<String>, 153} 154 155/// Deployment mode configuration 156#[derive(Debug, Clone, Deserialize, Serialize, Default)] 157pub struct DeploymentConfig { 158 #[serde(default)] 159 pub mode: String, 160 pub cloudflare: Option<CloudflareConfig>, 161} 162 163/// Cloudflare deployment configuration 164#[derive(Debug, Clone, Deserialize, Serialize)] 165pub struct CloudflareConfig { 166 pub worker_name: String, 167 pub d1_binding: String, 168 pub database_name: String, 169} 170 171/// Sources configuration section 172#[derive(Debug, Clone, Deserialize, Serialize, Default)] 173pub struct SourcesConfig { 174 pub substack: Option<SubstackConfig>, 175 pub bluesky: Option<BlueskyConfig>, 176 #[serde(default)] 177 pub leaflet: Vec<LeafletConfig>, 178} 179 180/// Configuration for all sources 181#[derive(Debug, Clone, Deserialize, Serialize, Default)] 182pub struct Config { 183 #[serde(default)] 184 pub database: DatabaseConfig, 185 #[serde(default)] 186 pub deployment: DeploymentConfig, 187 #[serde(default)] 188 pub sources: SourcesConfig, 189} 190 191impl Config { 192 /// Load configuration from a TOML file 193 /// 194 /// Reads and parses the config file, validating the structure and required fields. 195 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> { 196 let content = 197 std::fs::read_to_string(path).map_err(|e| PaiError::Config(format!("Failed to read config file: {e}")))?; 198 Self::from_str(&content) 199 } 200} 201 202impl FromStr for Config { 203 type Err = PaiError; 204 205 fn from_str(s: &str) -> Result<Self> { 206 toml::from_str(s).map_err(|e| PaiError::Config(format!("Failed to parse config: {e}"))) 207 } 208} 209 210/// Synchronize all enabled sources 211/// 212/// Calls each configured source fetcher to retrieve and store content. 213/// Returns the number of sources successfully synced. 214/// 215/// Filters sources based on optional kind and source_id parameters. 216pub fn sync_all_sources( 217 config: &Config, storage: &dyn Storage, kind: Option<SourceKind>, source_id: Option<&str>, 218) -> Result<usize> { 219 let mut synced_count = 0; 220 221 if let Some(ref substack_config) = config.sources.substack { 222 let should_sync = substack_config.enabled 223 && match (kind, source_id) { 224 (Some(k), _) if k != SourceKind::Substack => false, 225 (_, Some(sid)) => { 226 let substack_id = substack_config 227 .base_url 228 .trim_start_matches("https://") 229 .trim_start_matches("http://") 230 .trim_end_matches('/'); 231 substack_id == sid 232 } 233 _ => true, 234 }; 235 236 if should_sync { 237 let fetcher = SubstackFetcher::new(substack_config.clone()); 238 fetcher.sync(storage)?; 239 synced_count += 1; 240 } 241 } 242 243 if let Some(ref bluesky_config) = config.sources.bluesky { 244 let should_sync = bluesky_config.enabled 245 && match (kind, source_id) { 246 (Some(k), _) if k != SourceKind::Bluesky => false, 247 (_, Some(sid)) => bluesky_config.handle == sid, 248 _ => true, 249 }; 250 251 if should_sync { 252 let fetcher = BlueskyFetcher::new(bluesky_config.clone()); 253 fetcher.sync(storage)?; 254 synced_count += 1; 255 } 256 } 257 258 for leaflet_config in &config.sources.leaflet { 259 if !leaflet_config.enabled { 260 continue; 261 } 262 263 let should_sync = match (kind, source_id) { 264 (Some(k), _) if k != SourceKind::Leaflet => false, 265 (_, Some(sid)) => leaflet_config.id == sid, 266 _ => true, 267 }; 268 269 if should_sync { 270 let fetcher = LeafletFetcher::new(leaflet_config.clone()); 271 fetcher.sync(storage)?; 272 synced_count += 1; 273 } 274 } 275 276 Ok(synced_count) 277} 278 279#[cfg(test)] 280mod tests { 281 use super::*; 282 283 #[test] 284 fn source_kind_display() { 285 assert_eq!(SourceKind::Substack.to_string(), "substack"); 286 assert_eq!(SourceKind::Bluesky.to_string(), "bluesky"); 287 assert_eq!(SourceKind::Leaflet.to_string(), "leaflet"); 288 } 289 290 #[test] 291 fn source_kind_parse() { 292 assert_eq!("substack".parse::<SourceKind>().unwrap(), SourceKind::Substack); 293 assert_eq!("BLUESKY".parse::<SourceKind>().unwrap(), SourceKind::Bluesky); 294 assert_eq!("Leaflet".parse::<SourceKind>().unwrap(), SourceKind::Leaflet); 295 assert!("invalid".parse::<SourceKind>().is_err()); 296 } 297 298 #[test] 299 fn error_unknown_source_kind() { 300 let err = "unknown".parse::<SourceKind>().unwrap_err(); 301 assert!(matches!(err, PaiError::UnknownSourceKind(_))); 302 assert_eq!(err.to_string(), "Unknown source kind: unknown"); 303 } 304 305 #[test] 306 fn list_filter_default() { 307 let filter = ListFilter::default(); 308 assert!(filter.source_kind.is_none()); 309 assert!(filter.source_id.is_none()); 310 assert!(filter.limit.is_none()); 311 assert!(filter.since.is_none()); 312 assert!(filter.query.is_none()); 313 } 314 315 #[test] 316 fn config_parse_empty() { 317 let config = Config::from_str("").unwrap(); 318 assert!(config.sources.substack.is_none()); 319 assert!(config.sources.bluesky.is_none()); 320 assert!(config.sources.leaflet.is_empty()); 321 } 322 323 #[test] 324 fn config_parse_substack() { 325 let toml = r#" 326[sources.substack] 327enabled = true 328base_url = "https://patternmatched.substack.com" 329"#; 330 let config = Config::from_str(toml).unwrap(); 331 let substack = config.sources.substack.as_ref().unwrap(); 332 assert!(substack.enabled); 333 assert_eq!(substack.base_url, "https://patternmatched.substack.com"); 334 } 335 336 #[test] 337 fn config_parse_bluesky() { 338 let toml = r#" 339[sources.bluesky] 340enabled = true 341handle = "desertthunder.dev" 342"#; 343 let config = Config::from_str(toml).unwrap(); 344 let bluesky = config.sources.bluesky.as_ref().unwrap(); 345 assert!(bluesky.enabled); 346 assert_eq!(bluesky.handle, "desertthunder.dev"); 347 } 348 349 #[test] 350 fn config_parse_leaflet_multiple() { 351 let toml = r#" 352[[sources.leaflet]] 353enabled = true 354id = "desertthunder" 355base_url = "https://desertthunder.leaflet.pub" 356 357[[sources.leaflet]] 358enabled = true 359id = "stormlightlabs" 360base_url = "https://stormlightlabs.leaflet.pub" 361"#; 362 let config = Config::from_str(toml).unwrap(); 363 assert_eq!(config.sources.leaflet.len(), 2); 364 assert_eq!(config.sources.leaflet[0].id, "desertthunder"); 365 assert_eq!(config.sources.leaflet[1].id, "stormlightlabs"); 366 } 367 368 #[test] 369 fn config_parse_all_sources() { 370 let toml = r#" 371[database] 372path = "/tmp/test.db" 373 374[deployment] 375mode = "sqlite" 376 377[sources.substack] 378enabled = true 379base_url = "https://test.substack.com" 380 381[sources.bluesky] 382enabled = false 383handle = "test.bsky.social" 384 385[[sources.leaflet]] 386enabled = true 387id = "test" 388base_url = "https://test.leaflet.pub" 389"#; 390 let config = Config::from_str(toml).unwrap(); 391 assert_eq!(config.database.path, Some("/tmp/test.db".to_string())); 392 assert_eq!(config.deployment.mode, "sqlite"); 393 assert!(config.sources.substack.is_some()); 394 assert!(config.sources.bluesky.is_some()); 395 assert_eq!(config.sources.leaflet.len(), 1); 396 } 397 398 #[test] 399 fn config_parse_invalid_toml() { 400 let toml = "this is not valid toml {{{"; 401 assert!(Config::from_str(toml).is_err()); 402 } 403 404 #[test] 405 fn config_parse_missing_required_field() { 406 let toml = r#" 407[sources.substack] 408enabled = true 409"#; 410 let result = Config::from_str(toml); 411 assert!(result.is_err()); 412 } 413 414 #[test] 415 fn config_default_enabled_false() { 416 let toml = r#" 417[sources.substack] 418base_url = "https://test.substack.com" 419"#; 420 let config = Config::from_str(toml).unwrap(); 421 let substack = config.sources.substack.as_ref().unwrap(); 422 assert!(!substack.enabled); 423 } 424}