feat: add manpage generation and installation commands

+142

Cargo.lock

··· 424 424 checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" 425 425 426 426 [[package]] 427 + name = "futures-io" 428 + version = "0.3.31" 429 + source = "registry+https://github.com/rust-lang/crates.io-index" 430 + checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" 431 + 432 + [[package]] 433 + name = "futures-macro" 434 + version = "0.3.31" 435 + source = "registry+https://github.com/rust-lang/crates.io-index" 436 + checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" 437 + dependencies = [ 438 + "proc-macro2", 439 + "quote", 440 + "syn", 441 + ] 442 + 443 + [[package]] 427 444 name = "futures-sink" 428 445 version = "0.3.31" 429 446 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 442 459 checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" 443 460 dependencies = [ 444 461 "futures-core", 462 + "futures-io", 463 + "futures-macro", 464 + "futures-sink", 445 465 "futures-task", 466 + "memchr", 446 467 "pin-project-lite", 447 468 "pin-utils", 469 + "slab", 448 470 ] 449 471 450 472 [[package]] ··· 1032 1054 [[package]] 1033 1055 name = "pai-worker" 1034 1056 version = "0.1.0" 1057 + dependencies = [ 1058 + "chrono", 1059 + "pai-core", 1060 + "rss", 1061 + "serde", 1062 + "serde_json", 1063 + "serde_urlencoded", 1064 + "worker", 1065 + ] 1035 1066 1036 1067 [[package]] 1037 1068 name = "percent-encoding" ··· 1040 1071 checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" 1041 1072 1042 1073 [[package]] 1074 + name = "pin-project" 1075 + version = "1.1.10" 1076 + source = "registry+https://github.com/rust-lang/crates.io-index" 1077 + checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" 1078 + dependencies = [ 1079 + "pin-project-internal", 1080 + ] 1081 + 1082 + [[package]] 1083 + name = "pin-project-internal" 1084 + version = "1.1.10" 1085 + source = "registry+https://github.com/rust-lang/crates.io-index" 1086 + checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" 1087 + dependencies = [ 1088 + "proc-macro2", 1089 + "quote", 1090 + "syn", 1091 + ] 1092 + 1093 + [[package]] 1043 1094 name = "pin-project-lite" 1044 1095 version = "0.2.16" 1045 1096 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1201 1252 checksum = "88f8660c1ff60292143c98d08fc6e2f654d722db50410e3f3797d40baaf9d8f3" 1202 1253 1203 1254 [[package]] 1255 + name = "rss" 1256 + version = "2.0.12" 1257 + source = "registry+https://github.com/rust-lang/crates.io-index" 1258 + checksum = "b2107738f003660f0a91f56fd3e3bd3ab5d918b2ddaf1e1ec2136fb1c46f71bf" 1259 + dependencies = [ 1260 + "quick-xml", 1261 + ] 1262 + 1263 + [[package]] 1204 1264 name = "rusqlite" 1205 1265 version = "0.37.0" 1206 1266 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1312 1372 dependencies = [ 1313 1373 "serde_core", 1314 1374 "serde_derive", 1375 + ] 1376 + 1377 + [[package]] 1378 + name = "serde-wasm-bindgen" 1379 + version = "0.6.5" 1380 + source = "registry+https://github.com/rust-lang/crates.io-index" 1381 + checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b" 1382 + dependencies = [ 1383 + "js-sys", 1384 + "serde", 1385 + "wasm-bindgen", 1315 1386 ] 1316 1387 1317 1388 [[package]] ··· 1842 1913 ] 1843 1914 1844 1915 [[package]] 1916 + name = "wasm-streams" 1917 + version = "0.4.2" 1918 + source = "registry+https://github.com/rust-lang/crates.io-index" 1919 + checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" 1920 + dependencies = [ 1921 + "futures-util", 1922 + "js-sys", 1923 + "wasm-bindgen", 1924 + "wasm-bindgen-futures", 1925 + "web-sys", 1926 + ] 1927 + 1928 + [[package]] 1845 1929 name = "web-sys" 1846 1930 version = "0.3.82" 1847 1931 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2088 2172 version = "0.46.0" 2089 2173 source = "registry+https://github.com/rust-lang/crates.io-index" 2090 2174 checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" 2175 + 2176 + [[package]] 2177 + name = "worker" 2178 + version = "0.6.7" 2179 + source = "registry+https://github.com/rust-lang/crates.io-index" 2180 + checksum = "d9320293035d2074f1fb84baf7d79d7932c183dd04a7e0c143dc75db0d0037ac" 2181 + dependencies = [ 2182 + "async-trait", 2183 + "bytes", 2184 + "chrono", 2185 + "futures-channel", 2186 + "futures-util", 2187 + "http", 2188 + "http-body", 2189 + "js-sys", 2190 + "matchit", 2191 + "pin-project", 2192 + "serde", 2193 + "serde-wasm-bindgen", 2194 + "serde_json", 2195 + "serde_urlencoded", 2196 + "tokio", 2197 + "url", 2198 + "wasm-bindgen", 2199 + "wasm-bindgen-futures", 2200 + "wasm-streams", 2201 + "web-sys", 2202 + "worker-macros", 2203 + "worker-sys", 2204 + ] 2205 + 2206 + [[package]] 2207 + name = "worker-macros" 2208 + version = "0.6.7" 2209 + source = "registry+https://github.com/rust-lang/crates.io-index" 2210 + checksum = "eb37d4f9d99921836a1e4dc21e6041df9b0c2c5fe3c230edddd172a8ef9e251e" 2211 + dependencies = [ 2212 + "async-trait", 2213 + "proc-macro2", 2214 + "quote", 2215 + "syn", 2216 + "wasm-bindgen", 2217 + "wasm-bindgen-futures", 2218 + "wasm-bindgen-macro-support", 2219 + "worker-sys", 2220 + ] 2221 + 2222 + [[package]] 2223 + name = "worker-sys" 2224 + version = "0.6.7" 2225 + source = "registry+https://github.com/rust-lang/crates.io-index" 2226 + checksum = "07b4e2ca5d405247a986d533bba78c396c941835747977631168b8b05304f1b6" 2227 + dependencies = [ 2228 + "cfg-if", 2229 + "js-sys", 2230 + "wasm-bindgen", 2231 + "web-sys", 2232 + ] 2091 2233 2092 2234 [[package]] 2093 2235 name = "writeable"

+136 -8

DEPLOYMENT.md

··· 1 1 # Personal Activity Index – Deployment Guide 2 2 3 - This guide walks through two common reverse proxy setups for `pai serve`: **nginx** and **Caddy**. Both sections include native (host binary) instructions and optional Docker paths if you prefer containerized deployments. 3 + This guide walks through two common reverse proxy setups for `pai serve`: **nginx** and **Caddy**. 4 + Both sections include native (host binary) instructions and optional Docker paths if you prefer containerized deployments. 5 + 6 + ## Table of Contents 7 + 8 + - [Prerequisites](#prerequisites) 9 + - [nginx Deployment](#nginx-deployment) 10 + - [Host Setup](#host-setup) 11 + - [nginx Config](#nginx-config) 12 + - [Optional: nginx via Docker](#optional-nginx-via-docker) 13 + - [Caddy Deployment](#caddy-deployment) 14 + - [Host Setup](#host-setup-1) 15 + - [Caddyfile Example](#caddyfile-example) 16 + - [Optional: Caddy + Docker Compose](#optional-caddy--docker-compose) 17 + - [Health Checks & Monitoring](#health-checks--monitoring) 18 + - [Cloudflare Worker Deployment](#cloudflare-worker-deployment) 19 + - [Prerequisites](#prerequisites-1) 20 + - [Quick Start](#quick-start) 21 + - [Cron Triggers](#cron-triggers) 22 + - [API Endpoints](#api-endpoints) 23 + - [Local Development](#local-development) 24 + - [Monitoring](#monitoring) 4 25 5 26 ## Prerequisites 6 27 7 - 1. Build the CLI binary: 28 + 1. Build binary: 8 29 9 30 ```sh 10 31 cargo build --release -p pai ··· 13 34 The binary will live at `target/release/pai`. 14 35 15 36 2. Prepare a configuration + database location. The default locations follow the XDG spec, but you can override them with `-C` (config dir) and `-d` (database path). 16 - 17 37 3. Run a sync at least once so the database has data: 18 38 19 39 ```sh ··· 160 180 161 181 ## Health Checks & Monitoring 162 182 163 - - `GET /status` – lightweight JSON (`status`, total items, counts per `source_kind`). Ideal for load balancer health probes. 183 + - `GET /status` – lightweight JSON (`status`, version, uptime, total items, counts per `source_kind`). Ideal for load balancer health probes. 164 184 - `GET /api/feed?limit=1` ensures the server can read from SQLite and return real data. 165 185 - `GET /api/item/{id}` is handy for debugging a specific record. 166 186 - Consider wiring `/status` into nginx/Caddy health checks (`/healthz`) or your platform’s monitoring agents. 167 187 168 - ## Security Tips 188 + ## Cloudflare Worker Deployment 189 + 190 + The Personal Activity Index can also be deployed as a Cloudflare Worker with D1 database, providing a serverless alternative to self-hosting. 191 + 192 + ### Prerequisites 193 + 194 + 1. Cloudflare account with Workers enabled 195 + 2. [Wrangler CLI](https://developers.cloudflare.com/workers/wrangler/install-and-update/) installed 196 + 3. Rust toolchain with `wasm32-unknown-unknown` target 197 + 198 + ### Quick Start 199 + 200 + #### 1. Generate Scaffolding 201 + 202 + Use the `pai cf-init` command to generate Cloudflare Worker configuration: 169 203 170 - - Bind the `pai serve` process to `127.0.0.1` and let the proxy handle TLS. 171 - - Run the binary as an unprivileged user with read/write access only to the DB path. 172 - - Regularly rotate TLS certificates (Caddy does this automatically; for nginx use certbot or similar). 204 + ```sh 205 + # Dry run to preview files 206 + pai cf-init --dry-run -o cloudflare-deployment 207 + 208 + # Create scaffolding 209 + pai cf-init -o cloudflare-deployment 210 + cd cloudflare-deployment 211 + ``` 212 + 213 + This creates: 214 + 215 + - `wrangler.example.toml` - Worker configuration template 216 + - `schema.sql` - D1 database schema 217 + - `README.md` - Deployment instructions 218 + 219 + #### 2. Create D1 Database 220 + 221 + ```sh 222 + wrangler d1 create personal-activity-db 223 + ``` 224 + 225 + Copy the database ID from the output and update `wrangler.example.toml`: 226 + 227 + ```toml 228 + [[d1_databases]] 229 + binding = "DB" 230 + database_name = "personal-activity-db" 231 + database_id = "your-database-id-here" # Replace with actual ID 232 + ``` 233 + 234 + Then copy to the active config: 235 + 236 + ```sh 237 + cp wrangler.example.toml wrangler.toml 238 + ``` 239 + 240 + #### 3. Initialize Database Schema 241 + 242 + ```sh 243 + wrangler d1 execute personal-activity-db --file=schema.sql 244 + ``` 245 + 246 + #### 4. Build and Deploy 247 + 248 + ```sh 249 + # Build the worker 250 + cd .. 251 + cargo install worker-build 252 + worker-build --release -p pai-worker 253 + 254 + # Deploy 255 + cd cloudflare-deployment 256 + wrangler deploy 257 + ``` 258 + 259 + ### Cron Triggers 260 + 261 + The worker includes a scheduled event handler for automatic syncing. Configure the schedule in `wrangler.toml`: 262 + 263 + ```toml 264 + [triggers] 265 + crons = ["0 * * * *"] # Every hour at minute 0 266 + ``` 267 + 268 + Common schedules: 269 + 270 + - `*/30 * * * *` - Every 30 minutes 271 + - `0 */6 * * *` - Every 6 hours 272 + - `0 0 * * *` - Daily at midnight 273 + 274 + ### API Endpoints 275 + 276 + The Worker exposes the same API as the self-hosted server: 277 + 278 + - `GET /api/feed?source_kind=bluesky&limit=20` - List items 279 + - `GET /api/item/{id}` - Get single item 280 + - `GET /status` - Health check 281 + 282 + ### Local Development 283 + 284 + Test the worker locally before deploying: 285 + 286 + ```sh 287 + wrangler dev 288 + ``` 289 + 290 + This starts a local server at `http://localhost:8787` with live reload. 291 + 292 + ### Monitoring 293 + 294 + View logs in real-time: 295 + 296 + ```sh 297 + wrangler tail 298 + ``` 299 + 300 + Or check logs in the [Cloudflare Dashboard](https://dash.cloudflare.com) under Workers & Pages.

+7

README.md

··· 35 35 36 36 # Check database 37 37 pai db-check 38 + 39 + # Install the manpage so `man pai` works 40 + pai man --install 41 + 42 + # Generate manpage to a file 43 + pai man -o pai.1 38 44 ``` 39 45 40 46 <details> ··· 65 71 ## Documentation 66 72 67 73 - CLI synopsis: `pai -h`, `pai <command> -h`, or `pai man` for the generated `pai(1)` page. 74 + - `pai man --install [--install-dir DIR]` copies `pai.1` into a MANPATH directory (defaults to `~/.local/share/man/man1`) so `man pai` works like any other UNIX tool. 68 75 - Database schema and config reference: [config.example.toml](./config.example.toml). 69 76 - Deployment topologies: [DEPLOYMENT.md](./DEPLOYMENT.md). 70 77

+466

TODO.md

··· 1 + 2 + # Personal Activity Index CLI – Roadmap & Tasks 3 + 4 + Objective: 5 + Build a POSIX-style Rust CLI that ingests content from Substack, Bluesky, and Leaflet into SQLite, with an optional Cloudflare Worker + D1 deployment path. 6 + 7 + Targets: 8 + 9 + - Self-host: single binary + SQLite. 10 + - Cloudflare: Rust Worker + D1 + Cron triggers. 11 + 12 + ## Workspace & Architecture 13 + 14 + **Goal:** Shared core library, CLI frontend, and Worker frontend, with clear separation of concerns. 15 + 16 + - [x] Create Cargo workspace layout: 17 + - [x] `core/` – shared types, fetchers, and storage traits. 18 + - [x] `cli/` – POSIX-style binary (`pai`). 19 + - [x] `worker/` – Cloudflare Worker using `workers-rs`. 20 + - [x] In `core/`: 21 + - [x] Define `SourceKind` enum: `substack`, `bluesky`, `leaflet`. 22 + - [x] Define `Item` struct with fields: 23 + - [x] `id`, `source_kind`, `source_id`, `author`, `title`, `summary`, 24 + `url`, `content_html`, `published_at`, `created_at`. 25 + - [x] Define `Storage` trait with at minimum: 26 + - [x] `insert_or_replace_item(&self, item: &Item) -> Result<()>` 27 + - [x] `list_items(&self, filter: &ListFilter) -> Result<Vec<Item>>` 28 + - [x] Define `SourceFetcher` trait: 29 + - [x] `fn sync(&self, storage: &dyn Storage) -> Result<()>` 30 + - [x] In `cli/`: 31 + - [x] Add argument parsing that follows POSIX conventions: 32 + - Options of the form `-h`, `-V`, `-C dir`, `-d path`, etc. 33 + - Options come before operands/subcommands where possible. 34 + - [x] Define subcommands (as operands) with their own POSIX-style options: 35 + - [x] `sync` 36 + - [x] `list` 37 + - [x] `export` 38 + - [x] `serve` 39 + - [x] In `core/`: 40 + - [x] Implement `sync_all_sources(config, storage)` that calls each fetcher. 41 + 42 + ## Milestone 1 – Local SQLite Storage (Self-host Base) 43 + 44 + **Goal:** `pai` can sync data into a local SQLite file. 45 + 46 + - [x] Choose SQLite crate (native mode): 47 + - [x] e.g. `rusqlite` 48 + - [x] Define SQL schema and migrations: 49 + - [x] `items` table: 50 + 51 + ```sql 52 + CREATE TABLE IF NOT EXISTS items ( 53 + id TEXT PRIMARY KEY, 54 + source_kind TEXT NOT NULL, 55 + source_id TEXT NOT NULL, 56 + author TEXT, 57 + title TEXT, 58 + summary TEXT, 59 + url TEXT NOT NULL, 60 + content_html TEXT, 61 + published_at TEXT NOT NULL, 62 + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP 63 + ); 64 + 65 + CREATE INDEX IF NOT EXISTS idx_items_source_date ON items (source_kind, source_id, published_at DESC); 66 + ``` 67 + 68 + - [x] Embed migrations or provide `schema.sql` + `pai db-migrate` command. 69 + - [x] Implement `SqliteStorage` in `cli/`: 70 + - [x] Opens/creates DB at `-d path` or `$XDG_DATA_HOME/pai/pai.db` fallback. 71 + - [x] Implements `Storage` trait. 72 + - [x] Implement `pai sync` path: 73 + - [x] `pai sync` → load config → open SQLite → call `sync_all_sources`. 74 + - [x] Exit codes: 75 + - [x] `0` on success, non-zero on failure. 76 + - [x] Add `pai db-check`: 77 + - [x] Verifies schema and prints basic stats (item count per source). 78 + 79 + ## Milestone 2 – Source Integrations ✅ 80 + 81 + **Goal:** All three sources can be ingested via the CLI. 82 + 83 + **Status:** COMPLETE - All three source integrations (Substack RSS, Bluesky AT Protocol, Leaflet RSS) are implemented and tested with real data. 84 + 85 + ### 2.1 Substack (Pattern Matched) 86 + 87 + - [x] Add config support: 88 + 89 + ```toml 90 + [sources.substack] 91 + enabled = true 92 + base_url = "https://patternmatched.substack.com" 93 + ``` 94 + 95 + - [x] Implement `SubstackFetcher` in `core/`: 96 + 97 + - [x] Fetch `{base_url}/feed`. 98 + - [x] Parse RSS using `feed-rs`. 99 + - [x] Map `<item>`: 100 + 101 + - [x] `id` = GUID if present, otherwise `link`. 102 + - [x] `source_kind = "substack"`. 103 + - [x] `source_id = "patternmatched.substack.com"`. 104 + - [x] `title`, `summary` from RSS `title`/`description`. 105 + - [x] `url` from `link`. 106 + - [x] `published_at` from `pubDate` (normalized to ISO 8601). 107 + - [x] Wire into `sync_all_sources` when enabled. 108 + 109 + ### 2.2 Bluesky (desertthunder.dev) 110 + 111 + - [x] Add config support: 112 + 113 + ```toml 114 + [sources.bluesky] 115 + enabled = true 116 + handle = "desertthunder.dev" 117 + ``` 118 + 119 + - [x] Implement `BlueskyFetcher` in `core/`: 120 + 121 + - [x] Fetch: 122 + 123 + - [x] `https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor=desertthunder.dev&limit=N` 124 + - [x] Filter out reposts/quotes (only original posts). 125 + - [x] Map `post` record: 126 + 127 + - [x] `id` = `uri` (AT URI). 128 + - [x] `source_kind = "bluesky"`. 129 + - [x] `source_id = "desertthunder.dev"`. 130 + - [x] `title` = truncated text up to N chars. 131 + - [x] `summary` = full text (or truncated). 132 + - [x] `url` = canonical `https://bsky.app/profile/…/post/…` derived from URI. 133 + - [x] `published_at` = `record.createdAt` (ISO 8601 already). 134 + - [ ] Optional: 135 + 136 + - [ ] Support pagination via `cursor` until a configured max number of posts. 137 + 138 + ### 2.3 Leaflet (desertthunder / stormlightlabs) 139 + 140 + - [x] Add config support: 141 + 142 + ```toml 143 + [[sources.leaflet]] 144 + enabled = true 145 + id = "desertthunder" 146 + base_url = "https://desertthunder.leaflet.pub" 147 + 148 + [[sources.leaflet]] 149 + enabled = true 150 + id = "stormlightlabs" 151 + base_url = "https://stormlightlabs.leaflet.pub" 152 + ``` 153 + 154 + - [x] Use AT Protocol instead of HTML parsing: 155 + 156 + - [x] Use `com.atproto.repo.listRecords` with collection `pub.leaflet.post`. 157 + 158 + - [x] Implement `LeafletFetcher` in `core/`: 159 + 160 + - [x] For each configured pub: 161 + 162 + - [x] Fetch records using AT Protocol. 163 + - [x] Parse `pub.leaflet.post` records. 164 + - [x] For each post: 165 + 166 + - [x] Extract `title` from record. 167 + - [x] Extract `publishedAt` or `createdAt`. 168 + - [x] Derive summary from `summary` or `content` field. 169 + - [x] Generate URL using `slug` or record ID. 170 + - [x] Normalize date to ISO 8601 for `published_at`. 171 + - [x] Insert or replace items in storage. 172 + 173 + - [x] Wire into `sync_all_sources`. 174 + 175 + ## Milestone 3 – Query, Filter, and Export (CLI Only) 176 + 177 + **Goal:** Make local data usable even without HTTP. 178 + 179 + - [x] Implement `pai list`: 180 + - [x] Syntax: `pai list [options]` (options before operands). 181 + - [x] Options: 182 + - [x] `-k kind` filter by `source_kind` (`substack`, `bluesky`, `leaflet`). 183 + - [x] `-S id` filter by `source_id` (host/handle). 184 + - [x] `-n N` limit number of results (default 20). 185 + - [x] `-s time` “since time” (e.g. ISO 8601, or “7d” shorthand if desired). 186 + - [x] `-q pattern` simple substring filter on title/summary. 187 + - [x] Render as ASCII table or simple text. 188 + - [x] Implement `pai export`: 189 + - [x] Syntax: `pai export -f format [-o file]`. 190 + - [x] Supported formats: 191 + - [x] `json` (default). 192 + - [x] `ndjson` (optional). 193 + - [x] `rss` (optional aggregate). 194 + - [x] Options: 195 + - [x] `-f format` (`json`, `rss`, …). 196 + - [x] `-o path` output file (default stdout). 197 + - [x] Implement exit statuses for typical cases: 198 + - [x] `0` on success. 199 + - [x] `>0` on error (bad args, DB error, network failure, etc.). 200 + 201 + ## Milestone 4 – Self-hosted HTTP Server Mode 202 + 203 + **Goal:** Provide a small HTTP API backed by SQLite for self-hosted deployments. 204 + 205 + - [x] Add `serve` subcommand in `cli/`: 206 + - [x] Syntax: `pai serve [options]`. 207 + - [x] Options: 208 + - [x] `-d path` database path. 209 + - [x] `-a addr` listen address (default `127.0.0.1:8080`). 210 + - [x] Follows POSIX conventions: all options before operands. 211 + - [x] Implement HTTP server (`axum`): 212 + - [x] `GET /api/feed` – list all items, newest first. 213 + - [x] Query params: 214 + - [x] `source_kind`, `source_id`, `limit`, `since`, `q`. 215 + - [x] Optional: 216 + - [x] `GET /api/item/{id}` for a single item. 217 + - [x] Ensure graceful shutdown and clean error handling. 218 + - [x] Document reverse-proxy examples (Caddy, nginx). 219 + 220 + ## Milestone 5 – Cloudflare Worker + D1 Frontend 221 + 222 + **Goal:** Provide an alternative deployment path using Cloudflare Workers with D1 and Cron triggers. 223 + 224 + - [ ] In `worker/`: 225 + - [ ] Depend on `worker` crate with `d1` feature enabled. 226 + - [ ] Reuse `core::Item` and parsing code (ensure crates are WASM-friendly). 227 + - [ ] Configure D1: 228 + - [ ] Provide `schema.sql` compatible with D1 (same `items` table). 229 + - [ ] Example `wrangler.toml` with `[[d1_databases]]` binding. 230 + - [ ] Implement Worker routes: 231 + - [ ] `GET /api/feed` with similar semantics as CLI server. 232 + - [ ] Implement `scheduled` handler for Cron: 233 + - [ ] On each scheduled run, call per-source syncers writing to D1. 234 + - [ ] Document cron configuration in `wrangler.toml`. 235 + - [ ] Add `pai cf-init` in `cli/`: 236 + - [ ] Generates a starter `wrangler.toml`. 237 + - [ ] Prints instructions to create D1 DB and bind it. 238 + 239 + ## Milestone 6 – POSIX Polish, Packaging, and Docs 240 + 241 + **Goal:** Make the CLI feel like a “real UNIX utility” and easy to adopt. 242 + 243 + - [ ] Verify POSIX-style argument handling: 244 + - [ ] Short options only in usage syntax; long options are optional extensions. 245 + - [ ] Options before operands/subcommands in docs and examples. 246 + - [ ] Support grouped short options where meaningful (e.g. `-hv`). 247 + - [ ] Implement: 248 + - [ ] `-h` – usage synopsis and options (per POSIX convention). 249 + - [ ] `-V` – version info. 250 + - [ ] Add manpage-style documentation using clap_mangen (<https://crates.io/crates/clap_mangen>) in build.rs: 251 + - [ ] `man/pai.1` with SYNOPSIS, DESCRIPTION, OPTIONS, OPERANDS, EXIT STATUS, ENVIRONMENT, FILES, EXAMPLES. 252 + - [ ] Publish `pai` crate to crates.io. 253 + - [ ] Write README with: 254 + - [ ] Self-hosted quickstart. 255 + - [ ] Cloudflare Worker quickstart. 256 + - [ ] Config reference (`config.toml`). 257 + 258 + ## 2. CLI & Config Spec (POSIX-style) 259 + 260 + ### 2.1 POSIX argument conventions you’re aligning with 261 + 262 + Key constraints you want to follow: 263 + 264 + - Options are introduced by a single `-` followed by a single letter (`-h`, `-V`, `-d path`). :contentReference[oaicite:0]{index=0} 265 + - Options that require arguments use a separate token: `-d path` rather than `-dpath`. :contentReference[oaicite:1]{index=1} 266 + - Options appear before operands (here, subcommands and file paths) in the recommended syntax: 267 + `utility_name [-a] [-b arg] operand1 operand2 …`. :contentReference[oaicite:2]{index=2} 268 + - `-h` for help, `-V` for version are widely conventional. :contentReference[oaicite:3]{index=3} 269 + 270 + You *can* still offer `--long-option` aliases as a GNU-style extension; just document the POSIX short forms as canonical. :contentReference[oaicite:4]{index=4} 271 + 272 + ### 2.2 CLI synopsis 273 + 274 + **Utility name:** `pai` (single binary). 275 + 276 + #### Global synopsis 277 + 278 + ```text 279 + pai [-hV] [-C config_dir] [-d db_path] command [command-options] [command-operands] 280 + ``` 281 + 282 + - `-h` 283 + Print usage and exit. 284 + 285 + - `-V` 286 + Print version and exit. 287 + 288 + - `-C config_dir` 289 + Set configuration directory. Default: `$XDG_CONFIG_HOME/pai` or `$HOME/.config/pai`. 290 + 291 + - `-d db_path` 292 + Path to SQLite database file. Default: `$XDG_DATA_HOME/pai/pai.db` or `$HOME/.local/share/pai/pai.db`. 293 + 294 + Subcommands are treated as **operands** in POSIX terms; each subcommand then has its own POSIX-style options. 295 + 296 + ### 2.3 Subcommands and their options 297 + 298 + #### 1. `sync` – fetch and store content 299 + 300 + ```text 301 + pai [-C config_dir] [-d db_path] sync [-a] [-k kind] [-S source_id] 302 + ``` 303 + 304 + Options: 305 + 306 + - `-a` 307 + Sync all configured sources (default if `-k` not specified). 308 + 309 + - `-k kind` 310 + Sync only a particular source kind: 311 + 312 + - `substack` 313 + - `bluesky` 314 + - `leaflet` 315 + 316 + - `-S source_id` 317 + Sync only a specific source instance (e.g. `patternmatched.substack.com`, `desertthunder.dev`, `desertthunder.leaflet.pub`, `stormlightlabs.leaflet.pub`). 318 + 319 + Examples: 320 + 321 + ```sh 322 + pai sync -a 323 + pai sync -k substack 324 + pai sync -k leaflet -S desertthunder.leaflet.pub 325 + ``` 326 + 327 + #### 2. `list` – inspect stored items 328 + 329 + ```text 330 + pai [-C config_dir] [-d db_path] list [-k kind] [-S source_id] [-n number] [-s since] [-q pattern] 331 + ``` 332 + 333 + Options: 334 + 335 + - `-k kind` 336 + Filter by source kind (`substack`, `bluesky`, `leaflet`). 337 + 338 + - `-S source_id` 339 + Filter by specific source id (host or handle). 340 + 341 + - `-n number` 342 + Maximum number of items to display (default 20). 343 + 344 + - `-s since` 345 + Only show items published at or after this time. The CLI can accept ISO 8601 (`2025-11-23T00:00:00Z`) and, as a convenience, relative strings like `7d`, `24h` if you want. 346 + 347 + - `-q pattern` 348 + Filter items whose title/summary contains the given substring. 349 + 350 + #### 3. `export` – produce feeds/files 351 + 352 + ```text 353 + pai [-C config_dir] [-d db_path] export [-k kind] [-S source_id] [-n number] [-s since] [-q pattern] [-f format] [-o file] 354 + ``` 355 + 356 + Options (in addition to `list` filters): 357 + 358 + - `-f format` 359 + Output format: 360 + 361 + - `json` (default) 362 + - `ndjson` 363 + - `rss` (optional) 364 + 365 + - `-o file` 366 + Output file. Default is standard output. 367 + 368 + Examples: 369 + 370 + ```sh 371 + pai export -f json -o activity.json 372 + pai export -k bluesky -n 50 -f ndjson 373 + ``` 374 + 375 + #### 4. `serve` – self-host HTTP API 376 + 377 + ```text 378 + pai [-C config_dir] [-d db_path] serve [-a address] 379 + ``` 380 + 381 + Options: 382 + 383 + - `-a address` 384 + Address to bind HTTP server to. Default: `127.0.0.1:8080`. 385 + 386 + The HTTP API mirrors the query semantics of `list` and `export`: 387 + 388 + - `GET /api/feed?source_kind=bluesky&limit=50&since=...&q=...` 389 + 390 + #### 5. `cf-init` – scaffold Cloudflare deployment 391 + 392 + ```text 393 + pai cf-init [-o dir] 394 + ``` 395 + 396 + Options: 397 + 398 + - `-o dir` 399 + Directory into which to write `wrangler.toml`, `schema.sql`, and a sample `worker` entry point. Default: current directory. 400 + 401 + This command doesn’t need DB access; it just writes templates and prints next steps (create D1 DB, bind it, set up Cron). 402 + 403 + ### 2.4 `config.toml` spec 404 + 405 + **Default location:** 406 + 407 + - `$XDG_CONFIG_HOME/pai/config.toml` or 408 + - `$HOME/.config/pai/config.toml` if `XDG_CONFIG_HOME` is unset. 409 + 410 + **Top-level layout:** 411 + 412 + ```toml 413 + [database] 414 + # Path to SQLite database for self-host mode. 415 + # Ignored by the Worker; used only by `pai` binary. 416 + path = "/home/owais/.local/share/pai/pai.db" 417 + 418 + [deployment] 419 + # Which deploy targets are configured. 420 + # "sqlite" is always available; "cloudflare" is optional. 421 + mode = "sqlite" # or "cloudflare" 422 + 423 + [deployment.cloudflare] 424 + # Optional metadata for generating wrangler.toml, etc. 425 + worker_name = "personal-activity-index" 426 + d1_binding = "DB" 427 + database_name = "personal_activity_db" 428 + 429 + [sources.substack] 430 + enabled = true 431 + base_url = "https://patternmatched.substack.com" 432 + 433 + [sources.bluesky] 434 + enabled = true 435 + handle = "desertthunder.dev" 436 + 437 + [[sources.leaflet]] 438 + enabled = true 439 + id = "desertthunder" 440 + base_url = "https://desertthunder.leaflet.pub" 441 + 442 + [[sources.leaflet]] 443 + enabled = true 444 + id = "stormlightlabs" 445 + base_url = "https://stormlightlabs.leaflet.pub" 446 + ``` 447 + 448 + **Notes:** 449 + 450 + - The CLI should **not** require the Cloudflare section unless a user explicitly wants to generate Worker scaffolding. 451 + - The Worker itself will get its D1 binding and Cron schedule from `wrangler.toml` and the Cloudflare dashboard, not from this config file; you just reuse the same schema and `Item` type. 452 + 453 + ### 2.5 POSIX compliance checklist 454 + 455 + When you implement the CLI parsing, you can sanity-check against POSIX & GNU guidance: 456 + 457 + - Short options are single letters with a single leading `-`. ([The Open Group][1]) 458 + - Options precede non-option arguments (your commands and operands) in the usage examples. ([The Open Group][1]) 459 + - Options that take arguments are formatted as `-x arg` rather than `-xarg` in documentation. ([gnu.org][2]) 460 + - You provide `-h` / `-V` and consistent help text. ([Baeldung on Kotlin][3]) 461 + - Long options (`--help`, `--version`, `--config-dir`, etc.) can be supported as extensions but are not required for conformance. ([Software Engineering Stack Exchange][4]) 462 + 463 + [1]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap12.html "12. Utility Conventions" 464 + [2]: https://www.gnu.org/s/libc/manual/html_node/Argument-Syntax.html "Argument Syntax (The GNU C Library)" 465 + [3]: https://www.baeldung.com/linux/posix "A Guide to POSIX | Baeldung on Linux" 466 + [4]: https://softwareengineering.stackexchange.com/questions/70357/command-line-options-style-posix-or-what "Command line options style - POSIX or what?"

+27

cli/src/app.rs

··· 109 109 #[arg(short = 'f')] 110 110 force: bool, 111 111 }, 112 + 113 + /// Generate or install the pai(1) manpage 114 + Man { 115 + /// Output file (default: stdout) 116 + #[arg(short = 'o', value_name = "FILE")] 117 + output: Option<PathBuf>, 118 + 119 + /// Install into a manpath directory (defaults to ~/.local/share/man if unset) 120 + #[arg(long)] 121 + install: bool, 122 + 123 + /// Custom directory for --install (e.g., /usr/local/share/man) 124 + #[arg(long, value_name = "DIR")] 125 + install_dir: Option<PathBuf>, 126 + }, 127 + 128 + /// Initialize Cloudflare Worker deployment scaffolding 129 + #[command(name = "cf-init")] 130 + CfInit { 131 + /// Output directory for scaffolding (default: current directory) 132 + #[arg(short = 'o', value_name = "DIR")] 133 + output_dir: Option<PathBuf>, 134 + 135 + /// Dry run - show what would be created without writing files 136 + #[arg(long)] 137 + dry_run: bool, 138 + }, 112 139 }

+215 -9

cli/src/main.rs

··· 8 8 use clap::Parser; 9 9 use owo_colors::OwoColorize; 10 10 use pai_core::{Config, Item, ListFilter, PaiError, SourceKind}; 11 - use std::fs::File; 11 + use std::fs; 12 12 use std::io::{self, Write}; 13 - use std::path::PathBuf; 13 + use std::path::{Path, PathBuf}; 14 14 use std::str::FromStr; 15 15 use storage::SqliteStorage; 16 16 ··· 18 18 const KIND_WIDTH: usize = 9; 19 19 const SOURCE_WIDTH: usize = 24; 20 20 const TITLE_WIDTH: usize = 60; 21 + const MAN_PAGE: &str = include_str!(env!("PAI_MAN_PAGE")); 21 22 22 23 fn main() { 23 24 let cli = Cli::parse(); ··· 31 32 Commands::Serve { address } => handle_serve(cli.db_path, address), 32 33 Commands::DbCheck => handle_db_check(cli.db_path), 33 34 Commands::Init { force } => handle_init(cli.config_dir, force), 35 + Commands::Man { output, install, install_dir } => handle_man(output, install, install_dir), 36 + Commands::CfInit { output_dir, dry_run } => handle_cf_init(output_dir, dry_run), 34 37 }; 35 38 36 39 if let Err(e) = result { ··· 170 173 return Err(PaiError::Config("Config file already exists".to_string())); 171 174 } 172 175 173 - std::fs::create_dir_all(&config_dir) 174 - .map_err(|e| PaiError::Config(format!("Failed to create config directory: {e}")))?; 176 + fs::create_dir_all(&config_dir).map_err(|e| PaiError::Config(format!("Failed to create config directory: {e}")))?; 175 177 176 178 let default_config = include_str!("../../config.example.toml"); 177 - std::fs::write(&config_path, default_config) 179 + fs::write(&config_path, default_config) 178 180 .map_err(|e| PaiError::Config(format!("Failed to write config file: {e}")))?; 179 181 180 182 println!("{} Created configuration file", "Success:".green().bold()); ··· 195 197 Ok(()) 196 198 } 197 199 200 + fn handle_man(output: Option<PathBuf>, install: bool, install_dir: Option<PathBuf>) -> Result<(), PaiError> { 201 + if install && output.is_some() { 202 + return Err(PaiError::InvalidArgument( 203 + "Use either --install or -o/--output when generating manpages".to_string(), 204 + )); 205 + } 206 + 207 + let target = if install { Some(resolve_man_install_path(install_dir)?) } else { output }; 208 + 209 + let mut writer = create_output_writer(target.as_ref())?; 210 + writer.write_all(MAN_PAGE.as_bytes()).map_err(PaiError::Io)?; 211 + writer.flush().map_err(PaiError::Io)?; 212 + 213 + if let Some(path) = target { 214 + if install { 215 + println!("{} Installed manpage to {}", "Success:".green(), path.display()); 216 + if let Some(root) = man_root_for(&path) { 217 + println!( 218 + "{} Ensure {} is on your MANPATH, then run {}", 219 + "Hint:".yellow(), 220 + root.display(), 221 + "man pai".bright_black() 222 + ); 223 + } else { 224 + println!( 225 + "{} Run man pai after adding the install dir to MANPATH.", 226 + "Hint:".yellow() 227 + ); 228 + } 229 + } else { 230 + println!("{} Wrote manpage to {}", "Success:".green(), path.display()); 231 + } 232 + } 233 + 234 + Ok(()) 235 + } 236 + 237 + fn resolve_man_install_path(custom_dir: Option<PathBuf>) -> Result<PathBuf, PaiError> { 238 + let base = if let Some(dir) = custom_dir { dir } else { find_writable_man_dir()? }; 239 + 240 + let install_dir = if base.file_name().map(|os| os == "man1").unwrap_or(false) { base } else { base.join("man1") }; 241 + 242 + fs::create_dir_all(&install_dir).map_err(|e| { 243 + PaiError::Io(io::Error::new( 244 + e.kind(), 245 + format!("Failed to create man directory {}: {}", install_dir.display(), e), 246 + )) 247 + })?; 248 + 249 + Ok(install_dir.join("pai.1")) 250 + } 251 + 252 + fn find_writable_man_dir() -> Result<PathBuf, PaiError> { 253 + let candidates = [ 254 + dirs::data_local_dir().map(|d| d.join("man")), 255 + dirs::home_dir().map(|d| d.join(".local/share/man")), 256 + Some(PathBuf::from("/usr/local/share/man")), 257 + Some(PathBuf::from("/opt/homebrew/share/man")), 258 + Some(PathBuf::from("/usr/local/Homebrew/share/man")), 259 + ]; 260 + 261 + for candidate in candidates.iter().flatten() { 262 + if candidate.exists() { 263 + let test_file = candidate.join(".pai-write-test"); 264 + if fs::write(&test_file, b"test").is_ok() { 265 + let _ = fs::remove_file(&test_file); 266 + return Ok(candidate.clone()); 267 + } 268 + } else if let Some(parent) = candidate.parent() { 269 + if parent.exists() { 270 + let test_dir = candidate.join("man1"); 271 + if fs::create_dir_all(&test_dir).is_ok() { 272 + let _ = fs::remove_dir_all(&test_dir); 273 + return Ok(candidate.clone()); 274 + } 275 + } 276 + } 277 + } 278 + 279 + if let Some(data_dir) = dirs::data_local_dir() { 280 + return Ok(data_dir.join("man")); 281 + } 282 + 283 + Err(PaiError::Config( 284 + "Unable to find a writable man page directory. Use --install-dir to specify one.".to_string(), 285 + )) 286 + } 287 + 288 + fn man_root_for(path: &Path) -> Option<&Path> { 289 + path.parent()?.parent() 290 + } 291 + 292 + fn handle_cf_init(output_dir: Option<PathBuf>, dry_run: bool) -> Result<(), PaiError> { 293 + let target_dir = output_dir.unwrap_or_else(|| PathBuf::from(".")); 294 + 295 + let wrangler_template = include_str!("../../worker/wrangler.example.toml"); 296 + let schema_sql = include_str!("../../worker/schema.sql"); 297 + 298 + let readme_content = r#"# Cloudflare Worker Deployment 299 + 300 + ## Quick Start 301 + 302 + 1. **Create D1 Database:** 303 + ```sh 304 + wrangler d1 create personal-activity-db 305 + ``` 306 + 307 + 2. **Copy the configuration:** 308 + ```sh 309 + cp wrangler.example.toml wrangler.toml 310 + ``` 311 + 312 + 3. **Update `wrangler.toml`:** 313 + - Replace `{DATABASE_ID}` with the ID from step 1 314 + - Adjust `name` and `database_name` if desired 315 + 316 + 4. **Initialize the database schema:** 317 + ```sh 318 + wrangler d1 execute personal-activity-db --file=schema.sql 319 + ``` 320 + 321 + 5. **Build the worker:** 322 + ```sh 323 + cd .. 324 + cargo install worker-build 325 + worker-build --release -p pai-worker 326 + ``` 327 + 328 + 6. **Deploy:** 329 + ```sh 330 + cd worker 331 + wrangler deploy 332 + ``` 333 + 334 + ## Testing Locally 335 + 336 + Run the worker locally with: 337 + ```sh 338 + wrangler dev 339 + ``` 340 + 341 + ## Scheduled Syncs 342 + 343 + The worker is configured with a cron trigger (see `wrangler.toml`). The default schedule runs every hour. 344 + To modify the schedule, edit the `crons` array in `wrangler.toml`. 345 + 346 + ## API Endpoints 347 + 348 + - `GET /api/feed` - List items with optional filters 349 + - `GET /api/item/:id` - Get a single item by ID 350 + - `GET /status` - Health check 351 + 352 + ## Environment Variables 353 + 354 + Configure in `wrangler.toml` under `[vars]`: 355 + - `LOG_LEVEL` - Set logging verbosity (optional) 356 + "#; 357 + 358 + let files = vec![ 359 + ("wrangler.example.toml", wrangler_template), 360 + ("schema.sql", schema_sql), 361 + ("README.md", readme_content), 362 + ]; 363 + 364 + if dry_run { 365 + println!("{} Dry run - showing files that would be created:\n", "Info:".cyan()); 366 + for (filename, content) in &files { 367 + let path = target_dir.join(filename); 368 + println!(" {} {}", "Would create:".bright_black(), path.display()); 369 + println!(" {} bytes", content.len()); 370 + } 371 + println!("\n{} Run without --dry-run to create these files", "Hint:".yellow()); 372 + return Ok(()); 373 + } 374 + 375 + fs::create_dir_all(&target_dir)?; 376 + 377 + for (filename, content) in &files { 378 + let path = target_dir.join(filename); 379 + if path.exists() { 380 + println!("{} {} already exists, skipping", "Warning:".yellow(), filename); 381 + continue; 382 + } 383 + fs::write(&path, content)?; 384 + println!("{} Created {}", "Success:".green(), path.display()); 385 + } 386 + 387 + println!("\n{} Cloudflare Worker scaffolding created!", "Success:".green().bold()); 388 + println!("\n{} Next steps:", "Info:".cyan()); 389 + println!(" 1. cd {}", target_dir.display()); 390 + println!(" 2. Read README.md for deployment instructions"); 391 + println!(" 3. wrangler d1 create personal-activity-db"); 392 + println!(" 4. Update wrangler.example.toml with your database ID"); 393 + 394 + Ok(()) 395 + } 396 + 198 397 fn normalize_since_input(since: Option<String>) -> Result<Option<String>, PaiError> { 199 398 normalize_since_with_now(since, Utc::now()) 200 399 } ··· 224 423 return Ok(Some(dt.with_timezone(&Utc).to_rfc3339())); 225 424 } 226 425 227 - Err(PaiError::InvalidArgument(format!( 426 + let msg = format!( 228 427 "Invalid since value '{value}'. Use ISO 8601 (e.g. 2024-01-01T00:00:00Z) or relative forms like 7d/24h/60m." 229 - ))) 428 + ); 429 + Err(PaiError::InvalidArgument(msg)) 230 430 } 231 431 232 432 fn parse_relative_duration(input: &str) -> Option<Duration> { ··· 296 496 if let Some(path) = path { 297 497 if let Some(parent) = path.parent() { 298 498 if !parent.as_os_str().is_empty() { 299 - std::fs::create_dir_all(parent)?; 499 + fs::create_dir_all(parent)?; 300 500 } 301 501 } 302 - let file = File::create(path)?; 502 + let file = fs::File::create(path)?; 303 503 Ok(Box::new(file)) 304 504 } else { 305 505 Ok(Box::new(io::stdout())) ··· 547 747 fn truncate_column_adds_ellipsis() { 548 748 let truncated = truncate_for_column("abcdefghijklmnopqrstuvwxyz", 8); 549 749 assert_eq!(truncated, "abcde..."); 750 + } 751 + 752 + #[test] 753 + fn manpage_contains_name_section() { 754 + assert!(MAN_PAGE.contains("NAME")); 755 + assert!(MAN_PAGE.contains("pai")); 550 756 } 551 757 }

+17 -5

cli/src/server.rs

··· 10 10 use owo_colors::OwoColorize; 11 11 use pai_core::{Item, ListFilter, PaiError, SourceKind}; 12 12 use serde::{Deserialize, Serialize}; 13 - use std::io; 14 - use std::{net::SocketAddr, path::PathBuf, sync::Arc}; 13 + use std::{io, net::SocketAddr, path::PathBuf, sync::Arc, time::Instant}; 15 14 use tokio::net::TcpListener; 16 15 17 16 const DEFAULT_LIMIT: usize = 20; 17 + const VERSION: &str = env!("CARGO_PKG_VERSION"); 18 18 19 19 /// Launches the HTTP server using the provided SQLite database path and address. 20 20 pub(crate) fn serve(db_path: PathBuf, address: String) -> Result<(), PaiError> { ··· 35 35 storage.verify_schema()?; 36 36 drop(storage); 37 37 38 - let state = AppState { db_path: Arc::new(db_path) }; 38 + let state = AppState { db_path: Arc::new(db_path), start_time: Instant::now() }; 39 39 40 40 let app = Router::new() 41 41 .route("/api/feed", get(feed_handler)) ··· 56 56 #[derive(Clone)] 57 57 struct AppState { 58 58 db_path: Arc<PathBuf>, 59 + start_time: Instant, 59 60 } 60 61 61 62 impl AppState { ··· 72 73 .map(|(kind, count)| SourceStat { kind, count }) 73 74 .collect(); 74 75 75 - Ok(StatusResponse { status: "ok", database_path: self.db_path.display().to_string(), total_items, sources }) 76 + Ok(StatusResponse { 77 + status: "ok", 78 + version: VERSION, 79 + uptime_seconds: self.start_time.elapsed().as_secs(), 80 + database_path: self.db_path.display().to_string(), 81 + total_items, 82 + sources, 83 + }) 76 84 } 77 85 } 78 86 ··· 111 119 #[derive(Serialize)] 112 120 struct StatusResponse { 113 121 status: &'static str, 122 + version: &'static str, 123 + uptime_seconds: u64, 114 124 database_path: String, 115 125 total_items: usize, 116 126 sources: Vec<SourceStat>, ··· 240 250 fn status_snapshot_reports_counts() { 241 251 let dir = tempdir().unwrap(); 242 252 let db_path = dir.path().join("status.db"); 243 - let state = AppState { db_path: Arc::new(db_path.clone()) }; 253 + let state = AppState { db_path: Arc::new(db_path.clone()), start_time: Instant::now() }; 244 254 245 255 let storage = state.open_storage().unwrap(); 246 256 let now = Utc::now().to_rfc3339(); ··· 260 270 261 271 let snapshot = state.status_snapshot().unwrap(); 262 272 assert_eq!(snapshot.status, "ok"); 273 + assert_eq!(snapshot.version, VERSION); 274 + assert!(snapshot.uptime_seconds < 5); 263 275 assert_eq!(snapshot.total_items, 1); 264 276 assert_eq!(snapshot.sources.len(), 1); 265 277 assert_eq!(snapshot.sources[0].kind, "substack");

+4

conf/.gitignore

··· 1 + # Ignore actual config files (keep only examples in git) 2 + /nginx.local.conf 3 + /Caddyfile.local 4 + *.local.*

+31

conf/Caddyfile

··· 1 + # Caddyfile for Personal Activity Index 2 + # Caddy automatically handles HTTPS with Let's Encrypt 3 + 4 + # Basic configuration for localhost (HTTP only) 5 + localhost { 6 + reverse_proxy 127.0.0.1:8080 7 + encode gzip zstd 8 + } 9 + 10 + # Configuration with custom domain 11 + # Uncomment and replace example.com with your domain: 12 + # 13 + # pai.example.com { 14 + # reverse_proxy 127.0.0.1:8080 15 + # encode gzip zstd 16 + # 17 + # header { 18 + # Referrer-Policy "no-referrer-when-downgrade" 19 + # X-Content-Type-Options "nosniff" 20 + # X-Frame-Options "SAMEORIGIN" 21 + # } 22 + # 23 + # # Optional: Rate limiting 24 + # # rate_limit { 25 + # # zone static { 26 + # # key {remote_host} 27 + # # events 100 28 + # # window 1m 29 + # # } 30 + # # } 31 + # }

+307

conf/README.md

··· 1 + # Personal Activity Index - Reverse Proxy Configurations 2 + 3 + This directory contains example reverse proxy configurations for deploying the Personal Activity Index HTTP server behind nginx or Caddy. 4 + 5 + ## Quick Start 6 + 7 + ### Option 1: nginx 8 + 9 + #### macOS 10 + 11 + 1. Install nginx: 12 + 13 + ```sh 14 + brew install nginx 15 + ``` 16 + 17 + 2. Copy the configuration: 18 + 19 + ```sh 20 + # For localhost testing 21 + cp nginx.conf /opt/homebrew/etc/nginx/servers/pai.conf 22 + 23 + # Or symlink to keep it in sync 24 + ln -s $(pwd)/nginx.conf /opt/homebrew/etc/nginx/servers/pai.conf 25 + ``` 26 + 27 + 3. Start the pai server: 28 + 29 + ```sh 30 + pai serve -a 127.0.0.1:8080 31 + ``` 32 + 33 + 4. Start nginx: 34 + 35 + ```sh 36 + brew services start nginx 37 + ``` 38 + 39 + 5. Access at <http://localhost> 40 + 41 + #### Linux 42 + 43 + 1. Install nginx: 44 + 45 + ```sh 46 + # Debian/Ubuntu 47 + sudo apt install nginx 48 + 49 + # RHEL/Fedora 50 + sudo dnf install nginx 51 + ``` 52 + 53 + 2. Copy the configuration: 54 + 55 + ```sh 56 + sudo cp nginx.conf /etc/nginx/sites-available/pai 57 + sudo ln -s /etc/nginx/sites-available/pai /etc/nginx/sites-enabled/ 58 + ``` 59 + 60 + 3. Start the pai server: 61 + 62 + ```sh 63 + pai serve -a 127.0.0.1:8080 64 + ``` 65 + 66 + 4. Test and reload nginx: 67 + 68 + ```sh 69 + sudo nginx -t 70 + sudo systemctl reload nginx 71 + ``` 72 + 73 + 5. Access at <http://localhost> 74 + 75 + ### Option 2: Caddy 76 + 77 + #### macOS 78 + 79 + 1. Install Caddy: 80 + 81 + ```sh 82 + brew install caddy 83 + ``` 84 + 85 + 2. Copy the Caddyfile: 86 + 87 + ```sh 88 + cp Caddyfile /opt/homebrew/etc/Caddyfile 89 + ``` 90 + 91 + 3. Start the pai server: 92 + 93 + ```sh 94 + pai serve -a 127.0.0.1:8080 95 + ``` 96 + 97 + 4. Start Caddy: 98 + 99 + ```sh 100 + brew services start caddy 101 + ``` 102 + 103 + 5. Access at <http://localhost> 104 + 105 + #### Linux 106 + 107 + 1. Install Caddy: 108 + 109 + ```sh 110 + # See https://caddyserver.com/docs/install 111 + 112 + # Debian/Ubuntu 113 + sudo apt install -y debian-keyring debian-archive-keyring apt-transport-https 114 + curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | sudo gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg 115 + curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | sudo tee /etc/apt/sources.list.d/caddy-stable.list 116 + sudo apt update 117 + sudo apt install caddy 118 + ``` 119 + 120 + 2. Copy the Caddyfile: 121 + 122 + ```sh 123 + sudo cp Caddyfile /etc/caddy/Caddyfile 124 + ``` 125 + 126 + 3. Start the pai server: 127 + 128 + ```sh 129 + pai serve -a 127.0.0.1:8080 130 + ``` 131 + 132 + 4. Reload Caddy: 133 + 134 + ```sh 135 + sudo systemctl reload caddy 136 + ``` 137 + 138 + 5. Access at <http://localhost> 139 + 140 + ## Production Deployment with Custom Domain 141 + 142 + ### nginx with SSL 143 + 144 + 1. Edit `nginx.conf` and replace `localhost` with your domain (e.g., `pai.example.com`) 145 + 146 + 2. Obtain SSL certificates using certbot: 147 + 148 + ```sh 149 + # macOS 150 + brew install certbot 151 + 152 + # Linux 153 + sudo apt install certbot python3-certbot-nginx # Debian/Ubuntu 154 + sudo dnf install certbot python3-certbot-nginx # RHEL/Fedora 155 + ``` 156 + 157 + 3. Get certificates: 158 + 159 + ```sh 160 + sudo certbot --nginx -d pai.example.com 161 + ``` 162 + 163 + 4. Certbot will automatically update your nginx configuration with SSL settings 164 + 165 + 5. Set up auto-renewal: 166 + 167 + ```sh 168 + # Test renewal 169 + sudo certbot renew --dry-run 170 + 171 + # On Linux, certbot sets up a systemd timer automatically 172 + # On macOS, add to crontab: 173 + sudo crontab -e 174 + # Add: 0 0 * * * certbot renew --quiet 175 + ``` 176 + 177 + ### Caddy with Custom Domain 178 + 179 + 1. Edit `Caddyfile` and uncomment the production section 180 + 181 + 2. Replace `pai.example.com` with your actual domain 182 + 183 + 3. Ensure DNS A/AAAA records point to your server 184 + 185 + 4. Reload Caddy: 186 + 187 + ```sh 188 + sudo systemctl reload caddy # Linux 189 + brew services restart caddy # macOS 190 + ``` 191 + 192 + Caddy automatically obtains and renews SSL certificates from Let's Encrypt - no additional configuration needed! 193 + 194 + ## Running pai as a System Service 195 + 196 + ### macOS (launchd) 197 + 198 + Create `/Library/LaunchDaemons/com.pai.server.plist`: 199 + 200 + ```xml 201 + <?xml version="1.0" encoding="UTF-8"?> 202 + <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> 203 + <plist version="1.0"> 204 + <dict> 205 + <key>Label</key> 206 + <string>com.pai.server</string> 207 + <key>ProgramArguments</key> 208 + <array> 209 + <string>/usr/local/bin/pai</string> 210 + <string>serve</string> 211 + <string>-a</string> 212 + <string>127.0.0.1:8080</string> 213 + <string>-d</string> 214 + <string>/var/lib/pai/pai.db</string> 215 + </array> 216 + <key>RunAtLoad</key> 217 + <true/> 218 + <key>KeepAlive</key> 219 + <true/> 220 + <key>StandardOutPath</key> 221 + <string>/var/log/pai/stdout.log</string> 222 + <key>StandardErrorPath</key> 223 + <string>/var/log/pai/stderr.log</string> 224 + <key>WorkingDirectory</key> 225 + <string>/var/lib/pai</string> 226 + </dict> 227 + </plist> 228 + ``` 229 + 230 + Load the service: 231 + 232 + ```sh 233 + sudo launchctl load /Library/LaunchDaemons/com.pai.server.plist 234 + ``` 235 + 236 + ### Linux (systemd) 237 + 238 + Create `/etc/systemd/system/pai.service`: 239 + 240 + ```ini 241 + [Unit] 242 + Description=Personal Activity Index 243 + After=network.target 244 + 245 + [Service] 246 + Type=simple 247 + ExecStart=/usr/local/bin/pai serve -a 127.0.0.1:8080 -d /var/lib/pai/pai.db 248 + Restart=on-failure 249 + RestartSec=5 250 + User=pai 251 + Group=pai 252 + WorkingDirectory=/var/lib/pai 253 + 254 + [Install] 255 + WantedBy=multi-user.target 256 + ``` 257 + 258 + Create the pai user and directories: 259 + 260 + ```sh 261 + sudo useradd -r -s /bin/false pai 262 + sudo mkdir -p /var/lib/pai 263 + sudo chown pai:pai /var/lib/pai 264 + ``` 265 + 266 + Enable and start the service: 267 + 268 + ```sh 269 + sudo systemctl daemon-reload 270 + sudo systemctl enable pai 271 + sudo systemctl start pai 272 + ``` 273 + 274 + Check status: 275 + 276 + ```sh 277 + sudo systemctl status pai 278 + ``` 279 + 280 + View logs: 281 + 282 + ```sh 283 + sudo journalctl -u pai -f 284 + ``` 285 + 286 + ## Testing 287 + 288 + Verify the proxy is working: 289 + 290 + ```sh 291 + # Health check 292 + curl http://localhost/status 293 + 294 + # API endpoint 295 + curl http://localhost/api/feed?limit=5 296 + 297 + # Specific item 298 + curl http://localhost/api/item/some-item-id 299 + ``` 300 + 301 + ## Additional Resources 302 + 303 + - [nginx documentation](https://nginx.org/en/docs/) 304 + - [Caddy documentation](https://caddyserver.com/docs/) 305 + - [Let's Encrypt](https://letsencrypt.org/) 306 + - [Personal Activity Index main documentation](../README.md) 307 + - [Deployment guide](../DEPLOYMENT.md)

+30

conf/nginx.conf

··· 1 + # nginx configuration for Personal Activity Index 2 + # This file provides a basic reverse proxy setup for pai serve 3 + 4 + server { 5 + listen 80; 6 + server_name localhost; 7 + 8 + # For a custom domain, replace localhost with your domain and add SSL configuration: 9 + # listen 443 ssl http2; 10 + # ssl_certificate /path/to/cert.pem; 11 + # ssl_certificate_key /path/to/key.pem; 12 + 13 + location / { 14 + proxy_pass http://127.0.0.1:8080; 15 + proxy_set_header Host $host; 16 + proxy_set_header X-Real-IP $remote_addr; 17 + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 18 + proxy_set_header X-Forwarded-Proto $scheme; 19 + 20 + # Optional: Add response headers 21 + add_header X-Frame-Options "SAMEORIGIN" always; 22 + add_header X-Content-Type-Options "nosniff" always; 23 + } 24 + 25 + # Optional: Health check endpoint for load balancers 26 + location /healthz { 27 + proxy_pass http://127.0.0.1:8080/status; 28 + access_log off; 29 + } 30 + }

+14 -1

worker/Cargo.toml

··· 1 1 [package] 2 2 name = "pai-worker" 3 3 version = "0.1.0" 4 - edition = "2024" 4 + edition = "2021" 5 + 6 + [lib] 7 + crate-type = ["cdylib"] 5 8 6 9 [dependencies] 10 + pai-core = { path = "../core" } 11 + worker = { version = "0.6", features = ["d1"] } 12 + serde = { version = "1.0", features = ["derive"] } 13 + serde_json = "1.0" 14 + serde_urlencoded = "0.7" 15 + chrono = { version = "0.4", default-features = false, features = [ 16 + "serde", 17 + "wasmbind", 18 + ] } 19 + rss = { version = "2.0", default-features = false }

+18

worker/schema.sql

··· 1 + -- Personal Activity Index D1 Schema 2 + -- This schema is compatible with both SQLite (CLI) and D1 (Worker) 3 + 4 + CREATE TABLE IF NOT EXISTS items ( 5 + id TEXT PRIMARY KEY, 6 + source_kind TEXT NOT NULL, 7 + source_id TEXT NOT NULL, 8 + author TEXT, 9 + title TEXT, 10 + summary TEXT, 11 + url TEXT NOT NULL, 12 + content_html TEXT, 13 + published_at TEXT NOT NULL, 14 + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP 15 + ); 16 + 17 + CREATE INDEX IF NOT EXISTS idx_items_source_date ON items (source_kind, source_id, published_at DESC); 18 + CREATE INDEX IF NOT EXISTS idx_items_published ON items (published_at DESC);

+575 -5

worker/src/lib.rs

··· 1 - pub fn add(left: u64, right: u64) -> u64 { 2 - left + right 1 + use pai_core::{Item, ListFilter, SourceKind}; 2 + use serde::{Deserialize, Serialize}; 3 + use wasm_bindgen::JsValue; 4 + use worker::*; 5 + 6 + #[derive(Deserialize)] 7 + struct SyncConfig { 8 + substack: Option<SubstackConfig>, 9 + bluesky: Option<BlueskyConfig>, 10 + leaflet: Vec<LeafletConfig>, 11 + } 12 + 13 + #[derive(Deserialize)] 14 + struct SubstackConfig { 15 + base_url: String, 16 + } 17 + 18 + #[derive(Deserialize)] 19 + struct BlueskyConfig { 20 + handle: String, 21 + } 22 + 23 + #[derive(Deserialize)] 24 + struct LeafletConfig { 25 + id: String, 26 + base_url: String, 27 + } 28 + 29 + #[derive(Deserialize)] 30 + struct FeedParams { 31 + source_kind: Option<SourceKind>, 32 + source_id: Option<String>, 33 + limit: Option<usize>, 34 + since: Option<String>, 35 + q: Option<String>, 36 + } 37 + 38 + #[derive(Serialize)] 39 + struct FeedResponse { 40 + items: Vec<Item>, 41 + } 42 + 43 + #[derive(Serialize)] 44 + struct StatusResponse { 45 + status: &'static str, 46 + version: &'static str, 47 + } 48 + 49 + #[event(fetch)] 50 + async fn fetch(req: Request, env: Env, _ctx: Context) -> Result<Response> { 51 + let router = Router::new(); 52 + router 53 + .get_async("/api/feed", |req, ctx| async move { handle_feed(req, ctx).await }) 54 + .get_async("/api/item/:id", |_req, ctx| async move { 55 + let id = ctx 56 + .param("id") 57 + .ok_or_else(|| Error::RustError("Missing id parameter".into()))?; 58 + handle_item(id, &ctx).await 59 + }) 60 + .get("/status", |_req, _ctx| { 61 + let version = env!("CARGO_PKG_VERSION"); 62 + let status = StatusResponse { status: "ok", version }; 63 + Response::from_json(&status) 64 + }) 65 + .run(req, env) 66 + .await 67 + } 68 + 69 + #[event(scheduled)] 70 + async fn scheduled(_event: ScheduledEvent, env: Env, _ctx: ScheduleContext) { 71 + if let Err(e) = run_sync(&env).await { 72 + console_error!("Scheduled sync failed: {}", e); 73 + } 74 + } 75 + 76 + async fn handle_feed(req: Request, ctx: RouteContext<()>) -> Result<Response> { 77 + let url = req.url()?; 78 + let params: FeedParams = serde_urlencoded::from_str(url.query().unwrap_or("")) 79 + .map_err(|e| Error::RustError(format!("Invalid query parameters: {e}")))?; 80 + 81 + let filter = ListFilter { 82 + source_kind: params.source_kind, 83 + source_id: params.source_id, 84 + limit: Some(params.limit.unwrap_or(20)), 85 + since: params.since, 86 + query: params.q, 87 + }; 88 + 89 + let db = ctx.env.d1("DB")?; 90 + let items = query_items(&db, &filter).await?; 91 + 92 + let response = FeedResponse { items }; 93 + Response::from_json(&response) 94 + } 95 + 96 + async fn handle_item(id: &str, ctx: &RouteContext<()>) -> Result<Response> { 97 + let db = ctx.env.d1("DB")?; 98 + let stmt = db.prepare("SELECT * FROM items WHERE id = ?1").bind(&[id.into()])?; 99 + 100 + let result = stmt.first::<Item>(None).await?; 101 + 102 + match result { 103 + Some(item) => Response::from_json(&item), 104 + None => Response::error("Item not found", 404), 105 + } 106 + } 107 + 108 + async fn query_items(db: &D1Database, filter: &ListFilter) -> Result<Vec<Item>> { 109 + let mut query = String::from( 110 + "SELECT id, source_kind, source_id, author, title, summary, url, content_html, published_at, created_at FROM items WHERE 1=1" 111 + ); 112 + let mut bindings = vec![]; 113 + 114 + if let Some(kind) = filter.source_kind { 115 + query.push_str(" AND source_kind = ?"); 116 + bindings.push(kind.to_string().into()); 117 + } 118 + 119 + if let Some(ref source_id) = filter.source_id { 120 + query.push_str(" AND source_id = ?"); 121 + bindings.push(source_id.clone().into()); 122 + } 123 + 124 + if let Some(ref since) = filter.since { 125 + query.push_str(" AND published_at >= ?"); 126 + bindings.push(since.clone().into()); 127 + } 128 + 129 + if let Some(ref q) = filter.query { 130 + query.push_str(" AND (title LIKE ? OR summary LIKE ?)"); 131 + let pattern = format!("%{q}%"); 132 + bindings.push(pattern.clone().into()); 133 + bindings.push(pattern.into()); 134 + } 135 + 136 + query.push_str(" ORDER BY published_at DESC"); 137 + 138 + if let Some(limit) = filter.limit { 139 + query.push_str(" LIMIT ?"); 140 + bindings.push((limit as i64).into()); 141 + } 142 + 143 + let mut stmt = db.prepare(&query); 144 + for binding in bindings { 145 + stmt = stmt.bind(&[binding])?; 146 + } 147 + 148 + let results = stmt.all().await?; 149 + let items: Vec<Item> = results.results()?; 150 + 151 + Ok(items) 152 + } 153 + 154 + async fn run_sync(env: &Env) -> Result<()> { 155 + let config = load_sync_config(env)?; 156 + 157 + let db = env.d1("DB")?; 158 + let mut synced = 0; 159 + 160 + if let Some(substack_config) = config.substack { 161 + match sync_substack(&substack_config, &db).await { 162 + Ok(count) => { 163 + console_log!("Synced {} items from Substack", count); 164 + synced += count; 165 + } 166 + Err(e) => console_error!("Substack sync failed: {}", e), 167 + } 168 + } 169 + 170 + if let Some(bluesky_config) = config.bluesky { 171 + match sync_bluesky(&bluesky_config, &db).await { 172 + Ok(count) => { 173 + console_log!("Synced {} items from Bluesky", count); 174 + synced += count; 175 + } 176 + Err(e) => console_error!("Bluesky sync failed: {}", e), 177 + } 178 + } 179 + 180 + for leaflet_config in config.leaflet { 181 + match sync_leaflet(&leaflet_config, &db).await { 182 + Ok(count) => { 183 + console_log!("Synced {} items from Leaflet ({})", count, leaflet_config.id); 184 + synced += count; 185 + } 186 + Err(e) => console_error!("Leaflet sync failed for {}: {}", leaflet_config.id, e), 187 + } 188 + } 189 + 190 + console_log!("Sync completed: {} total items", synced); 191 + Ok(()) 192 + } 193 + 194 + fn load_sync_config(env: &Env) -> Result<SyncConfig> { 195 + let substack = env 196 + .var("SUBSTACK_URL") 197 + .ok() 198 + .map(|url| SubstackConfig { base_url: url.to_string() }); 199 + 200 + let bluesky = env 201 + .var("BLUESKY_HANDLE") 202 + .ok() 203 + .map(|handle| BlueskyConfig { handle: handle.to_string() }); 204 + 205 + let leaflet = if let Ok(urls) = env.var("LEAFLET_URLS") { 206 + urls.to_string() 207 + .split(',') 208 + .filter_map(|entry| { 209 + let parts: Vec<&str> = entry.trim().splitn(2, ':').collect(); 210 + if parts.len() == 2 { 211 + Some(LeafletConfig { id: parts[0].to_string(), base_url: parts[1].to_string() }) 212 + } else { 213 + None 214 + } 215 + }) 216 + .collect() 217 + } else { 218 + Vec::new() 219 + }; 220 + 221 + Ok(SyncConfig { substack, bluesky, leaflet }) 222 + } 223 + 224 + async fn sync_substack(config: &SubstackConfig, db: &D1Database) -> Result<usize> { 225 + let feed_url = format!("{}/feed", config.base_url); 226 + 227 + let mut req = Request::new(&feed_url, Method::Get)?; 228 + req.headers_mut()?.set("User-Agent", "pai-worker/0.1.0")?; 229 + 230 + let mut resp = Fetch::Request(req).send().await?; 231 + let body = resp.text().await?; 232 + 233 + let channel = 234 + rss::Channel::read_from(body.as_bytes()).map_err(|e| Error::RustError(format!("Failed to parse RSS: {e}")))?; 235 + 236 + let source_id = normalize_source_id(&config.base_url); 237 + let mut count = 0; 238 + 239 + for item in channel.items() { 240 + let id = item.guid().map(|g| g.value()).unwrap_or(item.link().unwrap_or("")); 241 + let url = item.link().unwrap_or(id); 242 + let title = item.title(); 243 + let summary = item.description(); 244 + let author = item.author(); 245 + let content_html = item.content(); 246 + 247 + let published_at = item 248 + .pub_date() 249 + .and_then(|s| chrono::DateTime::parse_from_rfc2822(s).ok()) 250 + .map(|dt| dt.to_rfc3339()) 251 + .unwrap_or_else(|| chrono::Utc::now().to_rfc3339()); 252 + 253 + let created_at = chrono::Utc::now().to_rfc3339(); 254 + 255 + let stmt = db.prepare( 256 + "INSERT OR REPLACE INTO items (id, source_kind, source_id, author, title, summary, url, content_html, published_at, created_at) 257 + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)" 258 + ); 259 + 260 + stmt.bind(&[ 261 + id.into(), 262 + "substack".into(), 263 + source_id.clone().into(), 264 + author.map(|s| s.into()).unwrap_or(JsValue::NULL), 265 + title.map(|s| s.into()).unwrap_or(JsValue::NULL), 266 + summary.map(|s| s.into()).unwrap_or(JsValue::NULL), 267 + url.into(), 268 + content_html.map(|s| s.into()).unwrap_or(JsValue::NULL), 269 + published_at.into(), 270 + created_at.into(), 271 + ])? 272 + .run() 273 + .await?; 274 + 275 + count += 1; 276 + } 277 + 278 + Ok(count) 279 + } 280 + 281 + async fn sync_bluesky(config: &BlueskyConfig, db: &D1Database) -> Result<usize> { 282 + let api_url = format!( 283 + "https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor={}&limit=50", 284 + config.handle 285 + ); 286 + 287 + let mut req = Request::new(&api_url, Method::Get)?; 288 + req.headers_mut()?.set("User-Agent", "pai-worker/0.1.0")?; 289 + 290 + let mut resp = Fetch::Request(req).send().await?; 291 + let json: serde_json::Value = resp.json().await?; 292 + 293 + let feed = json["feed"] 294 + .as_array() 295 + .ok_or_else(|| Error::RustError("Invalid Bluesky response".into()))?; 296 + 297 + let mut count = 0; 298 + 299 + for item in feed { 300 + let post = &item["post"]; 301 + 302 + if item.get("reason").is_some() { 303 + continue; 304 + } 305 + 306 + let uri = post["uri"] 307 + .as_str() 308 + .ok_or_else(|| Error::RustError("Missing URI".into()))?; 309 + let record = &post["record"]; 310 + let text = record["text"].as_str().unwrap_or(""); 311 + 312 + let post_id = uri.split('/').next_back().unwrap_or(""); 313 + let url = format!("https://bsky.app/profile/{}/post/{}", config.handle, post_id); 314 + 315 + let title = if text.len() > 100 { format!("{}...", &text[..97]) } else { text.to_string() }; 316 + 317 + let published_at = record["createdAt"].as_str().unwrap_or("").to_string(); 318 + let created_at = chrono::Utc::now().to_rfc3339(); 319 + 320 + let stmt = db.prepare( 321 + "INSERT OR REPLACE INTO items (id, source_kind, source_id, author, title, summary, url, content_html, published_at, created_at) 322 + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)" 323 + ); 324 + 325 + stmt.bind(&[ 326 + uri.into(), 327 + "bluesky".into(), 328 + config.handle.clone().into(), 329 + config.handle.clone().into(), 330 + title.into(), 331 + text.into(), 332 + url.into(), 333 + JsValue::NULL, 334 + published_at.into(), 335 + created_at.into(), 336 + ])? 337 + .run() 338 + .await?; 339 + 340 + count += 1; 341 + } 342 + 343 + Ok(count) 344 + } 345 + 346 + async fn sync_leaflet(config: &LeafletConfig, db: &D1Database) -> Result<usize> { 347 + let host = normalize_source_id(&config.base_url); 348 + let subdomain = host.split('.').next().unwrap_or(&host); 349 + let did = format!("{subdomain}.bsky.social"); 350 + 351 + let api_url = format!( 352 + "https://public.api.bsky.app/xrpc/com.atproto.repo.listRecords?repo={did}&collection=pub.leaflet.post&limit=50" 353 + ); 354 + 355 + let mut req = Request::new(&api_url, Method::Get)?; 356 + req.headers_mut()?.set("User-Agent", "pai-worker/0.1.0")?; 357 + 358 + let mut resp = Fetch::Request(req).send().await?; 359 + let json: serde_json::Value = resp.json().await?; 360 + 361 + let records = json["records"] 362 + .as_array() 363 + .ok_or_else(|| Error::RustError("Invalid Leaflet response".into()))?; 364 + 365 + let mut count = 0; 366 + 367 + for record in records { 368 + let uri = record["uri"] 369 + .as_str() 370 + .ok_or_else(|| Error::RustError("Missing URI".into()))?; 371 + let value = &record["value"]; 372 + 373 + let title = value["title"].as_str().unwrap_or("Untitled"); 374 + let summary = value["summary"].as_str().or(value["content"].as_str()).unwrap_or(""); 375 + let slug = value["slug"].as_str().unwrap_or(""); 376 + 377 + let url = if !slug.is_empty() { 378 + format!("{}/{}", config.base_url, slug) 379 + } else { 380 + format!("{}/post/{}", config.base_url, uri.split('/').next_back().unwrap_or("")) 381 + }; 382 + 383 + let published_at = value["publishedAt"] 384 + .as_str() 385 + .or(value["createdAt"].as_str()) 386 + .unwrap_or("") 387 + .to_string(); 388 + 389 + let created_at = chrono::Utc::now().to_rfc3339(); 390 + 391 + let stmt = db.prepare( 392 + "INSERT OR REPLACE INTO items (id, source_kind, source_id, author, title, summary, url, content_html, published_at, created_at) 393 + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)" 394 + ); 395 + 396 + stmt.bind(&[ 397 + uri.into(), 398 + "leaflet".into(), 399 + config.id.clone().into(), 400 + JsValue::NULL, 401 + title.into(), 402 + summary.into(), 403 + url.into(), 404 + JsValue::NULL, 405 + published_at.into(), 406 + created_at.into(), 407 + ])? 408 + .run() 409 + .await?; 410 + 411 + count += 1; 412 + } 413 + 414 + Ok(count) 415 + } 416 + 417 + fn normalize_source_id(base_url: &str) -> String { 418 + base_url 419 + .trim_start_matches("https://") 420 + .trim_start_matches("http://") 421 + .trim_end_matches('/') 422 + .to_string() 3 423 } 4 424 5 425 #[cfg(test)] ··· 7 427 use super::*; 8 428 9 429 #[test] 10 - fn it_works() { 11 - let result = add(2, 2); 12 - assert_eq!(result, 4); 430 + fn test_normalize_source_id_https() { 431 + assert_eq!( 432 + normalize_source_id("https://patternmatched.substack.com"), 433 + "patternmatched.substack.com" 434 + ); 435 + } 436 + 437 + #[test] 438 + fn test_normalize_source_id_http() { 439 + assert_eq!(normalize_source_id("http://example.com/"), "example.com"); 440 + } 441 + 442 + #[test] 443 + fn test_normalize_source_id_trailing_slash() { 444 + assert_eq!(normalize_source_id("https://test.leaflet.pub/"), "test.leaflet.pub"); 445 + } 446 + 447 + #[test] 448 + fn test_normalize_source_id_no_protocol() { 449 + assert_eq!(normalize_source_id("example.com"), "example.com"); 450 + } 451 + 452 + #[test] 453 + fn test_bluesky_title_truncation_short() { 454 + let text = "Short post"; 455 + let title = if text.len() > 100 { format!("{}...", &text[..97]) } else { text.to_string() }; 456 + assert_eq!(title, "Short post"); 457 + } 458 + 459 + #[test] 460 + fn test_bluesky_title_truncation_long() { 461 + let text = "a".repeat(150); 462 + let title = if text.len() > 100 { format!("{}...", &text[..97]) } else { text.to_string() }; 463 + assert_eq!(title.len(), 100); 464 + assert!(title.ends_with("...")); 465 + } 466 + 467 + #[test] 468 + fn test_bluesky_title_truncation_boundary() { 469 + let text = "a".repeat(100); 470 + let title = if text.len() > 100 { format!("{}...", &text[..97]) } else { text.to_string() }; 471 + assert_eq!(title, text); 472 + } 473 + 474 + #[test] 475 + fn test_leaflet_url_with_slug() { 476 + let base_url = "https://test.leaflet.pub"; 477 + let slug = "my-post"; 478 + let url = if !slug.is_empty() { 479 + format!("{base_url}/{slug}") 480 + } else { 481 + format!("{}/post/{}", base_url, "fallback") 482 + }; 483 + assert_eq!(url, "https://test.leaflet.pub/my-post"); 484 + } 485 + 486 + #[test] 487 + fn test_leaflet_url_without_slug() { 488 + let base_url = "https://test.leaflet.pub"; 489 + let slug = ""; 490 + let uri = "at://did:plc:abc123/pub.leaflet.post/xyz789"; 491 + let post_id = uri.split('/').next_back().unwrap_or(""); 492 + let url = if !slug.is_empty() { format!("{base_url}/{slug}") } else { format!("{base_url}/post/{post_id}") }; 493 + assert_eq!(url, "https://test.leaflet.pub/post/xyz789"); 494 + } 495 + 496 + #[test] 497 + fn test_bluesky_post_id_extraction() { 498 + let uri = "at://did:plc:abc123/app.bsky.feed.post/3ld7xyqnvqk2a"; 499 + let post_id = uri.split('/').next_back().unwrap_or(""); 500 + assert_eq!(post_id, "3ld7xyqnvqk2a"); 501 + } 502 + 503 + #[test] 504 + fn test_bluesky_url_construction() { 505 + let handle = "desertthunder.dev"; 506 + let post_id = "3ld7xyqnvqk2a"; 507 + let url = format!("https://bsky.app/profile/{handle}/post/{post_id}"); 508 + assert_eq!(url, "https://bsky.app/profile/desertthunder.dev/post/3ld7xyqnvqk2a"); 509 + } 510 + 511 + #[test] 512 + fn test_leaflet_config_parsing() { 513 + let entry = "desertthunder:https://desertthunder.leaflet.pub"; 514 + let parts: Vec<&str> = entry.trim().splitn(2, ':').collect(); 515 + assert_eq!(parts.len(), 2); 516 + assert_eq!(parts[0], "desertthunder"); 517 + assert_eq!(parts[1], "https://desertthunder.leaflet.pub"); 518 + } 519 + 520 + #[test] 521 + fn test_leaflet_config_parsing_invalid() { 522 + let entry = "invalid-entry-no-colon"; 523 + let parts: Vec<&str> = entry.trim().splitn(2, ':').collect(); 524 + assert_ne!(parts.len(), 2); 525 + } 526 + 527 + #[test] 528 + fn test_leaflet_config_parsing_multiple() { 529 + let urls = "id1:https://pub1.leaflet.pub,id2:https://pub2.leaflet.pub"; 530 + let configs: Vec<_> = urls 531 + .split(',') 532 + .filter_map(|entry| { 533 + let parts: Vec<&str> = entry.trim().splitn(2, ':').collect(); 534 + if parts.len() == 2 { 535 + Some((parts[0].to_string(), parts[1].to_string())) 536 + } else { 537 + None 538 + } 539 + }) 540 + .collect(); 541 + 542 + assert_eq!(configs.len(), 2); 543 + assert_eq!(configs[0].0, "id1"); 544 + assert_eq!(configs[0].1, "https://pub1.leaflet.pub"); 545 + assert_eq!(configs[1].0, "id2"); 546 + assert_eq!(configs[1].1, "https://pub2.leaflet.pub"); 547 + } 548 + 549 + #[test] 550 + fn test_substack_feed_url_construction() { 551 + let base_url = "https://patternmatched.substack.com"; 552 + let feed_url = format!("{base_url}/feed"); 553 + assert_eq!(feed_url, "https://patternmatched.substack.com/feed"); 554 + } 555 + 556 + #[test] 557 + fn test_bluesky_api_url_construction() { 558 + let handle = "desertthunder.dev"; 559 + let api_url = format!("https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor={handle}&limit=50"); 560 + assert_eq!( 561 + api_url, 562 + "https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor=desertthunder.dev&limit=50" 563 + ); 564 + } 565 + 566 + #[test] 567 + fn test_leaflet_did_construction() { 568 + let subdomain = "desertthunder"; 569 + let did = format!("{subdomain}.bsky.social"); 570 + assert_eq!(did, "desertthunder.bsky.social"); 571 + } 572 + 573 + #[test] 574 + fn test_leaflet_api_url_construction() { 575 + let did = "desertthunder.bsky.social"; 576 + let api_url = format!( 577 + "https://public.api.bsky.app/xrpc/com.atproto.repo.listRecords?repo={did}&collection=pub.leaflet.post&limit=50" 578 + ); 579 + assert_eq!( 580 + api_url, 581 + "https://public.api.bsky.app/xrpc/com.atproto.repo.listRecords?repo=desertthunder.bsky.social&collection=pub.leaflet.post&limit=50" 582 + ); 13 583 } 14 584 }

+36

worker/wrangler.example.toml

··· 1 + # Cloudflare Workers configuration for Personal Activity Index 2 + # Copy this file to wrangler.toml and update with your values 3 + 4 + name = "personal-activity-index" 5 + main = "build/worker/index.js" 6 + compatibility_date = "2025-01-15" 7 + 8 + # D1 Database Binding 9 + # Create your D1 database with: 10 + # wrangler d1 create personal-activity-db 11 + # Then replace {DATABASE_ID} below with the ID from the output 12 + [[d1_databases]] 13 + binding = "DB" 14 + database_name = "personal-activity-db" 15 + database_id = "{DATABASE_ID}" 16 + 17 + # Cron Triggers for scheduled syncs 18 + # Runs every hour at minute 0 19 + [triggers] 20 + crons = ["0 * * * *"] 21 + 22 + # Environment variables for source configuration 23 + # Configure the sources you want to sync 24 + [vars] 25 + # Substack RSS feed URL 26 + SUBSTACK_URL = "https://patternmatched.substack.com" 27 + 28 + # Bluesky handle 29 + BLUESKY_HANDLE = "desertthunder.dev" 30 + 31 + # Leaflet publications (comma-separated id:url pairs) 32 + # Format: "id1:https://pub1.leaflet.pub,id2:https://pub2.leaflet.pub" 33 + LEAFLET_URLS = "desertthunder:https://desertthunder.leaflet.pub,stormlightlabs:https://stormlightlabs.leaflet.pub" 34 + 35 + # Optional: Logging level 36 + # LOG_LEVEL = "info"

Configure Feed

Configure Feed