BlueSky & more on desktop lazurite.stormlightlabs.org/
tauri rust typescript bluesky appview atproto solid
2
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: search commands (fts)

+860 -11
+17 -9
docs/tasks/06-search.md
··· 8 8 9 9 #### Network Search 10 10 11 - - [ ] Create 11 + - [x] Create 12 12 - `src-tauri/src/search.rs` for business logic 13 13 - `src-tauri/src/commands/search.rs` 14 - - [ ] Implement network search commands (not indexed - direct API calls): 14 + - [x] Implement network search commands (not indexed - direct API calls): 15 15 - `search_posts_network(query, sort?, limit?, cursor?)` → `app.bsky.feed.searchPosts` 16 16 - `search_actors(query, limit?, cursor?)` → `app.bsky.actor.searchActors` 17 17 - `search_starter_packs(query, limit?, cursor?)` → `app.bsky.graph.searchStarterPacks` ··· 20 20 21 21 #### Local Data Pipeline (Base) 22 22 23 - - [ ] Add `sync_state` table to migrations (stores cursor per `(did, source)`) 24 - - [ ] Implement `sync_posts(did: String, source: "like"|"bookmark")`: 23 + - [x] Add `sync_state` table to migrations (stores cursor per `(did, source)`) 24 + - [x] Implement `sync_posts(did: String, source: "like"|"bookmark")`: 25 25 - Resume from stored cursor in `sync_state` (never re-fetch full history) 26 26 - Paginate `app.bsky.feed.getActorLikes` (or bookmarks) for the **authenticated user's own** likes/saves 27 27 - Upsert into `posts` table ··· 30 30 31 31 #### Embeddings 32 32 33 - - [ ] Implement `embed_pending_posts()` *(opt-out - skip when embeddings disabled)*: 33 + - [x] Implement `embed_pending_posts()` 34 34 - Query posts without embeddings 35 35 - Batch through `fastembed` TextEmbedding model (`nomic-embed-text-v1.5`) 36 36 - Insert into `posts_vec` via `zerocopy::AsBytes` 37 - - [ ] Implement `reindex_embeddings()`: 37 + - [x] Implement `reindex_embeddings()`: 38 38 - Clear all rows from `posts_vec` 39 39 - Re-embed every post in `posts` table 40 40 - Triggered manually by user (reindex button in UI) 41 - - [ ] Implement `set_embeddings_enabled(enabled: bool)`: 41 + - [x] Implement `set_embeddings_enabled(enabled: bool)`: 42 42 - Persist preference; when disabled, skip model download + embedding on sync 43 43 - Keyword search remains fully functional regardless 44 44 ··· 54 54 55 55 ### Frontend 56 56 57 + #### Search UI 58 + 57 59 - [ ] search bar (`/` or `CTRL/CMD + F` to focus) with mode selector (network / keyword / semantic / hybrid), `Motion` sliding indicator underline 58 60 - [ ] search results with staggered `Motion` fade-in, highlighted keyword matches 59 - - [ ] sync status indicator with animated progress bar, `Presence` fade-out on complete 60 - - [ ] reindex button: triggers `reindex_embeddings()`, shown in search settings or sync status area 61 + 62 + #### Embeddings 63 + 61 64 - [ ] embeddings opt-out toggle in settings (disables semantic search, skips model download) 62 65 - [ ] model download progress bar (percentage + ETA) on first launch 63 66 - Enabled by default (opt-out) 64 67 - Splash/Preflight route should explain what semantic search provides 68 + 69 + #### Sync Indexing 70 + 71 + - [ ] sync status indicator with animated progress bar, `Presence` fade-out on complete 72 + - [ ] reindex button: triggers `reindex_embeddings()`, shown in search settings or sync status area 65 73 - [ ] empty state illustration when no posts synced yet 66 74 - [ ] `Tab` cycles search mode (network → keyword → semantic → hybrid), `Escape` clears
+1
src-tauri/src/commands/mod.rs
··· 1 1 #![allow(clippy::needless_pass_by_value)] 2 2 3 3 pub mod explorer; 4 + pub mod search; 4 5 5 6 use super::auth::{self, LoginSuggestion}; 6 7 use super::error::AppError;
+53
src-tauri/src/commands/search.rs
··· 1 + #![allow(clippy::needless_pass_by_value)] 2 + 3 + use super::super::error::AppError; 4 + use super::super::search::{self, SyncStatus}; 5 + use super::super::state::AppState; 6 + use serde_json::Value; 7 + use tauri::{AppHandle, State}; 8 + 9 + #[tauri::command] 10 + pub async fn search_posts_network( 11 + query: String, sort: Option<String>, limit: Option<u32>, cursor: Option<String>, state: State<'_, AppState>, 12 + ) -> Result<Value, AppError> { 13 + search::search_posts_network(query, sort, limit, cursor, &state).await 14 + } 15 + 16 + #[tauri::command] 17 + pub async fn search_actors( 18 + query: String, limit: Option<u32>, cursor: Option<String>, state: State<'_, AppState>, 19 + ) -> Result<Value, AppError> { 20 + search::search_actors(query, limit, cursor, &state).await 21 + } 22 + 23 + #[tauri::command] 24 + pub async fn search_starter_packs( 25 + query: String, limit: Option<u32>, cursor: Option<String>, state: State<'_, AppState>, 26 + ) -> Result<Value, AppError> { 27 + search::search_starter_packs(query, limit, cursor, &state).await 28 + } 29 + 30 + #[tauri::command] 31 + pub async fn sync_posts(did: String, source: String, state: State<'_, AppState>) -> Result<SyncStatus, AppError> { 32 + search::sync_posts(did, source, &state).await 33 + } 34 + 35 + #[tauri::command] 36 + pub fn get_sync_status(did: String, state: State<'_, AppState>) -> Result<Vec<SyncStatus>, AppError> { 37 + search::get_sync_status(&did, &state) 38 + } 39 + 40 + #[tauri::command] 41 + pub fn embed_pending_posts(app: AppHandle, state: State<'_, AppState>) -> Result<usize, AppError> { 42 + search::embed_pending_posts(&app, &state) 43 + } 44 + 45 + #[tauri::command] 46 + pub fn reindex_embeddings(app: AppHandle, state: State<'_, AppState>) -> Result<usize, AppError> { 47 + search::reindex_embeddings(&app, &state) 48 + } 49 + 50 + #[tauri::command] 51 + pub fn set_embeddings_enabled(enabled: bool, state: State<'_, AppState>) -> Result<(), AppError> { 52 + search::set_embeddings_enabled(enabled, &state) 53 + }
+1
src-tauri/src/db.rs
··· 41 41 ), 42 42 Migration::new(4, "account_avatars", include_str!("migrations/004_account_avatars.sql")), 43 43 Migration::new(5, "sync_state", include_str!("migrations/005_sync_state.sql")), 44 + Migration::new(6, "app_settings", include_str!("migrations/006_app_settings.sql")), 44 45 ]; 45 46 46 47 pub fn initialize_database(app: &AppHandle) -> Result<DbPool, AppError> {
+10 -1
src-tauri/src/lib.rs
··· 5 5 mod explorer; 6 6 mod feed; 7 7 mod notifications; 8 + mod search; 8 9 mod state; 9 10 mod tray; 10 11 ··· 97 98 cmd::explorer::list_records, 98 99 cmd::explorer::get_record, 99 100 cmd::explorer::export_repo_car, 100 - cmd::explorer::query_labels 101 + cmd::explorer::query_labels, 102 + cmd::search::search_posts_network, 103 + cmd::search::search_actors, 104 + cmd::search::search_starter_packs, 105 + cmd::search::sync_posts, 106 + cmd::search::get_sync_status, 107 + cmd::search::embed_pending_posts, 108 + cmd::search::reindex_embeddings, 109 + cmd::search::set_embeddings_enabled 101 110 ]) 102 111 .run(tauri::generate_context!()) 103 112 .expect("error while running tauri application");
+1 -1
src-tauri/src/main.rs
··· 1 - // Prevents additional console window on Windows in release, DO NOT REMOVE!! 1 + //! Prevents additional console window on Windows in release, DO NOT REMOVE!! 2 2 #![cfg_attr(not(debug_assertions), windows_subsystem = "windows")] 3 3 4 4 fn main() {
+7
src-tauri/src/migrations/006_app_settings.sql
··· 1 + CREATE TABLE IF NOT EXISTS app_settings ( 2 + key TEXT PRIMARY KEY, 3 + value TEXT NOT NULL 4 + ); 5 + 6 + -- Embeddings are enabled by default (opt-out). 7 + INSERT OR IGNORE INTO app_settings(key, value) VALUES ('embeddings_enabled', '1');
+770
src-tauri/src/search.rs
··· 1 + use super::auth::LazuriteOAuthSession; 2 + use super::error::{AppError, Result}; 3 + use super::state::AppState; 4 + use fastembed::{EmbeddingModel, TextEmbedding, TextInitOptions}; 5 + use jacquard::api::app_bsky::actor::search_actors::SearchActors; 6 + use jacquard::api::app_bsky::feed::get_actor_likes::GetActorLikes; 7 + use jacquard::api::app_bsky::feed::search_posts::SearchPosts; 8 + use jacquard::api::app_bsky::graph::search_starter_packs::SearchStarterPacks; 9 + use jacquard::types::did::Did; 10 + use jacquard::types::ident::AtIdentifier; 11 + use jacquard::xrpc::XrpcClient; 12 + use rusqlite::{params, Connection, OptionalExtension}; 13 + use serde::Serialize; 14 + use std::path::PathBuf; 15 + use std::sync::Arc; 16 + use tauri::{AppHandle, Manager}; 17 + use tauri_plugin_log::log; 18 + 19 + #[derive(Debug, Serialize)] 20 + #[serde(rename_all = "camelCase")] 21 + pub struct SyncStatus { 22 + pub source: String, 23 + pub post_count: i64, 24 + pub cursor: Option<String>, 25 + pub last_synced_at: Option<String>, 26 + } 27 + 28 + fn validate_query(query: &str) -> Result<()> { 29 + if query.trim().is_empty() { 30 + return Err(AppError::validation("search query must not be empty")); 31 + } 32 + Ok(()) 33 + } 34 + 35 + fn validate_source(source: &str) -> Result<()> { 36 + match source { 37 + "like" | "bookmark" => Ok(()), 38 + _ => Err(AppError::validation("source must be 'like' or 'bookmark'")), 39 + } 40 + } 41 + 42 + async fn get_session(state: &AppState) -> Result<Arc<LazuriteOAuthSession>> { 43 + let did = state 44 + .active_session 45 + .read() 46 + .map_err(|error| { 47 + log::error!("active_session poisoned: {error}"); 48 + AppError::StatePoisoned("active_session") 49 + })? 50 + .as_ref() 51 + .ok_or_else(|| { 52 + log::error!("no active account"); 53 + AppError::Validation("no active account".into()) 54 + })? 55 + .did 56 + .clone(); 57 + 58 + state 59 + .sessions 60 + .read() 61 + .map_err(|error| { 62 + log::error!("sessions poisoned: {error}"); 63 + AppError::StatePoisoned("sessions") 64 + })? 65 + .get(&did) 66 + .cloned() 67 + .ok_or_else(|| { 68 + log::error!("session not found for active account"); 69 + AppError::Validation("session not found for active account".into()) 70 + }) 71 + } 72 + 73 + fn db_load_sync_cursor(conn: &Connection, did: &str, source: &str) -> Result<Option<String>> { 74 + conn.query_row( 75 + "SELECT cursor FROM sync_state WHERE did = ?1 AND source = ?2", 76 + params![did, source], 77 + |row| row.get::<_, Option<String>>(0), 78 + ) 79 + .optional() 80 + .map(|opt| opt.flatten()) 81 + .map_err(AppError::from) 82 + } 83 + 84 + fn db_save_sync_state(conn: &Connection, did: &str, source: &str, cursor: Option<&str>) -> Result<()> { 85 + conn.execute( 86 + "INSERT INTO sync_state(did, source, cursor, last_synced_at) 87 + VALUES(?1, ?2, ?3, CURRENT_TIMESTAMP) 88 + ON CONFLICT(did, source) DO UPDATE SET 89 + cursor = excluded.cursor, 90 + last_synced_at = excluded.last_synced_at", 91 + params![did, source, cursor], 92 + )?; 93 + Ok(()) 94 + } 95 + 96 + /// Upsert a single `FeedViewPost` JSON item into the `posts` table. 97 + /// On conflict (same uri) updates mutable fields but keeps indexed_at. 98 + fn db_upsert_post(conn: &Connection, feed_item: &serde_json::Value, source: &str) -> Result<()> { 99 + let post = feed_item.get("post").unwrap_or(feed_item); 100 + 101 + let uri = post 102 + .get("uri") 103 + .and_then(|v| v.as_str()) 104 + .ok_or_else(|| AppError::validation("feed item missing post.uri"))?; 105 + let cid = post 106 + .get("cid") 107 + .and_then(|v| v.as_str()) 108 + .ok_or_else(|| AppError::validation("feed item missing post.cid"))?; 109 + let author = post 110 + .get("author") 111 + .ok_or_else(|| AppError::validation("feed item missing post.author"))?; 112 + let author_did = author 113 + .get("did") 114 + .and_then(|v| v.as_str()) 115 + .ok_or_else(|| AppError::validation("feed item missing post.author.did"))?; 116 + let author_handle = author.get("handle").and_then(|v| v.as_str()); 117 + 118 + let record = post.get("record"); 119 + let text = record.and_then(|r| r.get("text")).and_then(|v| v.as_str()); 120 + let created_at = record.and_then(|r| r.get("createdAt")).and_then(|v| v.as_str()); 121 + let json_record = record.map(|r| r.to_string()); 122 + 123 + conn.execute( 124 + "INSERT INTO posts(uri, cid, author_did, author_handle, text, created_at, json_record, source) 125 + VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) 126 + ON CONFLICT(uri) DO UPDATE SET 127 + cid = excluded.cid, 128 + author_handle = excluded.author_handle, 129 + text = excluded.text, 130 + json_record = excluded.json_record", 131 + params![ 132 + uri, 133 + cid, 134 + author_did, 135 + author_handle, 136 + text, 137 + created_at, 138 + json_record, 139 + source 140 + ], 141 + )?; 142 + Ok(()) 143 + } 144 + 145 + fn db_post_count(conn: &Connection, source: &str) -> Result<i64> { 146 + conn.query_row("SELECT COUNT(*) FROM posts WHERE source = ?1", params![source], |row| { 147 + row.get(0) 148 + }) 149 + .map_err(AppError::from) 150 + } 151 + 152 + fn db_sync_status(conn: &Connection, source: &str) -> Result<SyncStatus> { 153 + let post_count = db_post_count(conn, source)?; 154 + let (cursor, last_synced_at) = conn 155 + .query_row( 156 + "SELECT cursor, last_synced_at FROM sync_state WHERE source = ?1", 157 + params![source], 158 + |row| Ok((row.get::<_, Option<String>>(0)?, row.get::<_, Option<String>>(1)?)), 159 + ) 160 + .optional()? 161 + .unwrap_or((None, None)); 162 + 163 + Ok(SyncStatus { source: source.to_owned(), post_count, cursor, last_synced_at }) 164 + } 165 + 166 + pub async fn search_posts_network( 167 + query: String, sort: Option<String>, limit: Option<u32>, cursor: Option<String>, state: &AppState, 168 + ) -> Result<serde_json::Value> { 169 + validate_query(&query)?; 170 + let session = get_session(state).await?; 171 + 172 + let output = session 173 + .send( 174 + SearchPosts::new() 175 + .sort(sort.as_deref().map(|s| s.into())) 176 + .limit(limit.map(|l| l as i64)) 177 + .cursor(cursor.as_deref().map(|c| c.into())) 178 + .q(query.as_str()) 179 + .build(), 180 + ) 181 + .await 182 + .map_err(|error| { 183 + log::error!("searchPosts error: {error}"); 184 + AppError::validation("searchPosts error") 185 + })? 186 + .into_output() 187 + .map_err(|error| { 188 + log::error!("searchPosts output error: {error}"); 189 + AppError::validation("searchPosts output error") 190 + })?; 191 + 192 + serde_json::to_value(&output).map_err(AppError::from) 193 + } 194 + 195 + pub async fn search_actors( 196 + query: String, limit: Option<u32>, cursor: Option<String>, state: &AppState, 197 + ) -> Result<serde_json::Value> { 198 + validate_query(&query)?; 199 + let session = get_session(state).await?; 200 + 201 + let output = session 202 + .send( 203 + SearchActors::new() 204 + .q(Some(query.as_str().into())) 205 + .limit(limit.map(|l| l as i64)) 206 + .cursor(cursor.as_deref().map(|c| c.into())) 207 + .build(), 208 + ) 209 + .await 210 + .map_err(|error| { 211 + log::error!("searchActors error: {error}"); 212 + AppError::validation("searchActors error") 213 + })? 214 + .into_output() 215 + .map_err(|error| { 216 + log::error!("searchActors output error: {error}"); 217 + AppError::validation("searchActors output error") 218 + })?; 219 + 220 + serde_json::to_value(&output).map_err(AppError::from) 221 + } 222 + 223 + pub async fn search_starter_packs( 224 + query: String, limit: Option<u32>, cursor: Option<String>, state: &AppState, 225 + ) -> Result<serde_json::Value> { 226 + validate_query(&query)?; 227 + let session = get_session(state).await?; 228 + 229 + let output = session 230 + .send( 231 + SearchStarterPacks::new() 232 + .limit(limit.map(|l| l as i64)) 233 + .cursor(cursor.as_deref().map(|c| c.into())) 234 + .q(query.as_str()) 235 + .build(), 236 + ) 237 + .await 238 + .map_err(|error| { 239 + log::error!("searchStarterPacks error: {error}"); 240 + AppError::validation("searchStarterPacks error") 241 + })? 242 + .into_output() 243 + .map_err(|error| { 244 + log::error!("searchStarterPacks output error: {error}"); 245 + AppError::validation("searchStarterPacks output error") 246 + })?; 247 + 248 + serde_json::to_value(&output).map_err(AppError::from) 249 + } 250 + 251 + /// Sync the authenticated user's likes (or bookmarks) into the local DB. 252 + /// 253 + /// Resumes from the last stored cursor so interrupted syncs never re-fetch the full history. 254 + /// On completion the cursor is cleared, allowing subsequent calls to pick up new items from the top of the feed. 255 + pub async fn sync_posts(did: String, source: String, state: &AppState) -> Result<SyncStatus> { 256 + validate_source(&source)?; 257 + 258 + if source == "bookmark" { 259 + return Err(AppError::validation("bookmark sync is not yet supported")); 260 + } 261 + 262 + let session = get_session(state).await?; 263 + 264 + let mut cursor: Option<String> = { 265 + let conn = state.auth_store.lock_connection()?; 266 + db_load_sync_cursor(&conn, &did, &source)? 267 + }; 268 + 269 + log::info!("starting {source} sync for {did}, resume cursor: {cursor:?}"); 270 + 271 + loop { 272 + let output = session 273 + .send( 274 + GetActorLikes::new() 275 + .limit(Some(100i64)) 276 + .cursor(cursor.as_deref().map(|c| c.into())) 277 + .actor(AtIdentifier::Did(Did::new(&did)?)) 278 + .build(), 279 + ) 280 + .await 281 + .map_err(|error| { 282 + log::error!("getActorLikes error: {error}"); 283 + AppError::validation("getActorLikes error") 284 + })? 285 + .into_output() 286 + .map_err(|error| { 287 + log::error!("getActorLikes output error: {error}"); 288 + AppError::validation("getActorLikes output error") 289 + })?; 290 + 291 + let output_json = serde_json::to_value(&output)?; 292 + 293 + let feed = output_json 294 + .get("feed") 295 + .and_then(|v| v.as_array()) 296 + .cloned() 297 + .unwrap_or_default(); 298 + 299 + if feed.is_empty() { 300 + log::info!("{source} sync for {did}: empty page, stopping"); 301 + break; 302 + } 303 + 304 + let next_cursor = output_json.get("cursor").and_then(|v| v.as_str()).map(str::to_owned); 305 + 306 + { 307 + let conn = state.auth_store.lock_connection()?; 308 + for item in &feed { 309 + db_upsert_post(&conn, item, &source)?; 310 + } 311 + db_save_sync_state(&conn, &did, &source, next_cursor.as_deref())?; 312 + } 313 + 314 + log::debug!( 315 + "{source} sync for {did}: upserted {} posts, next cursor: {next_cursor:?}", 316 + feed.len() 317 + ); 318 + 319 + match next_cursor { 320 + None => { 321 + log::info!("{source} sync for {did}: reached end of feed"); 322 + break; 323 + } 324 + Some(c) => cursor = Some(c), 325 + } 326 + } 327 + 328 + let conn = state.auth_store.lock_connection()?; 329 + db_sync_status(&conn, &source) 330 + } 331 + 332 + /// Returns sync status for all sources for the given DID. 333 + pub fn get_sync_status(did: &str, state: &AppState) -> Result<Vec<SyncStatus>> { 334 + let conn = state.auth_store.lock_connection()?; 335 + let mut stmt = conn.prepare( 336 + "SELECT ss.source, 337 + COUNT(p.uri) AS post_count, 338 + ss.cursor, 339 + ss.last_synced_at 340 + FROM sync_state ss 341 + LEFT JOIN posts p ON p.source = ss.source 342 + WHERE ss.did = ?1 343 + GROUP BY ss.source", 344 + )?; 345 + 346 + let rows = stmt.query_map(params![did], |row| { 347 + Ok(SyncStatus { 348 + source: row.get(0)?, 349 + post_count: row.get(1)?, 350 + cursor: row.get(2)?, 351 + last_synced_at: row.get(3)?, 352 + }) 353 + })?; 354 + 355 + rows.collect::<rusqlite::Result<Vec<_>>>().map_err(AppError::from) 356 + } 357 + 358 + const EMBED_BATCH_SIZE: usize = 32; 359 + 360 + fn resolve_models_dir(app: &AppHandle) -> Result<PathBuf> { 361 + let mut dir = app 362 + .path() 363 + .app_data_dir() 364 + .map_err(|error| AppError::PathResolve(error.to_string()))?; 365 + dir.push("models"); 366 + std::fs::create_dir_all(&dir)?; 367 + Ok(dir) 368 + } 369 + 370 + fn db_get_embeddings_enabled(conn: &Connection) -> Result<bool> { 371 + let val: Option<String> = conn 372 + .query_row( 373 + "SELECT value FROM app_settings WHERE key = 'embeddings_enabled'", 374 + [], 375 + |row| row.get(0), 376 + ) 377 + .optional()?; 378 + Ok(val.map(|v| v != "0").unwrap_or(true)) 379 + } 380 + 381 + fn db_set_embeddings_enabled(conn: &Connection, enabled: bool) -> Result<()> { 382 + conn.execute( 383 + "INSERT INTO app_settings(key, value) VALUES('embeddings_enabled', ?1) 384 + ON CONFLICT(key) DO UPDATE SET value = excluded.value", 385 + params![if enabled { "1" } else { "0" }], 386 + )?; 387 + Ok(()) 388 + } 389 + 390 + /// Returns (uri, text) for posts that have no embedding yet. 391 + fn db_posts_without_embeddings(conn: &Connection) -> Result<Vec<(String, String)>> { 392 + let mut stmt = conn.prepare( 393 + "SELECT p.uri, p.text 394 + FROM posts p 395 + WHERE p.text IS NOT NULL 396 + AND p.text != '' 397 + AND p.uri NOT IN (SELECT uri FROM posts_vec)", 398 + )?; 399 + 400 + let rows = stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?; 401 + rows.collect::<rusqlite::Result<Vec<_>>>().map_err(AppError::from) 402 + } 403 + 404 + /// Returns (uri, text) for ALL posts that have non-empty text. 405 + fn db_all_posts_with_text(conn: &Connection) -> Result<Vec<(String, String)>> { 406 + let mut stmt = conn.prepare("SELECT uri, text FROM posts WHERE text IS NOT NULL AND text != ''")?; 407 + 408 + let rows = stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?; 409 + rows.collect::<rusqlite::Result<Vec<_>>>().map_err(AppError::from) 410 + } 411 + 412 + fn db_upsert_embedding(conn: &Connection, uri: &str, embedding: &[f32]) -> Result<()> { 413 + let bytes: Vec<u8> = embedding.iter().flat_map(|f| f.to_le_bytes()).collect(); 414 + conn.execute( 415 + "INSERT OR REPLACE INTO posts_vec(uri, embedding) VALUES(?1, ?2)", 416 + params![uri, bytes.as_slice()], 417 + )?; 418 + Ok(()) 419 + } 420 + 421 + fn embed_posts(posts: &[(String, String)], models_dir: PathBuf, state: &AppState) -> Result<usize> { 422 + if posts.is_empty() { 423 + return Ok(0); 424 + } 425 + 426 + let mut model = TextEmbedding::try_new( 427 + TextInitOptions::new(EmbeddingModel::NomicEmbedTextV15) 428 + .with_cache_dir(models_dir) 429 + .with_show_download_progress(false), 430 + ) 431 + .map_err(|error| AppError::validation(format!("failed to init embedding model: {error}")))?; 432 + 433 + let mut total = 0usize; 434 + 435 + for chunk in posts.chunks(EMBED_BATCH_SIZE) { 436 + let texts: Vec<String> = chunk.iter().map(|(_, text)| text.clone()).collect(); 437 + let embeddings = model 438 + .embed(texts, Some(EMBED_BATCH_SIZE)) 439 + .map_err(|error| AppError::validation(format!("embedding error: {error}")))?; 440 + 441 + let conn = state.auth_store.lock_connection()?; 442 + for ((uri, _), embedding) in chunk.iter().zip(embeddings.iter()) { 443 + db_upsert_embedding(&conn, uri, embedding)?; 444 + } 445 + total += chunk.len(); 446 + } 447 + 448 + Ok(total) 449 + } 450 + 451 + /// Embed all posts that do not yet have an embedding. Skipped when embeddings are disabled. 452 + pub fn embed_pending_posts(app: &AppHandle, state: &AppState) -> Result<usize> { 453 + let enabled = { 454 + let conn = state.auth_store.lock_connection()?; 455 + db_get_embeddings_enabled(&conn)? 456 + }; 457 + if !enabled { 458 + log::info!("embeddings disabled, skipping embed_pending_posts"); 459 + return Ok(0); 460 + } 461 + 462 + let posts = { 463 + let conn = state.auth_store.lock_connection()?; 464 + db_posts_without_embeddings(&conn)? 465 + }; 466 + 467 + log::info!("embedding {} pending posts", posts.len()); 468 + let models_dir = resolve_models_dir(app)?; 469 + embed_posts(&posts, models_dir, state) 470 + } 471 + 472 + /// Clear all embeddings from `posts_vec` then re-embed every post. 473 + pub fn reindex_embeddings(app: &AppHandle, state: &AppState) -> Result<usize> { 474 + { 475 + let conn = state.auth_store.lock_connection()?; 476 + conn.execute("DELETE FROM posts_vec", [])?; 477 + } 478 + log::info!("cleared posts_vec for reindex"); 479 + 480 + let posts = { 481 + let conn = state.auth_store.lock_connection()?; 482 + db_all_posts_with_text(&conn)? 483 + }; 484 + 485 + log::info!("reindexing {} posts", posts.len()); 486 + let models_dir = resolve_models_dir(app)?; 487 + embed_posts(&posts, models_dir, state) 488 + } 489 + 490 + /// Persist the embeddings-enabled preference. 491 + pub fn set_embeddings_enabled(enabled: bool, state: &AppState) -> Result<()> { 492 + let conn = state.auth_store.lock_connection()?; 493 + db_set_embeddings_enabled(&conn, enabled) 494 + } 495 + 496 + #[cfg(test)] 497 + mod tests { 498 + use super::{ 499 + db_get_embeddings_enabled, db_load_sync_cursor, db_post_count, db_save_sync_state, db_set_embeddings_enabled, 500 + db_upsert_post, validate_query, validate_source, 501 + }; 502 + use rusqlite::Connection; 503 + 504 + /// Minimal schema for unit tests w/o FTS/vec tables. 505 + fn test_db() -> Connection { 506 + let conn = Connection::open_in_memory().expect("in-memory db should open"); 507 + conn.execute_batch( 508 + "CREATE TABLE posts ( 509 + uri TEXT PRIMARY KEY, 510 + cid TEXT NOT NULL, 511 + author_did TEXT NOT NULL, 512 + author_handle TEXT, 513 + text TEXT, 514 + created_at TEXT, 515 + indexed_at TEXT DEFAULT CURRENT_TIMESTAMP, 516 + json_record TEXT, 517 + source TEXT NOT NULL 518 + ); 519 + CREATE TABLE sync_state ( 520 + did TEXT NOT NULL, 521 + source TEXT NOT NULL, 522 + cursor TEXT, 523 + last_synced_at TEXT, 524 + PRIMARY KEY (did, source) 525 + ); 526 + CREATE TABLE app_settings ( 527 + key TEXT PRIMARY KEY, 528 + value TEXT NOT NULL 529 + );", 530 + ) 531 + .expect("test schema should apply"); 532 + conn 533 + } 534 + 535 + fn feed_item(uri: &str, cid: &str, did: &str, handle: &str, text: &str) -> serde_json::Value { 536 + serde_json::json!({ 537 + "post": { 538 + "uri": uri, 539 + "cid": cid, 540 + "author": { "did": did, "handle": handle }, 541 + "record": { "$type": "app.bsky.feed.post", "text": text, "createdAt": "2024-01-01T00:00:00Z" } 542 + } 543 + }) 544 + } 545 + 546 + #[test] 547 + fn empty_query_is_rejected() { 548 + assert!(validate_query("").is_err()); 549 + } 550 + 551 + #[test] 552 + fn whitespace_only_query_is_rejected() { 553 + assert!(validate_query(" ").is_err()); 554 + } 555 + 556 + #[test] 557 + fn valid_query_is_accepted() { 558 + assert!(validate_query("rust programming").is_ok()); 559 + } 560 + 561 + #[test] 562 + fn single_char_query_is_accepted() { 563 + assert!(validate_query("a").is_ok()); 564 + } 565 + 566 + #[test] 567 + fn from_handle_syntax_is_accepted() { 568 + assert!(validate_query("from:alice.bsky.social hello").is_ok()); 569 + } 570 + 571 + #[test] 572 + fn valid_sources_are_accepted() { 573 + assert!(validate_source("like").is_ok()); 574 + assert!(validate_source("bookmark").is_ok()); 575 + } 576 + 577 + #[test] 578 + fn unknown_source_is_rejected() { 579 + assert!(validate_source("repost").is_err()); 580 + assert!(validate_source("").is_err()); 581 + } 582 + 583 + #[test] 584 + fn cursor_is_none_when_no_sync_state_row_exists() { 585 + let conn = test_db(); 586 + let cursor = db_load_sync_cursor(&conn, "did:plc:alice", "like").unwrap(); 587 + assert!(cursor.is_none()); 588 + } 589 + 590 + #[test] 591 + fn save_and_load_cursor_roundtrips() { 592 + let conn = test_db(); 593 + db_save_sync_state(&conn, "did:plc:alice", "like", Some("cursor-abc")).unwrap(); 594 + let loaded = db_load_sync_cursor(&conn, "did:plc:alice", "like").unwrap(); 595 + assert_eq!(loaded.as_deref(), Some("cursor-abc")); 596 + } 597 + 598 + #[test] 599 + fn saving_none_cursor_clears_stored_cursor() { 600 + let conn = test_db(); 601 + db_save_sync_state(&conn, "did:plc:alice", "like", Some("cursor-abc")).unwrap(); 602 + db_save_sync_state(&conn, "did:plc:alice", "like", None).unwrap(); 603 + let loaded = db_load_sync_cursor(&conn, "did:plc:alice", "like").unwrap(); 604 + assert!(loaded.is_none()); 605 + } 606 + 607 + #[test] 608 + fn cursor_is_per_did_and_source() { 609 + let conn = test_db(); 610 + db_save_sync_state(&conn, "did:plc:alice", "like", Some("cursor-alice-like")).unwrap(); 611 + db_save_sync_state(&conn, "did:plc:alice", "bookmark", Some("cursor-alice-bm")).unwrap(); 612 + db_save_sync_state(&conn, "did:plc:bob", "like", Some("cursor-bob-like")).unwrap(); 613 + 614 + assert_eq!( 615 + db_load_sync_cursor(&conn, "did:plc:alice", "like").unwrap().as_deref(), 616 + Some("cursor-alice-like") 617 + ); 618 + assert_eq!( 619 + db_load_sync_cursor(&conn, "did:plc:alice", "bookmark") 620 + .unwrap() 621 + .as_deref(), 622 + Some("cursor-alice-bm") 623 + ); 624 + assert_eq!( 625 + db_load_sync_cursor(&conn, "did:plc:bob", "like").unwrap().as_deref(), 626 + Some("cursor-bob-like") 627 + ); 628 + } 629 + 630 + #[test] 631 + fn upsert_inserts_new_post() { 632 + let conn = test_db(); 633 + let item = feed_item( 634 + "at://did:plc:a/app.bsky.feed.post/1", 635 + "cid1", 636 + "did:plc:a", 637 + "alice", 638 + "hello", 639 + ); 640 + db_upsert_post(&conn, &item, "like").unwrap(); 641 + assert_eq!(db_post_count(&conn, "like").unwrap(), 1); 642 + } 643 + 644 + #[test] 645 + fn upsert_is_idempotent_for_same_uri() { 646 + let conn = test_db(); 647 + let item = feed_item( 648 + "at://did:plc:a/app.bsky.feed.post/1", 649 + "cid1", 650 + "did:plc:a", 651 + "alice", 652 + "hello", 653 + ); 654 + db_upsert_post(&conn, &item, "like").unwrap(); 655 + db_upsert_post(&conn, &item, "like").unwrap(); 656 + assert_eq!(db_post_count(&conn, "like").unwrap(), 1); 657 + } 658 + 659 + #[test] 660 + fn upsert_updates_text_on_conflict() { 661 + let conn = test_db(); 662 + let original = feed_item( 663 + "at://did:plc:a/app.bsky.feed.post/1", 664 + "cid1", 665 + "did:plc:a", 666 + "alice", 667 + "original", 668 + ); 669 + db_upsert_post(&conn, &original, "like").unwrap(); 670 + 671 + let updated = feed_item( 672 + "at://did:plc:a/app.bsky.feed.post/1", 673 + "cid2", 674 + "did:plc:a", 675 + "alice", 676 + "updated", 677 + ); 678 + db_upsert_post(&conn, &updated, "like").unwrap(); 679 + 680 + let text: String = conn 681 + .query_row( 682 + "SELECT text FROM posts WHERE uri = ?1", 683 + ["at://did:plc:a/app.bsky.feed.post/1"], 684 + |r| r.get(0), 685 + ) 686 + .unwrap(); 687 + assert_eq!(text, "updated"); 688 + } 689 + 690 + #[test] 691 + fn upsert_stores_source() { 692 + let conn = test_db(); 693 + let item = feed_item( 694 + "at://did:plc:a/app.bsky.feed.post/1", 695 + "cid1", 696 + "did:plc:a", 697 + "alice", 698 + "hi", 699 + ); 700 + db_upsert_post(&conn, &item, "bookmark").unwrap(); 701 + let source: String = conn 702 + .query_row( 703 + "SELECT source FROM posts WHERE uri = ?1", 704 + ["at://did:plc:a/app.bsky.feed.post/1"], 705 + |r| r.get(0), 706 + ) 707 + .unwrap(); 708 + assert_eq!(source, "bookmark"); 709 + } 710 + 711 + #[test] 712 + fn upsert_rejects_item_missing_uri() { 713 + let conn = test_db(); 714 + let bad = serde_json::json!({ "post": { "cid": "cid1", "author": { "did": "x" } } }); 715 + assert!(db_upsert_post(&conn, &bad, "like").is_err()); 716 + } 717 + 718 + #[test] 719 + fn post_count_is_per_source() { 720 + let conn = test_db(); 721 + db_upsert_post( 722 + &conn, 723 + &feed_item("at://a/app.bsky.feed.post/1", "c1", "did:plc:a", "a", "t"), 724 + "like", 725 + ) 726 + .unwrap(); 727 + db_upsert_post( 728 + &conn, 729 + &feed_item("at://a/app.bsky.feed.post/2", "c2", "did:plc:a", "a", "t"), 730 + "bookmark", 731 + ) 732 + .unwrap(); 733 + assert_eq!(db_post_count(&conn, "like").unwrap(), 1); 734 + assert_eq!(db_post_count(&conn, "bookmark").unwrap(), 1); 735 + } 736 + 737 + #[test] 738 + fn embeddings_enabled_defaults_to_true_when_row_absent() { 739 + let conn = test_db(); 740 + assert!(db_get_embeddings_enabled(&conn).unwrap()); 741 + } 742 + 743 + #[test] 744 + fn set_embeddings_enabled_false_persists() { 745 + let conn = test_db(); 746 + db_set_embeddings_enabled(&conn, false).unwrap(); 747 + assert!(!db_get_embeddings_enabled(&conn).unwrap()); 748 + } 749 + 750 + #[test] 751 + fn set_embeddings_enabled_true_persists() { 752 + let conn = test_db(); 753 + db_set_embeddings_enabled(&conn, false).unwrap(); 754 + db_set_embeddings_enabled(&conn, true).unwrap(); 755 + assert!(db_get_embeddings_enabled(&conn).unwrap()); 756 + } 757 + 758 + #[test] 759 + fn embeddings_enabled_toggle_is_idempotent() { 760 + let conn = test_db(); 761 + conn.execute( 762 + "INSERT INTO app_settings(key, value) VALUES('embeddings_enabled', '1')", 763 + [], 764 + ) 765 + .unwrap(); 766 + db_set_embeddings_enabled(&conn, false).unwrap(); 767 + db_set_embeddings_enabled(&conn, false).unwrap(); 768 + assert!(!db_get_embeddings_enabled(&conn).unwrap()); 769 + } 770 + }