cxs is a local-first CLI for searching Codex session logs. It is designed for progressive retrieval: find the right session first, then read
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

refactor(structure): 拆分查询和数据库模块

catoncat e167ce90 dd86f1a3

+2102 -2033
+2 -2
AGENTS.md
··· 36 36 - [cli.ts](/Users/envvar/work/repos/cxs/src/cli.ts): CLI 命令面 37 37 - [indexer.ts](/Users/envvar/work/repos/cxs/src/indexer.ts): sync 与索引更新 38 38 - [parser.ts](/Users/envvar/work/repos/cxs/src/parser.ts): Codex JSONL 解析与 `summary_text` 生成 39 - - [db.ts](/Users/envvar/work/repos/cxs/src/db.ts): SQLite schema、会话/消息存取 40 - - [query.ts](/Users/envvar/work/repos/cxs/src/query.ts): find / list / read-range / read-page 查询编排 39 + - [db.ts](/Users/envvar/work/repos/cxs/src/db.ts): SQLite facade;具体 schema / store / coverage 模块在 `src/db/` 40 + - [query.ts](/Users/envvar/work/repos/cxs/src/query.ts): 查询 facade;find / read / list / stats / search / snippet 模块在 `src/query/` 41 41 - [status.ts](/Users/envvar/work/repos/cxs/src/status.ts): status 输出编排 42 42 - [selector.ts](/Users/envvar/work/repos/cxs/src/selector.ts): selector 解析与覆盖蕴含规则 43 43 - [source-inventory.ts](/Users/envvar/work/repos/cxs/src/source-inventory.ts): raw sessions metadata inventory
+2 -2
docs/ARCHITECTURE.md
··· 39 39 40 40 ### 2. 持久化 41 41 42 - [db.ts](/Users/envvar/work/repos/cxs/src/db.ts) 维护两层主数据: 42 + [db.ts](/Users/envvar/work/repos/cxs/src/db.ts) 是 SQLite 访问 facade;`src/db/` 下的 schema / store / coverage 模块维护两层主数据: 43 43 44 44 - `sessions` 45 45 - `messages` ··· 64 64 65 65 ### 3. 查询 66 66 67 - [query.ts](/Users/envvar/work/repos/cxs/src/query.ts) 提供三类读取: 67 + [query.ts](/Users/envvar/work/repos/cxs/src/query.ts) 是查询 facade;`src/query/` 下的 find / read / list / stats / search 模块提供三类读取: 68 68 69 69 - `findSessions()` 70 70 - `getMessageRange()`
+3 -3
docs/CODE_QUALITY_REVIEW_2026-04-27.md
··· 52 52 - `src/cli.ts`:CLI 命令面 53 53 - `src/indexer.ts`:sync 与索引更新 54 54 - `src/parser.ts`:Codex JSONL 解析与 session summary 生成 55 - - `src/db.ts`:SQLite schema、session/message 存取、FTS 表维护 56 - - `src/query.ts`:find/list/read-range/read-page/current 查询编排 55 + - `src/db.ts` + `src/db/`:SQLite facade、schema、session/message/coverage/store 模块 56 + - `src/query.ts` + `src/query/`:查询 facade、find/list/read-range/read-page/stats/search/snippet 模块 57 57 - `src/ranking.ts`:session 级 heuristic rerank 58 58 - `eval/`:manual eval 与 batch compare 59 59 60 - 这对一个本地 CLI 来说是健康结构。 60 + 这对一个本地 CLI 来说是健康结构;后续已进一步把 db/query 大文件拆成 facade + 子模块。 61 61 62 62 ### 3. 当前改动方向合理 63 63
+3 -3
docs/RANKING_WEIGHTS.md
··· 1 1 # cxs ranking 权重说明 2 2 3 - 本文是 [ranking.ts](../src/ranking.ts) 与 [query.ts](../src/query.ts) 中所有 magic constant 的“为什么是这个值”说明,受众是未来要调权重的维护者(人或 agent)。 3 + 本文是 [ranking.ts](../src/ranking.ts) 与 [query/search.ts](../src/query/search.ts) 与 [ranking.ts](../src/ranking.ts) 中所有 magic constant 的“为什么是这个值”说明,受众是未来要调权重的维护者(人或 agent)。 4 4 5 5 每个权重都需要在三个层次的相对量级里活下去: 6 6 7 - 1. **bm25 row-level 分数**(从 `src/query.ts:289` 或 `messages_fts` 的 `bm25(...)` 来)。被 `-row.score` 翻成正向后,单行通常落在 `2 ~ 15`。 7 + 1. **bm25 row-level 分数**(从 `src/query/search.ts:120` 或 `messages_fts` 的 `bm25(...)` 来)。被 `-row.score` 翻成正向后,单行通常落在 `2 ~ 15`。 8 8 2. **row-level signal bonus** (`scoreRow`)。叠加在 bm25 之上,常见区间 `0 ~ 16`。 9 9 3. **session-level metadata bonus** (`scoreSession`)。是一个 session 维度的“补强”加层,常见区间 `0 ~ 80`。 10 10 ··· 14 14 15 15 ## SQL 列权重: `bm25(sessions_fts, 8.0, 3.0, 4.0, 1.2)` 16 16 17 - 位置: [query.ts:289](../src/query.ts)。 17 + 位置: [query/search.ts:120](../src/query/search.ts)。 18 18 19 19 `sessions_fts` 的索引列顺序固定为 `(title, summary_text, compact_text, reasoning_summary_text, session_uuid)`,`session_uuid` 是 UNINDEXED,所以 bm25 的四个权重对应前四列。SQLite FTS5 的 bm25 输出是 *负数*,**值越小越好**;权重越大表示该列匹配应该被放大。 20 20
+2 -1
skill-packages/cxs/references/advanced-queries.md
··· 81 81 82 82 ## 来源 83 83 84 - - 仓库内 `src/query.ts` 84 + - 仓库内 `src/query/search.ts` 85 + - 仓库内 `src/query/snippet.ts` 85 86 - 仓库内 `src/tokenize.ts` 86 87 - 仓库内 `src/ranking.ts`
+1
skill-packages/cxs/references/progressive-workflow.md
··· 78 78 79 79 - 仓库内 `README.md` 80 80 - 仓库内 `src/query.ts` 81 + - 仓库内 `src/query/read.ts` 81 82 - 仓库内 `src/types.ts`
+2 -1
skill-packages/cxsd/references/advanced-queries.md
··· 81 81 82 82 ## 来源 83 83 84 - - 仓库内 `src/query.ts` 84 + - 仓库内 `src/query/search.ts` 85 + - 仓库内 `src/query/snippet.ts` 85 86 - 仓库内 `src/tokenize.ts` 86 87 - 仓库内 `src/ranking.ts`
+1
skill-packages/cxsd/references/progressive-workflow.md
··· 78 78 79 79 - 仓库内 `README.md` 80 80 - 仓库内 `src/query.ts` 81 + - 仓库内 `src/query/read.ts` 81 82 - 仓库内 `src/types.ts`
+15 -703
src/db.ts
··· 1 - import { existsSync } from "node:fs"; 2 - import Database from "better-sqlite3"; 3 - import { tokenizedText } from "./tokenize"; 4 - import { INDEX_VERSION } from "./env"; 5 - import type { 6 - CoverageRecord, 7 - CwdCount, 8 - MessageRecord, 9 - ParsedSession, 10 - Selector, 11 - SessionListEntry, 12 - SessionListQuery, 13 - SessionRecord, 14 - } from "./types"; 15 - import { selectorImplies, selectorStorageKey } from "./selector"; 16 - 17 - type Db = Database.Database; 18 - type SqlParams = unknown[]; 19 - 20 - const BUSY_TIMEOUT_MS = 5000; 21 - 22 - export class IndexUnavailableError extends Error { 23 - constructor(public readonly dbPath: string) { 24 - super(`index not found: ${dbPath}`); 25 - this.name = "IndexUnavailableError"; 26 - } 27 - } 28 - 29 - export function openReadDb(dbPath: string): Db { 30 - if (!existsSync(dbPath)) { 31 - throw new IndexUnavailableError(dbPath); 32 - } 33 - 34 - const db = new Database(dbPath, { readonly: true }); 35 - db.pragma(`busy_timeout = ${BUSY_TIMEOUT_MS}`); 36 - db.pragma("query_only = ON"); 37 - db.pragma("temp_store = MEMORY"); 38 - return db; 39 - } 40 - 41 - // Why: callers used to do `const db = openReadDb(...); ... db.close();` which 42 - // leaks the connection if work in between throws. Wrapping in try/finally at 43 - // every callsite is noise — fold it once. 44 - export function withReadDb<T>(dbPath: string, fn: (db: Db) => T): T { 45 - const db = openReadDb(dbPath); 46 - try { 47 - return fn(db); 48 - } finally { 49 - db.close(); 50 - } 51 - } 52 - 53 - export function openWriteDb(dbPath: string): Db { 54 - const db = new Database(dbPath); 55 - db.pragma(`busy_timeout = ${BUSY_TIMEOUT_MS}`); 56 - db.pragma("journal_mode = WAL"); 57 - db.pragma("synchronous = NORMAL"); 58 - db.pragma("temp_store = MEMORY"); 59 - db.pragma("foreign_keys = ON"); 60 - ensureSchema(db); 61 - return db; 62 - } 63 - 64 - function ensureSchema(db: Db): void { 65 - db.exec(` 66 - CREATE TABLE IF NOT EXISTS sessions ( 67 - id INTEGER PRIMARY KEY AUTOINCREMENT, 68 - session_uuid TEXT NOT NULL UNIQUE, 69 - file_path TEXT NOT NULL UNIQUE, 70 - source_root TEXT NOT NULL DEFAULT '', 71 - title TEXT NOT NULL DEFAULT '', 72 - summary_text TEXT NOT NULL DEFAULT '', 73 - compact_text TEXT NOT NULL DEFAULT '', 74 - reasoning_summary_text TEXT NOT NULL DEFAULT '', 75 - cwd TEXT NOT NULL DEFAULT '', 76 - model TEXT NOT NULL DEFAULT '', 77 - started_at TEXT NOT NULL, 78 - ended_at TEXT NOT NULL, 79 - path_date TEXT NOT NULL DEFAULT '', 80 - message_count INTEGER NOT NULL DEFAULT 0, 81 - raw_file_mtime INTEGER NOT NULL DEFAULT 0, 82 - raw_file_size INTEGER NOT NULL DEFAULT 0, 83 - index_version TEXT NOT NULL DEFAULT '', 84 - updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP 85 - ) 86 - `); 87 - 88 - ensureTextColumn(db, "sessions", "summary_text"); 89 - ensureTextColumn(db, "sessions", "compact_text"); 90 - ensureTextColumn(db, "sessions", "reasoning_summary_text"); 91 - ensureTextColumn(db, "sessions", "path_date"); 92 - ensureTextColumn(db, "sessions", "source_root"); 93 - 94 - db.exec(` 95 - CREATE TABLE IF NOT EXISTS messages ( 96 - id INTEGER PRIMARY KEY AUTOINCREMENT, 97 - session_id INTEGER NOT NULL REFERENCES sessions(id) ON DELETE CASCADE, 98 - session_uuid TEXT NOT NULL, 99 - seq INTEGER NOT NULL, 100 - role TEXT NOT NULL, 101 - content_text TEXT NOT NULL, 102 - timestamp TEXT NOT NULL, 103 - source_kind TEXT NOT NULL, 104 - UNIQUE(session_uuid, seq) 105 - ) 106 - `); 107 - 108 - db.exec("CREATE INDEX IF NOT EXISTS idx_messages_session_seq ON messages(session_uuid, seq)"); 109 - db.exec("CREATE INDEX IF NOT EXISTS idx_sessions_started_at ON sessions(started_at DESC)"); 110 - 111 - db.exec(` 112 - CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5( 113 - content_text, 114 - session_uuid UNINDEXED, 115 - seq UNINDEXED, 116 - role UNINDEXED, 117 - timestamp UNINDEXED, 118 - tokenize='unicode61 remove_diacritics 1' 119 - ) 120 - `); 121 - 122 - ensureSessionsFtsTable(db); 123 - ensureCoverageTable(db); 124 - 125 - dropLegacyTrigramTable(db); 126 - } 127 - 128 - function ensureCoverageTable(db: Db): void { 129 - db.exec(` 130 - CREATE TABLE IF NOT EXISTS coverage ( 131 - id INTEGER PRIMARY KEY AUTOINCREMENT, 132 - selector_key TEXT NOT NULL UNIQUE, 133 - selector_json TEXT NOT NULL, 134 - selector_kind TEXT NOT NULL, 135 - root TEXT NOT NULL, 136 - cwd TEXT, 137 - from_date TEXT, 138 - to_date TEXT, 139 - source_fingerprint TEXT NOT NULL, 140 - source_file_count INTEGER NOT NULL, 141 - indexed_session_count INTEGER NOT NULL, 142 - completed_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, 143 - index_version TEXT NOT NULL 144 - ) 145 - `); 146 - 147 - db.exec("CREATE INDEX IF NOT EXISTS idx_coverage_root ON coverage(root)"); 148 - } 149 - 150 - function dropLegacyTrigramTable(db: Db): void { 151 - // cxs <= v2 shipped a second FTS5 virtual table for CJK trigram search. 152 - // The hybrid bigram+Segmenter tokenizer in tokenize.ts replaces it, so 153 - // drop the old table and its shadow rows if they still exist. 154 - db.exec("DROP TABLE IF EXISTS messages_fts_trigram"); 155 - } 156 - 157 - function ensureSessionsFtsTable(db: Db): void { 158 - const existing = db 159 - .prepare("SELECT 1 FROM sqlite_master WHERE name = 'sessions_fts' LIMIT 1") 160 - .get(); 161 - 162 - if (existing) { 163 - const columns = db 164 - .prepare("PRAGMA table_info(sessions_fts)") 165 - .all() as Array<{ name: string }>; 166 - const names = new Set(columns.map((column) => column.name)); 167 - if (!names.has("compact_text") || !names.has("reasoning_summary_text")) { 168 - db.exec("DROP TABLE sessions_fts"); 169 - } 170 - } 171 - 172 - db.exec(` 173 - CREATE VIRTUAL TABLE IF NOT EXISTS sessions_fts USING fts5( 174 - title, 175 - summary_text, 176 - compact_text, 177 - reasoning_summary_text, 178 - session_uuid UNINDEXED, 179 - tokenize='unicode61 remove_diacritics 1' 180 - ) 181 - `); 182 - } 183 - 184 - export function getIndexedSessionMeta( 185 - db: Db, 186 - filePath: string, 187 - ): { rawFileMtime: number; rawFileSize: number; indexVersion: string } | null { 188 - const row = db 189 - .prepare<[string], { rawFileMtime: number; rawFileSize: number; indexVersion: string }>(` 190 - SELECT raw_file_mtime AS rawFileMtime, raw_file_size AS rawFileSize, index_version AS indexVersion 191 - FROM sessions 192 - WHERE file_path = ? 193 - LIMIT 1 194 - `) 195 - .get(filePath) as 196 - | { rawFileMtime: number; rawFileSize: number; indexVersion: string } 197 - | undefined; 198 - 199 - return row ?? null; 200 - } 201 - 202 - export function deleteSessionByFilePath(db: Db, filePath: string): void { 203 - const row = db 204 - .prepare<[string], { sessionUuid: string }>("SELECT session_uuid AS sessionUuid FROM sessions WHERE file_path = ? LIMIT 1") 205 - .get(filePath) as { sessionUuid: string } | undefined; 206 - 207 - if (!row) return; 208 - deleteSessionByUuid(db, row.sessionUuid); 209 - } 210 - 211 - function deleteSessionByUuid(db: Db, sessionUuid: string): void { 212 - db.prepare("DELETE FROM sessions_fts WHERE session_uuid = ?").run(sessionUuid); 213 - db.prepare("DELETE FROM messages_fts WHERE session_uuid = ?").run(sessionUuid); 214 - db.prepare("DELETE FROM messages WHERE session_uuid = ?").run(sessionUuid); 215 - db.prepare("DELETE FROM sessions WHERE session_uuid = ?").run(sessionUuid); 216 - } 217 - 218 - export function replaceSession( 219 - db: Db, 220 - session: ParsedSession, 221 - rawFileMtime: number, 222 - rawFileSize: number, 223 - indexVersion: string, 224 - pathDate: string, 225 - sourceRoot = sessionRootFromFile(session.filePath), 226 - ): void { 227 - const tx = db.transaction(() => { 228 - const existing = db 229 - .prepare<[string, string], { id: number }>("SELECT id FROM sessions WHERE session_uuid = ? OR file_path = ? LIMIT 1") 230 - .get(session.sessionUuid, session.filePath) as { id: number } | undefined; 231 - 232 - if (existing) { 233 - db.prepare( 234 - ` 235 - UPDATE sessions 236 - SET session_uuid = ?, file_path = ?, source_root = ?, title = ?, summary_text = ?, compact_text = ?, reasoning_summary_text = ?, 237 - cwd = ?, model = ?, started_at = ?, ended_at = ?, path_date = ?, 238 - message_count = ?, raw_file_mtime = ?, raw_file_size = ?, index_version = ?, updated_at = CURRENT_TIMESTAMP 239 - WHERE id = ? 240 - `, 241 - ).run( 242 - session.sessionUuid, 243 - session.filePath, 244 - sourceRoot, 245 - session.title, 246 - session.summaryText, 247 - session.compactText ?? "", 248 - session.reasoningSummaryText ?? "", 249 - session.cwd, 250 - session.model, 251 - session.startedAt, 252 - session.endedAt, 253 - pathDate, 254 - session.messages.length, 255 - rawFileMtime, 256 - rawFileSize, 257 - indexVersion, 258 - existing.id, 259 - ); 260 - } else { 261 - db.prepare( 262 - ` 263 - INSERT INTO sessions ( 264 - session_uuid, file_path, source_root, title, summary_text, compact_text, reasoning_summary_text, 265 - cwd, model, started_at, ended_at, path_date, 266 - message_count, raw_file_mtime, raw_file_size, index_version 267 - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 268 - `, 269 - ).run( 270 - session.sessionUuid, 271 - session.filePath, 272 - sourceRoot, 273 - session.title, 274 - session.summaryText, 275 - session.compactText ?? "", 276 - session.reasoningSummaryText ?? "", 277 - session.cwd, 278 - session.model, 279 - session.startedAt, 280 - session.endedAt, 281 - pathDate, 282 - session.messages.length, 283 - rawFileMtime, 284 - rawFileSize, 285 - indexVersion, 286 - ); 287 - } 288 - 289 - const sessionRow = db 290 - .prepare<[string], { id: number }>("SELECT id FROM sessions WHERE session_uuid = ? LIMIT 1") 291 - .get(session.sessionUuid) as { id: number }; 292 - 293 - db.prepare("DELETE FROM messages_fts WHERE session_uuid = ?").run(session.sessionUuid); 294 - db.prepare("DELETE FROM messages WHERE session_uuid = ?").run(session.sessionUuid); 295 - db.prepare("DELETE FROM sessions_fts WHERE rowid = ? OR session_uuid = ?").run(sessionRow.id, session.sessionUuid); 296 - 297 - db.prepare( 298 - ` 299 - INSERT INTO sessions_fts(rowid, title, summary_text, compact_text, reasoning_summary_text, session_uuid) 300 - VALUES (?, ?, ?, ?, ?, ?) 301 - `, 302 - ).run( 303 - sessionRow.id, 304 - tokenizedText(session.title), 305 - tokenizedText(session.summaryText), 306 - tokenizedText(session.compactText ?? ""), 307 - tokenizedText(session.reasoningSummaryText ?? ""), 308 - session.sessionUuid, 309 - ); 310 - 311 - const messageStmt = db.prepare<[number, string, number, string, string, string, string]>(` 312 - INSERT INTO messages (session_id, session_uuid, seq, role, content_text, timestamp, source_kind) 313 - VALUES (?, ?, ?, ?, ?, ?, ?) 314 - `); 315 - const ftsStmt = db.prepare<[number, string, string, number, string, string]>(` 316 - INSERT INTO messages_fts(rowid, content_text, session_uuid, seq, role, timestamp) 317 - VALUES (?, ?, ?, ?, ?, ?) 318 - `); 319 - 320 - for (const message of session.messages) { 321 - const result = messageStmt.run( 322 - sessionRow.id, 323 - session.sessionUuid, 324 - message.seq, 325 - message.role, 326 - message.contentText, 327 - message.timestamp, 328 - message.sourceKind, 329 - ); 330 - const messageId = Number(result.lastInsertRowid); 331 - // Feed the FTS index with tokenized text so that CJK runs are split 332 - // into bigrams by tokenize(). Stored content in messages.content_text 333 - // stays raw for display. 334 - ftsStmt.run( 335 - messageId, 336 - tokenizedText(message.contentText), 337 - session.sessionUuid, 338 - message.seq, 339 - message.role, 340 - message.timestamp, 341 - ); 342 - } 343 - }); 344 - 345 - tx(); 346 - } 347 - 348 - export function getSessionRecord(db: Db, sessionUuid: string): SessionRecord | null { 349 - const row = db 350 - .prepare<[string], SessionRecord & { filePath: string }>(` 351 - SELECT 352 - session_uuid AS sessionUuid, 353 - file_path AS filePath, 354 - source_root AS sourceRoot, 355 - title, 356 - summary_text AS summaryText, 357 - cwd, 358 - model, 359 - started_at AS startedAt, 360 - ended_at AS endedAt, 361 - path_date AS pathDate, 362 - message_count AS messageCount 363 - FROM sessions 364 - WHERE session_uuid = ? 365 - LIMIT 1 366 - `) 367 - .get(sessionUuid) as (SessionRecord & { filePath: string }) | undefined; 368 - 369 - if (!row) return null; 370 - return row; 371 - } 372 - 373 - function ensureTextColumn(db: Db, tableName: string, columnName: string): void { 374 - const columns = db 375 - .prepare(`PRAGMA table_info(${tableName})`) 376 - .all() as Array<{ name?: string }>; 377 - 378 - if (columns.some((column) => column.name === columnName)) return; 379 - db.exec(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} TEXT NOT NULL DEFAULT ''`); 380 - } 381 - 382 - export function getMessagesForRange( 383 - db: Db, 384 - sessionUuid: string, 385 - startSeq: number, 386 - endSeq: number, 387 - ): MessageRecord[] { 388 - return db 389 - .prepare<[string, number, number], MessageRecord>(` 390 - SELECT 391 - session_uuid AS sessionUuid, 392 - seq, 393 - role, 394 - content_text AS contentText, 395 - timestamp, 396 - source_kind AS sourceKind 397 - FROM messages 398 - WHERE session_uuid = ? AND seq BETWEEN ? AND ? 399 - ORDER BY seq 400 - `) 401 - .all(sessionUuid, startSeq, endSeq) as MessageRecord[]; 402 - } 403 - 404 - export function getMessagesForPage( 405 - db: Db, 406 - sessionUuid: string, 407 - offset: number, 408 - limit: number, 409 - ): MessageRecord[] { 410 - return db 411 - .prepare<[string, number, number], MessageRecord>(` 412 - SELECT 413 - session_uuid AS sessionUuid, 414 - seq, 415 - role, 416 - content_text AS contentText, 417 - timestamp, 418 - source_kind AS sourceKind 419 - FROM messages 420 - WHERE session_uuid = ? 421 - ORDER BY seq 422 - LIMIT ? OFFSET ? 423 - `) 424 - .all(sessionUuid, limit, offset) as MessageRecord[]; 425 - } 426 - 427 - export function listSessions(db: Db, query: SessionListQuery): SessionListEntry[] { 428 - const conditions: string[] = []; 429 - const params: SqlParams = []; 430 - if (query.selector) { 431 - const selectorWhere = selectorWhereSql(query.selector, "sessions"); 432 - conditions.push(...selectorWhere.conditions); 433 - params.push(...selectorWhere.params); 434 - } 435 - if (query.cwd) { 436 - // Substring match rather than prefix/equality: agent callers often pass 437 - // the trailing segment of a project path, not the full canonical path. 438 - conditions.push("lower(cwd) LIKE ? ESCAPE '\\'"); 439 - params.push(`%${escapeLike(query.cwd.toLowerCase())}%`); 440 - } 441 - if (query.since) { 442 - conditions.push("ended_at >= ?"); 443 - params.push(query.since); 444 - } 445 - const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : ""; 446 - 447 - const orderColumn = query.sort === "started" 448 - ? "started_at" 449 - : query.sort === "messages" 450 - ? "message_count" 451 - : "ended_at"; 452 - 453 - params.push(query.limit); 454 - 455 - return db 456 - .prepare<typeof params, SessionListEntry>(` 457 - SELECT 458 - session_uuid AS sessionUuid, 459 - title, 460 - summary_text AS summaryText, 461 - cwd, 462 - started_at AS startedAt, 463 - ended_at AS endedAt, 464 - path_date AS pathDate, 465 - message_count AS messageCount 466 - FROM sessions 467 - ${where} 468 - ORDER BY ${orderColumn} DESC 469 - LIMIT ? 470 - `) 471 - .all(...params) as SessionListEntry[]; 472 - } 473 - 474 - export function getStatsCounts(db: Db): { 475 - sessionCount: number; 476 - messageCount: number; 477 - earliestStartedAt: string | null; 478 - latestEndedAt: string | null; 479 - lastSyncAt: string | null; 480 - } { 481 - const row = db 482 - .prepare(` 483 - SELECT 484 - COUNT(*) AS sessionCount, 485 - COALESCE(SUM(message_count), 0) AS messageCount, 486 - MIN(started_at) AS earliestStartedAt, 487 - MAX(ended_at) AS latestEndedAt, 488 - MAX(updated_at) AS lastSyncAt 489 - FROM sessions 490 - `) 491 - .get() as { 492 - sessionCount: number; 493 - messageCount: number; 494 - earliestStartedAt: string | null; 495 - latestEndedAt: string | null; 496 - lastSyncAt: string | null; 497 - }; 498 - return row; 499 - } 500 - 501 - export function getTopCwds(db: Db, limit: number): CwdCount[] { 502 - return db 503 - .prepare<[number], CwdCount>(` 504 - SELECT cwd, COUNT(*) AS count 505 - FROM sessions 506 - WHERE cwd != '' 507 - GROUP BY cwd 508 - ORDER BY count DESC, cwd ASC 509 - LIMIT ? 510 - `) 511 - .all(limit) as CwdCount[]; 512 - } 513 - 514 - export function replaceCoverage( 515 - db: Db, 516 - selector: Selector, 517 - sourceFingerprint: string, 518 - sourceFileCount: number, 519 - indexedSessionCount: number, 520 - indexVersion: string, 521 - ): CoverageRecord { 522 - const key = selectorStorageKey(selector); 523 - const stmt = db.prepare(` 524 - INSERT INTO coverage ( 525 - selector_key, selector_json, selector_kind, root, cwd, from_date, to_date, 526 - source_fingerprint, source_file_count, indexed_session_count, index_version 527 - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 528 - ON CONFLICT(selector_key) DO UPDATE SET 529 - selector_json = excluded.selector_json, 530 - selector_kind = excluded.selector_kind, 531 - root = excluded.root, 532 - cwd = excluded.cwd, 533 - from_date = excluded.from_date, 534 - to_date = excluded.to_date, 535 - source_fingerprint = excluded.source_fingerprint, 536 - source_file_count = excluded.source_file_count, 537 - indexed_session_count = excluded.indexed_session_count, 538 - completed_at = CURRENT_TIMESTAMP, 539 - index_version = excluded.index_version 540 - `); 541 - stmt.run( 542 - key, 543 - JSON.stringify(selector), 544 - selector.kind, 545 - selector.root, 546 - "cwd" in selector ? selector.cwd : null, 547 - "fromDate" in selector ? selector.fromDate : null, 548 - "toDate" in selector ? selector.toDate : null, 549 - sourceFingerprint, 550 - sourceFileCount, 551 - indexedSessionCount, 552 - indexVersion, 553 - ); 554 - return getCoverageRecordByKey(db, key)!; 555 - } 556 - 557 - export function listCoverageRecords(db: Db): CoverageRecord[] { 558 - if (!tableExists(db, "coverage")) return []; 559 - const rows = db.prepare("SELECT * FROM coverage ORDER BY completed_at DESC, id DESC").all() as CoverageRow[]; 560 - return rows.map(rowToCoverageRecord); 561 - } 562 - 563 - export function coverageStatusForSelector(db: Db, requested: Selector | null): { 564 - complete: boolean; 565 - coveringSelectors: CoverageRecord[]; 566 - } { 567 - if (!requested) return { complete: false, coveringSelectors: [] }; 568 - const entries = listCoverageRecords(db).filter((entry) => 569 - entry.indexVersion === requestedIndexVersion(db) && selectorImplies(entry.selector, requested) 570 - ); 571 - return { 572 - complete: entries.length > 0, 573 - coveringSelectors: entries, 574 - }; 575 - } 576 - 577 - export function countSessionsForSelector(db: Db, selector: Selector): number { 578 - const where = selectorWhereSql(selector, "sessions"); 579 - const row = db 580 - .prepare<typeof where.params, { count: number }>(` 581 - SELECT COUNT(*) AS count 582 - FROM sessions 583 - WHERE ${where.conditions.join(" AND ")} 584 - `) 585 - .get(...where.params) as { count: number }; 586 - return row.count; 587 - } 588 - 589 - export function deleteSessionsForSelectorExceptFilePaths( 590 - db: Db, 591 - selector: Selector, 592 - retainedFilePaths: Set<string>, 593 - ): number { 594 - const where = selectorWhereSql(selector, "sessions"); 595 - const params = [...where.params]; 596 - const retained = [...retainedFilePaths]; 597 - const retainedClause = retained.length > 0 598 - ? ` AND sessions.file_path NOT IN (${retained.map(() => "?").join(", ")})` 599 - : ""; 600 - params.push(...retained); 601 - const rows = db 602 - .prepare(` 603 - SELECT session_uuid AS sessionUuid 604 - FROM sessions 605 - WHERE ${where.conditions.join(" AND ")}${retainedClause} 606 - `) 607 - .all(...params) as Array<{ sessionUuid: string }>; 608 - 609 - for (const row of rows) { 610 - deleteSessionByUuid(db, row.sessionUuid); 611 - } 612 - return rows.length; 613 - } 614 - 615 - export function selectorWhereSql(selector: Selector, alias: string): { conditions: string[]; params: SqlParams } { 616 - const conditions = [`(${alias}.file_path = ? OR ${alias}.file_path LIKE ? ESCAPE '\\')`]; 617 - const params: SqlParams = [selector.root, `${escapeLike(selector.root)}/%`]; 618 - if (selector.kind === "cwd" || selector.kind === "cwd_date_range") { 619 - conditions.push(`${alias}.cwd = ?`); 620 - params.push(selector.cwd); 621 - } 622 - if (selector.kind === "date_range" || selector.kind === "cwd_date_range") { 623 - conditions.push(`${alias}.path_date >= ?`); 624 - conditions.push(`${alias}.path_date <= ?`); 625 - params.push(selector.fromDate, selector.toDate); 626 - } 627 - return { conditions, params }; 628 - } 629 - 630 - export function coverageEntriesForSession(db: Db, session: SessionRecord): CoverageRecord[] { 631 - const root = session.sourceRoot || sessionRootFromFile(session.filePath); 632 - const sessionSelectors: Selector[] = [ 633 - { kind: "all", root }, 634 - { kind: "cwd", root, cwd: session.cwd }, 635 - ]; 636 - if (session.pathDate) { 637 - sessionSelectors.push({ 638 - kind: "date_range", 639 - root, 640 - fromDate: session.pathDate, 641 - toDate: session.pathDate, 642 - }); 643 - sessionSelectors.push({ 644 - kind: "cwd_date_range", 645 - root, 646 - cwd: session.cwd, 647 - fromDate: session.pathDate, 648 - toDate: session.pathDate, 649 - }); 650 - } 651 - return listCoverageRecords(db).filter((entry) => 652 - sessionSelectors.some((selector) => selectorImplies(entry.selector, selector)) 653 - ); 654 - } 655 - 656 - type CoverageRow = { 657 - id: number; 658 - selector_json: string; 659 - source_fingerprint: string; 660 - source_file_count: number; 661 - indexed_session_count: number; 662 - completed_at: string; 663 - index_version: string; 664 - }; 665 - 666 - function getCoverageRecordByKey(db: Db, key: string): CoverageRecord | null { 667 - const row = db.prepare<[string], CoverageRow>("SELECT * FROM coverage WHERE selector_key = ? LIMIT 1").get(key); 668 - return row ? rowToCoverageRecord(row) : null; 669 - } 670 - 671 - function rowToCoverageRecord(row: CoverageRow): CoverageRecord { 672 - return { 673 - id: row.id, 674 - selector: JSON.parse(row.selector_json) as Selector, 675 - sourceFingerprint: row.source_fingerprint, 676 - sourceFileCount: row.source_file_count, 677 - indexedSessionCount: row.indexed_session_count, 678 - completedAt: row.completed_at, 679 - indexVersion: row.index_version, 680 - }; 681 - } 682 - 683 - function tableExists(db: Db, tableName: string): boolean { 684 - const row = db.prepare<[string], unknown>("SELECT 1 FROM sqlite_master WHERE name = ? LIMIT 1").get(tableName); 685 - return Boolean(row); 686 - } 687 - 688 - function requestedIndexVersion(_db: Db): string { 689 - // Kept as a function so coverage matching has one place for future index 690 - // compatibility policy; current policy is exact index version equality. 691 - return INDEX_VERSION; 692 - } 693 - 694 - function sessionRootFromFile(filePath: string): string { 695 - const marker = "/sessions/"; 696 - const index = filePath.indexOf(marker); 697 - if (index >= 0) return filePath.slice(0, index + marker.length - 1); 698 - return filePath.slice(0, Math.max(0, filePath.lastIndexOf("/"))); 699 - } 700 - 701 - function escapeLike(value: string): string { 702 - return value.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_"); 703 - } 1 + export type { Db, SqlParams } from "./db/shared"; 2 + export { IndexUnavailableError, openReadDb, openWriteDb, withReadDb } from "./db/connection"; 3 + export { getIndexedSessionMeta, deleteSessionByFilePath, replaceSession, getSessionRecord } from "./db/session-store"; 4 + export { getMessagesForPage, getMessagesForRange } from "./db/message-store"; 5 + export { listSessions } from "./db/list-store"; 6 + export { getStatsCounts, getTopCwds } from "./db/stats-store"; 7 + export { 8 + coverageEntriesForSession, 9 + coverageStatusForSelector, 10 + countSessionsForSelector, 11 + deleteSessionsForSelectorExceptFilePaths, 12 + listCoverageRecords, 13 + replaceCoverage, 14 + } from "./db/coverage-store"; 15 + export { selectorWhereSql } from "./db/sql";
+46
src/db/connection.ts
··· 1 + import { existsSync } from "node:fs"; 2 + import Database from "better-sqlite3"; 3 + import { ensureSchema } from "./schema"; 4 + import { BUSY_TIMEOUT_MS, type Db } from "./shared"; 5 + 6 + export class IndexUnavailableError extends Error { 7 + constructor(public readonly dbPath: string) { 8 + super(`index not found: ${dbPath}`); 9 + this.name = "IndexUnavailableError"; 10 + } 11 + } 12 + 13 + export function openReadDb(dbPath: string): Db { 14 + if (!existsSync(dbPath)) { 15 + throw new IndexUnavailableError(dbPath); 16 + } 17 + 18 + const db = new Database(dbPath, { readonly: true }); 19 + db.pragma(`busy_timeout = ${BUSY_TIMEOUT_MS}`); 20 + db.pragma("query_only = ON"); 21 + db.pragma("temp_store = MEMORY"); 22 + return db; 23 + } 24 + 25 + // Why: callers used to do `const db = openReadDb(...); ... db.close();` which 26 + // leaks the connection if work in between throws. Wrapping in try/finally at 27 + // every callsite is noise — fold it once. 28 + export function withReadDb<T>(dbPath: string, fn: (db: Db) => T): T { 29 + const db = openReadDb(dbPath); 30 + try { 31 + return fn(db); 32 + } finally { 33 + db.close(); 34 + } 35 + } 36 + 37 + export function openWriteDb(dbPath: string): Db { 38 + const db = new Database(dbPath); 39 + db.pragma(`busy_timeout = ${BUSY_TIMEOUT_MS}`); 40 + db.pragma("journal_mode = WAL"); 41 + db.pragma("synchronous = NORMAL"); 42 + db.pragma("temp_store = MEMORY"); 43 + db.pragma("foreign_keys = ON"); 44 + ensureSchema(db); 45 + return db; 46 + }
+166
src/db/coverage-store.ts
··· 1 + import { INDEX_VERSION } from "../env"; 2 + import { selectorImplies, selectorStorageKey } from "../selector"; 3 + import type { CoverageRecord, Selector, SessionRecord } from "../types"; 4 + import { deleteSessionByUuid } from "./session-store"; 5 + import type { Db } from "./shared"; 6 + import { selectorWhereSql, sessionRootFromFile, tableExists } from "./sql"; 7 + 8 + export function replaceCoverage( 9 + db: Db, 10 + selector: Selector, 11 + sourceFingerprint: string, 12 + sourceFileCount: number, 13 + indexedSessionCount: number, 14 + indexVersion: string, 15 + ): CoverageRecord { 16 + const key = selectorStorageKey(selector); 17 + const stmt = db.prepare(` 18 + INSERT INTO coverage ( 19 + selector_key, selector_json, selector_kind, root, cwd, from_date, to_date, 20 + source_fingerprint, source_file_count, indexed_session_count, index_version 21 + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 22 + ON CONFLICT(selector_key) DO UPDATE SET 23 + selector_json = excluded.selector_json, 24 + selector_kind = excluded.selector_kind, 25 + root = excluded.root, 26 + cwd = excluded.cwd, 27 + from_date = excluded.from_date, 28 + to_date = excluded.to_date, 29 + source_fingerprint = excluded.source_fingerprint, 30 + source_file_count = excluded.source_file_count, 31 + indexed_session_count = excluded.indexed_session_count, 32 + completed_at = CURRENT_TIMESTAMP, 33 + index_version = excluded.index_version 34 + `); 35 + stmt.run( 36 + key, 37 + JSON.stringify(selector), 38 + selector.kind, 39 + selector.root, 40 + "cwd" in selector ? selector.cwd : null, 41 + "fromDate" in selector ? selector.fromDate : null, 42 + "toDate" in selector ? selector.toDate : null, 43 + sourceFingerprint, 44 + sourceFileCount, 45 + indexedSessionCount, 46 + indexVersion, 47 + ); 48 + return getCoverageRecordByKey(db, key)!; 49 + } 50 + 51 + export function listCoverageRecords(db: Db): CoverageRecord[] { 52 + if (!tableExists(db, "coverage")) return []; 53 + const rows = db.prepare("SELECT * FROM coverage ORDER BY completed_at DESC, id DESC").all() as CoverageRow[]; 54 + return rows.map(rowToCoverageRecord); 55 + } 56 + 57 + export function coverageStatusForSelector(db: Db, requested: Selector | null): { 58 + complete: boolean; 59 + coveringSelectors: CoverageRecord[]; 60 + } { 61 + if (!requested) return { complete: false, coveringSelectors: [] }; 62 + const entries = listCoverageRecords(db).filter((entry) => 63 + entry.indexVersion === requestedIndexVersion(db) && selectorImplies(entry.selector, requested) 64 + ); 65 + return { 66 + complete: entries.length > 0, 67 + coveringSelectors: entries, 68 + }; 69 + } 70 + 71 + export function countSessionsForSelector(db: Db, selector: Selector): number { 72 + const where = selectorWhereSql(selector, "sessions"); 73 + const row = db 74 + .prepare<typeof where.params, { count: number }>(` 75 + SELECT COUNT(*) AS count 76 + FROM sessions 77 + WHERE ${where.conditions.join(" AND ")} 78 + `) 79 + .get(...where.params) as { count: number }; 80 + return row.count; 81 + } 82 + 83 + export function deleteSessionsForSelectorExceptFilePaths( 84 + db: Db, 85 + selector: Selector, 86 + retainedFilePaths: Set<string>, 87 + ): number { 88 + const where = selectorWhereSql(selector, "sessions"); 89 + const params = [...where.params]; 90 + const retained = [...retainedFilePaths]; 91 + const retainedClause = retained.length > 0 92 + ? ` AND sessions.file_path NOT IN (${retained.map(() => "?").join(", ")})` 93 + : ""; 94 + params.push(...retained); 95 + const rows = db 96 + .prepare(` 97 + SELECT session_uuid AS sessionUuid 98 + FROM sessions 99 + WHERE ${where.conditions.join(" AND ")}${retainedClause} 100 + `) 101 + .all(...params) as Array<{ sessionUuid: string }>; 102 + 103 + for (const row of rows) { 104 + deleteSessionByUuid(db, row.sessionUuid); 105 + } 106 + return rows.length; 107 + } 108 + 109 + export function coverageEntriesForSession(db: Db, session: SessionRecord): CoverageRecord[] { 110 + const root = session.sourceRoot || sessionRootFromFile(session.filePath); 111 + const sessionSelectors: Selector[] = [ 112 + { kind: "all", root }, 113 + { kind: "cwd", root, cwd: session.cwd }, 114 + ]; 115 + if (session.pathDate) { 116 + sessionSelectors.push({ 117 + kind: "date_range", 118 + root, 119 + fromDate: session.pathDate, 120 + toDate: session.pathDate, 121 + }); 122 + sessionSelectors.push({ 123 + kind: "cwd_date_range", 124 + root, 125 + cwd: session.cwd, 126 + fromDate: session.pathDate, 127 + toDate: session.pathDate, 128 + }); 129 + } 130 + return listCoverageRecords(db).filter((entry) => 131 + sessionSelectors.some((selector) => selectorImplies(entry.selector, selector)) 132 + ); 133 + } 134 + 135 + type CoverageRow = { 136 + id: number; 137 + selector_json: string; 138 + source_fingerprint: string; 139 + source_file_count: number; 140 + indexed_session_count: number; 141 + completed_at: string; 142 + index_version: string; 143 + }; 144 + 145 + function getCoverageRecordByKey(db: Db, key: string): CoverageRecord | null { 146 + const row = db.prepare<[string], CoverageRow>("SELECT * FROM coverage WHERE selector_key = ? LIMIT 1").get(key); 147 + return row ? rowToCoverageRecord(row) : null; 148 + } 149 + 150 + function rowToCoverageRecord(row: CoverageRow): CoverageRecord { 151 + return { 152 + id: row.id, 153 + selector: JSON.parse(row.selector_json) as Selector, 154 + sourceFingerprint: row.source_fingerprint, 155 + sourceFileCount: row.source_file_count, 156 + indexedSessionCount: row.indexed_session_count, 157 + completedAt: row.completed_at, 158 + indexVersion: row.index_version, 159 + }; 160 + } 161 + 162 + function requestedIndexVersion(_db: Db): string { 163 + // Kept as a function so coverage matching has one place for future index 164 + // compatibility policy; current policy is exact index version equality. 165 + return INDEX_VERSION; 166 + }
+50
src/db/list-store.ts
··· 1 + import type { SessionListEntry, SessionListQuery } from "../types"; 2 + import type { Db, SqlParams } from "./shared"; 3 + import { escapeLike, selectorWhereSql } from "./sql"; 4 + 5 + export function listSessions(db: Db, query: SessionListQuery): SessionListEntry[] { 6 + const conditions: string[] = []; 7 + const params: SqlParams = []; 8 + if (query.selector) { 9 + const selectorWhere = selectorWhereSql(query.selector, "sessions"); 10 + conditions.push(...selectorWhere.conditions); 11 + params.push(...selectorWhere.params); 12 + } 13 + if (query.cwd) { 14 + // Substring match rather than prefix/equality: agent callers often pass 15 + // the trailing segment of a project path, not the full canonical path. 16 + conditions.push("lower(cwd) LIKE ? ESCAPE '\\'"); 17 + params.push(`%${escapeLike(query.cwd.toLowerCase())}%`); 18 + } 19 + if (query.since) { 20 + conditions.push("ended_at >= ?"); 21 + params.push(query.since); 22 + } 23 + const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : ""; 24 + 25 + const orderColumn = query.sort === "started" 26 + ? "started_at" 27 + : query.sort === "messages" 28 + ? "message_count" 29 + : "ended_at"; 30 + 31 + params.push(query.limit); 32 + 33 + return db 34 + .prepare<typeof params, SessionListEntry>(` 35 + SELECT 36 + session_uuid AS sessionUuid, 37 + title, 38 + summary_text AS summaryText, 39 + cwd, 40 + started_at AS startedAt, 41 + ended_at AS endedAt, 42 + path_date AS pathDate, 43 + message_count AS messageCount 44 + FROM sessions 45 + ${where} 46 + ORDER BY ${orderColumn} DESC 47 + LIMIT ? 48 + `) 49 + .all(...params) as SessionListEntry[]; 50 + }
+47
src/db/message-store.ts
··· 1 + import type { MessageRecord } from "../types"; 2 + import type { Db } from "./shared"; 3 + 4 + export function getMessagesForRange( 5 + db: Db, 6 + sessionUuid: string, 7 + startSeq: number, 8 + endSeq: number, 9 + ): MessageRecord[] { 10 + return db 11 + .prepare<[string, number, number], MessageRecord>(` 12 + SELECT 13 + session_uuid AS sessionUuid, 14 + seq, 15 + role, 16 + content_text AS contentText, 17 + timestamp, 18 + source_kind AS sourceKind 19 + FROM messages 20 + WHERE session_uuid = ? AND seq BETWEEN ? AND ? 21 + ORDER BY seq 22 + `) 23 + .all(sessionUuid, startSeq, endSeq) as MessageRecord[]; 24 + } 25 + 26 + export function getMessagesForPage( 27 + db: Db, 28 + sessionUuid: string, 29 + offset: number, 30 + limit: number, 31 + ): MessageRecord[] { 32 + return db 33 + .prepare<[string, number, number], MessageRecord>(` 34 + SELECT 35 + session_uuid AS sessionUuid, 36 + seq, 37 + role, 38 + content_text AS contentText, 39 + timestamp, 40 + source_kind AS sourceKind 41 + FROM messages 42 + WHERE session_uuid = ? 43 + ORDER BY seq 44 + LIMIT ? OFFSET ? 45 + `) 46 + .all(sessionUuid, limit, offset) as MessageRecord[]; 47 + }
+130
src/db/schema.ts
··· 1 + import type { Db } from "./shared"; 2 + 3 + export function ensureSchema(db: Db): void { 4 + db.exec(` 5 + CREATE TABLE IF NOT EXISTS sessions ( 6 + id INTEGER PRIMARY KEY AUTOINCREMENT, 7 + session_uuid TEXT NOT NULL UNIQUE, 8 + file_path TEXT NOT NULL UNIQUE, 9 + source_root TEXT NOT NULL DEFAULT '', 10 + title TEXT NOT NULL DEFAULT '', 11 + summary_text TEXT NOT NULL DEFAULT '', 12 + compact_text TEXT NOT NULL DEFAULT '', 13 + reasoning_summary_text TEXT NOT NULL DEFAULT '', 14 + cwd TEXT NOT NULL DEFAULT '', 15 + model TEXT NOT NULL DEFAULT '', 16 + started_at TEXT NOT NULL, 17 + ended_at TEXT NOT NULL, 18 + path_date TEXT NOT NULL DEFAULT '', 19 + message_count INTEGER NOT NULL DEFAULT 0, 20 + raw_file_mtime INTEGER NOT NULL DEFAULT 0, 21 + raw_file_size INTEGER NOT NULL DEFAULT 0, 22 + index_version TEXT NOT NULL DEFAULT '', 23 + updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP 24 + ) 25 + `); 26 + 27 + ensureTextColumn(db, "sessions", "summary_text"); 28 + ensureTextColumn(db, "sessions", "compact_text"); 29 + ensureTextColumn(db, "sessions", "reasoning_summary_text"); 30 + ensureTextColumn(db, "sessions", "path_date"); 31 + ensureTextColumn(db, "sessions", "source_root"); 32 + 33 + db.exec(` 34 + CREATE TABLE IF NOT EXISTS messages ( 35 + id INTEGER PRIMARY KEY AUTOINCREMENT, 36 + session_id INTEGER NOT NULL REFERENCES sessions(id) ON DELETE CASCADE, 37 + session_uuid TEXT NOT NULL, 38 + seq INTEGER NOT NULL, 39 + role TEXT NOT NULL, 40 + content_text TEXT NOT NULL, 41 + timestamp TEXT NOT NULL, 42 + source_kind TEXT NOT NULL, 43 + UNIQUE(session_uuid, seq) 44 + ) 45 + `); 46 + 47 + db.exec("CREATE INDEX IF NOT EXISTS idx_messages_session_seq ON messages(session_uuid, seq)"); 48 + db.exec("CREATE INDEX IF NOT EXISTS idx_sessions_started_at ON sessions(started_at DESC)"); 49 + 50 + db.exec(` 51 + CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5( 52 + content_text, 53 + session_uuid UNINDEXED, 54 + seq UNINDEXED, 55 + role UNINDEXED, 56 + timestamp UNINDEXED, 57 + tokenize='unicode61 remove_diacritics 1' 58 + ) 59 + `); 60 + 61 + ensureSessionsFtsTable(db); 62 + ensureCoverageTable(db); 63 + 64 + dropLegacyTrigramTable(db); 65 + } 66 + 67 + function ensureCoverageTable(db: Db): void { 68 + db.exec(` 69 + CREATE TABLE IF NOT EXISTS coverage ( 70 + id INTEGER PRIMARY KEY AUTOINCREMENT, 71 + selector_key TEXT NOT NULL UNIQUE, 72 + selector_json TEXT NOT NULL, 73 + selector_kind TEXT NOT NULL, 74 + root TEXT NOT NULL, 75 + cwd TEXT, 76 + from_date TEXT, 77 + to_date TEXT, 78 + source_fingerprint TEXT NOT NULL, 79 + source_file_count INTEGER NOT NULL, 80 + indexed_session_count INTEGER NOT NULL, 81 + completed_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, 82 + index_version TEXT NOT NULL 83 + ) 84 + `); 85 + 86 + db.exec("CREATE INDEX IF NOT EXISTS idx_coverage_root ON coverage(root)"); 87 + } 88 + 89 + function dropLegacyTrigramTable(db: Db): void { 90 + // cxs <= v2 shipped a second FTS5 virtual table for CJK trigram search. 91 + // The hybrid bigram+Segmenter tokenizer in tokenize.ts replaces it, so 92 + // drop the old table and its shadow rows if they still exist. 93 + db.exec("DROP TABLE IF EXISTS messages_fts_trigram"); 94 + } 95 + 96 + function ensureSessionsFtsTable(db: Db): void { 97 + const existing = db 98 + .prepare("SELECT 1 FROM sqlite_master WHERE name = 'sessions_fts' LIMIT 1") 99 + .get(); 100 + 101 + if (existing) { 102 + const columns = db 103 + .prepare("PRAGMA table_info(sessions_fts)") 104 + .all() as Array<{ name: string }>; 105 + const names = new Set(columns.map((column) => column.name)); 106 + if (!names.has("compact_text") || !names.has("reasoning_summary_text")) { 107 + db.exec("DROP TABLE sessions_fts"); 108 + } 109 + } 110 + 111 + db.exec(` 112 + CREATE VIRTUAL TABLE IF NOT EXISTS sessions_fts USING fts5( 113 + title, 114 + summary_text, 115 + compact_text, 116 + reasoning_summary_text, 117 + session_uuid UNINDEXED, 118 + tokenize='unicode61 remove_diacritics 1' 119 + ) 120 + `); 121 + } 122 + 123 + function ensureTextColumn(db: Db, tableName: string, columnName: string): void { 124 + const columns = db 125 + .prepare(`PRAGMA table_info(${tableName})`) 126 + .all() as Array<{ name?: string }>; 127 + 128 + if (columns.some((column) => column.name === columnName)) return; 129 + db.exec(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} TEXT NOT NULL DEFAULT ''`); 130 + }
+193
src/db/session-store.ts
··· 1 + import { tokenizedText } from "../tokenize"; 2 + import type { ParsedSession, SessionRecord } from "../types"; 3 + import type { Db } from "./shared"; 4 + import { sessionRootFromFile } from "./sql"; 5 + 6 + export function getIndexedSessionMeta( 7 + db: Db, 8 + filePath: string, 9 + ): { rawFileMtime: number; rawFileSize: number; indexVersion: string } | null { 10 + const row = db 11 + .prepare<[string], { rawFileMtime: number; rawFileSize: number; indexVersion: string }>(` 12 + SELECT raw_file_mtime AS rawFileMtime, raw_file_size AS rawFileSize, index_version AS indexVersion 13 + FROM sessions 14 + WHERE file_path = ? 15 + LIMIT 1 16 + `) 17 + .get(filePath) as 18 + | { rawFileMtime: number; rawFileSize: number; indexVersion: string } 19 + | undefined; 20 + 21 + return row ?? null; 22 + } 23 + 24 + export function deleteSessionByFilePath(db: Db, filePath: string): void { 25 + const row = db 26 + .prepare<[string], { sessionUuid: string }>("SELECT session_uuid AS sessionUuid FROM sessions WHERE file_path = ? LIMIT 1") 27 + .get(filePath) as { sessionUuid: string } | undefined; 28 + 29 + if (!row) return; 30 + deleteSessionByUuid(db, row.sessionUuid); 31 + } 32 + 33 + export function deleteSessionByUuid(db: Db, sessionUuid: string): void { 34 + db.prepare("DELETE FROM sessions_fts WHERE session_uuid = ?").run(sessionUuid); 35 + db.prepare("DELETE FROM messages_fts WHERE session_uuid = ?").run(sessionUuid); 36 + db.prepare("DELETE FROM messages WHERE session_uuid = ?").run(sessionUuid); 37 + db.prepare("DELETE FROM sessions WHERE session_uuid = ?").run(sessionUuid); 38 + } 39 + 40 + export function replaceSession( 41 + db: Db, 42 + session: ParsedSession, 43 + rawFileMtime: number, 44 + rawFileSize: number, 45 + indexVersion: string, 46 + pathDate: string, 47 + sourceRoot = sessionRootFromFile(session.filePath), 48 + ): void { 49 + const tx = db.transaction(() => { 50 + const existing = db 51 + .prepare<[string, string], { id: number }>("SELECT id FROM sessions WHERE session_uuid = ? OR file_path = ? LIMIT 1") 52 + .get(session.sessionUuid, session.filePath) as { id: number } | undefined; 53 + 54 + if (existing) { 55 + db.prepare( 56 + ` 57 + UPDATE sessions 58 + SET session_uuid = ?, file_path = ?, source_root = ?, title = ?, summary_text = ?, compact_text = ?, reasoning_summary_text = ?, 59 + cwd = ?, model = ?, started_at = ?, ended_at = ?, path_date = ?, 60 + message_count = ?, raw_file_mtime = ?, raw_file_size = ?, index_version = ?, updated_at = CURRENT_TIMESTAMP 61 + WHERE id = ? 62 + `, 63 + ).run( 64 + session.sessionUuid, 65 + session.filePath, 66 + sourceRoot, 67 + session.title, 68 + session.summaryText, 69 + session.compactText ?? "", 70 + session.reasoningSummaryText ?? "", 71 + session.cwd, 72 + session.model, 73 + session.startedAt, 74 + session.endedAt, 75 + pathDate, 76 + session.messages.length, 77 + rawFileMtime, 78 + rawFileSize, 79 + indexVersion, 80 + existing.id, 81 + ); 82 + } else { 83 + db.prepare( 84 + ` 85 + INSERT INTO sessions ( 86 + session_uuid, file_path, source_root, title, summary_text, compact_text, reasoning_summary_text, 87 + cwd, model, started_at, ended_at, path_date, 88 + message_count, raw_file_mtime, raw_file_size, index_version 89 + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 90 + `, 91 + ).run( 92 + session.sessionUuid, 93 + session.filePath, 94 + sourceRoot, 95 + session.title, 96 + session.summaryText, 97 + session.compactText ?? "", 98 + session.reasoningSummaryText ?? "", 99 + session.cwd, 100 + session.model, 101 + session.startedAt, 102 + session.endedAt, 103 + pathDate, 104 + session.messages.length, 105 + rawFileMtime, 106 + rawFileSize, 107 + indexVersion, 108 + ); 109 + } 110 + 111 + const sessionRow = db 112 + .prepare<[string], { id: number }>("SELECT id FROM sessions WHERE session_uuid = ? LIMIT 1") 113 + .get(session.sessionUuid) as { id: number }; 114 + 115 + db.prepare("DELETE FROM messages_fts WHERE session_uuid = ?").run(session.sessionUuid); 116 + db.prepare("DELETE FROM messages WHERE session_uuid = ?").run(session.sessionUuid); 117 + db.prepare("DELETE FROM sessions_fts WHERE rowid = ? OR session_uuid = ?").run(sessionRow.id, session.sessionUuid); 118 + 119 + db.prepare( 120 + ` 121 + INSERT INTO sessions_fts(rowid, title, summary_text, compact_text, reasoning_summary_text, session_uuid) 122 + VALUES (?, ?, ?, ?, ?, ?) 123 + `, 124 + ).run( 125 + sessionRow.id, 126 + tokenizedText(session.title), 127 + tokenizedText(session.summaryText), 128 + tokenizedText(session.compactText ?? ""), 129 + tokenizedText(session.reasoningSummaryText ?? ""), 130 + session.sessionUuid, 131 + ); 132 + 133 + const messageStmt = db.prepare<[number, string, number, string, string, string, string]>(` 134 + INSERT INTO messages (session_id, session_uuid, seq, role, content_text, timestamp, source_kind) 135 + VALUES (?, ?, ?, ?, ?, ?, ?) 136 + `); 137 + const ftsStmt = db.prepare<[number, string, string, number, string, string]>(` 138 + INSERT INTO messages_fts(rowid, content_text, session_uuid, seq, role, timestamp) 139 + VALUES (?, ?, ?, ?, ?, ?) 140 + `); 141 + 142 + for (const message of session.messages) { 143 + const result = messageStmt.run( 144 + sessionRow.id, 145 + session.sessionUuid, 146 + message.seq, 147 + message.role, 148 + message.contentText, 149 + message.timestamp, 150 + message.sourceKind, 151 + ); 152 + const messageId = Number(result.lastInsertRowid); 153 + // Feed the FTS index with tokenized text so that CJK runs are split 154 + // into bigrams by tokenize(). Stored content in messages.content_text 155 + // stays raw for display. 156 + ftsStmt.run( 157 + messageId, 158 + tokenizedText(message.contentText), 159 + session.sessionUuid, 160 + message.seq, 161 + message.role, 162 + message.timestamp, 163 + ); 164 + } 165 + }); 166 + 167 + tx(); 168 + } 169 + 170 + export function getSessionRecord(db: Db, sessionUuid: string): SessionRecord | null { 171 + const row = db 172 + .prepare<[string], SessionRecord & { filePath: string }>(` 173 + SELECT 174 + session_uuid AS sessionUuid, 175 + file_path AS filePath, 176 + source_root AS sourceRoot, 177 + title, 178 + summary_text AS summaryText, 179 + cwd, 180 + model, 181 + started_at AS startedAt, 182 + ended_at AS endedAt, 183 + path_date AS pathDate, 184 + message_count AS messageCount 185 + FROM sessions 186 + WHERE session_uuid = ? 187 + LIMIT 1 188 + `) 189 + .get(sessionUuid) as (SessionRecord & { filePath: string }) | undefined; 190 + 191 + if (!row) return null; 192 + return row; 193 + }
+6
src/db/shared.ts
··· 1 + import Database from "better-sqlite3"; 2 + 3 + export type Db = Database.Database; 4 + export type SqlParams = unknown[]; 5 + 6 + export const BUSY_TIMEOUT_MS = 5000;
+33
src/db/sql.ts
··· 1 + import type { Selector } from "../types"; 2 + import type { Db, SqlParams } from "./shared"; 3 + 4 + export function selectorWhereSql(selector: Selector, alias: string): { conditions: string[]; params: SqlParams } { 5 + const conditions = [`(${alias}.file_path = ? OR ${alias}.file_path LIKE ? ESCAPE '\\')`]; 6 + const params: SqlParams = [selector.root, `${escapeLike(selector.root)}/%`]; 7 + if (selector.kind === "cwd" || selector.kind === "cwd_date_range") { 8 + conditions.push(`${alias}.cwd = ?`); 9 + params.push(selector.cwd); 10 + } 11 + if (selector.kind === "date_range" || selector.kind === "cwd_date_range") { 12 + conditions.push(`${alias}.path_date >= ?`); 13 + conditions.push(`${alias}.path_date <= ?`); 14 + params.push(selector.fromDate, selector.toDate); 15 + } 16 + return { conditions, params }; 17 + } 18 + 19 + export function tableExists(db: Db, tableName: string): boolean { 20 + const row = db.prepare<[string], unknown>("SELECT 1 FROM sqlite_master WHERE name = ? LIMIT 1").get(tableName); 21 + return Boolean(row); 22 + } 23 + 24 + export function sessionRootFromFile(filePath: string): string { 25 + const marker = "/sessions/"; 26 + const index = filePath.indexOf(marker); 27 + if (index >= 0) return filePath.slice(0, index + marker.length - 1); 28 + return filePath.slice(0, Math.max(0, filePath.lastIndexOf("/"))); 29 + } 30 + 31 + export function escapeLike(value: string): string { 32 + return value.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_"); 33 + }
+42
src/db/stats-store.ts
··· 1 + import type { CwdCount } from "../types"; 2 + import type { Db } from "./shared"; 3 + 4 + export function getStatsCounts(db: Db): { 5 + sessionCount: number; 6 + messageCount: number; 7 + earliestStartedAt: string | null; 8 + latestEndedAt: string | null; 9 + lastSyncAt: string | null; 10 + } { 11 + const row = db 12 + .prepare(` 13 + SELECT 14 + COUNT(*) AS sessionCount, 15 + COALESCE(SUM(message_count), 0) AS messageCount, 16 + MIN(started_at) AS earliestStartedAt, 17 + MAX(ended_at) AS latestEndedAt, 18 + MAX(updated_at) AS lastSyncAt 19 + FROM sessions 20 + `) 21 + .get() as { 22 + sessionCount: number; 23 + messageCount: number; 24 + earliestStartedAt: string | null; 25 + latestEndedAt: string | null; 26 + lastSyncAt: string | null; 27 + }; 28 + return row; 29 + } 30 + 31 + export function getTopCwds(db: Db, limit: number): CwdCount[] { 32 + return db 33 + .prepare<[number], CwdCount>(` 34 + SELECT cwd, COUNT(*) AS count 35 + FROM sessions 36 + WHERE cwd != '' 37 + GROUP BY cwd 38 + ORDER BY count DESC, cwd ASC 39 + LIMIT ? 40 + `) 41 + .all(limit) as CwdCount[]; 42 + }
+44
src/query-concurrency.test.ts
··· 1 + import { describe, expect, test } from "vitest"; 2 + import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs"; 3 + import { tmpdir } from "node:os"; 4 + import { join } from "node:path"; 5 + import { pathToFileURL } from "node:url"; 6 + import { syncSessions } from "./indexer"; 7 + import { holdExclusiveLock, line, runReadChild, tempDirs } from "./query-test-helpers"; 8 + 9 + describe("cxs query concurrency", () => { 10 + test("parallel read commands wait through transient locks without surfacing SQLITE_BUSY", async () => { 11 + const base = mkdtempSync(join(tmpdir(), "cxs-parallel-")); 12 + tempDirs.push(base); 13 + const sessionsRoot = join(base, "sessions", "2026", "04", "22"); 14 + mkdirSync(sessionsRoot, { recursive: true }); 15 + 16 + writeFileSync( 17 + join(sessionsRoot, "rollout-2026-04-22T10-00-00-56565656-5656-4565-8565-565656565656.jsonl"), 18 + [ 19 + line("session_meta", { id: "56565656-5656-4565-8565-565656565656", cwd: "/tmp/parallel" }), 20 + line("turn_context", { model: "gpt-5.4" }), 21 + line("event_msg", { type: "user_message", message: "reverse-i-search 历史怎么找" }), 22 + line("event_msg", { type: "agent_message", message: "先用 cxs find reverse-i-search" }), 23 + line("event_msg", { type: "user_message", message: "顺便查 ffmpeg 的那次会话" }), 24 + line("event_msg", { type: "agent_message", message: "可以并行 find ffmpeg 再看 stats" }), 25 + ].join("\n"), 26 + ); 27 + 28 + const dbPath = join(base, "index.sqlite"); 29 + const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 30 + expect(summary.added).toBe(1); 31 + 32 + const queryModuleUrl = pathToFileURL(join(import.meta.dirname, "query.ts")).href; 33 + const blocker = await holdExclusiveLock(dbPath, 400); 34 + const tasks = [ 35 + ...Array.from({ length: 6 }, () => runReadChild(queryModuleUrl, dbPath, "find", "reverse-i-search")), 36 + ...Array.from({ length: 6 }, () => runReadChild(queryModuleUrl, dbPath, "stats")), 37 + ]; 38 + const results = await Promise.all(tasks); 39 + await blocker.done; 40 + const failures = results.filter((result) => result.code !== 0); 41 + 42 + expect(failures).toEqual([]); 43 + }); 44 + });
+188
src/query-display-row.test.ts
··· 1 + import { describe, expect, test } from "vitest"; 2 + import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs"; 3 + import { tmpdir } from "node:os"; 4 + import { join } from "node:path"; 5 + import { openWriteDb, replaceSession } from "./db"; 6 + import { INDEX_VERSION } from "./env"; 7 + import { syncSessions } from "./indexer"; 8 + import { findSessions } from "./query"; 9 + import { line, tempDirs } from "./query-test-helpers"; 10 + 11 + describe("cxs display row selection", () => { 12 + test("mixed session/message hit prefers message displayRow but keeps session ranking signal", () => { 13 + const base = mkdtempSync(join(tmpdir(), "cxs-mixed-match-")); 14 + tempDirs.push(base); 15 + const dbPath = join(base, "index.sqlite"); 16 + const db = openWriteDb(dbPath); 17 + 18 + // Mixed session: title carries the query (strong session hit) AND one 19 + // message body also carries it (weaker message hit). The display row 20 + // must come from the message hit so read-range can re-anchor on a real 21 + // seq, but the session-level signal still has to outrank a peer that 22 + // only has the message hit. 23 + replaceSession(db, { 24 + sessionUuid: "60606060-6060-4606-8606-606060606060", 25 + filePath: join(base, "mixed.jsonl"), 26 + title: "payloadbeacon retry handoff", 27 + summaryText: "", 28 + compactText: "", 29 + reasoningSummaryText: "", 30 + cwd: "/tmp/mixed", 31 + model: "gpt-5.4", 32 + startedAt: "2026-04-24T01:00:00.000Z", 33 + endedAt: "2026-04-24T01:00:00.000Z", 34 + messages: [ 35 + { 36 + role: "user", 37 + contentText: "noticed payloadbeacon stalled in production", 38 + timestamp: "2026-04-24T01:00:00.000Z", 39 + seq: 0, 40 + sourceKind: "event_msg", 41 + }, 42 + { 43 + role: "assistant", 44 + contentText: "checking retry queue depth and surface", 45 + timestamp: "2026-04-24T01:00:30.000Z", 46 + seq: 1, 47 + sourceKind: "event_msg", 48 + }, 49 + ], 50 + }, 1, 1, INDEX_VERSION, ""); 51 + 52 + // Message-only control: query appears only in a message body, neither 53 + // title nor any session-level field carries it. 54 + replaceSession(db, { 55 + sessionUuid: "70707070-7070-4707-8707-707070707070", 56 + filePath: join(base, "message-only.jsonl"), 57 + title: "neutral retry surface review", 58 + summaryText: "", 59 + compactText: "", 60 + reasoningSummaryText: "", 61 + cwd: "/tmp/message-only", 62 + model: "gpt-5.4", 63 + startedAt: "2026-04-24T01:00:00.000Z", 64 + endedAt: "2026-04-24T01:00:00.000Z", 65 + messages: [ 66 + { 67 + role: "user", 68 + contentText: "saw payloadbeacon mentioned once in passing", 69 + timestamp: "2026-04-24T01:00:00.000Z", 70 + seq: 0, 71 + sourceKind: "event_msg", 72 + }, 73 + ], 74 + }, 1, 1, INDEX_VERSION, ""); 75 + 76 + db.close(); 77 + 78 + const found = findSessions(dbPath, "payloadbeacon", 5); 79 + 80 + const mixed = found.results.find( 81 + (result) => result.sessionUuid === "60606060-6060-4606-8606-606060606060", 82 + ); 83 + const messageOnly = found.results.find( 84 + (result) => result.sessionUuid === "70707070-7070-4707-8707-707070707070", 85 + ); 86 + 87 + expect(mixed).toBeDefined(); 88 + expect(messageOnly).toBeDefined(); 89 + 90 + // Display row must come from the message hit so read-range can anchor 91 + // on a real seq. 92 + expect(mixed?.matchSource).toBe("message"); 93 + expect(typeof mixed?.matchSeq).toBe("number"); 94 + 95 + // Session-level signal still wins overall ranking and score. 96 + expect(found.results[0]?.sessionUuid).toBe("60606060-6060-4606-8606-606060606060"); 97 + expect(mixed!.score).toBeGreaterThan(messageOnly!.score); 98 + }); 99 + 100 + test("session-only hit reports matchSource session and null matchSeq", () => { 101 + const base = mkdtempSync(join(tmpdir(), "cxs-session-only-match-")); 102 + tempDirs.push(base); 103 + const dbPath = join(base, "index.sqlite"); 104 + const db = openWriteDb(dbPath); 105 + 106 + replaceSession(db, { 107 + sessionUuid: "80808080-8080-4808-8808-808080808080", 108 + filePath: join(base, "session-only.jsonl"), 109 + title: "payloadbeacon postmortem outline", 110 + summaryText: "", 111 + compactText: "", 112 + reasoningSummaryText: "", 113 + cwd: "/tmp/session-only", 114 + model: "gpt-5.4", 115 + startedAt: "2026-04-24T01:00:00.000Z", 116 + endedAt: "2026-04-24T01:00:00.000Z", 117 + messages: [ 118 + { 119 + role: "user", 120 + contentText: "everything looked fine on the surface", 121 + timestamp: "2026-04-24T01:00:00.000Z", 122 + seq: 0, 123 + sourceKind: "event_msg", 124 + }, 125 + { 126 + role: "assistant", 127 + contentText: "agreed, no anomalies in the queue depth", 128 + timestamp: "2026-04-24T01:00:30.000Z", 129 + seq: 1, 130 + sourceKind: "event_msg", 131 + }, 132 + ], 133 + }, 1, 1, INDEX_VERSION, ""); 134 + 135 + db.close(); 136 + 137 + const found = findSessions(dbPath, "payloadbeacon", 5); 138 + 139 + expect(found.results).toHaveLength(1); 140 + expect(found.results[0]?.sessionUuid).toBe("80808080-8080-4808-8808-808080808080"); 141 + expect(found.results[0]?.matchSource).toBe("session"); 142 + expect(found.results[0]?.matchSeq).toBeNull(); 143 + }); 144 + 145 + test("message-only hit reports matchSource message with a numeric matchSeq", () => { 146 + const base = mkdtempSync(join(tmpdir(), "cxs-message-only-match-")); 147 + tempDirs.push(base); 148 + const dbPath = join(base, "index.sqlite"); 149 + const db = openWriteDb(dbPath); 150 + 151 + replaceSession(db, { 152 + sessionUuid: "90909090-9090-4909-8909-909090909090", 153 + filePath: join(base, "message-only-baseline.jsonl"), 154 + title: "neutral retry surface review", 155 + summaryText: "", 156 + compactText: "", 157 + reasoningSummaryText: "", 158 + cwd: "/tmp/message-only-baseline", 159 + model: "gpt-5.4", 160 + startedAt: "2026-04-24T01:00:00.000Z", 161 + endedAt: "2026-04-24T01:00:00.000Z", 162 + messages: [ 163 + { 164 + role: "assistant", 165 + contentText: "kicked off neutral diagnostics", 166 + timestamp: "2026-04-24T01:00:00.000Z", 167 + seq: 0, 168 + sourceKind: "event_msg", 169 + }, 170 + { 171 + role: "user", 172 + contentText: "found payloadbeacon in the trace", 173 + timestamp: "2026-04-24T01:00:30.000Z", 174 + seq: 1, 175 + sourceKind: "event_msg", 176 + }, 177 + ], 178 + }, 1, 1, INDEX_VERSION, ""); 179 + 180 + db.close(); 181 + 182 + const found = findSessions(dbPath, "payloadbeacon", 5); 183 + 184 + expect(found.results).toHaveLength(1); 185 + expect(found.results[0]?.matchSource).toBe("message"); 186 + expect(found.results[0]?.matchSeq).toBe(1); 187 + }); 188 + });
+153
src/query-flow.test.ts
··· 1 + import { describe, expect, test } from "vitest"; 2 + import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs"; 3 + import { tmpdir } from "node:os"; 4 + import { join } from "node:path"; 5 + import { openReadDb } from "./db"; 6 + import { syncSessions } from "./indexer"; 7 + import { findSessions, getMessagePage, getMessageRange } from "./query"; 8 + import { line, tempDirs } from "./query-test-helpers"; 9 + 10 + describe("cxs retrieval flow", () => { 11 + test("sync -> find -> read-range -> read-page works on fixture sessions", async () => { 12 + const base = mkdtempSync(join(tmpdir(), "cxs-test-")); 13 + tempDirs.push(base); 14 + const sessionsRoot = join(base, "sessions", "2026", "04", "21"); 15 + mkdirSync(sessionsRoot, { recursive: true }); 16 + 17 + writeFileSync( 18 + join(sessionsRoot, "rollout-2026-04-21T10-00-00-11111111-1111-4111-8111-111111111111.jsonl"), 19 + [ 20 + line("session_meta", { id: "11111111-1111-4111-8111-111111111111", cwd: "/tmp/project-a" }), 21 + line("turn_context", { model: "gpt-5.4" }), 22 + line("event_msg", { type: "user_message", message: "排查 fly deploy 失败" }), 23 + line("event_msg", { type: "agent_message", message: "先看 health check 和 readback" }), 24 + line("event_msg", { type: "user_message", message: "health check 还是 500" }), 25 + line("event_msg", { type: "agent_message", message: "继续检查 secrets readback" }), 26 + ].join("\n"), 27 + ); 28 + 29 + writeFileSync( 30 + join(sessionsRoot, "rollout-2026-04-21T11-00-00-22222222-2222-4222-8222-222222222222.jsonl"), 31 + [ 32 + line("session_meta", { id: "22222222-2222-4222-8222-222222222222", cwd: "/tmp/project-b" }), 33 + line("turn_context", { model: "gpt-5.4" }), 34 + line("event_msg", { type: "user_message", message: "重构 markdown parser" }), 35 + line("event_msg", { type: "agent_message", message: "先补失败测试" }), 36 + ].join("\n"), 37 + ); 38 + 39 + const dbPath = join(base, "index.sqlite"); 40 + const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 41 + 42 + expect(summary.added).toBe(2); 43 + 44 + const found = findSessions(dbPath, "health check", 5); 45 + expect(found.results).toHaveLength(1); 46 + expect(found.results[0]?.sessionUuid).toBe("11111111-1111-4111-8111-111111111111"); 47 + expect(found.results[0]?.matchSeq).toBe(2); 48 + 49 + const range = getMessageRange(dbPath, "11111111-1111-4111-8111-111111111111", { 50 + seq: 2, 51 + before: 1, 52 + after: 1, 53 + }); 54 + expect(range.anchorSeq).toBe(2); 55 + expect(range.messages.map((message) => message.seq)).toEqual([1, 2, 3]); 56 + 57 + const page = getMessagePage(dbPath, "11111111-1111-4111-8111-111111111111", 2, 2); 58 + expect(page.messages.map((message) => message.seq)).toEqual([2, 3]); 59 + }); 60 + 61 + test("read-range can relocate anchor by query within a session", async () => { 62 + const base = mkdtempSync(join(tmpdir(), "cxs-query-")); 63 + tempDirs.push(base); 64 + const sessionsRoot = join(base, "sessions", "2026", "04", "21"); 65 + mkdirSync(sessionsRoot, { recursive: true }); 66 + 67 + writeFileSync( 68 + join(sessionsRoot, "rollout-2026-04-21T10-00-00-33333333-3333-4333-8333-333333333333.jsonl"), 69 + [ 70 + line("session_meta", { id: "33333333-3333-4333-8333-333333333333", cwd: "/tmp/project-c" }), 71 + line("turn_context", { model: "gpt-5.4" }), 72 + line("event_msg", { type: "user_message", message: "先做回滚预案" }), 73 + line("event_msg", { type: "agent_message", message: "health check 先确认 500 触发点" }), 74 + line("event_msg", { type: "agent_message", message: "然后看 readback" }), 75 + ].join("\n"), 76 + ); 77 + 78 + const dbPath = join(base, "index.sqlite"); 79 + const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 80 + expect(summary.added).toBe(1); 81 + 82 + const range = getMessageRange(dbPath, "33333333-3333-4333-8333-333333333333", { 83 + query: "health check", 84 + before: 0, 85 + after: 1, 86 + }); 87 + 88 + expect(range.anchorSeq).toBe(1); 89 + expect(range.rangeStartSeq).toBe(1); 90 + expect(range.rangeEndSeq).toBe(2); 91 + expect(range.messages.map((message) => message.seq)).toEqual([1, 2]); 92 + }); 93 + 94 + test("read-page reports coverage for sessions synced from a nonstandard root", async () => { 95 + const base = mkdtempSync(join(tmpdir(), "cxs-nonstandard-root-")); 96 + tempDirs.push(base); 97 + const root = join(base, "rawroot"); 98 + const day = join(root, "2026", "04", "22"); 99 + mkdirSync(day, { recursive: true }); 100 + 101 + writeFileSync( 102 + join(day, "rollout-2026-04-22T10-00-00-45454545-4545-4545-8545-454545454545.jsonl"), 103 + [ 104 + line("session_meta", { id: "45454545-4545-4545-8545-454545454545", cwd: "/tmp/nonstandard-root" }), 105 + line("event_msg", { type: "user_message", message: "root attribution needle" }), 106 + ].join("\n"), 107 + ); 108 + 109 + const dbPath = join(base, "index.sqlite"); 110 + await syncSessions({ dbPath, selector: { kind: "all", root } }); 111 + 112 + const page = getMessagePage(dbPath, "45454545-4545-4545-8545-454545454545", 0, 10); 113 + 114 + expect(page.coverage.entries).toHaveLength(1); 115 + expect(page.coverage.entries[0]?.selector).toEqual({ kind: "all", root }); 116 + }); 117 + 118 + test("sync stores derived session summary and find returns it", async () => { 119 + const base = mkdtempSync(join(tmpdir(), "cxs-summary-")); 120 + tempDirs.push(base); 121 + const sessionsRoot = join(base, "sessions", "2026", "04", "21"); 122 + mkdirSync(sessionsRoot, { recursive: true }); 123 + 124 + writeFileSync( 125 + join(sessionsRoot, "rollout-2026-04-21T12-00-00-eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee.jsonl"), 126 + [ 127 + line("session_meta", { id: "eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee", cwd: "/tmp/deploy-summary" }), 128 + line("turn_context", { model: "gpt-5.4" }), 129 + line("event_msg", { type: "user_message", message: "排查 fly deploy 失败" }), 130 + line("event_msg", { type: "agent_message", message: "先看 health check 和 readback" }), 131 + line("event_msg", { type: "user_message", message: "health check 还是 500" }), 132 + line("event_msg", { type: "agent_message", message: "继续核对 secrets readback" }), 133 + ].join("\n"), 134 + ); 135 + 136 + const dbPath = join(base, "index.sqlite"); 137 + const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 138 + expect(summary.added).toBe(1); 139 + 140 + const db = openReadDb(dbPath); 141 + const row = db 142 + .prepare<[string], { summaryText: string }>("SELECT summary_text AS summaryText FROM sessions WHERE session_uuid = ? LIMIT 1") 143 + .get("eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee") as { summaryText: string } | null; 144 + db.close(); 145 + 146 + expect(row?.summaryText).toContain("排查 fly deploy 失败"); 147 + expect(row?.summaryText).toContain("先看 health check 和 readback"); 148 + expect(row?.summaryText).toContain("health check 还是 500"); 149 + 150 + const found = findSessions(dbPath, "deploy", 5); 151 + expect(found.results[0]?.summaryText).toContain("排查 fly deploy 失败"); 152 + }); 153 + });
+10
src/query-profile.test.ts
··· 1 + import { describe, expect, test } from "vitest"; 2 + import { classifyQueryProfile } from "./query"; 3 + 4 + describe("query profile", () => { 5 + test("classifies broad concept query separately from exact troubleshooting query", () => { 6 + expect(classifyQueryProfile("deploy").kind).toBe("broad"); 7 + expect(classifyQueryProfile("health check 500").kind).toBe("exact"); 8 + expect(classifyQueryProfile("src/background.ts remoteHosts").kind).toBe("exact"); 9 + }); 10 + });
+198
src/query-session-fields.test.ts
··· 1 + import { describe, expect, test } from "vitest"; 2 + import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs"; 3 + import { tmpdir } from "node:os"; 4 + import { join } from "node:path"; 5 + import { openWriteDb, replaceSession } from "./db"; 6 + import { INDEX_VERSION } from "./env"; 7 + import { syncSessions } from "./indexer"; 8 + import { findSessions } from "./query"; 9 + import { line, tempDirs } from "./query-test-helpers"; 10 + 11 + describe("cxs session-level fields", () => { 12 + test("find can recall session title even when no message contains the query", () => { 13 + const base = mkdtempSync(join(tmpdir(), "cxs-session-title-")); 14 + tempDirs.push(base); 15 + const dbPath = join(base, "index.sqlite"); 16 + const db = openWriteDb(dbPath); 17 + replaceSession( 18 + db, 19 + { 20 + sessionUuid: "abababab-abab-4aba-8aba-abababababab", 21 + filePath: join(base, "rollout.jsonl"), 22 + title: "设置 ChatGPT 订阅取消提醒", 23 + summaryText: "user: billing reminder | assistant: schedule a local notification", 24 + compactText: "", 25 + reasoningSummaryText: "", 26 + cwd: "/tmp/title-only", 27 + model: "gpt-5.4", 28 + startedAt: "2026-04-24T01:00:00.000Z", 29 + endedAt: "2026-04-24T01:01:00.000Z", 30 + messages: [ 31 + { 32 + role: "user", 33 + contentText: "billing reminder", 34 + timestamp: "2026-04-24T01:00:00.000Z", 35 + seq: 0, 36 + sourceKind: "event_msg", 37 + }, 38 + { 39 + role: "assistant", 40 + contentText: "schedule a local notification", 41 + timestamp: "2026-04-24T01:01:00.000Z", 42 + seq: 1, 43 + sourceKind: "event_msg", 44 + }, 45 + ], 46 + }, 47 + 1, 48 + 1, 49 + INDEX_VERSION, 50 + "", 51 + ); 52 + db.close(); 53 + 54 + const found = findSessions(dbPath, "订阅取消提醒", 5); 55 + 56 + expect(found.results).toHaveLength(1); 57 + expect(found.results[0]?.sessionUuid).toBe("abababab-abab-4aba-8aba-abababababab"); 58 + expect(found.results[0]?.matchSource).toBe("session"); 59 + expect(found.results[0]?.matchSeq).toBeNull(); 60 + expect(found.results[0]?.snippet).toContain("订阅取消提醒"); 61 + }); 62 + 63 + test("session-level fields have explicit ranking weights", () => { 64 + const base = mkdtempSync(join(tmpdir(), "cxs-session-field-weights-")); 65 + tempDirs.push(base); 66 + const dbPath = join(base, "index.sqlite"); 67 + const db = openWriteDb(dbPath); 68 + const common = { 69 + filePath: join(base, "rollout.jsonl"), 70 + title: "neutral session", 71 + summaryText: "", 72 + compactText: "", 73 + reasoningSummaryText: "", 74 + cwd: "/tmp/field-weights", 75 + model: "gpt-5.4", 76 + startedAt: "2026-04-24T01:00:00.000Z", 77 + endedAt: "2026-04-24T01:00:00.000Z", 78 + messages: [ 79 + { 80 + role: "user" as const, 81 + contentText: "ordinary visible message", 82 + timestamp: "2026-04-24T01:00:00.000Z", 83 + seq: 0, 84 + sourceKind: "event_msg" as const, 85 + }, 86 + ], 87 + }; 88 + 89 + replaceSession(db, { 90 + ...common, 91 + sessionUuid: "10101010-1010-4010-8010-101010101010", 92 + filePath: join(base, "title.jsonl"), 93 + title: "handoffneedle title", 94 + }, 1, 1, INDEX_VERSION, ""); 95 + replaceSession(db, { 96 + ...common, 97 + sessionUuid: "20202020-2020-4020-8020-202020202020", 98 + filePath: join(base, "compact.jsonl"), 99 + compactText: "handoffneedle compact handoff", 100 + }, 1, 1, INDEX_VERSION, ""); 101 + replaceSession(db, { 102 + ...common, 103 + sessionUuid: "30303030-3030-4030-8030-303030303030", 104 + filePath: join(base, "summary.jsonl"), 105 + summaryText: "handoffneedle derived summary", 106 + }, 1, 1, INDEX_VERSION, ""); 107 + replaceSession(db, { 108 + ...common, 109 + sessionUuid: "40404040-4040-4040-8040-404040404040", 110 + filePath: join(base, "reasoning.jsonl"), 111 + reasoningSummaryText: "handoffneedle reasoning summary", 112 + }, 1, 1, INDEX_VERSION, ""); 113 + db.close(); 114 + 115 + const found = findSessions(dbPath, "handoffneedle", 10); 116 + 117 + expect(found.results.map((result) => result.sessionUuid)).toEqual([ 118 + "10101010-1010-4010-8010-101010101010", 119 + "20202020-2020-4020-8020-202020202020", 120 + "30303030-3030-4030-8030-303030303030", 121 + "40404040-4040-4040-8040-404040404040", 122 + ]); 123 + }); 124 + 125 + test("sync indexes compacted handoff text for session-level recall", async () => { 126 + const base = mkdtempSync(join(tmpdir(), "cxs-compact-recall-")); 127 + tempDirs.push(base); 128 + const sessionsRoot = join(base, "sessions", "2026", "04", "24"); 129 + mkdirSync(sessionsRoot, { recursive: true }); 130 + 131 + writeFileSync( 132 + join(sessionsRoot, "rollout-2026-04-24T09-00-00-90909090-9090-4090-8090-909090909090.jsonl"), 133 + [ 134 + line("session_meta", { id: "90909090-9090-4090-8090-909090909090", cwd: "/tmp/compact-recall" }), 135 + line("turn_context", { model: "gpt-5.4" }), 136 + line("event_msg", { type: "user_message", message: "继续前一个任务" }), 137 + line("compacted", { message: "handoff says durable output queue needs final verification" }), 138 + line("event_msg", { type: "context_compacted" }), 139 + line("event_msg", { type: "agent_message", message: "先读取测试文件" }), 140 + ].join("\n"), 141 + ); 142 + 143 + const dbPath = join(base, "index.sqlite"); 144 + const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 145 + expect(summary.added).toBe(1); 146 + 147 + const found = findSessions(dbPath, "durable output queue", 5); 148 + 149 + expect(found.results).toHaveLength(1); 150 + expect(found.results[0]?.sessionUuid).toBe("90909090-9090-4090-8090-909090909090"); 151 + expect(found.results[0]?.matchSource).toBe("session"); 152 + expect(found.results[0]?.snippet).toContain("durable output queue"); 153 + }); 154 + 155 + test("session-level snippet prefers the window with denser query term coverage", () => { 156 + const base = mkdtempSync(join(tmpdir(), "cxs-session-snippet-")); 157 + tempDirs.push(base); 158 + const dbPath = join(base, "index.sqlite"); 159 + const db = openWriteDb(dbPath); 160 + replaceSession( 161 + db, 162 + { 163 + sessionUuid: "50505050-5050-4050-8050-505050505050", 164 + filePath: join(base, "snippet.jsonl"), 165 + title: "neutral deploy title", 166 + summaryText: "", 167 + compactText: [ 168 + "部署 happened early in the handoff.", 169 + "Later the important evidence says the health check failed after rollout.", 170 + ].join(" "), 171 + reasoningSummaryText: "", 172 + cwd: "/tmp/snippet", 173 + model: "gpt-5.4", 174 + startedAt: "2026-04-24T01:00:00.000Z", 175 + endedAt: "2026-04-24T01:00:00.000Z", 176 + messages: [ 177 + { 178 + role: "user", 179 + contentText: "ordinary visible message", 180 + timestamp: "2026-04-24T01:00:00.000Z", 181 + seq: 0, 182 + sourceKind: "event_msg", 183 + }, 184 + ], 185 + }, 186 + 1, 187 + 1, 188 + INDEX_VERSION, 189 + "", 190 + ); 191 + db.close(); 192 + 193 + const found = findSessions(dbPath, "部署 health check", 5); 194 + 195 + expect(found.results[0]?.snippet).toContain("health"); 196 + expect(found.results[0]?.snippet).toContain("check"); 197 + }); 198 + });
+121
src/query-session-ranking.test.ts
··· 1 + import { describe, expect, test } from "vitest"; 2 + import { mkdtempSync, mkdirSync, writeFileSync } from "node:fs"; 3 + import { tmpdir } from "node:os"; 4 + import { join } from "node:path"; 5 + import { openWriteDb, replaceSession } from "./db"; 6 + import { INDEX_VERSION } from "./env"; 7 + import { syncSessions } from "./indexer"; 8 + import { findSessions } from "./query"; 9 + import { line, tempDirs } from "./query-test-helpers"; 10 + 11 + describe("cxs session ranking", () => { 12 + test("session title hit outranks broad incidental mentions", async () => { 13 + const base = mkdtempSync(join(tmpdir(), "cxs-rank-")); 14 + tempDirs.push(base); 15 + const sessionsRoot = join(base, "sessions", "2026", "04", "21"); 16 + mkdirSync(sessionsRoot, { recursive: true }); 17 + 18 + writeFileSync( 19 + join(sessionsRoot, "rollout-2026-04-21T09-00-00-aaaaaaaa-aaaa-4aaa-8aaa-aaaaaaaaaaaa.jsonl"), 20 + [ 21 + line("session_meta", { id: "aaaaaaaa-aaaa-4aaa-8aaa-aaaaaaaaaaaa", cwd: "/tmp/mac-setup" }), 22 + line("turn_context", { model: "gpt-5.4" }), 23 + line("event_msg", { type: "user_message", message: "同步新 Mac 配置" }), 24 + line("event_msg", { type: "agent_message", message: "先确认 Hammerspoon 进程在不在" }), 25 + line("event_msg", { type: "agent_message", message: "Hammerspoon 路径已经对了" }), 26 + line("event_msg", { type: "agent_message", message: "如果 Hammerspoon console 没报错就继续" }), 27 + ].join("\n"), 28 + ); 29 + 30 + writeFileSync( 31 + join(sessionsRoot, "rollout-2026-04-21T10-00-00-bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb.jsonl"), 32 + [ 33 + line("session_meta", { id: "bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb", cwd: "/Users/envvar/.hammerspoon" }), 34 + line("turn_context", { model: "gpt-5.4" }), 35 + line("event_msg", { type: "user_message", message: "hammerspoon clipboard 搜索坏了" }), 36 + line("event_msg", { type: "agent_message", message: "先检查 clipboard history" }), 37 + ].join("\n"), 38 + ); 39 + 40 + const dbPath = join(base, "index.sqlite"); 41 + const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 42 + expect(summary.added).toBe(2); 43 + 44 + const found = findSessions(dbPath, "hammerspoon", 5); 45 + expect(found.results[0]?.sessionUuid).toBe("bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb"); 46 + }); 47 + 48 + test("broad query prefers sustained session evidence over title-only incidental hit", async () => { 49 + const base = mkdtempSync(join(tmpdir(), "cxs-broad-")); 50 + tempDirs.push(base); 51 + const sessionsRoot = join(base, "sessions", "2026", "04", "21"); 52 + mkdirSync(sessionsRoot, { recursive: true }); 53 + 54 + writeFileSync( 55 + join(sessionsRoot, "rollout-2026-04-21T09-00-00-cccccccc-cccc-4ccc-8ccc-cccccccccccc.jsonl"), 56 + [ 57 + line("session_meta", { id: "cccccccc-cccc-4ccc-8ccc-cccccccccccc", cwd: "/tmp/deploy-title" }), 58 + line("turn_context", { model: "gpt-5.4" }), 59 + line("event_msg", { type: "user_message", message: "deploy checklist 先记一下" }), 60 + line("event_msg", { type: "agent_message", message: "今天主要在调 hammerspoon 输入法切换" }), 61 + line("event_msg", { type: "agent_message", message: "先确认 WeChat 输入法默认值" }), 62 + ].join("\n"), 63 + ); 64 + 65 + writeFileSync( 66 + join(sessionsRoot, "rollout-2026-04-21T10-00-00-dddddddd-dddd-4ddd-8ddd-dddddddddddd.jsonl"), 67 + [ 68 + line("session_meta", { id: "dddddddd-dddd-4ddd-8ddd-dddddddddddd", cwd: "/tmp/deploy-incident" }), 69 + line("turn_context", { model: "gpt-5.4" }), 70 + line("event_msg", { type: "user_message", message: "fly deploy 之后 health check 还是 500" }), 71 + line("event_msg", { type: "agent_message", message: "先确认 deploy 之后的 readback 和 health check" }), 72 + line("event_msg", { type: "user_message", message: "这个 deploy 回滚后恢复了" }), 73 + ].join("\n"), 74 + ); 75 + 76 + const dbPath = join(base, "index.sqlite"); 77 + const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 78 + expect(summary.added).toBe(2); 79 + 80 + const found = findSessions(dbPath, "deploy", 5); 81 + expect(found.results[0]?.sessionUuid).toBe("dddddddd-dddd-4ddd-8ddd-dddddddddddd"); 82 + }); 83 + 84 + test("find keeps distinct sessions even when titles collapse to the same normalized key", async () => { 85 + const base = mkdtempSync(join(tmpdir(), "cxs-dedup-")); 86 + tempDirs.push(base); 87 + const sessionsRoot = join(base, "sessions", "2026", "04", "22"); 88 + mkdirSync(sessionsRoot, { recursive: true }); 89 + 90 + writeFileSync( 91 + join(sessionsRoot, "rollout-2026-04-22T08-00-00-12121212-1212-4212-8212-121212121212.jsonl"), 92 + [ 93 + line("session_meta", { id: "12121212-1212-4212-8212-121212121212", cwd: "/tmp/alpha" }), 94 + line("turn_context", { model: "gpt-5.4" }), 95 + line("event_msg", { type: "user_message", message: "排查 deploy 500" }), 96 + line("event_msg", { type: "agent_message", message: "alpha 先看 first deploy rollback" }), 97 + ].join("\n"), 98 + ); 99 + 100 + writeFileSync( 101 + join(sessionsRoot, "rollout-2026-04-22T09-00-00-34343434-3434-4343-8343-343434343434.jsonl"), 102 + [ 103 + line("session_meta", { id: "34343434-3434-4343-8343-343434343434", cwd: "/tmp/beta" }), 104 + line("turn_context", { model: "gpt-5.4" }), 105 + line("event_msg", { type: "user_message", message: "排查 deploy 500" }), 106 + line("event_msg", { type: "agent_message", message: "beta 再看 second deploy readback" }), 107 + ].join("\n"), 108 + ); 109 + 110 + const dbPath = join(base, "index.sqlite"); 111 + const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 112 + expect(summary.added).toBe(2); 113 + 114 + const found = findSessions(dbPath, "deploy 500", 5); 115 + expect(found.results).toHaveLength(2); 116 + expect(found.results.map((result) => result.sessionUuid).sort()).toEqual([ 117 + "12121212-1212-4212-8212-121212121212", 118 + "34343434-3434-4343-8343-343434343434", 119 + ]); 120 + }); 121 + });
+110
src/query-test-helpers.ts
··· 1 + import { afterEach } from "vitest"; 2 + import { spawn } from "node:child_process"; 3 + import { rmSync } from "node:fs"; 4 + 5 + export const tempDirs: string[] = []; 6 + 7 + afterEach(() => { 8 + for (const dir of tempDirs.splice(0)) { 9 + rmSync(dir, { recursive: true, force: true }); 10 + } 11 + }); 12 + 13 + export function line(type: string, payload: Record<string, unknown>): string { 14 + return JSON.stringify({ 15 + timestamp: new Date("2026-04-21T00:00:00.000Z").toISOString(), 16 + type, 17 + payload, 18 + }); 19 + } 20 + 21 + export function runReadChild( 22 + queryModuleUrl: string, 23 + dbPath: string, 24 + command: "find" | "stats", 25 + query?: string, 26 + ): Promise<{ code: number | null; stderr: string }> { 27 + return new Promise((resolve, reject) => { 28 + const script = ` 29 + const [moduleUrl, dbPath, command, query = ""] = process.argv.slice(1); 30 + const queryModule = await import(moduleUrl); 31 + if (command === "stats") { 32 + queryModule.collectStats(dbPath); 33 + } else { 34 + queryModule.findSessions(dbPath, query, 5); 35 + } 36 + `; 37 + const child = spawn( 38 + process.execPath, 39 + ["--import", "tsx", "--eval", script, queryModuleUrl, dbPath, command, query ?? ""], 40 + { cwd: import.meta.dirname, stdio: ["ignore", "ignore", "pipe"] }, 41 + ); 42 + 43 + let stderr = ""; 44 + child.stderr.setEncoding("utf8"); 45 + child.stderr.on("data", (chunk) => { 46 + stderr += chunk; 47 + }); 48 + child.on("error", reject); 49 + child.on("close", (code) => { 50 + resolve({ code, stderr }); 51 + }); 52 + }); 53 + } 54 + 55 + export function holdExclusiveLock( 56 + dbPath: string, 57 + holdMs: number, 58 + ): Promise<{ done: Promise<number | null> }> { 59 + return new Promise((resolve, reject) => { 60 + const script = ` 61 + import Database from "better-sqlite3"; 62 + const [dbPath, holdMs] = process.argv.slice(1); 63 + const db = new Database(dbPath); 64 + db.pragma("busy_timeout = 5000"); 65 + db.pragma("locking_mode = EXCLUSIVE"); 66 + db.exec("BEGIN EXCLUSIVE"); 67 + console.log("locked"); 68 + setTimeout(() => { 69 + db.exec("COMMIT"); 70 + db.close(); 71 + }, Number(holdMs)); 72 + `; 73 + const child = spawn( 74 + process.execPath, 75 + ["--eval", script, dbPath, String(holdMs)], 76 + { cwd: import.meta.dirname, stdio: ["ignore", "pipe", "pipe"] }, 77 + ); 78 + 79 + let settled = false; 80 + let stderr = ""; 81 + child.stdout.setEncoding("utf8"); 82 + child.stderr.setEncoding("utf8"); 83 + child.stderr.on("data", (chunk) => { 84 + stderr += chunk; 85 + }); 86 + child.on("error", reject); 87 + child.on("close", (code) => { 88 + if (!settled && code !== 0) { 89 + settled = true; 90 + reject(new Error(stderr || `lock holder exited with code ${code}`)); 91 + } 92 + }); 93 + child.stdout.on("data", (chunk) => { 94 + if (settled || !chunk.includes("locked")) return; 95 + settled = true; 96 + resolve({ 97 + done: new Promise((doneResolve, doneReject) => { 98 + child.on("error", doneReject); 99 + child.on("close", (code) => { 100 + if (code === 0) { 101 + doneResolve(code); 102 + return; 103 + } 104 + doneReject(new Error(stderr || `lock holder exited with code ${code}`)); 105 + }); 106 + }), 107 + }); 108 + }); 109 + }); 110 + }
-784
src/query.test.ts
··· 1 - import { afterEach, describe, expect, test } from "vitest"; 2 - import { spawn } from "node:child_process"; 3 - import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; 4 - import { tmpdir } from "node:os"; 5 - import { join } from "node:path"; 6 - import { pathToFileURL } from "node:url"; 7 - import { openReadDb, openWriteDb, replaceSession } from "./db"; 8 - import { INDEX_VERSION } from "./env"; 9 - import { syncSessions } from "./indexer"; 10 - import { 11 - classifyQueryProfile, 12 - findSessions, 13 - getMessagePage, 14 - getMessageRange, 15 - } from "./query"; 16 - 17 - const tempDirs: string[] = []; 18 - 19 - afterEach(() => { 20 - for (const dir of tempDirs.splice(0)) { 21 - rmSync(dir, { recursive: true, force: true }); 22 - } 23 - }); 24 - 25 - describe("cxs retrieval flow", () => { 26 - test("sync -> find -> read-range -> read-page works on fixture sessions", async () => { 27 - const base = mkdtempSync(join(tmpdir(), "cxs-test-")); 28 - tempDirs.push(base); 29 - const sessionsRoot = join(base, "sessions", "2026", "04", "21"); 30 - mkdirSync(sessionsRoot, { recursive: true }); 31 - 32 - writeFileSync( 33 - join(sessionsRoot, "rollout-2026-04-21T10-00-00-11111111-1111-4111-8111-111111111111.jsonl"), 34 - [ 35 - line("session_meta", { id: "11111111-1111-4111-8111-111111111111", cwd: "/tmp/project-a" }), 36 - line("turn_context", { model: "gpt-5.4" }), 37 - line("event_msg", { type: "user_message", message: "排查 fly deploy 失败" }), 38 - line("event_msg", { type: "agent_message", message: "先看 health check 和 readback" }), 39 - line("event_msg", { type: "user_message", message: "health check 还是 500" }), 40 - line("event_msg", { type: "agent_message", message: "继续检查 secrets readback" }), 41 - ].join("\n"), 42 - ); 43 - 44 - writeFileSync( 45 - join(sessionsRoot, "rollout-2026-04-21T11-00-00-22222222-2222-4222-8222-222222222222.jsonl"), 46 - [ 47 - line("session_meta", { id: "22222222-2222-4222-8222-222222222222", cwd: "/tmp/project-b" }), 48 - line("turn_context", { model: "gpt-5.4" }), 49 - line("event_msg", { type: "user_message", message: "重构 markdown parser" }), 50 - line("event_msg", { type: "agent_message", message: "先补失败测试" }), 51 - ].join("\n"), 52 - ); 53 - 54 - const dbPath = join(base, "index.sqlite"); 55 - const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 56 - 57 - expect(summary.added).toBe(2); 58 - 59 - const found = findSessions(dbPath, "health check", 5); 60 - expect(found.results).toHaveLength(1); 61 - expect(found.results[0]?.sessionUuid).toBe("11111111-1111-4111-8111-111111111111"); 62 - expect(found.results[0]?.matchSeq).toBe(2); 63 - 64 - const range = getMessageRange(dbPath, "11111111-1111-4111-8111-111111111111", { 65 - seq: 2, 66 - before: 1, 67 - after: 1, 68 - }); 69 - expect(range.anchorSeq).toBe(2); 70 - expect(range.messages.map((message) => message.seq)).toEqual([1, 2, 3]); 71 - 72 - const page = getMessagePage(dbPath, "11111111-1111-4111-8111-111111111111", 2, 2); 73 - expect(page.messages.map((message) => message.seq)).toEqual([2, 3]); 74 - }); 75 - 76 - test("read-range can relocate anchor by query within a session", async () => { 77 - const base = mkdtempSync(join(tmpdir(), "cxs-query-")); 78 - tempDirs.push(base); 79 - const sessionsRoot = join(base, "sessions", "2026", "04", "21"); 80 - mkdirSync(sessionsRoot, { recursive: true }); 81 - 82 - writeFileSync( 83 - join(sessionsRoot, "rollout-2026-04-21T10-00-00-33333333-3333-4333-8333-333333333333.jsonl"), 84 - [ 85 - line("session_meta", { id: "33333333-3333-4333-8333-333333333333", cwd: "/tmp/project-c" }), 86 - line("turn_context", { model: "gpt-5.4" }), 87 - line("event_msg", { type: "user_message", message: "先做回滚预案" }), 88 - line("event_msg", { type: "agent_message", message: "health check 先确认 500 触发点" }), 89 - line("event_msg", { type: "agent_message", message: "然后看 readback" }), 90 - ].join("\n"), 91 - ); 92 - 93 - const dbPath = join(base, "index.sqlite"); 94 - const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 95 - expect(summary.added).toBe(1); 96 - 97 - const range = getMessageRange(dbPath, "33333333-3333-4333-8333-333333333333", { 98 - query: "health check", 99 - before: 0, 100 - after: 1, 101 - }); 102 - 103 - expect(range.anchorSeq).toBe(1); 104 - expect(range.rangeStartSeq).toBe(1); 105 - expect(range.rangeEndSeq).toBe(2); 106 - expect(range.messages.map((message) => message.seq)).toEqual([1, 2]); 107 - }); 108 - 109 - test("read-page reports coverage for sessions synced from a nonstandard root", async () => { 110 - const base = mkdtempSync(join(tmpdir(), "cxs-nonstandard-root-")); 111 - tempDirs.push(base); 112 - const root = join(base, "rawroot"); 113 - const day = join(root, "2026", "04", "22"); 114 - mkdirSync(day, { recursive: true }); 115 - 116 - writeFileSync( 117 - join(day, "rollout-2026-04-22T10-00-00-45454545-4545-4545-8545-454545454545.jsonl"), 118 - [ 119 - line("session_meta", { id: "45454545-4545-4545-8545-454545454545", cwd: "/tmp/nonstandard-root" }), 120 - line("event_msg", { type: "user_message", message: "root attribution needle" }), 121 - ].join("\n"), 122 - ); 123 - 124 - const dbPath = join(base, "index.sqlite"); 125 - await syncSessions({ dbPath, selector: { kind: "all", root } }); 126 - 127 - const page = getMessagePage(dbPath, "45454545-4545-4545-8545-454545454545", 0, 10); 128 - 129 - expect(page.coverage.entries).toHaveLength(1); 130 - expect(page.coverage.entries[0]?.selector).toEqual({ kind: "all", root }); 131 - }); 132 - 133 - test("session title hit outranks broad incidental mentions", async () => { 134 - const base = mkdtempSync(join(tmpdir(), "cxs-rank-")); 135 - tempDirs.push(base); 136 - const sessionsRoot = join(base, "sessions", "2026", "04", "21"); 137 - mkdirSync(sessionsRoot, { recursive: true }); 138 - 139 - writeFileSync( 140 - join(sessionsRoot, "rollout-2026-04-21T09-00-00-aaaaaaaa-aaaa-4aaa-8aaa-aaaaaaaaaaaa.jsonl"), 141 - [ 142 - line("session_meta", { id: "aaaaaaaa-aaaa-4aaa-8aaa-aaaaaaaaaaaa", cwd: "/tmp/mac-setup" }), 143 - line("turn_context", { model: "gpt-5.4" }), 144 - line("event_msg", { type: "user_message", message: "同步新 Mac 配置" }), 145 - line("event_msg", { type: "agent_message", message: "先确认 Hammerspoon 进程在不在" }), 146 - line("event_msg", { type: "agent_message", message: "Hammerspoon 路径已经对了" }), 147 - line("event_msg", { type: "agent_message", message: "如果 Hammerspoon console 没报错就继续" }), 148 - ].join("\n"), 149 - ); 150 - 151 - writeFileSync( 152 - join(sessionsRoot, "rollout-2026-04-21T10-00-00-bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb.jsonl"), 153 - [ 154 - line("session_meta", { id: "bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb", cwd: "/Users/envvar/.hammerspoon" }), 155 - line("turn_context", { model: "gpt-5.4" }), 156 - line("event_msg", { type: "user_message", message: "hammerspoon clipboard 搜索坏了" }), 157 - line("event_msg", { type: "agent_message", message: "先检查 clipboard history" }), 158 - ].join("\n"), 159 - ); 160 - 161 - const dbPath = join(base, "index.sqlite"); 162 - const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 163 - expect(summary.added).toBe(2); 164 - 165 - const found = findSessions(dbPath, "hammerspoon", 5); 166 - expect(found.results[0]?.sessionUuid).toBe("bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb"); 167 - }); 168 - 169 - test("broad query prefers sustained session evidence over title-only incidental hit", async () => { 170 - const base = mkdtempSync(join(tmpdir(), "cxs-broad-")); 171 - tempDirs.push(base); 172 - const sessionsRoot = join(base, "sessions", "2026", "04", "21"); 173 - mkdirSync(sessionsRoot, { recursive: true }); 174 - 175 - writeFileSync( 176 - join(sessionsRoot, "rollout-2026-04-21T09-00-00-cccccccc-cccc-4ccc-8ccc-cccccccccccc.jsonl"), 177 - [ 178 - line("session_meta", { id: "cccccccc-cccc-4ccc-8ccc-cccccccccccc", cwd: "/tmp/deploy-title" }), 179 - line("turn_context", { model: "gpt-5.4" }), 180 - line("event_msg", { type: "user_message", message: "deploy checklist 先记一下" }), 181 - line("event_msg", { type: "agent_message", message: "今天主要在调 hammerspoon 输入法切换" }), 182 - line("event_msg", { type: "agent_message", message: "先确认 WeChat 输入法默认值" }), 183 - ].join("\n"), 184 - ); 185 - 186 - writeFileSync( 187 - join(sessionsRoot, "rollout-2026-04-21T10-00-00-dddddddd-dddd-4ddd-8ddd-dddddddddddd.jsonl"), 188 - [ 189 - line("session_meta", { id: "dddddddd-dddd-4ddd-8ddd-dddddddddddd", cwd: "/tmp/deploy-incident" }), 190 - line("turn_context", { model: "gpt-5.4" }), 191 - line("event_msg", { type: "user_message", message: "fly deploy 之后 health check 还是 500" }), 192 - line("event_msg", { type: "agent_message", message: "先确认 deploy 之后的 readback 和 health check" }), 193 - line("event_msg", { type: "user_message", message: "这个 deploy 回滚后恢复了" }), 194 - ].join("\n"), 195 - ); 196 - 197 - const dbPath = join(base, "index.sqlite"); 198 - const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 199 - expect(summary.added).toBe(2); 200 - 201 - const found = findSessions(dbPath, "deploy", 5); 202 - expect(found.results[0]?.sessionUuid).toBe("dddddddd-dddd-4ddd-8ddd-dddddddddddd"); 203 - }); 204 - 205 - test("sync stores derived session summary and find returns it", async () => { 206 - const base = mkdtempSync(join(tmpdir(), "cxs-summary-")); 207 - tempDirs.push(base); 208 - const sessionsRoot = join(base, "sessions", "2026", "04", "21"); 209 - mkdirSync(sessionsRoot, { recursive: true }); 210 - 211 - writeFileSync( 212 - join(sessionsRoot, "rollout-2026-04-21T12-00-00-eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee.jsonl"), 213 - [ 214 - line("session_meta", { id: "eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee", cwd: "/tmp/deploy-summary" }), 215 - line("turn_context", { model: "gpt-5.4" }), 216 - line("event_msg", { type: "user_message", message: "排查 fly deploy 失败" }), 217 - line("event_msg", { type: "agent_message", message: "先看 health check 和 readback" }), 218 - line("event_msg", { type: "user_message", message: "health check 还是 500" }), 219 - line("event_msg", { type: "agent_message", message: "继续核对 secrets readback" }), 220 - ].join("\n"), 221 - ); 222 - 223 - const dbPath = join(base, "index.sqlite"); 224 - const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 225 - expect(summary.added).toBe(1); 226 - 227 - const db = openReadDb(dbPath); 228 - const row = db 229 - .prepare<[string], { summaryText: string }>("SELECT summary_text AS summaryText FROM sessions WHERE session_uuid = ? LIMIT 1") 230 - .get("eeeeeeee-eeee-4eee-8eee-eeeeeeeeeeee") as { summaryText: string } | null; 231 - db.close(); 232 - 233 - expect(row?.summaryText).toContain("排查 fly deploy 失败"); 234 - expect(row?.summaryText).toContain("先看 health check 和 readback"); 235 - expect(row?.summaryText).toContain("health check 还是 500"); 236 - 237 - const found = findSessions(dbPath, "deploy", 5); 238 - expect(found.results[0]?.summaryText).toContain("排查 fly deploy 失败"); 239 - }); 240 - 241 - test("find can recall session title even when no message contains the query", () => { 242 - const base = mkdtempSync(join(tmpdir(), "cxs-session-title-")); 243 - tempDirs.push(base); 244 - const dbPath = join(base, "index.sqlite"); 245 - const db = openWriteDb(dbPath); 246 - replaceSession( 247 - db, 248 - { 249 - sessionUuid: "abababab-abab-4aba-8aba-abababababab", 250 - filePath: join(base, "rollout.jsonl"), 251 - title: "设置 ChatGPT 订阅取消提醒", 252 - summaryText: "user: billing reminder | assistant: schedule a local notification", 253 - compactText: "", 254 - reasoningSummaryText: "", 255 - cwd: "/tmp/title-only", 256 - model: "gpt-5.4", 257 - startedAt: "2026-04-24T01:00:00.000Z", 258 - endedAt: "2026-04-24T01:01:00.000Z", 259 - messages: [ 260 - { 261 - role: "user", 262 - contentText: "billing reminder", 263 - timestamp: "2026-04-24T01:00:00.000Z", 264 - seq: 0, 265 - sourceKind: "event_msg", 266 - }, 267 - { 268 - role: "assistant", 269 - contentText: "schedule a local notification", 270 - timestamp: "2026-04-24T01:01:00.000Z", 271 - seq: 1, 272 - sourceKind: "event_msg", 273 - }, 274 - ], 275 - }, 276 - 1, 277 - 1, 278 - INDEX_VERSION, 279 - "", 280 - ); 281 - db.close(); 282 - 283 - const found = findSessions(dbPath, "订阅取消提醒", 5); 284 - 285 - expect(found.results).toHaveLength(1); 286 - expect(found.results[0]?.sessionUuid).toBe("abababab-abab-4aba-8aba-abababababab"); 287 - expect(found.results[0]?.matchSource).toBe("session"); 288 - expect(found.results[0]?.matchSeq).toBeNull(); 289 - expect(found.results[0]?.snippet).toContain("订阅取消提醒"); 290 - }); 291 - 292 - test("session-level fields have explicit ranking weights", () => { 293 - const base = mkdtempSync(join(tmpdir(), "cxs-session-field-weights-")); 294 - tempDirs.push(base); 295 - const dbPath = join(base, "index.sqlite"); 296 - const db = openWriteDb(dbPath); 297 - const common = { 298 - filePath: join(base, "rollout.jsonl"), 299 - title: "neutral session", 300 - summaryText: "", 301 - compactText: "", 302 - reasoningSummaryText: "", 303 - cwd: "/tmp/field-weights", 304 - model: "gpt-5.4", 305 - startedAt: "2026-04-24T01:00:00.000Z", 306 - endedAt: "2026-04-24T01:00:00.000Z", 307 - messages: [ 308 - { 309 - role: "user" as const, 310 - contentText: "ordinary visible message", 311 - timestamp: "2026-04-24T01:00:00.000Z", 312 - seq: 0, 313 - sourceKind: "event_msg" as const, 314 - }, 315 - ], 316 - }; 317 - 318 - replaceSession(db, { 319 - ...common, 320 - sessionUuid: "10101010-1010-4010-8010-101010101010", 321 - filePath: join(base, "title.jsonl"), 322 - title: "handoffneedle title", 323 - }, 1, 1, INDEX_VERSION, ""); 324 - replaceSession(db, { 325 - ...common, 326 - sessionUuid: "20202020-2020-4020-8020-202020202020", 327 - filePath: join(base, "compact.jsonl"), 328 - compactText: "handoffneedle compact handoff", 329 - }, 1, 1, INDEX_VERSION, ""); 330 - replaceSession(db, { 331 - ...common, 332 - sessionUuid: "30303030-3030-4030-8030-303030303030", 333 - filePath: join(base, "summary.jsonl"), 334 - summaryText: "handoffneedle derived summary", 335 - }, 1, 1, INDEX_VERSION, ""); 336 - replaceSession(db, { 337 - ...common, 338 - sessionUuid: "40404040-4040-4040-8040-404040404040", 339 - filePath: join(base, "reasoning.jsonl"), 340 - reasoningSummaryText: "handoffneedle reasoning summary", 341 - }, 1, 1, INDEX_VERSION, ""); 342 - db.close(); 343 - 344 - const found = findSessions(dbPath, "handoffneedle", 10); 345 - 346 - expect(found.results.map((result) => result.sessionUuid)).toEqual([ 347 - "10101010-1010-4010-8010-101010101010", 348 - "20202020-2020-4020-8020-202020202020", 349 - "30303030-3030-4030-8030-303030303030", 350 - "40404040-4040-4040-8040-404040404040", 351 - ]); 352 - }); 353 - 354 - test("sync indexes compacted handoff text for session-level recall", async () => { 355 - const base = mkdtempSync(join(tmpdir(), "cxs-compact-recall-")); 356 - tempDirs.push(base); 357 - const sessionsRoot = join(base, "sessions", "2026", "04", "24"); 358 - mkdirSync(sessionsRoot, { recursive: true }); 359 - 360 - writeFileSync( 361 - join(sessionsRoot, "rollout-2026-04-24T09-00-00-90909090-9090-4090-8090-909090909090.jsonl"), 362 - [ 363 - line("session_meta", { id: "90909090-9090-4090-8090-909090909090", cwd: "/tmp/compact-recall" }), 364 - line("turn_context", { model: "gpt-5.4" }), 365 - line("event_msg", { type: "user_message", message: "继续前一个任务" }), 366 - line("compacted", { message: "handoff says durable output queue needs final verification" }), 367 - line("event_msg", { type: "context_compacted" }), 368 - line("event_msg", { type: "agent_message", message: "先读取测试文件" }), 369 - ].join("\n"), 370 - ); 371 - 372 - const dbPath = join(base, "index.sqlite"); 373 - const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 374 - expect(summary.added).toBe(1); 375 - 376 - const found = findSessions(dbPath, "durable output queue", 5); 377 - 378 - expect(found.results).toHaveLength(1); 379 - expect(found.results[0]?.sessionUuid).toBe("90909090-9090-4090-8090-909090909090"); 380 - expect(found.results[0]?.matchSource).toBe("session"); 381 - expect(found.results[0]?.snippet).toContain("durable output queue"); 382 - }); 383 - 384 - test("session-level snippet prefers the window with denser query term coverage", () => { 385 - const base = mkdtempSync(join(tmpdir(), "cxs-session-snippet-")); 386 - tempDirs.push(base); 387 - const dbPath = join(base, "index.sqlite"); 388 - const db = openWriteDb(dbPath); 389 - replaceSession( 390 - db, 391 - { 392 - sessionUuid: "50505050-5050-4050-8050-505050505050", 393 - filePath: join(base, "snippet.jsonl"), 394 - title: "neutral deploy title", 395 - summaryText: "", 396 - compactText: [ 397 - "部署 happened early in the handoff.", 398 - "Later the important evidence says the health check failed after rollout.", 399 - ].join(" "), 400 - reasoningSummaryText: "", 401 - cwd: "/tmp/snippet", 402 - model: "gpt-5.4", 403 - startedAt: "2026-04-24T01:00:00.000Z", 404 - endedAt: "2026-04-24T01:00:00.000Z", 405 - messages: [ 406 - { 407 - role: "user", 408 - contentText: "ordinary visible message", 409 - timestamp: "2026-04-24T01:00:00.000Z", 410 - seq: 0, 411 - sourceKind: "event_msg", 412 - }, 413 - ], 414 - }, 415 - 1, 416 - 1, 417 - INDEX_VERSION, 418 - "", 419 - ); 420 - db.close(); 421 - 422 - const found = findSessions(dbPath, "部署 health check", 5); 423 - 424 - expect(found.results[0]?.snippet).toContain("health"); 425 - expect(found.results[0]?.snippet).toContain("check"); 426 - }); 427 - 428 - test("mixed session/message hit prefers message displayRow but keeps session ranking signal", () => { 429 - const base = mkdtempSync(join(tmpdir(), "cxs-mixed-match-")); 430 - tempDirs.push(base); 431 - const dbPath = join(base, "index.sqlite"); 432 - const db = openWriteDb(dbPath); 433 - 434 - // Mixed session: title carries the query (strong session hit) AND one 435 - // message body also carries it (weaker message hit). The display row 436 - // must come from the message hit so read-range can re-anchor on a real 437 - // seq, but the session-level signal still has to outrank a peer that 438 - // only has the message hit. 439 - replaceSession(db, { 440 - sessionUuid: "60606060-6060-4606-8606-606060606060", 441 - filePath: join(base, "mixed.jsonl"), 442 - title: "payloadbeacon retry handoff", 443 - summaryText: "", 444 - compactText: "", 445 - reasoningSummaryText: "", 446 - cwd: "/tmp/mixed", 447 - model: "gpt-5.4", 448 - startedAt: "2026-04-24T01:00:00.000Z", 449 - endedAt: "2026-04-24T01:00:00.000Z", 450 - messages: [ 451 - { 452 - role: "user", 453 - contentText: "noticed payloadbeacon stalled in production", 454 - timestamp: "2026-04-24T01:00:00.000Z", 455 - seq: 0, 456 - sourceKind: "event_msg", 457 - }, 458 - { 459 - role: "assistant", 460 - contentText: "checking retry queue depth and surface", 461 - timestamp: "2026-04-24T01:00:30.000Z", 462 - seq: 1, 463 - sourceKind: "event_msg", 464 - }, 465 - ], 466 - }, 1, 1, INDEX_VERSION, ""); 467 - 468 - // Message-only control: query appears only in a message body, neither 469 - // title nor any session-level field carries it. 470 - replaceSession(db, { 471 - sessionUuid: "70707070-7070-4707-8707-707070707070", 472 - filePath: join(base, "message-only.jsonl"), 473 - title: "neutral retry surface review", 474 - summaryText: "", 475 - compactText: "", 476 - reasoningSummaryText: "", 477 - cwd: "/tmp/message-only", 478 - model: "gpt-5.4", 479 - startedAt: "2026-04-24T01:00:00.000Z", 480 - endedAt: "2026-04-24T01:00:00.000Z", 481 - messages: [ 482 - { 483 - role: "user", 484 - contentText: "saw payloadbeacon mentioned once in passing", 485 - timestamp: "2026-04-24T01:00:00.000Z", 486 - seq: 0, 487 - sourceKind: "event_msg", 488 - }, 489 - ], 490 - }, 1, 1, INDEX_VERSION, ""); 491 - 492 - db.close(); 493 - 494 - const found = findSessions(dbPath, "payloadbeacon", 5); 495 - 496 - const mixed = found.results.find( 497 - (result) => result.sessionUuid === "60606060-6060-4606-8606-606060606060", 498 - ); 499 - const messageOnly = found.results.find( 500 - (result) => result.sessionUuid === "70707070-7070-4707-8707-707070707070", 501 - ); 502 - 503 - expect(mixed).toBeDefined(); 504 - expect(messageOnly).toBeDefined(); 505 - 506 - // Display row must come from the message hit so read-range can anchor 507 - // on a real seq. 508 - expect(mixed?.matchSource).toBe("message"); 509 - expect(typeof mixed?.matchSeq).toBe("number"); 510 - 511 - // Session-level signal still wins overall ranking and score. 512 - expect(found.results[0]?.sessionUuid).toBe("60606060-6060-4606-8606-606060606060"); 513 - expect(mixed!.score).toBeGreaterThan(messageOnly!.score); 514 - }); 515 - 516 - test("session-only hit reports matchSource session and null matchSeq", () => { 517 - const base = mkdtempSync(join(tmpdir(), "cxs-session-only-match-")); 518 - tempDirs.push(base); 519 - const dbPath = join(base, "index.sqlite"); 520 - const db = openWriteDb(dbPath); 521 - 522 - replaceSession(db, { 523 - sessionUuid: "80808080-8080-4808-8808-808080808080", 524 - filePath: join(base, "session-only.jsonl"), 525 - title: "payloadbeacon postmortem outline", 526 - summaryText: "", 527 - compactText: "", 528 - reasoningSummaryText: "", 529 - cwd: "/tmp/session-only", 530 - model: "gpt-5.4", 531 - startedAt: "2026-04-24T01:00:00.000Z", 532 - endedAt: "2026-04-24T01:00:00.000Z", 533 - messages: [ 534 - { 535 - role: "user", 536 - contentText: "everything looked fine on the surface", 537 - timestamp: "2026-04-24T01:00:00.000Z", 538 - seq: 0, 539 - sourceKind: "event_msg", 540 - }, 541 - { 542 - role: "assistant", 543 - contentText: "agreed, no anomalies in the queue depth", 544 - timestamp: "2026-04-24T01:00:30.000Z", 545 - seq: 1, 546 - sourceKind: "event_msg", 547 - }, 548 - ], 549 - }, 1, 1, INDEX_VERSION, ""); 550 - 551 - db.close(); 552 - 553 - const found = findSessions(dbPath, "payloadbeacon", 5); 554 - 555 - expect(found.results).toHaveLength(1); 556 - expect(found.results[0]?.sessionUuid).toBe("80808080-8080-4808-8808-808080808080"); 557 - expect(found.results[0]?.matchSource).toBe("session"); 558 - expect(found.results[0]?.matchSeq).toBeNull(); 559 - }); 560 - 561 - test("message-only hit reports matchSource message with a numeric matchSeq", () => { 562 - const base = mkdtempSync(join(tmpdir(), "cxs-message-only-match-")); 563 - tempDirs.push(base); 564 - const dbPath = join(base, "index.sqlite"); 565 - const db = openWriteDb(dbPath); 566 - 567 - replaceSession(db, { 568 - sessionUuid: "90909090-9090-4909-8909-909090909090", 569 - filePath: join(base, "message-only-baseline.jsonl"), 570 - title: "neutral retry surface review", 571 - summaryText: "", 572 - compactText: "", 573 - reasoningSummaryText: "", 574 - cwd: "/tmp/message-only-baseline", 575 - model: "gpt-5.4", 576 - startedAt: "2026-04-24T01:00:00.000Z", 577 - endedAt: "2026-04-24T01:00:00.000Z", 578 - messages: [ 579 - { 580 - role: "assistant", 581 - contentText: "kicked off neutral diagnostics", 582 - timestamp: "2026-04-24T01:00:00.000Z", 583 - seq: 0, 584 - sourceKind: "event_msg", 585 - }, 586 - { 587 - role: "user", 588 - contentText: "found payloadbeacon in the trace", 589 - timestamp: "2026-04-24T01:00:30.000Z", 590 - seq: 1, 591 - sourceKind: "event_msg", 592 - }, 593 - ], 594 - }, 1, 1, INDEX_VERSION, ""); 595 - 596 - db.close(); 597 - 598 - const found = findSessions(dbPath, "payloadbeacon", 5); 599 - 600 - expect(found.results).toHaveLength(1); 601 - expect(found.results[0]?.matchSource).toBe("message"); 602 - expect(found.results[0]?.matchSeq).toBe(1); 603 - }); 604 - 605 - test("find keeps distinct sessions even when titles collapse to the same normalized key", async () => { 606 - const base = mkdtempSync(join(tmpdir(), "cxs-dedup-")); 607 - tempDirs.push(base); 608 - const sessionsRoot = join(base, "sessions", "2026", "04", "22"); 609 - mkdirSync(sessionsRoot, { recursive: true }); 610 - 611 - writeFileSync( 612 - join(sessionsRoot, "rollout-2026-04-22T08-00-00-12121212-1212-4212-8212-121212121212.jsonl"), 613 - [ 614 - line("session_meta", { id: "12121212-1212-4212-8212-121212121212", cwd: "/tmp/alpha" }), 615 - line("turn_context", { model: "gpt-5.4" }), 616 - line("event_msg", { type: "user_message", message: "排查 deploy 500" }), 617 - line("event_msg", { type: "agent_message", message: "alpha 先看 first deploy rollback" }), 618 - ].join("\n"), 619 - ); 620 - 621 - writeFileSync( 622 - join(sessionsRoot, "rollout-2026-04-22T09-00-00-34343434-3434-4343-8343-343434343434.jsonl"), 623 - [ 624 - line("session_meta", { id: "34343434-3434-4343-8343-343434343434", cwd: "/tmp/beta" }), 625 - line("turn_context", { model: "gpt-5.4" }), 626 - line("event_msg", { type: "user_message", message: "排查 deploy 500" }), 627 - line("event_msg", { type: "agent_message", message: "beta 再看 second deploy readback" }), 628 - ].join("\n"), 629 - ); 630 - 631 - const dbPath = join(base, "index.sqlite"); 632 - const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 633 - expect(summary.added).toBe(2); 634 - 635 - const found = findSessions(dbPath, "deploy 500", 5); 636 - expect(found.results).toHaveLength(2); 637 - expect(found.results.map((result) => result.sessionUuid).sort()).toEqual([ 638 - "12121212-1212-4212-8212-121212121212", 639 - "34343434-3434-4343-8343-343434343434", 640 - ]); 641 - }); 642 - 643 - test("parallel read commands wait through transient locks without surfacing SQLITE_BUSY", async () => { 644 - const base = mkdtempSync(join(tmpdir(), "cxs-parallel-")); 645 - tempDirs.push(base); 646 - const sessionsRoot = join(base, "sessions", "2026", "04", "22"); 647 - mkdirSync(sessionsRoot, { recursive: true }); 648 - 649 - writeFileSync( 650 - join(sessionsRoot, "rollout-2026-04-22T10-00-00-56565656-5656-4565-8565-565656565656.jsonl"), 651 - [ 652 - line("session_meta", { id: "56565656-5656-4565-8565-565656565656", cwd: "/tmp/parallel" }), 653 - line("turn_context", { model: "gpt-5.4" }), 654 - line("event_msg", { type: "user_message", message: "reverse-i-search 历史怎么找" }), 655 - line("event_msg", { type: "agent_message", message: "先用 cxs find reverse-i-search" }), 656 - line("event_msg", { type: "user_message", message: "顺便查 ffmpeg 的那次会话" }), 657 - line("event_msg", { type: "agent_message", message: "可以并行 find ffmpeg 再看 stats" }), 658 - ].join("\n"), 659 - ); 660 - 661 - const dbPath = join(base, "index.sqlite"); 662 - const summary = await syncSessions({ dbPath, rootDir: join(base, "sessions") }); 663 - expect(summary.added).toBe(1); 664 - 665 - const queryModuleUrl = pathToFileURL(join(import.meta.dirname, "query.ts")).href; 666 - const blocker = await holdExclusiveLock(dbPath, 400); 667 - const tasks = [ 668 - ...Array.from({ length: 6 }, () => runReadChild(queryModuleUrl, dbPath, "find", "reverse-i-search")), 669 - ...Array.from({ length: 6 }, () => runReadChild(queryModuleUrl, dbPath, "stats")), 670 - ]; 671 - const results = await Promise.all(tasks); 672 - await blocker.done; 673 - const failures = results.filter((result) => result.code !== 0); 674 - 675 - expect(failures).toEqual([]); 676 - }); 677 - }); 678 - 679 - describe("query profile", () => { 680 - test("classifies broad concept query separately from exact troubleshooting query", () => { 681 - expect(classifyQueryProfile("deploy").kind).toBe("broad"); 682 - expect(classifyQueryProfile("health check 500").kind).toBe("exact"); 683 - expect(classifyQueryProfile("src/background.ts remoteHosts").kind).toBe("exact"); 684 - }); 685 - }); 686 - 687 - function line(type: string, payload: Record<string, unknown>): string { 688 - return JSON.stringify({ 689 - timestamp: new Date("2026-04-21T00:00:00.000Z").toISOString(), 690 - type, 691 - payload, 692 - }); 693 - } 694 - 695 - function runReadChild( 696 - queryModuleUrl: string, 697 - dbPath: string, 698 - command: "find" | "stats", 699 - query?: string, 700 - ): Promise<{ code: number | null; stderr: string }> { 701 - return new Promise((resolve, reject) => { 702 - const script = ` 703 - const [moduleUrl, dbPath, command, query = ""] = process.argv.slice(1); 704 - const queryModule = await import(moduleUrl); 705 - if (command === "stats") { 706 - queryModule.collectStats(dbPath); 707 - } else { 708 - queryModule.findSessions(dbPath, query, 5); 709 - } 710 - `; 711 - const child = spawn( 712 - process.execPath, 713 - ["--import", "tsx", "--eval", script, queryModuleUrl, dbPath, command, query ?? ""], 714 - { cwd: import.meta.dirname, stdio: ["ignore", "ignore", "pipe"] }, 715 - ); 716 - 717 - let stderr = ""; 718 - child.stderr.setEncoding("utf8"); 719 - child.stderr.on("data", (chunk) => { 720 - stderr += chunk; 721 - }); 722 - child.on("error", reject); 723 - child.on("close", (code) => { 724 - resolve({ code, stderr }); 725 - }); 726 - }); 727 - } 728 - 729 - function holdExclusiveLock( 730 - dbPath: string, 731 - holdMs: number, 732 - ): Promise<{ done: Promise<number | null> }> { 733 - return new Promise((resolve, reject) => { 734 - const script = ` 735 - import Database from "better-sqlite3"; 736 - const [dbPath, holdMs] = process.argv.slice(1); 737 - const db = new Database(dbPath); 738 - db.pragma("busy_timeout = 5000"); 739 - db.pragma("locking_mode = EXCLUSIVE"); 740 - db.exec("BEGIN EXCLUSIVE"); 741 - console.log("locked"); 742 - setTimeout(() => { 743 - db.exec("COMMIT"); 744 - db.close(); 745 - }, Number(holdMs)); 746 - `; 747 - const child = spawn( 748 - process.execPath, 749 - ["--eval", script, dbPath, String(holdMs)], 750 - { cwd: import.meta.dirname, stdio: ["ignore", "pipe", "pipe"] }, 751 - ); 752 - 753 - let settled = false; 754 - let stderr = ""; 755 - child.stdout.setEncoding("utf8"); 756 - child.stderr.setEncoding("utf8"); 757 - child.stderr.on("data", (chunk) => { 758 - stderr += chunk; 759 - }); 760 - child.on("error", reject); 761 - child.on("close", (code) => { 762 - if (!settled && code !== 0) { 763 - settled = true; 764 - reject(new Error(stderr || `lock holder exited with code ${code}`)); 765 - } 766 - }); 767 - child.stdout.on("data", (chunk) => { 768 - if (settled || !chunk.includes("locked")) return; 769 - settled = true; 770 - resolve({ 771 - done: new Promise((doneResolve, doneReject) => { 772 - child.on("error", doneReject); 773 - child.on("close", (code) => { 774 - if (code === 0) { 775 - doneResolve(code); 776 - return; 777 - } 778 - doneReject(new Error(stderr || `lock holder exited with code ${code}`)); 779 - }); 780 - }), 781 - }); 782 - }); 783 - }); 784 - }
+5 -534
src/query.ts
··· 1 - import Database from "better-sqlite3"; 2 - import { statSync } from "node:fs"; 3 - import { 4 - coverageEntriesForSession, 5 - coverageStatusForSelector, 6 - getMessagesForPage, 7 - getMessagesForRange, 8 - getSessionRecord, 9 - getStatsCounts, 10 - getTopCwds, 11 - listCoverageRecords, 12 - listSessions, 13 - selectorWhereSql, 14 - withReadDb, 15 - } from "./db"; 16 - import { INDEX_VERSION } from "./env"; 17 - import { classifyQueryProfile, rerankHits } from "./ranking"; 18 - import type { RawHitRow } from "./ranking"; 19 - import { hasCjk, isCjkToken, queryTerms } from "./tokenize"; 20 - import type { 21 - CoverageStatus, 22 - FindResult, 23 - Selector, 24 - SessionListEntry, 25 - SessionListQuery, 26 - SessionRecord, 27 - StatsSummary, 28 - } from "./types"; 29 - 30 1 export { classifyQueryProfile } from "./ranking"; 31 - type Db = Database.Database; 32 - type SqlParams = unknown[]; 33 - 34 - export function findSessions( 35 - dbPath: string, 36 - query: string, 37 - limit: number, 38 - selector: Selector | null = null, 39 - ): { query: string; results: FindResult[]; coverage: CoverageStatus } { 40 - return withReadDb(dbPath, (db) => { 41 - const recallLimit = Math.max(limit * 12, 50); 42 - const rawRows = [ 43 - ...searchMessageHits(db, query, recallLimit, undefined, selector), 44 - ...searchSessionHits(db, query, recallLimit, selector), 45 - ]; 46 - const results = rerankHits(rawRows, query, limit); 47 - return { query, results, coverage: buildCoverageStatus(db, selector) }; 48 - }); 49 - } 50 - 51 - export function getMessageRange( 52 - dbPath: string, 53 - sessionUuid: string, 54 - options: { seq?: number; query?: string; before: number; after: number }, 55 - ): { 56 - session: SessionRecord; 57 - anchorSeq: number; 58 - rangeStartSeq: number; 59 - rangeEndSeq: number; 60 - messages: ReturnType<typeof getMessagesForRange>; 61 - coverage: { entries: ReturnType<typeof coverageEntriesForSession> }; 62 - } { 63 - return withReadDb(dbPath, (db) => { 64 - const anchorSeq = resolveAnchorSeq(db, sessionUuid, options.seq, options.query); 65 - const session = getSessionRecord(db, sessionUuid); 66 - if (!session) throw new Error(`session not found: ${sessionUuid}`); 67 - 68 - const rangeStartSeq = Math.max(0, anchorSeq - options.before); 69 - const rangeEndSeq = anchorSeq + options.after; 70 - const messages = getMessagesForRange(db, sessionUuid, rangeStartSeq, rangeEndSeq); 71 - return { 72 - session, 73 - anchorSeq, 74 - rangeStartSeq, 75 - rangeEndSeq, 76 - messages, 77 - coverage: { entries: coverageEntriesForSession(db, session) }, 78 - }; 79 - }); 80 - } 81 - 82 - export function getMessagePage( 83 - dbPath: string, 84 - sessionUuid: string, 85 - offset: number, 86 - limit: number, 87 - ): { 88 - session: SessionRecord; 89 - offset: number; 90 - limit: number; 91 - totalCount: number; 92 - hasMore: boolean; 93 - messages: ReturnType<typeof getMessagesForPage>; 94 - coverage: { entries: ReturnType<typeof coverageEntriesForSession> }; 95 - } { 96 - return withReadDb(dbPath, (db) => { 97 - const session = getSessionRecord(db, sessionUuid); 98 - if (!session) throw new Error(`session not found: ${sessionUuid}`); 99 - const messages = getMessagesForPage(db, sessionUuid, offset, limit); 100 - const totalCount = session.messageCount; 101 - const hasMore = offset + messages.length < totalCount; 102 - return { 103 - session, 104 - offset, 105 - limit, 106 - totalCount, 107 - hasMore, 108 - messages, 109 - coverage: { entries: coverageEntriesForSession(db, session) }, 110 - }; 111 - }); 112 - } 113 - 114 - export function listSessionSummaries( 115 - dbPath: string, 116 - query: SessionListQuery, 117 - ): { query: SessionListQuery; results: SessionListEntry[]; coverage: CoverageStatus } { 118 - return withReadDb(dbPath, (db) => { 119 - const results = listSessions(db, query); 120 - return { query, results, coverage: buildCoverageStatus(db, query.selector ?? null) }; 121 - }); 122 - } 123 - 124 - export function collectStats(dbPath: string): StatsSummary { 125 - const { counts, topCwds, coverage } = withReadDb(dbPath, (db) => ({ 126 - counts: getStatsCounts(db), 127 - topCwds: getTopCwds(db, 10), 128 - coverage: listCoverageRecords(db), 129 - })); 130 - 131 - let dbSizeBytes = 0; 132 - try { 133 - dbSizeBytes = statSync(dbPath).size; 134 - } catch { 135 - dbSizeBytes = 0; 136 - } 137 - 138 - return { 139 - sessionCount: counts.sessionCount, 140 - messageCount: counts.messageCount, 141 - earliestStartedAt: counts.earliestStartedAt, 142 - latestEndedAt: counts.latestEndedAt, 143 - topCwds, 144 - indexVersion: INDEX_VERSION, 145 - dbPath, 146 - dbSizeBytes, 147 - lastSyncAt: counts.lastSyncAt, 148 - coverage, 149 - }; 150 - } 151 - 152 - function resolveAnchorSeq( 153 - db: Db, 154 - sessionUuid: string, 155 - seq?: number, 156 - query?: string, 157 - ): number { 158 - if (typeof seq === "number") { 159 - return seq; 160 - } 161 - 162 - if (query) { 163 - const best = searchTopHitInSession(db, sessionUuid, query); 164 - if (best && typeof best.matchSeq === "number") return best.matchSeq; 165 - } 166 - 167 - throw new Error("read-range requires explicit session_uuid plus either --seq or --query"); 168 - } 169 - 170 - function searchTopHitInSession(db: Db, sessionUuid: string, query: string): FindResult | null { 171 - const rows = searchMessageHits(db, query, 20, sessionUuid); 172 - const result = rerankHits(rows, query, 1)[0]; 173 - return result ?? null; 174 - } 175 - 176 - function searchMessageHits( 177 - db: Db, 178 - query: string, 179 - limit: number, 180 - sessionUuid?: string, 181 - selector: Selector | null = null, 182 - ): RawHitRow[] { 183 - const normalized = query.trim(); 184 - if (!normalized) return []; 185 - 186 - const terms = queryTerms(normalized); 187 - // Queries that degenerate to zero tokens (e.g. a single kanji dropped as 188 - // stop-word-like noise, or whitespace only) cannot hit the FTS index. Fall 189 - // back to a bounded LIKE scan so single-character CJK probes still work 190 - // even though they are discouraged. 191 - if (terms.length === 0) { 192 - if (hasCjk(normalized)) return searchByLike(db, normalized, limit, sessionUuid, selector); 193 - return []; 194 - } 195 - 196 - return searchByFts(db, terms, limit, sessionUuid, selector); 197 - } 198 - 199 - function searchSessionHits(db: Db, query: string, limit: number, selector: Selector | null): RawHitRow[] { 200 - const normalized = query.trim(); 201 - if (!normalized || !tableExists(db, "sessions_fts")) return []; 202 - 203 - const terms = queryTerms(normalized); 204 - if (terms.length === 0) return []; 205 - 206 - return searchSessionsByFts(db, normalized, terms, limit, selector); 207 - } 208 - 209 - function searchByFts( 210 - db: Db, 211 - terms: string[], 212 - limit: number, 213 - sessionUuid?: string, 214 - selector: Selector | null = null, 215 - ): RawHitRow[] { 216 - const matchExpr = buildFtsMatch(terms); 217 - const conditions = [`messages_fts MATCH ?`]; 218 - const params: SqlParams = [matchExpr]; 219 - 220 - if (selector) { 221 - const selectorWhere = selectorWhereSql(selector, "s"); 222 - conditions.push(...selectorWhere.conditions); 223 - params.push(...selectorWhere.params); 224 - } 225 - if (sessionUuid) { 226 - conditions.push("m.session_uuid = ?"); 227 - params.push(sessionUuid); 228 - } 229 - params.push(limit); 230 - 231 - return db 232 - .prepare<typeof params, RawHitRow>(` 233 - SELECT 234 - s.session_uuid AS sessionUuid, 235 - s.title AS title, 236 - s.summary_text AS summaryText, 237 - s.cwd AS cwd, 238 - s.started_at AS startedAt, 239 - s.ended_at AS endedAt, 240 - 'message' AS matchSource, 241 - m.seq AS matchSeq, 242 - m.role AS matchRole, 243 - m.timestamp AS matchTimestamp, 244 - m.content_text AS contentText, 245 - snippet(messages_fts, 0, '<mark>', '</mark>', '…', 16) AS snippet, 246 - bm25(messages_fts) AS score 247 - FROM messages_fts 248 - JOIN messages m ON m.id = messages_fts.rowid 249 - JOIN sessions s ON s.id = m.session_id 250 - WHERE ${conditions.join(" AND ")} 251 - ORDER BY score 252 - LIMIT ? 253 - `) 254 - .all(...params) as RawHitRow[]; 255 - } 256 - 257 - function searchSessionsByFts( 258 - db: Db, 259 - query: string, 260 - terms: string[], 261 - limit: number, 262 - selector: Selector | null, 263 - ): RawHitRow[] { 264 - const matchExpr = buildFtsMatch(terms); 265 - const conditions = ["sessions_fts MATCH ?"]; 266 - const params: SqlParams = [matchExpr]; 267 - if (selector) { 268 - const selectorWhere = selectorWhereSql(selector, "s"); 269 - conditions.push(...selectorWhere.conditions); 270 - params.push(...selectorWhere.params); 271 - } 272 - params.push(limit); 273 - const rows = db 274 - .prepare<typeof params, RawHitRow>(` 275 - SELECT 276 - s.session_uuid AS sessionUuid, 277 - s.title AS title, 278 - s.summary_text AS summaryText, 279 - s.cwd AS cwd, 280 - s.started_at AS startedAt, 281 - s.ended_at AS endedAt, 282 - 'session' AS matchSource, 283 - NULL AS matchSeq, 284 - 'session' AS matchRole, 285 - NULL AS matchTimestamp, 286 - s.title || char(10) || s.summary_text || char(10) || s.compact_text || char(10) || s.reasoning_summary_text AS contentText, 287 - '' AS snippet, 288 - bm25(sessions_fts, 8.0, 3.0, 4.0, 1.2) AS score 289 - FROM sessions_fts 290 - JOIN sessions s ON s.id = sessions_fts.rowid 291 - WHERE ${conditions.join(" AND ")} 292 - ORDER BY score 293 - LIMIT ? 294 - `) 295 - .all(...params) as RawHitRow[]; 296 - 297 - return rows.map((row) => ({ 298 - ...row, 299 - snippet: makeRawSnippet(row.contentText, query, terms), 300 - })); 301 - } 302 - 303 - function searchByLike( 304 - db: Db, 305 - query: string, 306 - limit: number, 307 - sessionUuid?: string, 308 - selector: Selector | null = null, 309 - ): RawHitRow[] { 310 - const conditions = ["lower(m.content_text) LIKE ? ESCAPE '\\'"]; 311 - const params: SqlParams = [`%${escapeLike(query.toLowerCase())}%`]; 312 - if (selector) { 313 - const selectorWhere = selectorWhereSql(selector, "s"); 314 - conditions.push(...selectorWhere.conditions); 315 - params.push(...selectorWhere.params); 316 - } 317 - if (sessionUuid) { 318 - conditions.push("m.session_uuid = ?"); 319 - params.push(sessionUuid); 320 - } 321 - params.push(limit); 322 - 323 - const rows = db 324 - .prepare<typeof params, RawHitRow & { contentText: string }>(` 325 - SELECT 326 - s.session_uuid AS sessionUuid, 327 - s.title AS title, 328 - s.summary_text AS summaryText, 329 - s.cwd AS cwd, 330 - s.started_at AS startedAt, 331 - s.ended_at AS endedAt, 332 - 'message' AS matchSource, 333 - m.seq AS matchSeq, 334 - m.role AS matchRole, 335 - m.timestamp AS matchTimestamp, 336 - m.content_text AS contentText 337 - FROM messages m 338 - JOIN sessions s ON s.id = m.session_id 339 - WHERE ${conditions.join(" AND ")} 340 - ORDER BY s.started_at DESC, m.seq ASC 341 - LIMIT ? 342 - `) 343 - .all(...params) as Array<RawHitRow & { contentText: string }>; 344 - 345 - return rows.map((row, index) => ({ 346 - ...row, 347 - snippet: makeLikeSnippet(row.contentText, query), 348 - // Negate the ordinal so LIKE rows share the "lower is better" polarity 349 - // with bm25() scores; downstream rerank sorts on row metrics, but any 350 - // code that touches this raw score won't see a sign mismatch. 351 - score: -(index + 1), 352 - })); 353 - } 354 - 355 - function tableExists(db: Db, tableName: string): boolean { 356 - const row = db 357 - .prepare<[string], unknown>("SELECT 1 FROM sqlite_master WHERE name = ? LIMIT 1") 358 - .get(tableName); 359 - return Boolean(row); 360 - } 361 - 362 - /** 363 - * Build an FTS5 MATCH expression from already-tokenized terms. Each term is 364 - * quoted and ANDed, giving us intersection semantics across CJK bigrams and 365 - * non-CJK words alike. 366 - */ 367 - function buildFtsMatch(terms: string[]): string { 368 - return terms.map(quoteFtsTerm).join(" AND "); 369 - } 370 - 371 - function quoteFtsTerm(term: string): string { 372 - // FTS5 treats unquoted * / ^ / NEAR / NOT / AND / OR as operators. Wrapping 373 - // each term in double quotes neutralizes all of them (including *), and we 374 - // escape internal quotes by doubling them. Bigrams stay bigrams. 375 - const escaped = term.replaceAll('"', '""'); 376 - return `"${escaped}"`; 377 - } 378 - 379 - // LIKE-path escape stays unchanged: only CJK single-character probes and 380 - // empty-token queries fall through to this branch now. 381 - function escapeLike(value: string): string { 382 - return value.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_"); 383 - } 384 - 385 - function buildCoverageStatus(db: Db, selector: Selector | null): CoverageStatus { 386 - const status = coverageStatusForSelector(db, selector); 387 - return { 388 - requested: selector, 389 - complete: status.complete, 390 - freshness: "not_checked", 391 - coveringSelectors: status.coveringSelectors, 392 - }; 393 - } 394 - 395 - function makeLikeSnippet(content: string, query: string): string { 396 - const lower = content.toLowerCase(); 397 - const target = query.toLowerCase(); 398 - const index = lower.indexOf(target); 399 - if (index < 0) return content.slice(0, 160); 400 - const start = Math.max(0, index - 40); 401 - const end = Math.min(content.length, index + target.length + 80); 402 - const prefix = start > 0 ? "…" : ""; 403 - const suffix = end < content.length ? "…" : ""; 404 - const snippet = content.slice(start, end); 405 - // Re-scan the snippet slice and wrap every occurrence so the returned 406 - // snippet agrees with FTS5's snippet() which highlights all matches. 407 - const highlighted = wrapAllOccurrences(snippet, target); 408 - return `${prefix}${highlighted}${suffix}`; 409 - } 410 - 411 - function makeRawSnippet(content: string, query: string, terms: string[]): string { 412 - const normalizedQuery = query.toLowerCase(); 413 - const lower = content.toLowerCase(); 414 - const phraseIndex = normalizedQuery ? lower.indexOf(normalizedQuery) : -1; 415 - if (phraseIndex >= 0) { 416 - return snippetAround(content, phraseIndex, query.length, [normalizedQuery]); 417 - } 418 - 419 - const termLowers = uniqueNonEmpty(terms.map((term) => term.toLowerCase())); 420 - const termHits = termLowers.flatMap((term) => collectTermHits(lower, term)); 421 - if (termHits.length === 0) return content.slice(0, 160); 422 - 423 - const bestWindow = termHits 424 - .map((hit) => { 425 - const start = Math.max(0, hit.index - 40); 426 - const end = Math.min(content.length, hit.index + hit.length + 80); 427 - return { 428 - start, 429 - end, 430 - anchor: hit.index, 431 - score: scoreSnippetWindow(lower.slice(start, end), termLowers), 432 - }; 433 - }) 434 - .sort((left, right) => { 435 - if (right.score !== left.score) return right.score - left.score; 436 - return left.anchor - right.anchor; 437 - })[0]; 438 - 439 - return snippetWindow(content, bestWindow.start, bestWindow.end, termLowers); 440 - } 441 - 442 - function snippetAround(content: string, index: number, length: number, needleLowers: string[]): string { 443 - const start = Math.max(0, index - 40); 444 - const end = Math.min(content.length, index + length + 80); 445 - return snippetWindow(content, start, end, needleLowers); 446 - } 447 - 448 - function snippetWindow(content: string, start: number, end: number, needleLowers: string[]): string { 449 - const prefix = start > 0 ? "…" : ""; 450 - const suffix = end < content.length ? "…" : ""; 451 - const snippet = content.slice(start, end); 452 - return `${prefix}${wrapAnyOccurrences(snippet, needleLowers)}${suffix}`; 453 - } 454 - 455 - function collectTermHits(lower: string, termLower: string): Array<{ index: number; length: number }> { 456 - const hits: Array<{ index: number; length: number }> = []; 457 - let cursor = 0; 458 - while (cursor < lower.length) { 459 - const index = lower.indexOf(termLower, cursor); 460 - if (index < 0) break; 461 - hits.push({ index, length: termLower.length }); 462 - cursor = index + termLower.length; 463 - } 464 - return hits; 465 - } 466 - 467 - function scoreSnippetWindow(lowerSnippet: string, termLowers: string[]): number { 468 - let distinctTerms = 0; 469 - let totalHits = 0; 470 - let matchedChars = 0; 471 - 472 - for (const term of termLowers) { 473 - const hits = collectTermHits(lowerSnippet, term).length; 474 - if (hits > 0) distinctTerms += 1; 475 - totalHits += hits; 476 - matchedChars += hits * term.length; 477 - } 478 - 479 - return distinctTerms * 1_000 + matchedChars * 10 + totalHits; 480 - } 481 - 482 - function uniqueNonEmpty(values: string[]): string[] { 483 - return [...new Set(values.filter(Boolean))]; 484 - } 485 - 486 - function wrapAnyOccurrences(haystack: string, needleLowers: string[]): string { 487 - const needles = uniqueNonEmpty(needleLowers).sort((left, right) => right.length - left.length); 488 - if (needles.length === 0) return haystack; 489 - 490 - const lower = haystack.toLowerCase(); 491 - const matches = needles 492 - .flatMap((needle) => collectTermHits(lower, needle)) 493 - .sort((left, right) => { 494 - if (left.index !== right.index) return left.index - right.index; 495 - return right.length - left.length; 496 - }); 497 - 498 - const out: string[] = []; 499 - let cursor = 0; 500 - for (const match of matches) { 501 - if (match.index < cursor) continue; 502 - out.push(haystack.slice(cursor, match.index)); 503 - out.push("<mark>"); 504 - out.push(haystack.slice(match.index, match.index + match.length)); 505 - out.push("</mark>"); 506 - cursor = match.index + match.length; 507 - } 508 - out.push(haystack.slice(cursor)); 509 - return out.join(""); 510 - } 511 - 512 - function wrapAllOccurrences(haystack: string, needleLower: string): string { 513 - if (!needleLower) return haystack; 514 - const out: string[] = []; 515 - let cursor = 0; 516 - const lower = haystack.toLowerCase(); 517 - while (cursor < haystack.length) { 518 - const hit = lower.indexOf(needleLower, cursor); 519 - if (hit < 0) { 520 - out.push(haystack.slice(cursor)); 521 - break; 522 - } 523 - out.push(haystack.slice(cursor, hit)); 524 - out.push("<mark>"); 525 - out.push(haystack.slice(hit, hit + needleLower.length)); 526 - out.push("</mark>"); 527 - cursor = hit + needleLower.length; 528 - } 529 - return out.join(""); 530 - } 531 - 532 - // Re-export for callers that still rely on the old helper name. 533 - export function isCjkTerm(token: string): boolean { 534 - return isCjkToken(token); 535 - } 2 + export { findSessions } from "./query/find"; 3 + export { getMessagePage, getMessageRange } from "./query/read"; 4 + export { listSessionSummaries } from "./query/list"; 5 + export { collectStats } from "./query/stats"; 6 + export { isCjkTerm } from "./query/cjk";
+6
src/query/cjk.ts
··· 1 + import { isCjkToken } from "../tokenize"; 2 + 3 + // Re-export for callers that still rely on the old helper name. 4 + export function isCjkTerm(token: string): boolean { 5 + return isCjkToken(token); 6 + }
+13
src/query/coverage.ts
··· 1 + import { coverageStatusForSelector } from "../db"; 2 + import type { CoverageStatus, Selector } from "../types"; 3 + import type { Db } from "../db"; 4 + 5 + export function buildCoverageStatus(db: Db, selector: Selector | null): CoverageStatus { 6 + const status = coverageStatusForSelector(db, selector); 7 + return { 8 + requested: selector, 9 + complete: status.complete, 10 + freshness: "not_checked", 11 + coveringSelectors: status.coveringSelectors, 12 + }; 13 + }
+22
src/query/find.ts
··· 1 + import { withReadDb } from "../db"; 2 + import { rerankHits } from "../ranking"; 3 + import type { FindResult, Selector } from "../types"; 4 + import { buildCoverageStatus } from "./coverage"; 5 + import { searchMessageHits, searchSessionHits } from "./search"; 6 + 7 + export function findSessions( 8 + dbPath: string, 9 + query: string, 10 + limit: number, 11 + selector: Selector | null = null, 12 + ): { query: string; results: FindResult[]; coverage: ReturnType<typeof buildCoverageStatus> } { 13 + return withReadDb(dbPath, (db) => { 14 + const recallLimit = Math.max(limit * 12, 50); 15 + const rawRows = [ 16 + ...searchMessageHits(db, query, recallLimit, undefined, selector), 17 + ...searchSessionHits(db, query, recallLimit, selector), 18 + ]; 19 + const results = rerankHits(rawRows, query, limit); 20 + return { query, results, coverage: buildCoverageStatus(db, selector) }; 21 + }); 22 + }
+13
src/query/list.ts
··· 1 + import { listSessions, withReadDb } from "../db"; 2 + import type { SessionListEntry, SessionListQuery } from "../types"; 3 + import { buildCoverageStatus } from "./coverage"; 4 + 5 + export function listSessionSummaries( 6 + dbPath: string, 7 + query: SessionListQuery, 8 + ): { query: SessionListQuery; results: SessionListEntry[]; coverage: ReturnType<typeof buildCoverageStatus> } { 9 + return withReadDb(dbPath, (db) => { 10 + const results = listSessions(db, query); 11 + return { query, results, coverage: buildCoverageStatus(db, query.selector ?? null) }; 12 + }); 13 + }
+92
src/query/read.ts
··· 1 + import { coverageEntriesForSession, getMessagesForPage, getMessagesForRange, getSessionRecord, withReadDb } from "../db"; 2 + import { rerankHits } from "../ranking"; 3 + import type { FindResult, SessionRecord } from "../types"; 4 + import type { Db } from "../db"; 5 + import { searchMessageHits } from "./search"; 6 + 7 + export function getMessageRange( 8 + dbPath: string, 9 + sessionUuid: string, 10 + options: { seq?: number; query?: string; before: number; after: number }, 11 + ): { 12 + session: SessionRecord; 13 + anchorSeq: number; 14 + rangeStartSeq: number; 15 + rangeEndSeq: number; 16 + messages: ReturnType<typeof getMessagesForRange>; 17 + coverage: { entries: ReturnType<typeof coverageEntriesForSession> }; 18 + } { 19 + return withReadDb(dbPath, (db) => { 20 + const anchorSeq = resolveAnchorSeq(db, sessionUuid, options.seq, options.query); 21 + const session = getSessionRecord(db, sessionUuid); 22 + if (!session) throw new Error(`session not found: ${sessionUuid}`); 23 + 24 + const rangeStartSeq = Math.max(0, anchorSeq - options.before); 25 + const rangeEndSeq = anchorSeq + options.after; 26 + const messages = getMessagesForRange(db, sessionUuid, rangeStartSeq, rangeEndSeq); 27 + return { 28 + session, 29 + anchorSeq, 30 + rangeStartSeq, 31 + rangeEndSeq, 32 + messages, 33 + coverage: { entries: coverageEntriesForSession(db, session) }, 34 + }; 35 + }); 36 + } 37 + 38 + export function getMessagePage( 39 + dbPath: string, 40 + sessionUuid: string, 41 + offset: number, 42 + limit: number, 43 + ): { 44 + session: SessionRecord; 45 + offset: number; 46 + limit: number; 47 + totalCount: number; 48 + hasMore: boolean; 49 + messages: ReturnType<typeof getMessagesForPage>; 50 + coverage: { entries: ReturnType<typeof coverageEntriesForSession> }; 51 + } { 52 + return withReadDb(dbPath, (db) => { 53 + const session = getSessionRecord(db, sessionUuid); 54 + if (!session) throw new Error(`session not found: ${sessionUuid}`); 55 + const messages = getMessagesForPage(db, sessionUuid, offset, limit); 56 + const totalCount = session.messageCount; 57 + const hasMore = offset + messages.length < totalCount; 58 + return { 59 + session, 60 + offset, 61 + limit, 62 + totalCount, 63 + hasMore, 64 + messages, 65 + coverage: { entries: coverageEntriesForSession(db, session) }, 66 + }; 67 + }); 68 + } 69 + 70 + function resolveAnchorSeq( 71 + db: Db, 72 + sessionUuid: string, 73 + seq?: number, 74 + query?: string, 75 + ): number { 76 + if (typeof seq === "number") { 77 + return seq; 78 + } 79 + 80 + if (query) { 81 + const best = searchTopHitInSession(db, sessionUuid, query); 82 + if (best && typeof best.matchSeq === "number") return best.matchSeq; 83 + } 84 + 85 + throw new Error("read-range requires explicit session_uuid plus either --seq or --query"); 86 + } 87 + 88 + function searchTopHitInSession(db: Db, sessionUuid: string, query: string): FindResult | null { 89 + const rows = searchMessageHits(db, query, 20, sessionUuid); 90 + const result = rerankHits(rows, query, 1)[0]; 91 + return result ?? null; 92 + }
+215
src/query/search.ts
··· 1 + import { selectorWhereSql } from "../db"; 2 + import type { RawHitRow } from "../ranking"; 3 + import { hasCjk, queryTerms } from "../tokenize"; 4 + import type { Selector } from "../types"; 5 + import type { Db, SqlParams } from "../db"; 6 + import { makeLikeSnippet, makeRawSnippet } from "./snippet"; 7 + 8 + export function searchMessageHits( 9 + db: Db, 10 + query: string, 11 + limit: number, 12 + sessionUuid?: string, 13 + selector: Selector | null = null, 14 + ): RawHitRow[] { 15 + const normalized = query.trim(); 16 + if (!normalized) return []; 17 + 18 + const terms = queryTerms(normalized); 19 + // Queries that degenerate to zero tokens (e.g. a single kanji dropped as 20 + // stop-word-like noise, or whitespace only) cannot hit the FTS index. Fall 21 + // back to a bounded LIKE scan so single-character CJK probes still work 22 + // even though they are discouraged. 23 + if (terms.length === 0) { 24 + if (hasCjk(normalized)) return searchByLike(db, normalized, limit, sessionUuid, selector); 25 + return []; 26 + } 27 + 28 + return searchByFts(db, terms, limit, sessionUuid, selector); 29 + } 30 + 31 + export function searchSessionHits(db: Db, query: string, limit: number, selector: Selector | null): RawHitRow[] { 32 + const normalized = query.trim(); 33 + if (!normalized || !tableExists(db, "sessions_fts")) return []; 34 + 35 + const terms = queryTerms(normalized); 36 + if (terms.length === 0) return []; 37 + 38 + return searchSessionsByFts(db, normalized, terms, limit, selector); 39 + } 40 + 41 + function searchByFts( 42 + db: Db, 43 + terms: string[], 44 + limit: number, 45 + sessionUuid?: string, 46 + selector: Selector | null = null, 47 + ): RawHitRow[] { 48 + const matchExpr = buildFtsMatch(terms); 49 + const conditions = [`messages_fts MATCH ?`]; 50 + const params: SqlParams = [matchExpr]; 51 + 52 + if (selector) { 53 + const selectorWhere = selectorWhereSql(selector, "s"); 54 + conditions.push(...selectorWhere.conditions); 55 + params.push(...selectorWhere.params); 56 + } 57 + if (sessionUuid) { 58 + conditions.push("m.session_uuid = ?"); 59 + params.push(sessionUuid); 60 + } 61 + params.push(limit); 62 + 63 + return db 64 + .prepare<typeof params, RawHitRow>(` 65 + SELECT 66 + s.session_uuid AS sessionUuid, 67 + s.title AS title, 68 + s.summary_text AS summaryText, 69 + s.cwd AS cwd, 70 + s.started_at AS startedAt, 71 + s.ended_at AS endedAt, 72 + 'message' AS matchSource, 73 + m.seq AS matchSeq, 74 + m.role AS matchRole, 75 + m.timestamp AS matchTimestamp, 76 + m.content_text AS contentText, 77 + snippet(messages_fts, 0, '<mark>', '</mark>', '…', 16) AS snippet, 78 + bm25(messages_fts) AS score 79 + FROM messages_fts 80 + JOIN messages m ON m.id = messages_fts.rowid 81 + JOIN sessions s ON s.id = m.session_id 82 + WHERE ${conditions.join(" AND ")} 83 + ORDER BY score 84 + LIMIT ? 85 + `) 86 + .all(...params) as RawHitRow[]; 87 + } 88 + 89 + function searchSessionsByFts( 90 + db: Db, 91 + query: string, 92 + terms: string[], 93 + limit: number, 94 + selector: Selector | null, 95 + ): RawHitRow[] { 96 + const matchExpr = buildFtsMatch(terms); 97 + const conditions = ["sessions_fts MATCH ?"]; 98 + const params: SqlParams = [matchExpr]; 99 + if (selector) { 100 + const selectorWhere = selectorWhereSql(selector, "s"); 101 + conditions.push(...selectorWhere.conditions); 102 + params.push(...selectorWhere.params); 103 + } 104 + params.push(limit); 105 + const rows = db 106 + .prepare<typeof params, RawHitRow>(` 107 + SELECT 108 + s.session_uuid AS sessionUuid, 109 + s.title AS title, 110 + s.summary_text AS summaryText, 111 + s.cwd AS cwd, 112 + s.started_at AS startedAt, 113 + s.ended_at AS endedAt, 114 + 'session' AS matchSource, 115 + NULL AS matchSeq, 116 + 'session' AS matchRole, 117 + NULL AS matchTimestamp, 118 + s.title || char(10) || s.summary_text || char(10) || s.compact_text || char(10) || s.reasoning_summary_text AS contentText, 119 + '' AS snippet, 120 + bm25(sessions_fts, 8.0, 3.0, 4.0, 1.2) AS score 121 + FROM sessions_fts 122 + JOIN sessions s ON s.id = sessions_fts.rowid 123 + WHERE ${conditions.join(" AND ")} 124 + ORDER BY score 125 + LIMIT ? 126 + `) 127 + .all(...params) as RawHitRow[]; 128 + 129 + return rows.map((row) => ({ 130 + ...row, 131 + snippet: makeRawSnippet(row.contentText, query, terms), 132 + })); 133 + } 134 + 135 + function searchByLike( 136 + db: Db, 137 + query: string, 138 + limit: number, 139 + sessionUuid?: string, 140 + selector: Selector | null = null, 141 + ): RawHitRow[] { 142 + const conditions = ["lower(m.content_text) LIKE ? ESCAPE '\\'"]; 143 + const params: SqlParams = [`%${escapeLike(query.toLowerCase())}%`]; 144 + if (selector) { 145 + const selectorWhere = selectorWhereSql(selector, "s"); 146 + conditions.push(...selectorWhere.conditions); 147 + params.push(...selectorWhere.params); 148 + } 149 + if (sessionUuid) { 150 + conditions.push("m.session_uuid = ?"); 151 + params.push(sessionUuid); 152 + } 153 + params.push(limit); 154 + 155 + const rows = db 156 + .prepare<typeof params, RawHitRow & { contentText: string }>(` 157 + SELECT 158 + s.session_uuid AS sessionUuid, 159 + s.title AS title, 160 + s.summary_text AS summaryText, 161 + s.cwd AS cwd, 162 + s.started_at AS startedAt, 163 + s.ended_at AS endedAt, 164 + 'message' AS matchSource, 165 + m.seq AS matchSeq, 166 + m.role AS matchRole, 167 + m.timestamp AS matchTimestamp, 168 + m.content_text AS contentText 169 + FROM messages m 170 + JOIN sessions s ON s.id = m.session_id 171 + WHERE ${conditions.join(" AND ")} 172 + ORDER BY s.started_at DESC, m.seq ASC 173 + LIMIT ? 174 + `) 175 + .all(...params) as Array<RawHitRow & { contentText: string }>; 176 + 177 + return rows.map((row, index) => ({ 178 + ...row, 179 + snippet: makeLikeSnippet(row.contentText, query), 180 + // Negate the ordinal so LIKE rows share the "lower is better" polarity 181 + // with bm25() scores; downstream rerank sorts on row metrics, but any 182 + // code that touches this raw score won't see a sign mismatch. 183 + score: -(index + 1), 184 + })); 185 + } 186 + 187 + function tableExists(db: Db, tableName: string): boolean { 188 + const row = db 189 + .prepare<[string], unknown>("SELECT 1 FROM sqlite_master WHERE name = ? LIMIT 1") 190 + .get(tableName); 191 + return Boolean(row); 192 + } 193 + 194 + /** 195 + * Build an FTS5 MATCH expression from already-tokenized terms. Each term is 196 + * quoted and ANDed, giving us intersection semantics across CJK bigrams and 197 + * non-CJK words alike. 198 + */ 199 + function buildFtsMatch(terms: string[]): string { 200 + return terms.map(quoteFtsTerm).join(" AND "); 201 + } 202 + 203 + function quoteFtsTerm(term: string): string { 204 + // FTS5 treats unquoted * / ^ / NEAR / NOT / AND / OR as operators. Wrapping 205 + // each term in double quotes neutralizes all of them (including *), and we 206 + // escape internal quotes by doubling them. Bigrams stay bigrams. 207 + const escaped = term.replaceAll('"', '""'); 208 + return `"${escaped}"`; 209 + } 210 + 211 + // LIKE-path escape stays unchanged: only CJK single-character probes and 212 + // empty-token queries fall through to this branch now. 213 + function escapeLike(value: string): string { 214 + return value.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_"); 215 + }
+136
src/query/snippet.ts
··· 1 + export function makeLikeSnippet(content: string, query: string): string { 2 + const lower = content.toLowerCase(); 3 + const target = query.toLowerCase(); 4 + const index = lower.indexOf(target); 5 + if (index < 0) return content.slice(0, 160); 6 + const start = Math.max(0, index - 40); 7 + const end = Math.min(content.length, index + target.length + 80); 8 + const prefix = start > 0 ? "…" : ""; 9 + const suffix = end < content.length ? "…" : ""; 10 + const snippet = content.slice(start, end); 11 + // Re-scan the snippet slice and wrap every occurrence so the returned 12 + // snippet agrees with FTS5's snippet() which highlights all matches. 13 + const highlighted = wrapAllOccurrences(snippet, target); 14 + return `${prefix}${highlighted}${suffix}`; 15 + } 16 + 17 + export function makeRawSnippet(content: string, query: string, terms: string[]): string { 18 + const normalizedQuery = query.toLowerCase(); 19 + const lower = content.toLowerCase(); 20 + const phraseIndex = normalizedQuery ? lower.indexOf(normalizedQuery) : -1; 21 + if (phraseIndex >= 0) { 22 + return snippetAround(content, phraseIndex, query.length, [normalizedQuery]); 23 + } 24 + 25 + const termLowers = uniqueNonEmpty(terms.map((term) => term.toLowerCase())); 26 + const termHits = termLowers.flatMap((term) => collectTermHits(lower, term)); 27 + if (termHits.length === 0) return content.slice(0, 160); 28 + 29 + const bestWindow = termHits 30 + .map((hit) => { 31 + const start = Math.max(0, hit.index - 40); 32 + const end = Math.min(content.length, hit.index + hit.length + 80); 33 + return { 34 + start, 35 + end, 36 + anchor: hit.index, 37 + score: scoreSnippetWindow(lower.slice(start, end), termLowers), 38 + }; 39 + }) 40 + .sort((left, right) => { 41 + if (right.score !== left.score) return right.score - left.score; 42 + return left.anchor - right.anchor; 43 + })[0]; 44 + 45 + return snippetWindow(content, bestWindow.start, bestWindow.end, termLowers); 46 + } 47 + 48 + function snippetAround(content: string, index: number, length: number, needleLowers: string[]): string { 49 + const start = Math.max(0, index - 40); 50 + const end = Math.min(content.length, index + length + 80); 51 + return snippetWindow(content, start, end, needleLowers); 52 + } 53 + 54 + function snippetWindow(content: string, start: number, end: number, needleLowers: string[]): string { 55 + const prefix = start > 0 ? "…" : ""; 56 + const suffix = end < content.length ? "…" : ""; 57 + const snippet = content.slice(start, end); 58 + return `${prefix}${wrapAnyOccurrences(snippet, needleLowers)}${suffix}`; 59 + } 60 + 61 + function collectTermHits(lower: string, termLower: string): Array<{ index: number; length: number }> { 62 + const hits: Array<{ index: number; length: number }> = []; 63 + let cursor = 0; 64 + while (cursor < lower.length) { 65 + const index = lower.indexOf(termLower, cursor); 66 + if (index < 0) break; 67 + hits.push({ index, length: termLower.length }); 68 + cursor = index + termLower.length; 69 + } 70 + return hits; 71 + } 72 + 73 + function scoreSnippetWindow(lowerSnippet: string, termLowers: string[]): number { 74 + let distinctTerms = 0; 75 + let totalHits = 0; 76 + let matchedChars = 0; 77 + 78 + for (const term of termLowers) { 79 + const hits = collectTermHits(lowerSnippet, term).length; 80 + if (hits > 0) distinctTerms += 1; 81 + totalHits += hits; 82 + matchedChars += hits * term.length; 83 + } 84 + 85 + return distinctTerms * 1_000 + matchedChars * 10 + totalHits; 86 + } 87 + 88 + function uniqueNonEmpty(values: string[]): string[] { 89 + return [...new Set(values.filter(Boolean))]; 90 + } 91 + 92 + function wrapAnyOccurrences(haystack: string, needleLowers: string[]): string { 93 + const needles = uniqueNonEmpty(needleLowers).sort((left, right) => right.length - left.length); 94 + if (needles.length === 0) return haystack; 95 + 96 + const lower = haystack.toLowerCase(); 97 + const matches = needles 98 + .flatMap((needle) => collectTermHits(lower, needle)) 99 + .sort((left, right) => { 100 + if (left.index !== right.index) return left.index - right.index; 101 + return right.length - left.length; 102 + }); 103 + 104 + const out: string[] = []; 105 + let cursor = 0; 106 + for (const match of matches) { 107 + if (match.index < cursor) continue; 108 + out.push(haystack.slice(cursor, match.index)); 109 + out.push("<mark>"); 110 + out.push(haystack.slice(match.index, match.index + match.length)); 111 + out.push("</mark>"); 112 + cursor = match.index + match.length; 113 + } 114 + out.push(haystack.slice(cursor)); 115 + return out.join(""); 116 + } 117 + 118 + function wrapAllOccurrences(haystack: string, needleLower: string): string { 119 + if (!needleLower) return haystack; 120 + const out: string[] = []; 121 + let cursor = 0; 122 + const lower = haystack.toLowerCase(); 123 + while (cursor < haystack.length) { 124 + const hit = lower.indexOf(needleLower, cursor); 125 + if (hit < 0) { 126 + out.push(haystack.slice(cursor)); 127 + break; 128 + } 129 + out.push(haystack.slice(cursor, hit)); 130 + out.push("<mark>"); 131 + out.push(haystack.slice(hit, hit + needleLower.length)); 132 + out.push("</mark>"); 133 + cursor = hit + needleLower.length; 134 + } 135 + return out.join(""); 136 + }
+32
src/query/stats.ts
··· 1 + import { statSync } from "node:fs"; 2 + import { getStatsCounts, getTopCwds, listCoverageRecords, withReadDb } from "../db"; 3 + import { INDEX_VERSION } from "../env"; 4 + import type { StatsSummary } from "../types"; 5 + 6 + export function collectStats(dbPath: string): StatsSummary { 7 + const { counts, topCwds, coverage } = withReadDb(dbPath, (db) => ({ 8 + counts: getStatsCounts(db), 9 + topCwds: getTopCwds(db, 10), 10 + coverage: listCoverageRecords(db), 11 + })); 12 + 13 + let dbSizeBytes = 0; 14 + try { 15 + dbSizeBytes = statSync(dbPath).size; 16 + } catch { 17 + dbSizeBytes = 0; 18 + } 19 + 20 + return { 21 + sessionCount: counts.sessionCount, 22 + messageCount: counts.messageCount, 23 + earliestStartedAt: counts.earliestStartedAt, 24 + latestEndedAt: counts.latestEndedAt, 25 + topCwds, 26 + indexVersion: INDEX_VERSION, 27 + dbPath, 28 + dbSizeBytes, 29 + lastSyncAt: counts.lastSyncAt, 30 + coverage, 31 + }; 32 + }