my harness for niri
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 1586 lines 51 kB view raw
1import fs from "fs/promises" 2import path from "path" 3import { createHash } from "crypto" 4import { fileURLToPath } from "url" 5import type { Message } from "./types.js" 6import { getDb, isVecAvailable, MEMORY_EMBEDDING_DIMENSIONS } from "./db.js" 7import { EMBEDDING_DIMENSIONS, EMBEDDING_MODEL, embeddingsConfigured, embedTexts } from "./embeddings.js" 8import { recordMetric } from "./metrics.js" 9 10const HOME_DIR = path.resolve(fileURLToPath(import.meta.url), "../../home") 11const MEMORIES_DIR = path.join(HOME_DIR, "memories") 12const JOURNAL_DIR = path.join(MEMORIES_DIR, "journal") 13const PEOPLE_DIR = path.join(MEMORIES_DIR, "people") 14const CORE_FILE = path.join(MEMORIES_DIR, "core.md") 15const ALIASES_FILE = path.join(MEMORIES_DIR, "aliases.json") 16 17const MEMORY_RECALL_HEADER = "[memory recall v1]" 18const MEMORY_RECALL_NOTE = 19 "Potentially relevant long-term notes. Use only if helpful; trust newer conversation details if anything conflicts." 20const MEMORY_RECALL_MAX_CHUNKS = 4 21const MEMORY_RECALL_MAX_CHUNKS_HARD_CAP = 8 22const MEMORY_RECALL_MAX_CHARS = 1_500 23const MEMORY_RECALL_PER_EXTRA_PERSON_CHARS = 400 24const MEMORY_QUERY_TOKEN_LIMIT = 12 25const MEMORY_RECALL_COOLDOWN_TURNS = 7 26const MEMORY_EMBEDDING_BATCH_SIZE = 24 27const MEMORY_SEMANTIC_MIN_SIMILARITY = 0.18 28const MEMORY_SEMANTIC_STRONG_SIMILARITY = 0.32 29const MEMORY_CHATTER_SIMILARITY_THRESHOLD = 0.74 30const MEMORY_RECALL_INTENT_SIMILARITY_THRESHOLD = 0.55 31const SCHEDULED_HEARTBEAT_CONTENT = "Scheduled heartbeat." 32const MEMORY_EMBEDDING_PROTOTYPES = [ 33 { id: 1, name: "affection-love", category: "chatter", text: "i love you so much sweetie <33" }, 34 { id: 2, name: "cat-greeting", category: "chatter", text: "boop mraow meow hi sweetie" }, 35 { id: 3, name: "celebration", category: "chatter", text: "yay yayy lets gooooo <33" }, 36 { id: 4, name: "goodnight", category: "chatter", text: "goodnight sweet dreams rest well" }, 37 { id: 101, name: "who-person", category: "recall_intent", text: "who is this person what do i know about them" }, 38 { id: 102, name: "past-event", category: "recall_intent", text: "what happened before remember when that event happened" }, 39 { id: 103, name: "task-context", category: "recall_intent", text: "what context do i need for this task or project" }, 40 { id: 104, name: "system-lesson", category: "recall_intent", text: "what lesson or instruction should i remember here" }, 41] as const 42const MEMORY_STOP_WORDS = new Set([ 43 "a", 44 "an", 45 "and", 46 "are", 47 "as", 48 "at", 49 "be", 50 "been", 51 "but", 52 "by", 53 "for", 54 "from", 55 "had", 56 "has", 57 "have", 58 "he", 59 "her", 60 "hers", 61 "him", 62 "his", 63 "i", 64 "if", 65 "in", 66 "into", 67 "is", 68 "it", 69 "its", 70 "me", 71 "my", 72 "of", 73 "on", 74 "or", 75 "our", 76 "she", 77 "that", 78 "the", 79 "their", 80 "them", 81 "there", 82 "they", 83 "this", 84 "to", 85 "up", 86 "us", 87 "was", 88 "we", 89 "were", 90 "with", 91 "you", 92 "your", 93]) 94 95type MemoryKind = "core" | "journal" | "people" 96 97type MemoryDocumentRow = { 98 id: number 99 path: string 100 content_hash: string 101 mtime_ms: number 102 kind?: string 103 title?: string 104} 105 106type MemoryChunkInput = { 107 title: string 108 headingPath: string | null 109 text: string 110 tags: string 111} 112 113type MemoryHit = { 114 chunkId: number 115 path: string 116 kind: MemoryKind 117 documentTitle: string 118 title: string 119 headingPath: string | null 120 text: string 121 rank: number 122 semanticDistance?: number 123 semanticSimilarity?: number 124} 125 126export type MemorySearchResult = { 127 chunkId: number 128 kind: MemoryKind 129 path: string 130 source: string 131 title: string 132 headingPath: string | null 133 preview: string 134} 135 136type MemoryQueryParts = { 137 sender: string | null 138 source: string | null 139 body: string 140} 141 142type MemorySearchProfile = { 143 normalized: string 144 sender: string | null 145 senderAliases: string[] 146 bodyTokens: string[] 147 bodyPeople: string[] 148 tokens: string[] 149 personQuery: boolean 150 eventQuery: boolean 151 bodyInformative: boolean 152} 153 154type MemoryHitSignal = { 155 overlap: number 156 strongOverlap: boolean 157 bodyOverlap: number 158 senderMatch: boolean 159} 160 161type MemoryEmbeddingRow = { 162 chunkId: number 163 path: string 164 kind: MemoryKind 165 documentTitle: string 166 title: string 167 headingPath: string | null 168 text: string 169 tags: string | null 170 model: string | null 171 dimensions: number | null 172 contentHash: string | null 173} 174 175type SemanticQuerySignal = { 176 vector: number[] 177 chatterSimilarity: number | null 178 recallIntentSimilarity: number | null 179} 180 181export type AliasMap = Record<string, string[]> 182 183function normalizeText(value: string): string { 184 return value.replace(/\r\n/g, "\n").replace(/\s+/g, " ").trim() 185} 186 187function trimForPrompt(value: string, maxChars: number): string { 188 if (value.length <= maxChars) return value 189 if (maxChars <= 3) return ".".repeat(maxChars) 190 return `${value.slice(0, maxChars - 3).trimEnd()}...` 191} 192 193function basenameWithoutExt(filePath: string): string { 194 return path.basename(filePath, path.extname(filePath)) 195} 196 197function titleFromPath(filePath: string, fallback: string): string { 198 const base = basenameWithoutExt(filePath).replace(/[-_]+/g, " ").trim() 199 return base ? base : fallback 200} 201 202function detectMemoryKind(filePath: string): MemoryKind | null { 203 if (filePath === CORE_FILE) return "core" 204 if (filePath.startsWith(`${JOURNAL_DIR}${path.sep}`)) return "journal" 205 if (filePath.startsWith(`${PEOPLE_DIR}${path.sep}`)) return "people" 206 return null 207} 208 209async function pathExists(target: string): Promise<boolean> { 210 try { 211 await fs.access(target) 212 return true 213 } catch { 214 return false 215 } 216} 217 218function normalizeHandle(handle: string): string { 219 return handle.trim().replace(/^@+/, "").toLowerCase() 220} 221 222let aliasCache: { mtimeMs: number; map: AliasMap } | null = null 223 224async function loadAliasMap(): Promise<AliasMap> { 225 try { 226 const stat = await fs.stat(ALIASES_FILE) 227 if (aliasCache && aliasCache.mtimeMs === stat.mtimeMs) return aliasCache.map 228 const raw = await fs.readFile(ALIASES_FILE, "utf-8") 229 const parsed = JSON.parse(raw) as unknown 230 const map: AliasMap = {} 231 if (parsed && typeof parsed === "object") { 232 for (const [key, value] of Object.entries(parsed as Record<string, unknown>)) { 233 const handle = normalizeHandle(key) 234 if (!handle) continue 235 const list = Array.isArray(value) ? value : [value] 236 const aliases = list 237 .map((v) => (typeof v === "string" ? normalizeHandle(v) : "")) 238 .filter((v) => v && v !== handle) 239 if (aliases.length > 0) map[handle] = Array.from(new Set(aliases)) 240 } 241 } 242 aliasCache = { mtimeMs: stat.mtimeMs, map } 243 return map 244 } catch { 245 aliasCache = { mtimeMs: 0, map: {} } 246 return {} 247 } 248} 249 250async function writeAliasMap(map: AliasMap): Promise<void> { 251 await fs.mkdir(MEMORIES_DIR, { recursive: true }) 252 const sorted: AliasMap = {} 253 for (const key of Object.keys(map).sort()) sorted[key] = [...map[key]!].sort() 254 await fs.writeFile(ALIASES_FILE, `${JSON.stringify(sorted, null, 2)}\n`, "utf-8") 255 aliasCache = null 256} 257 258function resolveAliases(handle: string | null, map: AliasMap): string[] { 259 if (!handle) return [] 260 const seen = new Set<string>([handle]) 261 const out: string[] = [] 262 const queue = [handle] 263 while (queue.length > 0) { 264 const current = queue.shift()! 265 const next = map[current] ?? [] 266 for (const alias of next) { 267 if (seen.has(alias)) continue 268 seen.add(alias) 269 out.push(alias) 270 queue.push(alias) 271 } 272 } 273 return out 274} 275 276export async function listAliases(): Promise<AliasMap> { 277 return loadAliasMap() 278} 279 280export async function setAlias(handle: string, canonical: string): Promise<AliasMap> { 281 const h = normalizeHandle(handle) 282 const c = normalizeHandle(canonical) 283 if (!h || !c) throw new Error("alias handle and canonical must be non-empty") 284 const map = await loadAliasMap() 285 if (h === c) return map 286 const existing = new Set(map[h] ?? []) 287 existing.add(c) 288 map[h] = Array.from(existing) 289 await writeAliasMap(map) 290 return map 291} 292 293export async function removeAlias(handle: string, canonical?: string): Promise<AliasMap> { 294 const h = normalizeHandle(handle) 295 if (!h) throw new Error("alias handle must be non-empty") 296 const map = await loadAliasMap() 297 if (!map[h]) return map 298 if (canonical) { 299 const c = normalizeHandle(canonical) 300 map[h] = map[h]!.filter((entry) => entry !== c) 301 if (map[h]!.length === 0) delete map[h] 302 } else { 303 delete map[h] 304 } 305 await writeAliasMap(map) 306 return map 307} 308 309async function walkMarkdownFiles(root: string): Promise<string[]> { 310 if (!(await pathExists(root))) return [] 311 312 const found: string[] = [] 313 const entries = await fs.readdir(root, { withFileTypes: true }) 314 for (const entry of entries) { 315 const fullPath = path.join(root, entry.name) 316 if (entry.isDirectory()) { 317 found.push(...(await walkMarkdownFiles(fullPath))) 318 continue 319 } 320 if (entry.isFile() && entry.name.endsWith(".md")) found.push(fullPath) 321 } 322 323 return found.sort() 324} 325 326async function listMemoryFiles(): Promise<string[]> { 327 const files: string[] = [] 328 if (await pathExists(CORE_FILE)) files.push(CORE_FILE) 329 files.push(...(await walkMarkdownFiles(JOURNAL_DIR))) 330 files.push(...(await walkMarkdownFiles(PEOPLE_DIR))) 331 return files 332} 333 334function contentHash(content: string): string { 335 return createHash("sha1").update(content).digest("hex") 336} 337 338function embeddingInputHash(content: string): string { 339 return createHash("sha256").update(content).digest("hex") 340} 341 342function vectorParam(vector: number[]): Float32Array { 343 return new Float32Array(vector) 344} 345 346function embeddingTextForChunk(row: { 347 path: string 348 kind: MemoryKind 349 documentTitle: string 350 title: string 351 headingPath: string | null 352 text: string 353 tags?: string | null 354}): string { 355 const relativePath = path.relative(HOME_DIR, row.path) 356 return [ 357 `kind: ${row.kind}`, 358 `file: ${relativePath}`, 359 `document: ${row.documentTitle}`, 360 `title: ${row.title}`, 361 row.headingPath ? `section: ${row.headingPath}` : null, 362 row.tags ? `tags: ${row.tags}` : null, 363 "", 364 row.text, 365 ] 366 .filter((part): part is string => part !== null) 367 .join("\n") 368} 369 370function chunkLargeSection(text: string, maxChars = 900): string[] { 371 const paragraphs = text 372 .split(/\n\s*\n/g) 373 .map((part) => part.trim()) 374 .filter(Boolean) 375 376 if (paragraphs.length === 0) return [] 377 378 const chunks: string[] = [] 379 let current = "" 380 381 for (const paragraph of paragraphs) { 382 const next = current ? `${current}\n\n${paragraph}` : paragraph 383 if (next.length <= maxChars || current.length === 0) { 384 current = next 385 continue 386 } 387 chunks.push(current) 388 current = paragraph 389 } 390 391 if (current) chunks.push(current) 392 return chunks 393} 394 395function parseMarkdownDocument(filePath: string, content: string): { title: string; chunks: MemoryChunkInput[] } { 396 const lines = content.replace(/\r\n/g, "\n").split("\n") 397 const h1 = lines.find((line) => /^#\s+/.test(line)) 398 const title = h1 ? h1.replace(/^#\s+/, "").trim() : titleFromPath(filePath, "Memory") 399 const headingStack: string[] = [] 400 let sectionLines: string[] = [] 401 let sectionTitle = title 402 const chunks: MemoryChunkInput[] = [] 403 404 const flushSection = () => { 405 const body = sectionLines.join("\n").trim() 406 if (!body) { 407 sectionLines = [] 408 return 409 } 410 411 const headingPath = headingStack.length > 0 ? headingStack.join(" > ") : null 412 const tags = [basenameWithoutExt(filePath), ...headingStack].join(" ").trim() 413 for (const part of chunkLargeSection(body)) { 414 chunks.push({ 415 title: sectionTitle || title, 416 headingPath, 417 text: part, 418 tags, 419 }) 420 } 421 sectionLines = [] 422 } 423 424 for (const line of lines) { 425 const headingMatch = line.match(/^(#{1,6})\s+(.*)$/) 426 if (!headingMatch) { 427 sectionLines.push(line) 428 continue 429 } 430 431 flushSection() 432 433 const level = headingMatch[1]!.length 434 const heading = headingMatch[2]!.trim() 435 if (level === 1) { 436 sectionTitle = heading || title 437 headingStack.length = 0 438 continue 439 } 440 441 while (headingStack.length >= level - 1) headingStack.pop() 442 headingStack.push(heading) 443 sectionTitle = heading || title 444 } 445 446 flushSection() 447 448 if (chunks.length === 0) { 449 const body = content.trim() 450 if (body) { 451 chunks.push({ 452 title, 453 headingPath: null, 454 text: body, 455 tags: basenameWithoutExt(filePath), 456 }) 457 } 458 } 459 460 return { title, chunks } 461} 462 463function memoryRecallCooldownTurns(kind: MemoryKind): number { 464 if (kind === "people" || kind === "journal") return MEMORY_RECALL_COOLDOWN_TURNS 465 return 0 466} 467 468function isCoolingDown(hit: MemoryHit, cooldowns: Record<number, number>, currentTurn: number): boolean { 469 const lastTurn = cooldowns[hit.chunkId] 470 if (typeof lastTurn !== "number") return false 471 return currentTurn - lastTurn < memoryRecallCooldownTurns(hit.kind) 472} 473 474function isMemoryRecallSkippedMessage(content: string): boolean { 475 if (content.startsWith(MEMORY_RECALL_HEADER)) return true 476 if (content.startsWith("[system]")) return true 477 if (content.includes("scan snapshot:") && !content.includes("[discord batch]")) return true 478 if (/\[discord batch\]/i.test(content) && conciseDiscordBatchMemoryQuery(content) === null) return true 479 return false 480} 481 482function latestMemoryRecallQuery(conversation: Message[]): string | null { 483 for (let i = conversation.length - 1; i >= 0; i -= 1) { 484 const message = conversation[i] 485 if (!message || message.role !== "user") continue 486 if (typeof message.content !== "string") continue 487 488 const content = message.content.trim() 489 if (!content || isMemoryRecallSkippedMessage(content)) continue 490 if (content === SCHEDULED_HEARTBEAT_CONTENT) continue 491 return content 492 } 493 return null 494} 495 496function discordChannelLabel(channelId: string | null, fallbackContext: string | null, isDm: boolean): string { 497 if (isDm) return "DM" 498 499 const fallback = fallbackContext 500 ?.replace(/^context:\s*/i, "") 501 .replace(/\s*\(\d+\)\s*$/, "") 502 .trim() 503 504 if (channelId) { 505 try { 506 const row = getDb() 507 .prepare("select guild_id, guild_name, channel_name, is_dm from discord_channels where channel_id = ?") 508 .get(channelId) as 509 | { 510 guild_id: string | null 511 guild_name: string | null 512 channel_name: string | null 513 is_dm: number 514 } 515 | undefined 516 517 if (row?.is_dm) return "DM" 518 if (row) { 519 const guild = row.guild_name ?? row.guild_id 520 const channel = row.channel_name ?? channelId 521 if (guild && channel) return `${guild}/#${channel}` 522 if (channel) return `#${channel}` 523 } 524 } catch { 525 // If the main db is unavailable in tests or scripts, keep the parsed context. 526 } 527 } 528 529 if (fallback) return fallback 530 return channelId ? `#${channelId}` : "channel" 531} 532 533function conciseDiscordMemoryQuery(raw: string): MemoryQueryParts | null { 534 const withoutWakeEnvelope = stripWakeEnvelope(raw) 535 if (!/\[discord\/(?:dm|channel)\]/i.test(withoutWakeEnvelope)) return null 536 537 const blocks = withoutWakeEnvelope 538 .split(/\n\s*\n/g) 539 .map((block) => block.trim()) 540 .filter(Boolean) 541 const headerBlock = blocks[0] ?? withoutWakeEnvelope 542 const message = blocks.length > 1 ? blocks.slice(1).join("\n\n").trim() : "" 543 544 const lines = headerBlock.split("\n").map((line) => line.trim()).filter(Boolean) 545 const discordLine = lines.find((line) => /^\[discord\/(?:dm|channel)\]/i.test(line)) ?? "" 546 const contextLine = lines.find((line) => /^context:\s*/i.test(line)) ?? null 547 const isDm = /\[discord\/dm\]/i.test(discordLine) 548 const author = discordLine.match(/@(\S+)/)?.[1] ?? null 549 const context = contextLine?.replace(/^context:\s*/i, "").trim() ?? "" 550 const dmChannelId = context.match(/^DM\s+(\d+)/i)?.[1] ?? null 551 const namedChannelId = context.match(/\((\d+)\)\s*$/)?.[1] ?? null 552 const channelId = dmChannelId ?? namedChannelId 553 const location = discordChannelLabel(channelId, contextLine, isDm) 554 555 if (!author && !location && !message) return null 556 return { 557 sender: author ? normalizeHandle(author) : null, 558 source: location || null, 559 body: message, 560 } 561} 562 563function extractBulletSection(raw: string, label: string): string[] { 564 const escapedLabel = label.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") 565 const match = raw.match(new RegExp(`(?:^|\\n)${escapedLabel}:\\n([\\s\\S]*?)(?:\\n\\n[^\\n:]+:|$)`, "i")) 566 if (!match) return [] 567 568 return match[1] 569 .split("\n") 570 .map((line) => line.trim()) 571 .filter((line) => line.startsWith("- ")) 572 .map((line) => line.slice(2).trim()) 573 .filter((line) => line && line !== "(none)") 574} 575 576function conciseDiscordBatchMemoryQuery(raw: string): MemoryQueryParts | null { 577 const withoutWakeEnvelope = stripWakeEnvelope(raw) 578 if (!/\[discord batch\]/i.test(withoutWakeEnvelope)) return null 579 580 const pending = extractBulletSection(withoutWakeEnvelope, "pending preview") 581 const selected = pending.filter((entry) => !/^\(none\)$/i.test(entry)).slice(-3) 582 if (selected.length === 0) return null 583 584 const senders: string[] = [] 585 const sources: string[] = [] 586 const bodies: string[] = [] 587 const entryPattern = /(?:^|\s)\[([^\]]+)\]\s+\[[^\]]+\]\s+@([^:]+):\s*(.*)$/i 588 for (const entry of selected) { 589 const match = entry.match(entryPattern) 590 if (!match) { 591 bodies.push(entry) 592 continue 593 } 594 const [, location, author, body] = match 595 if (author) senders.push(normalizeHandle(author)) 596 if (location) sources.push(location.trim()) 597 if (body) bodies.push(body.trim()) 598 } 599 600 const lastSender = senders.length > 0 ? senders[senders.length - 1]! : null 601 const lastSource = sources.length > 0 ? sources[sources.length - 1]! : null 602 const body = bodies.filter(Boolean).join("\n").trim() 603 604 if (!lastSender && !lastSource && !body) return null 605 return { sender: lastSender, source: lastSource, body } 606} 607 608const WAKE_ENVELOPE_PATTERN = /^\[(wake|incoming|harness restarted)[^\n]*\]\s*/gi 609 610function stripWakeEnvelope(raw: string): string { 611 return raw.replace(WAKE_ENVELOPE_PATTERN, "").trim() 612} 613 614function memoryQueryForUserMessage(raw: string): MemoryQueryParts { 615 return ( 616 conciseDiscordMemoryQuery(raw) ?? 617 conciseDiscordBatchMemoryQuery(raw) ?? 618 { sender: null, source: null, body: stripWakeEnvelope(raw) } 619 ) 620} 621 622function memoryQueryToString(parts: MemoryQueryParts): string { 623 const pieces = [ 624 parts.sender ? `@${parts.sender}` : null, 625 parts.body || null, 626 ].filter((value): value is string => Boolean(value && value.trim())) 627 return pieces.join("\n") 628} 629 630function normalizeBodyText(raw: string): string { 631 return raw 632 .replace(/@([a-z0-9_.-]+)/gi, " $1 ") 633 .replace(/\b\d{6,}\b/g, " ") 634 .replace(/[^\p{L}\p{N}\s'-]+/gu, " ") 635 .toLowerCase() 636 .trim() 637} 638 639function tokensFromText(raw: string): string[] { 640 const clean = normalizeBodyText(raw) 641 const tokens = clean 642 .split(/\s+/) 643 .map((token) => token.replace(/^['-]+|['-]+$/g, "")) 644 .filter((token) => token.length >= 2 || /\d{2,}/.test(token)) 645 .filter((token) => !MEMORY_STOP_WORDS.has(token)) 646 647 const unique: string[] = [] 648 const seen = new Set<string>() 649 for (const token of tokens) { 650 if (seen.has(token)) continue 651 seen.add(token) 652 unique.push(token) 653 if (unique.length >= MEMORY_QUERY_TOKEN_LIMIT) break 654 } 655 return unique 656} 657 658function searchTokens(raw: string): string[] { 659 return tokensFromText(raw) 660} 661 662const BODY_INFORMATIVE_BM25_THRESHOLD = -5 663 664function bestBodyBm25(bodyTokens: string[]): number | null { 665 if (bodyTokens.length === 0) return null 666 const query = bodyTokens.map((token) => `"${token.replace(/"/g, '""')}"*`).join(" OR ") 667 try { 668 const row = getDb() 669 .prepare( 670 "select bm25(memory_chunks_fts, 5.0, 2.0, 1.0, 0.5) as r from memory_chunks_fts where memory_chunks_fts match ? order by r limit 1", 671 ) 672 .get(query) as { r: number } | undefined 673 return row?.r ?? null 674 } catch { 675 return null 676 } 677} 678 679function computeBodyInformativeness(bodyTokens: string[], bodyPeople: string[]): boolean { 680 if (bodyPeople.length > 0) return true 681 if (bodyTokens.length === 0) return false 682 const top = bestBodyBm25(bodyTokens) 683 if (top === null) return false 684 return top <= BODY_INFORMATIVE_BM25_THRESHOLD 685} 686 687async function knownPeopleHandles(aliasMap: AliasMap): Promise<Set<string>> { 688 const handles = new Set<string>() 689 if (await pathExists(PEOPLE_DIR)) { 690 const entries = await fs.readdir(PEOPLE_DIR, { withFileTypes: true }) 691 for (const entry of entries) { 692 if (!entry.isFile() || !entry.name.endsWith(".md")) continue 693 const base = basenameWithoutExt(entry.name).toLowerCase() 694 if (base) handles.add(base) 695 } 696 } 697 for (const [key, values] of Object.entries(aliasMap)) { 698 handles.add(key) 699 for (const value of values) handles.add(value) 700 } 701 return handles 702} 703 704async function buildSearchProfile(parts: MemoryQueryParts): Promise<MemorySearchProfile> { 705 const aliasMap = await loadAliasMap() 706 const sender = parts.sender ? normalizeHandle(parts.sender) : null 707 const senderAliases = resolveAliases(sender, aliasMap) 708 const bodyTokens = parts.body ? tokensFromText(parts.body) : [] 709 710 const known = await knownPeopleHandles(aliasMap) 711 const inlineMentions = parts.body 712 ? Array.from(parts.body.matchAll(/@([a-z0-9_.-]+)/gi)).map((match) => normalizeHandle(match[1]!)) 713 : [] 714 const bodyPeopleSet = new Set<string>() 715 const senderSet = new Set<string>([sender ?? "", ...senderAliases].filter(Boolean)) 716 for (const token of [...bodyTokens, ...inlineMentions]) { 717 if (!token || senderSet.has(token)) continue 718 if (known.has(token)) bodyPeopleSet.add(token) 719 } 720 const bodyPeople = Array.from(bodyPeopleSet) 721 for (const person of [...bodyPeople]) { 722 for (const alias of resolveAliases(person, aliasMap)) { 723 if (!senderSet.has(alias)) bodyPeopleSet.add(alias) 724 } 725 } 726 const bodyPeopleResolved = Array.from(bodyPeopleSet) 727 728 const combined: string[] = [] 729 const seen = new Set<string>() 730 const push = (value: string | null | undefined) => { 731 if (!value) return 732 const lower = value.toLowerCase() 733 if (seen.has(lower)) return 734 seen.add(lower) 735 combined.push(lower) 736 } 737 push(sender) 738 for (const alias of senderAliases) push(alias) 739 for (const person of bodyPeopleResolved) push(person) 740 for (const token of bodyTokens) push(token) 741 742 const normalized = [sender ? `@${sender}` : "", parts.source ?? "", parts.body ?? ""] 743 .filter(Boolean) 744 .join(" ") 745 .toLowerCase() 746 747 const bodyInformative = computeBodyInformativeness(bodyTokens, bodyPeopleResolved) 748 749 return { 750 normalized, 751 sender, 752 senderAliases, 753 bodyTokens, 754 bodyPeople: bodyPeopleResolved, 755 tokens: combined.slice(0, MEMORY_QUERY_TOKEN_LIMIT), 756 bodyInformative, 757 personQuery: 758 Boolean(sender) || 759 bodyPeopleResolved.length > 0 || 760 /\b(who is|who's|tell me about|about)\b/.test(normalized) || 761 /\bname\b/.test(normalized) || 762 /\bfriend\b/.test(normalized), 763 eventQuery: 764 /\b(what happened|when|yesterday|today|tonight|earlier|before|after|session|wake)\b/.test( 765 normalized, 766 ) || 767 /\b\d{4}-\d{2}-\d{2}\b/.test(normalized) || 768 /\b\d{1,2}\/\d{1,2}(?:\/\d{2,4})?\b/.test(normalized) || 769 /\b(january|february|march|april|may|june|july|august|september|october|november|december)\b/.test( 770 normalized, 771 ), 772 } 773} 774 775function buildSearchQuery(profile: MemorySearchProfile): string | null { 776 if (profile.tokens.length === 0) return null 777 return profile.tokens.map((token) => `"${token.replace(/"/g, '""')}"*`).join(" OR ") 778} 779 780async function readMemoryDocumentRows(): Promise<Map<string, MemoryDocumentRow>> { 781 const rows = getDb() 782 .prepare("select id, path, kind, title, content_hash, mtime_ms from memory_documents") 783 .all() as MemoryDocumentRow[] 784 785 return new Map(rows.map((row) => [row.path, row])) 786} 787 788export async function syncMemoryIndex(): Promise<void> { 789 const db = getDb() 790 const files = await listMemoryFiles() 791 const known = await readMemoryDocumentRows() 792 const present = new Set(files) 793 794 const deleteChunksByDocument = db.prepare("delete from memory_chunks where document_id = ?") 795 const insertDocument = db.prepare(` 796 insert into memory_documents (path, kind, title, mtime_ms, content_hash, updated_at) 797 values (@path, @kind, @title, @mtime_ms, @content_hash, datetime('now')) 798 on conflict(path) do update set 799 kind = excluded.kind, 800 title = excluded.title, 801 mtime_ms = excluded.mtime_ms, 802 content_hash = excluded.content_hash, 803 updated_at = datetime('now') 804 `) 805 const selectDocumentId = db.prepare("select id from memory_documents where path = ?") 806 const insertChunk = db.prepare(` 807 insert into memory_chunks (document_id, chunk_index, title, heading_path, chunk_text, tags) 808 values (?, ?, ?, ?, ?, ?) 809 `) 810 const deleteDocumentByPath = db.prepare("delete from memory_documents where path = ?") 811 812 const updates: Array<{ 813 path: string 814 kind: MemoryKind 815 title: string 816 mtimeMs: number 817 hash: string 818 chunks: MemoryChunkInput[] 819 action: "inserted" | "updated" 820 }> = [] 821 822 for (const filePath of files) { 823 const kind = detectMemoryKind(filePath) 824 if (!kind) continue 825 826 const [content, stat] = await Promise.all([fs.readFile(filePath, "utf-8"), fs.stat(filePath)]) 827 const hash = contentHash(content) 828 const previous = known.get(filePath) 829 if (previous && previous.content_hash === hash && previous.mtime_ms === Math.floor(stat.mtimeMs)) continue 830 831 const parsed = parseMarkdownDocument(filePath, content) 832 updates.push({ 833 path: filePath, 834 kind, 835 title: parsed.title, 836 mtimeMs: Math.floor(stat.mtimeMs), 837 hash, 838 chunks: parsed.chunks, 839 action: previous ? "updated" : "inserted", 840 }) 841 } 842 843 const removedPaths: string[] = [] 844 845 db.transaction(() => { 846 for (const item of updates) { 847 insertDocument.run({ 848 path: item.path, 849 kind: item.kind, 850 title: item.title, 851 mtime_ms: item.mtimeMs, 852 content_hash: item.hash, 853 }) 854 const row = selectDocumentId.get(item.path) as { id: number } | undefined 855 if (!row) continue 856 857 deleteChunksByDocument.run(row.id) 858 item.chunks.forEach((chunk, index) => { 859 insertChunk.run(row.id, index, chunk.title, chunk.headingPath, chunk.text, chunk.tags) 860 }) 861 862 console.log( 863 `[memory] ${item.action} kind=${item.kind} chunks=${item.chunks.length} path=${path.relative(HOME_DIR, item.path)}`, 864 ) 865 } 866 867 for (const filePath of known.keys()) { 868 if (present.has(filePath)) continue 869 deleteDocumentByPath.run(filePath) 870 removedPaths.push(filePath) 871 console.log(`[memory] removed path=${path.relative(HOME_DIR, filePath)}`) 872 } 873 })() 874 875 if (updates.length === 0 && removedPaths.length === 0) { 876 await syncMemoryEmbeddings() 877 return 878 } 879 880 await syncMemoryEmbeddings() 881} 882 883let embeddingSkipWarned = false 884 885async function syncMemoryEmbeddings(): Promise<void> { 886 if (!isVecAvailable()) return 887 if (!embeddingsConfigured()) { 888 if (!embeddingSkipWarned) { 889 console.warn("[memory] embeddings disabled: set EMBEDDING_API_KEY") 890 embeddingSkipWarned = true 891 } 892 return 893 } 894 if (EMBEDDING_DIMENSIONS !== MEMORY_EMBEDDING_DIMENSIONS) { 895 if (!embeddingSkipWarned) { 896 console.warn( 897 `[memory] embeddings disabled: EMBEDDING_DIMENSIONS=${EMBEDDING_DIMENSIONS} but sqlite-vec table is ${MEMORY_EMBEDDING_DIMENSIONS}`, 898 ) 899 embeddingSkipWarned = true 900 } 901 return 902 } 903 904 const db = getDb() 905 try { 906 await syncMemoryEmbeddingPrototypes() 907 } catch (err: any) { 908 console.warn(`[memory] prototype embedding sync failed: ${err?.message ?? String(err)}`) 909 return 910 } 911 db.prepare("delete from memory_embedding_meta where chunk_id not in (select id from memory_chunks)").run() 912 db.prepare("delete from memory_chunk_vec where rowid not in (select id from memory_chunks)").run() 913 914 const rows = db 915 .prepare(` 916 select 917 c.id as chunkId, 918 d.path as path, 919 d.kind as kind, 920 d.title as documentTitle, 921 c.title as title, 922 c.heading_path as headingPath, 923 c.chunk_text as text, 924 c.tags as tags, 925 m.model as model, 926 m.dimensions as dimensions, 927 m.content_hash as contentHash 928 from memory_chunks c 929 join memory_documents d on d.id = c.document_id 930 left join memory_embedding_meta m on m.chunk_id = c.id 931 order by d.kind, d.path, c.chunk_index 932 `) 933 .all() as MemoryEmbeddingRow[] 934 935 const pending = rows 936 .map((row) => { 937 const text = embeddingTextForChunk(row) 938 return { ...row, embeddingText: text, embeddingHash: embeddingInputHash(text) } 939 }) 940 .filter( 941 (row) => 942 row.model !== EMBEDDING_MODEL || 943 row.dimensions !== MEMORY_EMBEDDING_DIMENSIONS || 944 row.contentHash !== row.embeddingHash, 945 ) 946 947 if (pending.length === 0) return 948 949 const upsertMeta = db.prepare(` 950 insert into memory_embedding_meta (chunk_id, model, dimensions, content_hash, updated_at) 951 values (?, ?, ?, ?, datetime('now')) 952 on conflict(chunk_id) do update set 953 model = excluded.model, 954 dimensions = excluded.dimensions, 955 content_hash = excluded.content_hash, 956 updated_at = datetime('now') 957 `) 958 const upsertVector = db.prepare("insert or replace into memory_chunk_vec(rowid, embedding) values (?, ?)") 959 960 let embedded = 0 961 for (let i = 0; i < pending.length; i += MEMORY_EMBEDDING_BATCH_SIZE) { 962 const batch = pending.slice(i, i + MEMORY_EMBEDDING_BATCH_SIZE) 963 let vectors: number[][] 964 try { 965 vectors = await embedTexts(batch.map((row) => row.embeddingText)) 966 } catch (err: any) { 967 console.warn(`[memory] embedding batch failed: ${err?.message ?? String(err)}`) 968 return 969 } 970 971 db.transaction(() => { 972 batch.forEach((row, index) => { 973 const vector = vectors[index] 974 if (!vector) return 975 if (vector.length !== MEMORY_EMBEDDING_DIMENSIONS) { 976 throw new Error(`embedding dimension mismatch: got ${vector.length}, expected ${MEMORY_EMBEDDING_DIMENSIONS}`) 977 } 978 upsertVector.run(BigInt(row.chunkId), vectorParam(vector)) 979 upsertMeta.run(row.chunkId, EMBEDDING_MODEL, MEMORY_EMBEDDING_DIMENSIONS, row.embeddingHash) 980 embedded += 1 981 }) 982 })() 983 } 984 985 console.log(`[memory] embedded chunks=${embedded} model=${EMBEDDING_MODEL} dimensions=${MEMORY_EMBEDDING_DIMENSIONS}`) 986} 987 988async function syncMemoryEmbeddingPrototypes(): Promise<void> { 989 const db = getDb() 990 const rows = db 991 .prepare("select id, name, category, model, dimensions, content_hash as contentHash from memory_embedding_prototypes") 992 .all() as Array<{ 993 id: number 994 name: string 995 category: string 996 model: string 997 dimensions: number 998 contentHash: string 999 }> 1000 const known = new Map(rows.map((row) => [row.id, row])) 1001 const pending = MEMORY_EMBEDDING_PROTOTYPES.map((prototype) => ({ 1002 ...prototype, 1003 hash: embeddingInputHash(`${prototype.category}\n${prototype.name}\n${prototype.text}`), 1004 })).filter((prototype) => { 1005 const row = known.get(prototype.id) 1006 return ( 1007 !row || 1008 row.name !== prototype.name || 1009 row.category !== prototype.category || 1010 row.model !== EMBEDDING_MODEL || 1011 row.dimensions !== MEMORY_EMBEDDING_DIMENSIONS || 1012 row.contentHash !== prototype.hash 1013 ) 1014 }) 1015 1016 if (pending.length === 0) return 1017 1018 const vectors = await embedTexts(pending.map((prototype) => prototype.text)) 1019 const upsertPrototype = db.prepare(` 1020 insert into memory_embedding_prototypes (id, name, category, model, dimensions, content_hash, updated_at) 1021 values (?, ?, ?, ?, ?, ?, datetime('now')) 1022 on conflict(id) do update set 1023 name = excluded.name, 1024 category = excluded.category, 1025 model = excluded.model, 1026 dimensions = excluded.dimensions, 1027 content_hash = excluded.content_hash, 1028 updated_at = datetime('now') 1029 `) 1030 const upsertVector = db.prepare("insert or replace into memory_prototype_vec(rowid, embedding) values (?, ?)") 1031 1032 db.transaction(() => { 1033 pending.forEach((prototype, index) => { 1034 const vector = vectors[index] 1035 if (!vector) return 1036 if (vector.length !== MEMORY_EMBEDDING_DIMENSIONS) { 1037 throw new Error(`prototype embedding dimension mismatch: got ${vector.length}, expected ${MEMORY_EMBEDDING_DIMENSIONS}`) 1038 } 1039 upsertVector.run(BigInt(prototype.id), vectorParam(vector)) 1040 upsertPrototype.run( 1041 prototype.id, 1042 prototype.name, 1043 prototype.category, 1044 EMBEDDING_MODEL, 1045 MEMORY_EMBEDDING_DIMENSIONS, 1046 prototype.hash, 1047 ) 1048 }) 1049 })() 1050} 1051 1052function senderHandles(profile: MemorySearchProfile): string[] { 1053 const out: string[] = [] 1054 if (profile.sender) out.push(profile.sender) 1055 for (const alias of profile.senderAliases) out.push(alias) 1056 return out 1057} 1058 1059function allPersonHandles(profile: MemorySearchProfile): string[] { 1060 const seen = new Set<string>() 1061 const out: string[] = [] 1062 for (const handle of [...senderHandles(profile), ...profile.bodyPeople]) { 1063 if (!handle || seen.has(handle)) continue 1064 seen.add(handle) 1065 out.push(handle) 1066 } 1067 return out 1068} 1069 1070function targetPersonHandles(profile: MemorySearchProfile): string[] { 1071 return profile.bodyPeople.length > 0 ? profile.bodyPeople : allPersonHandles(profile) 1072} 1073 1074function hitMatchesHandle(hit: MemoryHit, handle: string): boolean { 1075 const titleHaystack = `${hit.documentTitle} ${hit.title} ${hit.headingPath ?? ""} ${basenameWithoutExt(hit.path)}`.toLowerCase() 1076 const pathHaystack = hit.path.toLowerCase() 1077 return titleHaystack.includes(handle) || pathHaystack.includes(`/${handle}.md`) 1078} 1079 1080function hitMentionsHandle(hit: MemoryHit, handle: string): boolean { 1081 return hitMatchesHandle(hit, handle) || hitSearchHaystack(hit).includes(handle) 1082} 1083 1084function scoreMemoryHit(hit: MemoryHit, profile: MemorySearchProfile): number { 1085 let score = hit.rank 1086 1087 if (profile.personQuery && !profile.eventQuery) { 1088 if (hit.kind === "people") score -= 2 1089 if (hit.kind === "core") score -= 0.5 1090 if (hit.kind === "journal") score += 1.5 1091 } else if (profile.eventQuery && !profile.personQuery) { 1092 if (hit.kind === "journal") score -= 1.5 1093 if (hit.kind === "people") score += 0.75 1094 if (hit.kind === "core") score += 0.25 1095 } else { 1096 if (hit.kind === "people") score -= 0.5 1097 if (hit.kind === "core") score -= 0.25 1098 } 1099 1100 const titleHaystack = `${hit.documentTitle} ${hit.title} ${hit.headingPath ?? ""} ${basenameWithoutExt(hit.path)}`.toLowerCase() 1101 if (profile.bodyTokens.some((token) => titleHaystack.includes(token))) score -= 0.35 1102 1103 const handles = allPersonHandles(profile) 1104 if (handles.length > 0) { 1105 const fullHaystack = `${titleHaystack} ${hit.text.toLowerCase()}` 1106 const pathHaystack = hit.path.toLowerCase() 1107 if (handles.some((h) => titleHaystack.includes(h) || pathHaystack.includes(`/${h}.md`))) { 1108 score -= 3 1109 } else if (handles.some((h) => fullHaystack.includes(h))) { 1110 score -= 1 1111 } 1112 } 1113 1114 if (profile.bodyPeople.length > 0) { 1115 const targetHandles = targetPersonHandles(profile) 1116 const matchesTarget = targetHandles.some((handle) => hitMatchesHandle(hit, handle)) 1117 if (matchesTarget) { 1118 score -= 4 1119 } else if (hit.kind === "people") { 1120 score += 3 1121 } else if (hit.kind === "core" && !profileHasCoreIntent(profile)) { 1122 score += 3.5 1123 } 1124 } 1125 1126 return score 1127} 1128 1129function hitSearchHaystack(hit: MemoryHit): string { 1130 return `${hit.documentTitle} ${hit.title} ${hit.headingPath ?? ""} ${basenameWithoutExt(hit.path)} ${hit.text}`.toLowerCase() 1131} 1132 1133function memoryHitSignal(hit: MemoryHit, profile: MemorySearchProfile): MemoryHitSignal { 1134 const haystack = hitSearchHaystack(hit) 1135 let overlap = 0 1136 let strongOverlap = false 1137 let bodyOverlap = 0 1138 1139 for (const token of profile.tokens) { 1140 if (!haystack.includes(token)) continue 1141 overlap += 1 1142 if (token.length >= 5 || /[0-9]/.test(token)) strongOverlap = true 1143 } 1144 for (const token of profile.bodyTokens) { 1145 if (haystack.includes(token)) bodyOverlap += 1 1146 } 1147 1148 const handles = allPersonHandles(profile) 1149 const titleHaystack = `${hit.documentTitle} ${hit.title} ${hit.headingPath ?? ""} ${basenameWithoutExt(hit.path)}`.toLowerCase() 1150 const pathHaystack = hit.path.toLowerCase() 1151 const senderMatch = 1152 handles.length > 0 && 1153 handles.some((h) => titleHaystack.includes(h) || pathHaystack.includes(`/${h}.md`) || haystack.includes(h)) 1154 1155 return { overlap, strongOverlap, bodyOverlap, senderMatch } 1156} 1157 1158function shouldInjectHits(hits: MemoryHit[], profile: MemorySearchProfile): boolean { 1159 if (hits.length === 0) return false 1160 1161 const topSignal = memoryHitSignal(hits[0]!, profile) 1162 1163 if (profile.sender || profile.bodyPeople.length > 0) { 1164 if (!profile.bodyInformative) return false 1165 if (topSignal.senderMatch) return true 1166 if (topSignal.bodyOverlap >= 2) return true 1167 if (topSignal.bodyOverlap >= 1 && topSignal.strongOverlap) return true 1168 return false 1169 } 1170 1171 if (profile.eventQuery && !profile.personQuery) { 1172 return topSignal.overlap >= 1 && hits.some((hit) => hit.kind === "journal") 1173 } 1174 1175 if (profile.personQuery && !profile.eventQuery) { 1176 return topSignal.overlap >= 1 && hits.some((hit) => hit.kind === "people" || hit.kind === "core") 1177 } 1178 1179 if (topSignal.bodyOverlap >= 2) return true 1180 if (topSignal.bodyOverlap >= 1 && topSignal.strongOverlap) return true 1181 if (topSignal.overlap >= 2 && topSignal.strongOverlap) return true 1182 return false 1183} 1184 1185function profileHasExplicitRecallIntent(profile: MemorySearchProfile): boolean { 1186 if (profile.bodyPeople.length > 0) return true 1187 if (profile.eventQuery) return true 1188 if (/\b(who is|who's|tell me about|remember|recall|what happened|what do i know|context)\b/.test(profile.normalized)) { 1189 return true 1190 } 1191 return false 1192} 1193 1194function profileHasCoreIntent(profile: MemorySearchProfile): boolean { 1195 return /\b(core|identity|system|environment|lesson|rule|instruction|workflow|tool|config|memory)\b/.test(profile.normalized) 1196} 1197 1198async function semanticQuerySignal(memoryQuery: string): Promise<SemanticQuerySignal | null> { 1199 if (!isVecAvailable() || !embeddingsConfigured() || EMBEDDING_DIMENSIONS !== MEMORY_EMBEDDING_DIMENSIONS) return null 1200 const [vector] = await embedTexts([memoryQuery]) 1201 if (!vector || vector.length !== MEMORY_EMBEDDING_DIMENSIONS) return null 1202 1203 const rows = getDb() 1204 .prepare(` 1205 select p.category as category, v.distance as distance 1206 from memory_prototype_vec v 1207 join memory_embedding_prototypes p on p.id = v.rowid 1208 where v.embedding match ? 1209 and k = ? 1210 order by v.distance 1211 `) 1212 .all(vectorParam(vector), 8) as Array<{ category: string; distance: number }> 1213 1214 let chatterSimilarity: number | null = null 1215 let recallIntentSimilarity: number | null = null 1216 for (const row of rows) { 1217 const similarity = 1 - row.distance 1218 if (row.category === "chatter") chatterSimilarity = Math.max(chatterSimilarity ?? -Infinity, similarity) 1219 if (row.category === "recall_intent") { 1220 recallIntentSimilarity = Math.max(recallIntentSimilarity ?? -Infinity, similarity) 1221 } 1222 } 1223 1224 return { 1225 vector, 1226 chatterSimilarity, 1227 recallIntentSimilarity, 1228 } 1229} 1230 1231function shouldSkipForSemanticChatter(profile: MemorySearchProfile, signal: SemanticQuerySignal | null): boolean { 1232 if (!signal) return false 1233 if (profileHasExplicitRecallIntent(profile)) return false 1234 if (profile.bodyTokens.length > 5) return false 1235 const chatter = signal.chatterSimilarity ?? 0 1236 const recallIntent = signal.recallIntentSimilarity ?? 0 1237 return chatter >= MEMORY_CHATTER_SIMILARITY_THRESHOLD && recallIntent < MEMORY_RECALL_INTENT_SIMILARITY_THRESHOLD 1238} 1239 1240function semanticDistancesForQuery(vector: number[], limit: number): Map<number, number> { 1241 if (!isVecAvailable()) return new Map() 1242 const rows = getDb() 1243 .prepare(` 1244 select rowid as chunkId, distance 1245 from memory_chunk_vec 1246 where embedding match ? 1247 and k = ? 1248 order by distance 1249 `) 1250 .all(vectorParam(vector), limit) as Array<{ chunkId: number; distance: number }> 1251 return new Map(rows.map((row) => [row.chunkId, row.distance])) 1252} 1253 1254function exactPersonHits(profile: MemorySearchProfile): MemoryHit[] { 1255 if (profile.bodyPeople.length === 0) return [] 1256 const db = getDb() 1257 const hits: MemoryHit[] = [] 1258 const seen = new Set<number>() 1259 const stmt = db.prepare(` 1260 select 1261 c.id as chunkId, 1262 d.path as path, 1263 d.kind as kind, 1264 d.title as documentTitle, 1265 c.title as title, 1266 c.heading_path as headingPath, 1267 c.chunk_text as text, 1268 -10.0 as rank 1269 from memory_chunks c 1270 join memory_documents d on d.id = c.document_id 1271 where d.kind = 'people' 1272 and lower(d.path) like ? 1273 order by c.chunk_index 1274 `) 1275 1276 for (const handle of profile.bodyPeople) { 1277 const rows = stmt.all(`%/people/${handle.toLowerCase()}.md`) as MemoryHit[] 1278 for (const row of rows) { 1279 if (seen.has(row.chunkId)) continue 1280 seen.add(row.chunkId) 1281 hits.push(row) 1282 } 1283 } 1284 return hits 1285} 1286 1287function scoreMemoryHitWithSemantic(hit: MemoryHit, profile: MemorySearchProfile): number { 1288 let score = scoreMemoryHit(hit, profile) 1289 const coreIntent = profileHasCoreIntent(profile) 1290 1291 if (hit.semanticSimilarity !== undefined) { 1292 score -= hit.semanticSimilarity * 2.5 1293 if (hit.kind === "core" && !coreIntent && hit.semanticSimilarity < MEMORY_SEMANTIC_STRONG_SIMILARITY) { 1294 score += 1.25 1295 } 1296 } else { 1297 score += 0.75 1298 if (hit.kind === "core" && !coreIntent) score += 1.25 1299 } 1300 1301 if (hit.kind === "core" && !coreIntent) score += 0.75 1302 return score 1303} 1304 1305async function searchMemory( 1306 profile: MemorySearchProfile, 1307 cooldowns: Record<number, number>, 1308 currentTurn: number, 1309 limit: number, 1310 semanticSignal: SemanticQuerySignal | null = null, 1311): Promise<MemoryHit[]> { 1312 const db = getDb() 1313 const query = buildSearchQuery(profile) 1314 if (!query) return [] 1315 const rows = db 1316 .prepare(` 1317 select 1318 c.id as chunkId, 1319 d.path as path, 1320 d.kind as kind, 1321 d.title as documentTitle, 1322 c.title as title, 1323 c.heading_path as headingPath, 1324 c.chunk_text as text, 1325 bm25(memory_chunks_fts, 5.0, 2.0, 1.0, 0.5) as rank 1326 from memory_chunks_fts 1327 join memory_chunks c on c.id = memory_chunks_fts.rowid 1328 join memory_documents d on d.id = c.document_id 1329 where memory_chunks_fts match ? 1330 order by rank 1331 limit ? 1332 `) 1333 .all(query, Math.max(limit * 8, limit)) as MemoryHit[] 1334 1335 for (const hit of exactPersonHits(profile)) { 1336 if (rows.some((row) => row.chunkId === hit.chunkId)) continue 1337 rows.push(hit) 1338 } 1339 1340 if (semanticSignal) { 1341 const distances = semanticDistancesForQuery(semanticSignal.vector, Math.max(limit * 16, 64)) 1342 for (const row of rows) { 1343 const distance = distances.get(row.chunkId) 1344 if (distance === undefined) continue 1345 row.semanticDistance = distance 1346 row.semanticSimilarity = 1 - distance 1347 } 1348 } 1349 1350 const ordered = rows.sort((a, b) => scoreMemoryHitWithSemantic(a, profile) - scoreMemoryHitWithSemantic(b, profile)) 1351 const pool = 1352 profile.personQuery && !profile.eventQuery 1353 ? (() => { 1354 const structured = ordered.filter((row) => row.kind === "people" || row.kind === "core") 1355 return structured.length > 0 ? structured : ordered 1356 })() 1357 : profile.eventQuery && !profile.personQuery 1358 ? (() => { 1359 const journal = ordered.filter((row) => row.kind === "journal") 1360 return journal.length > 0 ? journal : ordered 1361 })() 1362 : ordered 1363 1364 const deduped: MemoryHit[] = [] 1365 const seenChunkIds = new Set<number>() 1366 const seenPaths = new Set<string>() 1367 1368 const handles = targetPersonHandles(profile) 1369 const exactTargetAvailable = 1370 profile.bodyPeople.length > 0 && pool.some((row) => handles.some((handle) => hitMatchesHandle(row, handle))) 1371 if (handles.length >= 2) { 1372 for (const handle of handles) { 1373 if (deduped.length >= limit) break 1374 const candidate = pool.find( 1375 (row) => 1376 !seenChunkIds.has(row.chunkId) && 1377 !seenPaths.has(row.path) && 1378 !isCoolingDown(row, cooldowns, currentTurn) && 1379 hitMatchesHandle(row, handle), 1380 ) 1381 if (!candidate) continue 1382 seenChunkIds.add(candidate.chunkId) 1383 seenPaths.add(candidate.path) 1384 deduped.push(candidate) 1385 } 1386 } 1387 1388 for (const row of pool) { 1389 if (deduped.length >= limit) break 1390 if (seenChunkIds.has(row.chunkId)) continue 1391 if (isCoolingDown(row, cooldowns, currentTurn)) continue 1392 const exactTargetMatch = profile.bodyPeople.length > 0 && handles.some((handle) => hitMatchesHandle(row, handle)) 1393 if (exactTargetAvailable && !exactTargetMatch && row.kind !== "journal") { 1394 continue 1395 } 1396 if (profile.bodyPeople.length > 0 && !profile.bodyPeople.some((handle) => hitMentionsHandle(row, handle))) { 1397 continue 1398 } 1399 if ( 1400 semanticSignal && 1401 row.kind === "core" && 1402 !profileHasCoreIntent(profile) && 1403 (row.semanticSimilarity ?? 0) < MEMORY_SEMANTIC_MIN_SIMILARITY 1404 ) { 1405 continue 1406 } 1407 seenChunkIds.add(row.chunkId) 1408 deduped.push(row) 1409 } 1410 1411 return deduped 1412} 1413 1414function formatMemorySource(hit: MemoryHit): string { 1415 const relativePath = path.relative(HOME_DIR, hit.path) 1416 const pieces = [`[${hit.kind}]`, hit.documentTitle] 1417 if (hit.headingPath && hit.headingPath !== hit.documentTitle) pieces.push(hit.headingPath) 1418 pieces.push(relativePath) 1419 return pieces.join(" / ") 1420} 1421 1422function toMemorySearchResult(hit: MemoryHit): MemorySearchResult { 1423 return { 1424 chunkId: hit.chunkId, 1425 kind: hit.kind, 1426 path: hit.path, 1427 source: formatMemorySource(hit), 1428 title: hit.title, 1429 headingPath: hit.headingPath, 1430 preview: trimForPrompt(normalizeText(hit.text), 280), 1431 } 1432} 1433 1434function buildMemoryRecallMessage(hits: MemoryHit[], maxChars: number): string { 1435 const lines = [MEMORY_RECALL_HEADER, MEMORY_RECALL_NOTE, ""] 1436 let usedChars = lines.join("\n").length 1437 1438 for (const hit of hits) { 1439 const source = formatMemorySource(hit) 1440 const remaining = Math.max(120, maxChars - usedChars - source.length - 10) 1441 const body = trimForPrompt(normalizeText(hit.text), Math.min(280, remaining)) 1442 const block = `- ${source}\n ${body}` 1443 1444 if (usedChars + block.length > maxChars && lines.length > 3) break 1445 lines.push(block) 1446 usedChars += block.length + 1 1447 } 1448 1449 return lines.join("\n").trim() 1450} 1451 1452export async function buildCompletionMessages( 1453 conversation: Message[], 1454 cooldowns: Record<number, number>, 1455 currentTurn: number, 1456): Promise<{ messages: Message[]; recalledChunkIds: number[] }> { 1457 const memoryQuerySource = latestMemoryRecallQuery(conversation) 1458 if (!memoryQuerySource) return { messages: conversation, recalledChunkIds: [] } 1459 const queryParts = memoryQueryForUserMessage(memoryQuerySource) 1460 const memoryQuery = memoryQueryToString(queryParts) 1461 1462 await syncMemoryIndex() 1463 1464 const profile = await buildSearchProfile(queryParts) 1465 if (profile.tokens.length === 0) return { messages: conversation, recalledChunkIds: [] } 1466 1467 if ((profile.sender || profile.bodyPeople.length > 0) && !profile.bodyInformative) { 1468 console.log( 1469 `[memory] skipped query=${JSON.stringify(trimForPrompt(normalizeText(memoryQuery), 120))} sender=${profile.sender ?? "-"} reason=trivial-body`, 1470 ) 1471 return { messages: conversation, recalledChunkIds: [] } 1472 } 1473 1474 let semanticSignal: SemanticQuerySignal | null = null 1475 try { 1476 semanticSignal = await semanticQuerySignal(memoryQuery) 1477 } catch (err: any) { 1478 console.warn(`[memory] semantic query failed: ${err?.message ?? String(err)}`) 1479 } 1480 1481 if (shouldSkipForSemanticChatter(profile, semanticSignal)) { 1482 console.log( 1483 `[memory] skipped query=${JSON.stringify(trimForPrompt(normalizeText(memoryQuery), 120))} sender=${profile.sender ?? "-"} reason=semantic-chatter chatter=${semanticSignal?.chatterSimilarity?.toFixed(3) ?? "-"} recallIntent=${semanticSignal?.recallIntentSimilarity?.toFixed(3) ?? "-"}`, 1484 ) 1485 return { messages: conversation, recalledChunkIds: [] } 1486 } 1487 1488 const personCount = allPersonHandles(profile).length 1489 const recallLimit = Math.min( 1490 MEMORY_RECALL_MAX_CHUNKS_HARD_CAP, 1491 personCount >= 2 ? MEMORY_RECALL_MAX_CHUNKS + (personCount - 1) : MEMORY_RECALL_MAX_CHUNKS, 1492 ) 1493 const hits = await searchMemory(profile, cooldowns, currentTurn, recallLimit, semanticSignal) 1494 const aliasInfo = profile.senderAliases.length > 0 ? ` aliases=${profile.senderAliases.join(",")}` : "" 1495 const peopleInfo = profile.bodyPeople.length > 0 ? ` people=${profile.bodyPeople.join(",")}` : "" 1496 const debugTag = `sender=${profile.sender ?? "-"}${aliasInfo}${peopleInfo} personQuery=${profile.personQuery} eventQuery=${profile.eventQuery}` 1497 if (hits.length === 0) { 1498 console.log( 1499 `[memory] no-hits query=${JSON.stringify(trimForPrompt(normalizeText(memoryQuery), 120))} ${debugTag}`, 1500 ) 1501 return { messages: conversation, recalledChunkIds: [] } 1502 } 1503 if (!shouldInjectHits(hits, profile)) { 1504 console.log( 1505 `[memory] skipped query=${JSON.stringify(trimForPrompt(normalizeText(memoryQuery), 120))} ${debugTag} reason=weak-match`, 1506 ) 1507 return { messages: conversation, recalledChunkIds: [] } 1508 } 1509 1510 const extraPersons = Math.max(0, personCount - 1) 1511 const recallChars = MEMORY_RECALL_MAX_CHARS + extraPersons * MEMORY_RECALL_PER_EXTRA_PERSON_CHARS 1512 const recallContent = buildMemoryRecallMessage(hits, recallChars) 1513 console.log( 1514 `[memory] recalled query=${JSON.stringify(trimForPrompt(normalizeText(memoryQuery), 120))} ${debugTag}\n${recallContent}`, 1515 ) 1516 1517 recordMetric({ 1518 type: "memory", 1519 query: memoryQuery, 1520 results: hits.map(toMemorySearchResult), 1521 }) 1522 1523 const recallMessage: Message = { 1524 role: "user", 1525 content: recallContent, 1526 } 1527 1528 return { 1529 messages: [...conversation, recallMessage], 1530 recalledChunkIds: hits.map((hit) => hit.chunkId), 1531 } 1532} 1533 1534export function rememberRecalledMemoryChunks( 1535 cooldowns: Record<number, number>, 1536 injectedChunkIds: number[], 1537 currentTurn: number, 1538): Record<number, number> { 1539 const next = { ...cooldowns } 1540 for (const chunkId of injectedChunkIds) next[chunkId] = currentTurn 1541 1542 for (const [chunkId, turn] of Object.entries(next)) { 1543 if (currentTurn - turn >= MEMORY_RECALL_COOLDOWN_TURNS * 2) delete next[Number(chunkId)] 1544 } 1545 1546 return next 1547} 1548 1549export const __memoryTest = { 1550 latestMemoryRecallQuery, 1551 memoryQueryForUserMessage, 1552 memoryQueryToString, 1553 normalizeBodyText, 1554 searchTokens, 1555 buildSearchProfile, 1556 resolveAliases, 1557 normalizeHandle, 1558} 1559 1560export async function searchMemories(rawQuery: string, limit = 5): Promise<MemorySearchResult[]> { 1561 await syncMemoryIndex() 1562 1563 const profile = await buildSearchProfile({ sender: null, source: null, body: rawQuery }) 1564 if (profile.tokens.length === 0) return [] 1565 1566 let semanticSignal: SemanticQuerySignal | null = null 1567 try { 1568 semanticSignal = await semanticQuerySignal(rawQuery) 1569 } catch { 1570 semanticSignal = null 1571 } 1572 1573 const results = (await searchMemory( 1574 profile, 1575 {}, 1576 Number.POSITIVE_INFINITY, 1577 Math.max(1, Math.min(limit, 10)), 1578 semanticSignal, 1579 )).map(toMemorySearchResult) 1580 recordMetric({ 1581 type: "memory", 1582 query: rawQuery, 1583 results, 1584 }) 1585 return results 1586}