my harness for niri
1import fs from "fs/promises"
2import path from "path"
3import { createHash } from "crypto"
4import { fileURLToPath } from "url"
5import type { Message } from "./types.js"
6import { getDb, isVecAvailable, MEMORY_EMBEDDING_DIMENSIONS } from "./db.js"
7import { EMBEDDING_DIMENSIONS, EMBEDDING_MODEL, embeddingsConfigured, embedTexts } from "./embeddings.js"
8import { recordMetric } from "./metrics.js"
9
10const HOME_DIR = path.resolve(fileURLToPath(import.meta.url), "../../home")
11const MEMORIES_DIR = path.join(HOME_DIR, "memories")
12const JOURNAL_DIR = path.join(MEMORIES_DIR, "journal")
13const PEOPLE_DIR = path.join(MEMORIES_DIR, "people")
14const CORE_FILE = path.join(MEMORIES_DIR, "core.md")
15const ALIASES_FILE = path.join(MEMORIES_DIR, "aliases.json")
16
17const MEMORY_RECALL_HEADER = "[memory recall v1]"
18const MEMORY_RECALL_NOTE =
19 "Potentially relevant long-term notes. Use only if helpful; trust newer conversation details if anything conflicts."
20const MEMORY_RECALL_MAX_CHUNKS = 4
21const MEMORY_RECALL_MAX_CHUNKS_HARD_CAP = 8
22const MEMORY_RECALL_MAX_CHARS = 1_500
23const MEMORY_RECALL_PER_EXTRA_PERSON_CHARS = 400
24const MEMORY_QUERY_TOKEN_LIMIT = 12
25const MEMORY_RECALL_COOLDOWN_TURNS = 7
26const MEMORY_EMBEDDING_BATCH_SIZE = 24
27const MEMORY_SEMANTIC_MIN_SIMILARITY = 0.18
28const MEMORY_SEMANTIC_STRONG_SIMILARITY = 0.32
29const MEMORY_CHATTER_SIMILARITY_THRESHOLD = 0.74
30const MEMORY_RECALL_INTENT_SIMILARITY_THRESHOLD = 0.55
31const SCHEDULED_HEARTBEAT_CONTENT = "Scheduled heartbeat."
32const MEMORY_EMBEDDING_PROTOTYPES = [
33 { id: 1, name: "affection-love", category: "chatter", text: "i love you so much sweetie <33" },
34 { id: 2, name: "cat-greeting", category: "chatter", text: "boop mraow meow hi sweetie" },
35 { id: 3, name: "celebration", category: "chatter", text: "yay yayy lets gooooo <33" },
36 { id: 4, name: "goodnight", category: "chatter", text: "goodnight sweet dreams rest well" },
37 { id: 101, name: "who-person", category: "recall_intent", text: "who is this person what do i know about them" },
38 { id: 102, name: "past-event", category: "recall_intent", text: "what happened before remember when that event happened" },
39 { id: 103, name: "task-context", category: "recall_intent", text: "what context do i need for this task or project" },
40 { id: 104, name: "system-lesson", category: "recall_intent", text: "what lesson or instruction should i remember here" },
41] as const
42const MEMORY_STOP_WORDS = new Set([
43 "a",
44 "an",
45 "and",
46 "are",
47 "as",
48 "at",
49 "be",
50 "been",
51 "but",
52 "by",
53 "for",
54 "from",
55 "had",
56 "has",
57 "have",
58 "he",
59 "her",
60 "hers",
61 "him",
62 "his",
63 "i",
64 "if",
65 "in",
66 "into",
67 "is",
68 "it",
69 "its",
70 "me",
71 "my",
72 "of",
73 "on",
74 "or",
75 "our",
76 "she",
77 "that",
78 "the",
79 "their",
80 "them",
81 "there",
82 "they",
83 "this",
84 "to",
85 "up",
86 "us",
87 "was",
88 "we",
89 "were",
90 "with",
91 "you",
92 "your",
93])
94
95type MemoryKind = "core" | "journal" | "people"
96
97type MemoryDocumentRow = {
98 id: number
99 path: string
100 content_hash: string
101 mtime_ms: number
102 kind?: string
103 title?: string
104}
105
106type MemoryChunkInput = {
107 title: string
108 headingPath: string | null
109 text: string
110 tags: string
111}
112
113type MemoryHit = {
114 chunkId: number
115 path: string
116 kind: MemoryKind
117 documentTitle: string
118 title: string
119 headingPath: string | null
120 text: string
121 rank: number
122 semanticDistance?: number
123 semanticSimilarity?: number
124}
125
126export type MemorySearchResult = {
127 chunkId: number
128 kind: MemoryKind
129 path: string
130 source: string
131 title: string
132 headingPath: string | null
133 preview: string
134}
135
136type MemoryQueryParts = {
137 sender: string | null
138 source: string | null
139 body: string
140}
141
142type MemorySearchProfile = {
143 normalized: string
144 sender: string | null
145 senderAliases: string[]
146 bodyTokens: string[]
147 bodyPeople: string[]
148 tokens: string[]
149 personQuery: boolean
150 eventQuery: boolean
151 bodyInformative: boolean
152}
153
154type MemoryHitSignal = {
155 overlap: number
156 strongOverlap: boolean
157 bodyOverlap: number
158 senderMatch: boolean
159}
160
161type MemoryEmbeddingRow = {
162 chunkId: number
163 path: string
164 kind: MemoryKind
165 documentTitle: string
166 title: string
167 headingPath: string | null
168 text: string
169 tags: string | null
170 model: string | null
171 dimensions: number | null
172 contentHash: string | null
173}
174
175type SemanticQuerySignal = {
176 vector: number[]
177 chatterSimilarity: number | null
178 recallIntentSimilarity: number | null
179}
180
181export type AliasMap = Record<string, string[]>
182
183function normalizeText(value: string): string {
184 return value.replace(/\r\n/g, "\n").replace(/\s+/g, " ").trim()
185}
186
187function trimForPrompt(value: string, maxChars: number): string {
188 if (value.length <= maxChars) return value
189 if (maxChars <= 3) return ".".repeat(maxChars)
190 return `${value.slice(0, maxChars - 3).trimEnd()}...`
191}
192
193function basenameWithoutExt(filePath: string): string {
194 return path.basename(filePath, path.extname(filePath))
195}
196
197function titleFromPath(filePath: string, fallback: string): string {
198 const base = basenameWithoutExt(filePath).replace(/[-_]+/g, " ").trim()
199 return base ? base : fallback
200}
201
202function detectMemoryKind(filePath: string): MemoryKind | null {
203 if (filePath === CORE_FILE) return "core"
204 if (filePath.startsWith(`${JOURNAL_DIR}${path.sep}`)) return "journal"
205 if (filePath.startsWith(`${PEOPLE_DIR}${path.sep}`)) return "people"
206 return null
207}
208
209async function pathExists(target: string): Promise<boolean> {
210 try {
211 await fs.access(target)
212 return true
213 } catch {
214 return false
215 }
216}
217
218function normalizeHandle(handle: string): string {
219 return handle.trim().replace(/^@+/, "").toLowerCase()
220}
221
222let aliasCache: { mtimeMs: number; map: AliasMap } | null = null
223
224async function loadAliasMap(): Promise<AliasMap> {
225 try {
226 const stat = await fs.stat(ALIASES_FILE)
227 if (aliasCache && aliasCache.mtimeMs === stat.mtimeMs) return aliasCache.map
228 const raw = await fs.readFile(ALIASES_FILE, "utf-8")
229 const parsed = JSON.parse(raw) as unknown
230 const map: AliasMap = {}
231 if (parsed && typeof parsed === "object") {
232 for (const [key, value] of Object.entries(parsed as Record<string, unknown>)) {
233 const handle = normalizeHandle(key)
234 if (!handle) continue
235 const list = Array.isArray(value) ? value : [value]
236 const aliases = list
237 .map((v) => (typeof v === "string" ? normalizeHandle(v) : ""))
238 .filter((v) => v && v !== handle)
239 if (aliases.length > 0) map[handle] = Array.from(new Set(aliases))
240 }
241 }
242 aliasCache = { mtimeMs: stat.mtimeMs, map }
243 return map
244 } catch {
245 aliasCache = { mtimeMs: 0, map: {} }
246 return {}
247 }
248}
249
250async function writeAliasMap(map: AliasMap): Promise<void> {
251 await fs.mkdir(MEMORIES_DIR, { recursive: true })
252 const sorted: AliasMap = {}
253 for (const key of Object.keys(map).sort()) sorted[key] = [...map[key]!].sort()
254 await fs.writeFile(ALIASES_FILE, `${JSON.stringify(sorted, null, 2)}\n`, "utf-8")
255 aliasCache = null
256}
257
258function resolveAliases(handle: string | null, map: AliasMap): string[] {
259 if (!handle) return []
260 const seen = new Set<string>([handle])
261 const out: string[] = []
262 const queue = [handle]
263 while (queue.length > 0) {
264 const current = queue.shift()!
265 const next = map[current] ?? []
266 for (const alias of next) {
267 if (seen.has(alias)) continue
268 seen.add(alias)
269 out.push(alias)
270 queue.push(alias)
271 }
272 }
273 return out
274}
275
276export async function listAliases(): Promise<AliasMap> {
277 return loadAliasMap()
278}
279
280export async function setAlias(handle: string, canonical: string): Promise<AliasMap> {
281 const h = normalizeHandle(handle)
282 const c = normalizeHandle(canonical)
283 if (!h || !c) throw new Error("alias handle and canonical must be non-empty")
284 const map = await loadAliasMap()
285 if (h === c) return map
286 const existing = new Set(map[h] ?? [])
287 existing.add(c)
288 map[h] = Array.from(existing)
289 await writeAliasMap(map)
290 return map
291}
292
293export async function removeAlias(handle: string, canonical?: string): Promise<AliasMap> {
294 const h = normalizeHandle(handle)
295 if (!h) throw new Error("alias handle must be non-empty")
296 const map = await loadAliasMap()
297 if (!map[h]) return map
298 if (canonical) {
299 const c = normalizeHandle(canonical)
300 map[h] = map[h]!.filter((entry) => entry !== c)
301 if (map[h]!.length === 0) delete map[h]
302 } else {
303 delete map[h]
304 }
305 await writeAliasMap(map)
306 return map
307}
308
309async function walkMarkdownFiles(root: string): Promise<string[]> {
310 if (!(await pathExists(root))) return []
311
312 const found: string[] = []
313 const entries = await fs.readdir(root, { withFileTypes: true })
314 for (const entry of entries) {
315 const fullPath = path.join(root, entry.name)
316 if (entry.isDirectory()) {
317 found.push(...(await walkMarkdownFiles(fullPath)))
318 continue
319 }
320 if (entry.isFile() && entry.name.endsWith(".md")) found.push(fullPath)
321 }
322
323 return found.sort()
324}
325
326async function listMemoryFiles(): Promise<string[]> {
327 const files: string[] = []
328 if (await pathExists(CORE_FILE)) files.push(CORE_FILE)
329 files.push(...(await walkMarkdownFiles(JOURNAL_DIR)))
330 files.push(...(await walkMarkdownFiles(PEOPLE_DIR)))
331 return files
332}
333
334function contentHash(content: string): string {
335 return createHash("sha1").update(content).digest("hex")
336}
337
338function embeddingInputHash(content: string): string {
339 return createHash("sha256").update(content).digest("hex")
340}
341
342function vectorParam(vector: number[]): Float32Array {
343 return new Float32Array(vector)
344}
345
346function embeddingTextForChunk(row: {
347 path: string
348 kind: MemoryKind
349 documentTitle: string
350 title: string
351 headingPath: string | null
352 text: string
353 tags?: string | null
354}): string {
355 const relativePath = path.relative(HOME_DIR, row.path)
356 return [
357 `kind: ${row.kind}`,
358 `file: ${relativePath}`,
359 `document: ${row.documentTitle}`,
360 `title: ${row.title}`,
361 row.headingPath ? `section: ${row.headingPath}` : null,
362 row.tags ? `tags: ${row.tags}` : null,
363 "",
364 row.text,
365 ]
366 .filter((part): part is string => part !== null)
367 .join("\n")
368}
369
370function chunkLargeSection(text: string, maxChars = 900): string[] {
371 const paragraphs = text
372 .split(/\n\s*\n/g)
373 .map((part) => part.trim())
374 .filter(Boolean)
375
376 if (paragraphs.length === 0) return []
377
378 const chunks: string[] = []
379 let current = ""
380
381 for (const paragraph of paragraphs) {
382 const next = current ? `${current}\n\n${paragraph}` : paragraph
383 if (next.length <= maxChars || current.length === 0) {
384 current = next
385 continue
386 }
387 chunks.push(current)
388 current = paragraph
389 }
390
391 if (current) chunks.push(current)
392 return chunks
393}
394
395function parseMarkdownDocument(filePath: string, content: string): { title: string; chunks: MemoryChunkInput[] } {
396 const lines = content.replace(/\r\n/g, "\n").split("\n")
397 const h1 = lines.find((line) => /^#\s+/.test(line))
398 const title = h1 ? h1.replace(/^#\s+/, "").trim() : titleFromPath(filePath, "Memory")
399 const headingStack: string[] = []
400 let sectionLines: string[] = []
401 let sectionTitle = title
402 const chunks: MemoryChunkInput[] = []
403
404 const flushSection = () => {
405 const body = sectionLines.join("\n").trim()
406 if (!body) {
407 sectionLines = []
408 return
409 }
410
411 const headingPath = headingStack.length > 0 ? headingStack.join(" > ") : null
412 const tags = [basenameWithoutExt(filePath), ...headingStack].join(" ").trim()
413 for (const part of chunkLargeSection(body)) {
414 chunks.push({
415 title: sectionTitle || title,
416 headingPath,
417 text: part,
418 tags,
419 })
420 }
421 sectionLines = []
422 }
423
424 for (const line of lines) {
425 const headingMatch = line.match(/^(#{1,6})\s+(.*)$/)
426 if (!headingMatch) {
427 sectionLines.push(line)
428 continue
429 }
430
431 flushSection()
432
433 const level = headingMatch[1]!.length
434 const heading = headingMatch[2]!.trim()
435 if (level === 1) {
436 sectionTitle = heading || title
437 headingStack.length = 0
438 continue
439 }
440
441 while (headingStack.length >= level - 1) headingStack.pop()
442 headingStack.push(heading)
443 sectionTitle = heading || title
444 }
445
446 flushSection()
447
448 if (chunks.length === 0) {
449 const body = content.trim()
450 if (body) {
451 chunks.push({
452 title,
453 headingPath: null,
454 text: body,
455 tags: basenameWithoutExt(filePath),
456 })
457 }
458 }
459
460 return { title, chunks }
461}
462
463function memoryRecallCooldownTurns(kind: MemoryKind): number {
464 if (kind === "people" || kind === "journal") return MEMORY_RECALL_COOLDOWN_TURNS
465 return 0
466}
467
468function isCoolingDown(hit: MemoryHit, cooldowns: Record<number, number>, currentTurn: number): boolean {
469 const lastTurn = cooldowns[hit.chunkId]
470 if (typeof lastTurn !== "number") return false
471 return currentTurn - lastTurn < memoryRecallCooldownTurns(hit.kind)
472}
473
474function isMemoryRecallSkippedMessage(content: string): boolean {
475 if (content.startsWith(MEMORY_RECALL_HEADER)) return true
476 if (content.startsWith("[system]")) return true
477 if (content.includes("scan snapshot:") && !content.includes("[discord batch]")) return true
478 if (/\[discord batch\]/i.test(content) && conciseDiscordBatchMemoryQuery(content) === null) return true
479 return false
480}
481
482function latestMemoryRecallQuery(conversation: Message[]): string | null {
483 for (let i = conversation.length - 1; i >= 0; i -= 1) {
484 const message = conversation[i]
485 if (!message || message.role !== "user") continue
486 if (typeof message.content !== "string") continue
487
488 const content = message.content.trim()
489 if (!content || isMemoryRecallSkippedMessage(content)) continue
490 if (content === SCHEDULED_HEARTBEAT_CONTENT) continue
491 return content
492 }
493 return null
494}
495
496function discordChannelLabel(channelId: string | null, fallbackContext: string | null, isDm: boolean): string {
497 if (isDm) return "DM"
498
499 const fallback = fallbackContext
500 ?.replace(/^context:\s*/i, "")
501 .replace(/\s*\(\d+\)\s*$/, "")
502 .trim()
503
504 if (channelId) {
505 try {
506 const row = getDb()
507 .prepare("select guild_id, guild_name, channel_name, is_dm from discord_channels where channel_id = ?")
508 .get(channelId) as
509 | {
510 guild_id: string | null
511 guild_name: string | null
512 channel_name: string | null
513 is_dm: number
514 }
515 | undefined
516
517 if (row?.is_dm) return "DM"
518 if (row) {
519 const guild = row.guild_name ?? row.guild_id
520 const channel = row.channel_name ?? channelId
521 if (guild && channel) return `${guild}/#${channel}`
522 if (channel) return `#${channel}`
523 }
524 } catch {
525 // If the main db is unavailable in tests or scripts, keep the parsed context.
526 }
527 }
528
529 if (fallback) return fallback
530 return channelId ? `#${channelId}` : "channel"
531}
532
533function conciseDiscordMemoryQuery(raw: string): MemoryQueryParts | null {
534 const withoutWakeEnvelope = stripWakeEnvelope(raw)
535 if (!/\[discord\/(?:dm|channel)\]/i.test(withoutWakeEnvelope)) return null
536
537 const blocks = withoutWakeEnvelope
538 .split(/\n\s*\n/g)
539 .map((block) => block.trim())
540 .filter(Boolean)
541 const headerBlock = blocks[0] ?? withoutWakeEnvelope
542 const message = blocks.length > 1 ? blocks.slice(1).join("\n\n").trim() : ""
543
544 const lines = headerBlock.split("\n").map((line) => line.trim()).filter(Boolean)
545 const discordLine = lines.find((line) => /^\[discord\/(?:dm|channel)\]/i.test(line)) ?? ""
546 const contextLine = lines.find((line) => /^context:\s*/i.test(line)) ?? null
547 const isDm = /\[discord\/dm\]/i.test(discordLine)
548 const author = discordLine.match(/@(\S+)/)?.[1] ?? null
549 const context = contextLine?.replace(/^context:\s*/i, "").trim() ?? ""
550 const dmChannelId = context.match(/^DM\s+(\d+)/i)?.[1] ?? null
551 const namedChannelId = context.match(/\((\d+)\)\s*$/)?.[1] ?? null
552 const channelId = dmChannelId ?? namedChannelId
553 const location = discordChannelLabel(channelId, contextLine, isDm)
554
555 if (!author && !location && !message) return null
556 return {
557 sender: author ? normalizeHandle(author) : null,
558 source: location || null,
559 body: message,
560 }
561}
562
563function extractBulletSection(raw: string, label: string): string[] {
564 const escapedLabel = label.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
565 const match = raw.match(new RegExp(`(?:^|\\n)${escapedLabel}:\\n([\\s\\S]*?)(?:\\n\\n[^\\n:]+:|$)`, "i"))
566 if (!match) return []
567
568 return match[1]
569 .split("\n")
570 .map((line) => line.trim())
571 .filter((line) => line.startsWith("- "))
572 .map((line) => line.slice(2).trim())
573 .filter((line) => line && line !== "(none)")
574}
575
576function conciseDiscordBatchMemoryQuery(raw: string): MemoryQueryParts | null {
577 const withoutWakeEnvelope = stripWakeEnvelope(raw)
578 if (!/\[discord batch\]/i.test(withoutWakeEnvelope)) return null
579
580 const pending = extractBulletSection(withoutWakeEnvelope, "pending preview")
581 const selected = pending.filter((entry) => !/^\(none\)$/i.test(entry)).slice(-3)
582 if (selected.length === 0) return null
583
584 const senders: string[] = []
585 const sources: string[] = []
586 const bodies: string[] = []
587 const entryPattern = /(?:^|\s)\[([^\]]+)\]\s+\[[^\]]+\]\s+@([^:]+):\s*(.*)$/i
588 for (const entry of selected) {
589 const match = entry.match(entryPattern)
590 if (!match) {
591 bodies.push(entry)
592 continue
593 }
594 const [, location, author, body] = match
595 if (author) senders.push(normalizeHandle(author))
596 if (location) sources.push(location.trim())
597 if (body) bodies.push(body.trim())
598 }
599
600 const lastSender = senders.length > 0 ? senders[senders.length - 1]! : null
601 const lastSource = sources.length > 0 ? sources[sources.length - 1]! : null
602 const body = bodies.filter(Boolean).join("\n").trim()
603
604 if (!lastSender && !lastSource && !body) return null
605 return { sender: lastSender, source: lastSource, body }
606}
607
608const WAKE_ENVELOPE_PATTERN = /^\[(wake|incoming|harness restarted)[^\n]*\]\s*/gi
609
610function stripWakeEnvelope(raw: string): string {
611 return raw.replace(WAKE_ENVELOPE_PATTERN, "").trim()
612}
613
614function memoryQueryForUserMessage(raw: string): MemoryQueryParts {
615 return (
616 conciseDiscordMemoryQuery(raw) ??
617 conciseDiscordBatchMemoryQuery(raw) ??
618 { sender: null, source: null, body: stripWakeEnvelope(raw) }
619 )
620}
621
622function memoryQueryToString(parts: MemoryQueryParts): string {
623 const pieces = [
624 parts.sender ? `@${parts.sender}` : null,
625 parts.body || null,
626 ].filter((value): value is string => Boolean(value && value.trim()))
627 return pieces.join("\n")
628}
629
630function normalizeBodyText(raw: string): string {
631 return raw
632 .replace(/@([a-z0-9_.-]+)/gi, " $1 ")
633 .replace(/\b\d{6,}\b/g, " ")
634 .replace(/[^\p{L}\p{N}\s'-]+/gu, " ")
635 .toLowerCase()
636 .trim()
637}
638
639function tokensFromText(raw: string): string[] {
640 const clean = normalizeBodyText(raw)
641 const tokens = clean
642 .split(/\s+/)
643 .map((token) => token.replace(/^['-]+|['-]+$/g, ""))
644 .filter((token) => token.length >= 2 || /\d{2,}/.test(token))
645 .filter((token) => !MEMORY_STOP_WORDS.has(token))
646
647 const unique: string[] = []
648 const seen = new Set<string>()
649 for (const token of tokens) {
650 if (seen.has(token)) continue
651 seen.add(token)
652 unique.push(token)
653 if (unique.length >= MEMORY_QUERY_TOKEN_LIMIT) break
654 }
655 return unique
656}
657
658function searchTokens(raw: string): string[] {
659 return tokensFromText(raw)
660}
661
662const BODY_INFORMATIVE_BM25_THRESHOLD = -5
663
664function bestBodyBm25(bodyTokens: string[]): number | null {
665 if (bodyTokens.length === 0) return null
666 const query = bodyTokens.map((token) => `"${token.replace(/"/g, '""')}"*`).join(" OR ")
667 try {
668 const row = getDb()
669 .prepare(
670 "select bm25(memory_chunks_fts, 5.0, 2.0, 1.0, 0.5) as r from memory_chunks_fts where memory_chunks_fts match ? order by r limit 1",
671 )
672 .get(query) as { r: number } | undefined
673 return row?.r ?? null
674 } catch {
675 return null
676 }
677}
678
679function computeBodyInformativeness(bodyTokens: string[], bodyPeople: string[]): boolean {
680 if (bodyPeople.length > 0) return true
681 if (bodyTokens.length === 0) return false
682 const top = bestBodyBm25(bodyTokens)
683 if (top === null) return false
684 return top <= BODY_INFORMATIVE_BM25_THRESHOLD
685}
686
687async function knownPeopleHandles(aliasMap: AliasMap): Promise<Set<string>> {
688 const handles = new Set<string>()
689 if (await pathExists(PEOPLE_DIR)) {
690 const entries = await fs.readdir(PEOPLE_DIR, { withFileTypes: true })
691 for (const entry of entries) {
692 if (!entry.isFile() || !entry.name.endsWith(".md")) continue
693 const base = basenameWithoutExt(entry.name).toLowerCase()
694 if (base) handles.add(base)
695 }
696 }
697 for (const [key, values] of Object.entries(aliasMap)) {
698 handles.add(key)
699 for (const value of values) handles.add(value)
700 }
701 return handles
702}
703
704async function buildSearchProfile(parts: MemoryQueryParts): Promise<MemorySearchProfile> {
705 const aliasMap = await loadAliasMap()
706 const sender = parts.sender ? normalizeHandle(parts.sender) : null
707 const senderAliases = resolveAliases(sender, aliasMap)
708 const bodyTokens = parts.body ? tokensFromText(parts.body) : []
709
710 const known = await knownPeopleHandles(aliasMap)
711 const inlineMentions = parts.body
712 ? Array.from(parts.body.matchAll(/@([a-z0-9_.-]+)/gi)).map((match) => normalizeHandle(match[1]!))
713 : []
714 const bodyPeopleSet = new Set<string>()
715 const senderSet = new Set<string>([sender ?? "", ...senderAliases].filter(Boolean))
716 for (const token of [...bodyTokens, ...inlineMentions]) {
717 if (!token || senderSet.has(token)) continue
718 if (known.has(token)) bodyPeopleSet.add(token)
719 }
720 const bodyPeople = Array.from(bodyPeopleSet)
721 for (const person of [...bodyPeople]) {
722 for (const alias of resolveAliases(person, aliasMap)) {
723 if (!senderSet.has(alias)) bodyPeopleSet.add(alias)
724 }
725 }
726 const bodyPeopleResolved = Array.from(bodyPeopleSet)
727
728 const combined: string[] = []
729 const seen = new Set<string>()
730 const push = (value: string | null | undefined) => {
731 if (!value) return
732 const lower = value.toLowerCase()
733 if (seen.has(lower)) return
734 seen.add(lower)
735 combined.push(lower)
736 }
737 push(sender)
738 for (const alias of senderAliases) push(alias)
739 for (const person of bodyPeopleResolved) push(person)
740 for (const token of bodyTokens) push(token)
741
742 const normalized = [sender ? `@${sender}` : "", parts.source ?? "", parts.body ?? ""]
743 .filter(Boolean)
744 .join(" ")
745 .toLowerCase()
746
747 const bodyInformative = computeBodyInformativeness(bodyTokens, bodyPeopleResolved)
748
749 return {
750 normalized,
751 sender,
752 senderAliases,
753 bodyTokens,
754 bodyPeople: bodyPeopleResolved,
755 tokens: combined.slice(0, MEMORY_QUERY_TOKEN_LIMIT),
756 bodyInformative,
757 personQuery:
758 Boolean(sender) ||
759 bodyPeopleResolved.length > 0 ||
760 /\b(who is|who's|tell me about|about)\b/.test(normalized) ||
761 /\bname\b/.test(normalized) ||
762 /\bfriend\b/.test(normalized),
763 eventQuery:
764 /\b(what happened|when|yesterday|today|tonight|earlier|before|after|session|wake)\b/.test(
765 normalized,
766 ) ||
767 /\b\d{4}-\d{2}-\d{2}\b/.test(normalized) ||
768 /\b\d{1,2}\/\d{1,2}(?:\/\d{2,4})?\b/.test(normalized) ||
769 /\b(january|february|march|april|may|june|july|august|september|october|november|december)\b/.test(
770 normalized,
771 ),
772 }
773}
774
775function buildSearchQuery(profile: MemorySearchProfile): string | null {
776 if (profile.tokens.length === 0) return null
777 return profile.tokens.map((token) => `"${token.replace(/"/g, '""')}"*`).join(" OR ")
778}
779
780async function readMemoryDocumentRows(): Promise<Map<string, MemoryDocumentRow>> {
781 const rows = getDb()
782 .prepare("select id, path, kind, title, content_hash, mtime_ms from memory_documents")
783 .all() as MemoryDocumentRow[]
784
785 return new Map(rows.map((row) => [row.path, row]))
786}
787
788export async function syncMemoryIndex(): Promise<void> {
789 const db = getDb()
790 const files = await listMemoryFiles()
791 const known = await readMemoryDocumentRows()
792 const present = new Set(files)
793
794 const deleteChunksByDocument = db.prepare("delete from memory_chunks where document_id = ?")
795 const insertDocument = db.prepare(`
796 insert into memory_documents (path, kind, title, mtime_ms, content_hash, updated_at)
797 values (@path, @kind, @title, @mtime_ms, @content_hash, datetime('now'))
798 on conflict(path) do update set
799 kind = excluded.kind,
800 title = excluded.title,
801 mtime_ms = excluded.mtime_ms,
802 content_hash = excluded.content_hash,
803 updated_at = datetime('now')
804 `)
805 const selectDocumentId = db.prepare("select id from memory_documents where path = ?")
806 const insertChunk = db.prepare(`
807 insert into memory_chunks (document_id, chunk_index, title, heading_path, chunk_text, tags)
808 values (?, ?, ?, ?, ?, ?)
809 `)
810 const deleteDocumentByPath = db.prepare("delete from memory_documents where path = ?")
811
812 const updates: Array<{
813 path: string
814 kind: MemoryKind
815 title: string
816 mtimeMs: number
817 hash: string
818 chunks: MemoryChunkInput[]
819 action: "inserted" | "updated"
820 }> = []
821
822 for (const filePath of files) {
823 const kind = detectMemoryKind(filePath)
824 if (!kind) continue
825
826 const [content, stat] = await Promise.all([fs.readFile(filePath, "utf-8"), fs.stat(filePath)])
827 const hash = contentHash(content)
828 const previous = known.get(filePath)
829 if (previous && previous.content_hash === hash && previous.mtime_ms === Math.floor(stat.mtimeMs)) continue
830
831 const parsed = parseMarkdownDocument(filePath, content)
832 updates.push({
833 path: filePath,
834 kind,
835 title: parsed.title,
836 mtimeMs: Math.floor(stat.mtimeMs),
837 hash,
838 chunks: parsed.chunks,
839 action: previous ? "updated" : "inserted",
840 })
841 }
842
843 const removedPaths: string[] = []
844
845 db.transaction(() => {
846 for (const item of updates) {
847 insertDocument.run({
848 path: item.path,
849 kind: item.kind,
850 title: item.title,
851 mtime_ms: item.mtimeMs,
852 content_hash: item.hash,
853 })
854 const row = selectDocumentId.get(item.path) as { id: number } | undefined
855 if (!row) continue
856
857 deleteChunksByDocument.run(row.id)
858 item.chunks.forEach((chunk, index) => {
859 insertChunk.run(row.id, index, chunk.title, chunk.headingPath, chunk.text, chunk.tags)
860 })
861
862 console.log(
863 `[memory] ${item.action} kind=${item.kind} chunks=${item.chunks.length} path=${path.relative(HOME_DIR, item.path)}`,
864 )
865 }
866
867 for (const filePath of known.keys()) {
868 if (present.has(filePath)) continue
869 deleteDocumentByPath.run(filePath)
870 removedPaths.push(filePath)
871 console.log(`[memory] removed path=${path.relative(HOME_DIR, filePath)}`)
872 }
873 })()
874
875 if (updates.length === 0 && removedPaths.length === 0) {
876 await syncMemoryEmbeddings()
877 return
878 }
879
880 await syncMemoryEmbeddings()
881}
882
883let embeddingSkipWarned = false
884
885async function syncMemoryEmbeddings(): Promise<void> {
886 if (!isVecAvailable()) return
887 if (!embeddingsConfigured()) {
888 if (!embeddingSkipWarned) {
889 console.warn("[memory] embeddings disabled: set EMBEDDING_API_KEY")
890 embeddingSkipWarned = true
891 }
892 return
893 }
894 if (EMBEDDING_DIMENSIONS !== MEMORY_EMBEDDING_DIMENSIONS) {
895 if (!embeddingSkipWarned) {
896 console.warn(
897 `[memory] embeddings disabled: EMBEDDING_DIMENSIONS=${EMBEDDING_DIMENSIONS} but sqlite-vec table is ${MEMORY_EMBEDDING_DIMENSIONS}`,
898 )
899 embeddingSkipWarned = true
900 }
901 return
902 }
903
904 const db = getDb()
905 try {
906 await syncMemoryEmbeddingPrototypes()
907 } catch (err: any) {
908 console.warn(`[memory] prototype embedding sync failed: ${err?.message ?? String(err)}`)
909 return
910 }
911 db.prepare("delete from memory_embedding_meta where chunk_id not in (select id from memory_chunks)").run()
912 db.prepare("delete from memory_chunk_vec where rowid not in (select id from memory_chunks)").run()
913
914 const rows = db
915 .prepare(`
916 select
917 c.id as chunkId,
918 d.path as path,
919 d.kind as kind,
920 d.title as documentTitle,
921 c.title as title,
922 c.heading_path as headingPath,
923 c.chunk_text as text,
924 c.tags as tags,
925 m.model as model,
926 m.dimensions as dimensions,
927 m.content_hash as contentHash
928 from memory_chunks c
929 join memory_documents d on d.id = c.document_id
930 left join memory_embedding_meta m on m.chunk_id = c.id
931 order by d.kind, d.path, c.chunk_index
932 `)
933 .all() as MemoryEmbeddingRow[]
934
935 const pending = rows
936 .map((row) => {
937 const text = embeddingTextForChunk(row)
938 return { ...row, embeddingText: text, embeddingHash: embeddingInputHash(text) }
939 })
940 .filter(
941 (row) =>
942 row.model !== EMBEDDING_MODEL ||
943 row.dimensions !== MEMORY_EMBEDDING_DIMENSIONS ||
944 row.contentHash !== row.embeddingHash,
945 )
946
947 if (pending.length === 0) return
948
949 const upsertMeta = db.prepare(`
950 insert into memory_embedding_meta (chunk_id, model, dimensions, content_hash, updated_at)
951 values (?, ?, ?, ?, datetime('now'))
952 on conflict(chunk_id) do update set
953 model = excluded.model,
954 dimensions = excluded.dimensions,
955 content_hash = excluded.content_hash,
956 updated_at = datetime('now')
957 `)
958 const upsertVector = db.prepare("insert or replace into memory_chunk_vec(rowid, embedding) values (?, ?)")
959
960 let embedded = 0
961 for (let i = 0; i < pending.length; i += MEMORY_EMBEDDING_BATCH_SIZE) {
962 const batch = pending.slice(i, i + MEMORY_EMBEDDING_BATCH_SIZE)
963 let vectors: number[][]
964 try {
965 vectors = await embedTexts(batch.map((row) => row.embeddingText))
966 } catch (err: any) {
967 console.warn(`[memory] embedding batch failed: ${err?.message ?? String(err)}`)
968 return
969 }
970
971 db.transaction(() => {
972 batch.forEach((row, index) => {
973 const vector = vectors[index]
974 if (!vector) return
975 if (vector.length !== MEMORY_EMBEDDING_DIMENSIONS) {
976 throw new Error(`embedding dimension mismatch: got ${vector.length}, expected ${MEMORY_EMBEDDING_DIMENSIONS}`)
977 }
978 upsertVector.run(BigInt(row.chunkId), vectorParam(vector))
979 upsertMeta.run(row.chunkId, EMBEDDING_MODEL, MEMORY_EMBEDDING_DIMENSIONS, row.embeddingHash)
980 embedded += 1
981 })
982 })()
983 }
984
985 console.log(`[memory] embedded chunks=${embedded} model=${EMBEDDING_MODEL} dimensions=${MEMORY_EMBEDDING_DIMENSIONS}`)
986}
987
988async function syncMemoryEmbeddingPrototypes(): Promise<void> {
989 const db = getDb()
990 const rows = db
991 .prepare("select id, name, category, model, dimensions, content_hash as contentHash from memory_embedding_prototypes")
992 .all() as Array<{
993 id: number
994 name: string
995 category: string
996 model: string
997 dimensions: number
998 contentHash: string
999 }>
1000 const known = new Map(rows.map((row) => [row.id, row]))
1001 const pending = MEMORY_EMBEDDING_PROTOTYPES.map((prototype) => ({
1002 ...prototype,
1003 hash: embeddingInputHash(`${prototype.category}\n${prototype.name}\n${prototype.text}`),
1004 })).filter((prototype) => {
1005 const row = known.get(prototype.id)
1006 return (
1007 !row ||
1008 row.name !== prototype.name ||
1009 row.category !== prototype.category ||
1010 row.model !== EMBEDDING_MODEL ||
1011 row.dimensions !== MEMORY_EMBEDDING_DIMENSIONS ||
1012 row.contentHash !== prototype.hash
1013 )
1014 })
1015
1016 if (pending.length === 0) return
1017
1018 const vectors = await embedTexts(pending.map((prototype) => prototype.text))
1019 const upsertPrototype = db.prepare(`
1020 insert into memory_embedding_prototypes (id, name, category, model, dimensions, content_hash, updated_at)
1021 values (?, ?, ?, ?, ?, ?, datetime('now'))
1022 on conflict(id) do update set
1023 name = excluded.name,
1024 category = excluded.category,
1025 model = excluded.model,
1026 dimensions = excluded.dimensions,
1027 content_hash = excluded.content_hash,
1028 updated_at = datetime('now')
1029 `)
1030 const upsertVector = db.prepare("insert or replace into memory_prototype_vec(rowid, embedding) values (?, ?)")
1031
1032 db.transaction(() => {
1033 pending.forEach((prototype, index) => {
1034 const vector = vectors[index]
1035 if (!vector) return
1036 if (vector.length !== MEMORY_EMBEDDING_DIMENSIONS) {
1037 throw new Error(`prototype embedding dimension mismatch: got ${vector.length}, expected ${MEMORY_EMBEDDING_DIMENSIONS}`)
1038 }
1039 upsertVector.run(BigInt(prototype.id), vectorParam(vector))
1040 upsertPrototype.run(
1041 prototype.id,
1042 prototype.name,
1043 prototype.category,
1044 EMBEDDING_MODEL,
1045 MEMORY_EMBEDDING_DIMENSIONS,
1046 prototype.hash,
1047 )
1048 })
1049 })()
1050}
1051
1052function senderHandles(profile: MemorySearchProfile): string[] {
1053 const out: string[] = []
1054 if (profile.sender) out.push(profile.sender)
1055 for (const alias of profile.senderAliases) out.push(alias)
1056 return out
1057}
1058
1059function allPersonHandles(profile: MemorySearchProfile): string[] {
1060 const seen = new Set<string>()
1061 const out: string[] = []
1062 for (const handle of [...senderHandles(profile), ...profile.bodyPeople]) {
1063 if (!handle || seen.has(handle)) continue
1064 seen.add(handle)
1065 out.push(handle)
1066 }
1067 return out
1068}
1069
1070function targetPersonHandles(profile: MemorySearchProfile): string[] {
1071 return profile.bodyPeople.length > 0 ? profile.bodyPeople : allPersonHandles(profile)
1072}
1073
1074function hitMatchesHandle(hit: MemoryHit, handle: string): boolean {
1075 const titleHaystack = `${hit.documentTitle} ${hit.title} ${hit.headingPath ?? ""} ${basenameWithoutExt(hit.path)}`.toLowerCase()
1076 const pathHaystack = hit.path.toLowerCase()
1077 return titleHaystack.includes(handle) || pathHaystack.includes(`/${handle}.md`)
1078}
1079
1080function hitMentionsHandle(hit: MemoryHit, handle: string): boolean {
1081 return hitMatchesHandle(hit, handle) || hitSearchHaystack(hit).includes(handle)
1082}
1083
1084function scoreMemoryHit(hit: MemoryHit, profile: MemorySearchProfile): number {
1085 let score = hit.rank
1086
1087 if (profile.personQuery && !profile.eventQuery) {
1088 if (hit.kind === "people") score -= 2
1089 if (hit.kind === "core") score -= 0.5
1090 if (hit.kind === "journal") score += 1.5
1091 } else if (profile.eventQuery && !profile.personQuery) {
1092 if (hit.kind === "journal") score -= 1.5
1093 if (hit.kind === "people") score += 0.75
1094 if (hit.kind === "core") score += 0.25
1095 } else {
1096 if (hit.kind === "people") score -= 0.5
1097 if (hit.kind === "core") score -= 0.25
1098 }
1099
1100 const titleHaystack = `${hit.documentTitle} ${hit.title} ${hit.headingPath ?? ""} ${basenameWithoutExt(hit.path)}`.toLowerCase()
1101 if (profile.bodyTokens.some((token) => titleHaystack.includes(token))) score -= 0.35
1102
1103 const handles = allPersonHandles(profile)
1104 if (handles.length > 0) {
1105 const fullHaystack = `${titleHaystack} ${hit.text.toLowerCase()}`
1106 const pathHaystack = hit.path.toLowerCase()
1107 if (handles.some((h) => titleHaystack.includes(h) || pathHaystack.includes(`/${h}.md`))) {
1108 score -= 3
1109 } else if (handles.some((h) => fullHaystack.includes(h))) {
1110 score -= 1
1111 }
1112 }
1113
1114 if (profile.bodyPeople.length > 0) {
1115 const targetHandles = targetPersonHandles(profile)
1116 const matchesTarget = targetHandles.some((handle) => hitMatchesHandle(hit, handle))
1117 if (matchesTarget) {
1118 score -= 4
1119 } else if (hit.kind === "people") {
1120 score += 3
1121 } else if (hit.kind === "core" && !profileHasCoreIntent(profile)) {
1122 score += 3.5
1123 }
1124 }
1125
1126 return score
1127}
1128
1129function hitSearchHaystack(hit: MemoryHit): string {
1130 return `${hit.documentTitle} ${hit.title} ${hit.headingPath ?? ""} ${basenameWithoutExt(hit.path)} ${hit.text}`.toLowerCase()
1131}
1132
1133function memoryHitSignal(hit: MemoryHit, profile: MemorySearchProfile): MemoryHitSignal {
1134 const haystack = hitSearchHaystack(hit)
1135 let overlap = 0
1136 let strongOverlap = false
1137 let bodyOverlap = 0
1138
1139 for (const token of profile.tokens) {
1140 if (!haystack.includes(token)) continue
1141 overlap += 1
1142 if (token.length >= 5 || /[0-9]/.test(token)) strongOverlap = true
1143 }
1144 for (const token of profile.bodyTokens) {
1145 if (haystack.includes(token)) bodyOverlap += 1
1146 }
1147
1148 const handles = allPersonHandles(profile)
1149 const titleHaystack = `${hit.documentTitle} ${hit.title} ${hit.headingPath ?? ""} ${basenameWithoutExt(hit.path)}`.toLowerCase()
1150 const pathHaystack = hit.path.toLowerCase()
1151 const senderMatch =
1152 handles.length > 0 &&
1153 handles.some((h) => titleHaystack.includes(h) || pathHaystack.includes(`/${h}.md`) || haystack.includes(h))
1154
1155 return { overlap, strongOverlap, bodyOverlap, senderMatch }
1156}
1157
1158function shouldInjectHits(hits: MemoryHit[], profile: MemorySearchProfile): boolean {
1159 if (hits.length === 0) return false
1160
1161 const topSignal = memoryHitSignal(hits[0]!, profile)
1162
1163 if (profile.sender || profile.bodyPeople.length > 0) {
1164 if (!profile.bodyInformative) return false
1165 if (topSignal.senderMatch) return true
1166 if (topSignal.bodyOverlap >= 2) return true
1167 if (topSignal.bodyOverlap >= 1 && topSignal.strongOverlap) return true
1168 return false
1169 }
1170
1171 if (profile.eventQuery && !profile.personQuery) {
1172 return topSignal.overlap >= 1 && hits.some((hit) => hit.kind === "journal")
1173 }
1174
1175 if (profile.personQuery && !profile.eventQuery) {
1176 return topSignal.overlap >= 1 && hits.some((hit) => hit.kind === "people" || hit.kind === "core")
1177 }
1178
1179 if (topSignal.bodyOverlap >= 2) return true
1180 if (topSignal.bodyOverlap >= 1 && topSignal.strongOverlap) return true
1181 if (topSignal.overlap >= 2 && topSignal.strongOverlap) return true
1182 return false
1183}
1184
1185function profileHasExplicitRecallIntent(profile: MemorySearchProfile): boolean {
1186 if (profile.bodyPeople.length > 0) return true
1187 if (profile.eventQuery) return true
1188 if (/\b(who is|who's|tell me about|remember|recall|what happened|what do i know|context)\b/.test(profile.normalized)) {
1189 return true
1190 }
1191 return false
1192}
1193
1194function profileHasCoreIntent(profile: MemorySearchProfile): boolean {
1195 return /\b(core|identity|system|environment|lesson|rule|instruction|workflow|tool|config|memory)\b/.test(profile.normalized)
1196}
1197
1198async function semanticQuerySignal(memoryQuery: string): Promise<SemanticQuerySignal | null> {
1199 if (!isVecAvailable() || !embeddingsConfigured() || EMBEDDING_DIMENSIONS !== MEMORY_EMBEDDING_DIMENSIONS) return null
1200 const [vector] = await embedTexts([memoryQuery])
1201 if (!vector || vector.length !== MEMORY_EMBEDDING_DIMENSIONS) return null
1202
1203 const rows = getDb()
1204 .prepare(`
1205 select p.category as category, v.distance as distance
1206 from memory_prototype_vec v
1207 join memory_embedding_prototypes p on p.id = v.rowid
1208 where v.embedding match ?
1209 and k = ?
1210 order by v.distance
1211 `)
1212 .all(vectorParam(vector), 8) as Array<{ category: string; distance: number }>
1213
1214 let chatterSimilarity: number | null = null
1215 let recallIntentSimilarity: number | null = null
1216 for (const row of rows) {
1217 const similarity = 1 - row.distance
1218 if (row.category === "chatter") chatterSimilarity = Math.max(chatterSimilarity ?? -Infinity, similarity)
1219 if (row.category === "recall_intent") {
1220 recallIntentSimilarity = Math.max(recallIntentSimilarity ?? -Infinity, similarity)
1221 }
1222 }
1223
1224 return {
1225 vector,
1226 chatterSimilarity,
1227 recallIntentSimilarity,
1228 }
1229}
1230
1231function shouldSkipForSemanticChatter(profile: MemorySearchProfile, signal: SemanticQuerySignal | null): boolean {
1232 if (!signal) return false
1233 if (profileHasExplicitRecallIntent(profile)) return false
1234 if (profile.bodyTokens.length > 5) return false
1235 const chatter = signal.chatterSimilarity ?? 0
1236 const recallIntent = signal.recallIntentSimilarity ?? 0
1237 return chatter >= MEMORY_CHATTER_SIMILARITY_THRESHOLD && recallIntent < MEMORY_RECALL_INTENT_SIMILARITY_THRESHOLD
1238}
1239
1240function semanticDistancesForQuery(vector: number[], limit: number): Map<number, number> {
1241 if (!isVecAvailable()) return new Map()
1242 const rows = getDb()
1243 .prepare(`
1244 select rowid as chunkId, distance
1245 from memory_chunk_vec
1246 where embedding match ?
1247 and k = ?
1248 order by distance
1249 `)
1250 .all(vectorParam(vector), limit) as Array<{ chunkId: number; distance: number }>
1251 return new Map(rows.map((row) => [row.chunkId, row.distance]))
1252}
1253
1254function exactPersonHits(profile: MemorySearchProfile): MemoryHit[] {
1255 if (profile.bodyPeople.length === 0) return []
1256 const db = getDb()
1257 const hits: MemoryHit[] = []
1258 const seen = new Set<number>()
1259 const stmt = db.prepare(`
1260 select
1261 c.id as chunkId,
1262 d.path as path,
1263 d.kind as kind,
1264 d.title as documentTitle,
1265 c.title as title,
1266 c.heading_path as headingPath,
1267 c.chunk_text as text,
1268 -10.0 as rank
1269 from memory_chunks c
1270 join memory_documents d on d.id = c.document_id
1271 where d.kind = 'people'
1272 and lower(d.path) like ?
1273 order by c.chunk_index
1274 `)
1275
1276 for (const handle of profile.bodyPeople) {
1277 const rows = stmt.all(`%/people/${handle.toLowerCase()}.md`) as MemoryHit[]
1278 for (const row of rows) {
1279 if (seen.has(row.chunkId)) continue
1280 seen.add(row.chunkId)
1281 hits.push(row)
1282 }
1283 }
1284 return hits
1285}
1286
1287function scoreMemoryHitWithSemantic(hit: MemoryHit, profile: MemorySearchProfile): number {
1288 let score = scoreMemoryHit(hit, profile)
1289 const coreIntent = profileHasCoreIntent(profile)
1290
1291 if (hit.semanticSimilarity !== undefined) {
1292 score -= hit.semanticSimilarity * 2.5
1293 if (hit.kind === "core" && !coreIntent && hit.semanticSimilarity < MEMORY_SEMANTIC_STRONG_SIMILARITY) {
1294 score += 1.25
1295 }
1296 } else {
1297 score += 0.75
1298 if (hit.kind === "core" && !coreIntent) score += 1.25
1299 }
1300
1301 if (hit.kind === "core" && !coreIntent) score += 0.75
1302 return score
1303}
1304
1305async function searchMemory(
1306 profile: MemorySearchProfile,
1307 cooldowns: Record<number, number>,
1308 currentTurn: number,
1309 limit: number,
1310 semanticSignal: SemanticQuerySignal | null = null,
1311): Promise<MemoryHit[]> {
1312 const db = getDb()
1313 const query = buildSearchQuery(profile)
1314 if (!query) return []
1315 const rows = db
1316 .prepare(`
1317 select
1318 c.id as chunkId,
1319 d.path as path,
1320 d.kind as kind,
1321 d.title as documentTitle,
1322 c.title as title,
1323 c.heading_path as headingPath,
1324 c.chunk_text as text,
1325 bm25(memory_chunks_fts, 5.0, 2.0, 1.0, 0.5) as rank
1326 from memory_chunks_fts
1327 join memory_chunks c on c.id = memory_chunks_fts.rowid
1328 join memory_documents d on d.id = c.document_id
1329 where memory_chunks_fts match ?
1330 order by rank
1331 limit ?
1332 `)
1333 .all(query, Math.max(limit * 8, limit)) as MemoryHit[]
1334
1335 for (const hit of exactPersonHits(profile)) {
1336 if (rows.some((row) => row.chunkId === hit.chunkId)) continue
1337 rows.push(hit)
1338 }
1339
1340 if (semanticSignal) {
1341 const distances = semanticDistancesForQuery(semanticSignal.vector, Math.max(limit * 16, 64))
1342 for (const row of rows) {
1343 const distance = distances.get(row.chunkId)
1344 if (distance === undefined) continue
1345 row.semanticDistance = distance
1346 row.semanticSimilarity = 1 - distance
1347 }
1348 }
1349
1350 const ordered = rows.sort((a, b) => scoreMemoryHitWithSemantic(a, profile) - scoreMemoryHitWithSemantic(b, profile))
1351 const pool =
1352 profile.personQuery && !profile.eventQuery
1353 ? (() => {
1354 const structured = ordered.filter((row) => row.kind === "people" || row.kind === "core")
1355 return structured.length > 0 ? structured : ordered
1356 })()
1357 : profile.eventQuery && !profile.personQuery
1358 ? (() => {
1359 const journal = ordered.filter((row) => row.kind === "journal")
1360 return journal.length > 0 ? journal : ordered
1361 })()
1362 : ordered
1363
1364 const deduped: MemoryHit[] = []
1365 const seenChunkIds = new Set<number>()
1366 const seenPaths = new Set<string>()
1367
1368 const handles = targetPersonHandles(profile)
1369 const exactTargetAvailable =
1370 profile.bodyPeople.length > 0 && pool.some((row) => handles.some((handle) => hitMatchesHandle(row, handle)))
1371 if (handles.length >= 2) {
1372 for (const handle of handles) {
1373 if (deduped.length >= limit) break
1374 const candidate = pool.find(
1375 (row) =>
1376 !seenChunkIds.has(row.chunkId) &&
1377 !seenPaths.has(row.path) &&
1378 !isCoolingDown(row, cooldowns, currentTurn) &&
1379 hitMatchesHandle(row, handle),
1380 )
1381 if (!candidate) continue
1382 seenChunkIds.add(candidate.chunkId)
1383 seenPaths.add(candidate.path)
1384 deduped.push(candidate)
1385 }
1386 }
1387
1388 for (const row of pool) {
1389 if (deduped.length >= limit) break
1390 if (seenChunkIds.has(row.chunkId)) continue
1391 if (isCoolingDown(row, cooldowns, currentTurn)) continue
1392 const exactTargetMatch = profile.bodyPeople.length > 0 && handles.some((handle) => hitMatchesHandle(row, handle))
1393 if (exactTargetAvailable && !exactTargetMatch && row.kind !== "journal") {
1394 continue
1395 }
1396 if (profile.bodyPeople.length > 0 && !profile.bodyPeople.some((handle) => hitMentionsHandle(row, handle))) {
1397 continue
1398 }
1399 if (
1400 semanticSignal &&
1401 row.kind === "core" &&
1402 !profileHasCoreIntent(profile) &&
1403 (row.semanticSimilarity ?? 0) < MEMORY_SEMANTIC_MIN_SIMILARITY
1404 ) {
1405 continue
1406 }
1407 seenChunkIds.add(row.chunkId)
1408 deduped.push(row)
1409 }
1410
1411 return deduped
1412}
1413
1414function formatMemorySource(hit: MemoryHit): string {
1415 const relativePath = path.relative(HOME_DIR, hit.path)
1416 const pieces = [`[${hit.kind}]`, hit.documentTitle]
1417 if (hit.headingPath && hit.headingPath !== hit.documentTitle) pieces.push(hit.headingPath)
1418 pieces.push(relativePath)
1419 return pieces.join(" / ")
1420}
1421
1422function toMemorySearchResult(hit: MemoryHit): MemorySearchResult {
1423 return {
1424 chunkId: hit.chunkId,
1425 kind: hit.kind,
1426 path: hit.path,
1427 source: formatMemorySource(hit),
1428 title: hit.title,
1429 headingPath: hit.headingPath,
1430 preview: trimForPrompt(normalizeText(hit.text), 280),
1431 }
1432}
1433
1434function buildMemoryRecallMessage(hits: MemoryHit[], maxChars: number): string {
1435 const lines = [MEMORY_RECALL_HEADER, MEMORY_RECALL_NOTE, ""]
1436 let usedChars = lines.join("\n").length
1437
1438 for (const hit of hits) {
1439 const source = formatMemorySource(hit)
1440 const remaining = Math.max(120, maxChars - usedChars - source.length - 10)
1441 const body = trimForPrompt(normalizeText(hit.text), Math.min(280, remaining))
1442 const block = `- ${source}\n ${body}`
1443
1444 if (usedChars + block.length > maxChars && lines.length > 3) break
1445 lines.push(block)
1446 usedChars += block.length + 1
1447 }
1448
1449 return lines.join("\n").trim()
1450}
1451
1452export async function buildCompletionMessages(
1453 conversation: Message[],
1454 cooldowns: Record<number, number>,
1455 currentTurn: number,
1456): Promise<{ messages: Message[]; recalledChunkIds: number[] }> {
1457 const memoryQuerySource = latestMemoryRecallQuery(conversation)
1458 if (!memoryQuerySource) return { messages: conversation, recalledChunkIds: [] }
1459 const queryParts = memoryQueryForUserMessage(memoryQuerySource)
1460 const memoryQuery = memoryQueryToString(queryParts)
1461
1462 await syncMemoryIndex()
1463
1464 const profile = await buildSearchProfile(queryParts)
1465 if (profile.tokens.length === 0) return { messages: conversation, recalledChunkIds: [] }
1466
1467 if ((profile.sender || profile.bodyPeople.length > 0) && !profile.bodyInformative) {
1468 console.log(
1469 `[memory] skipped query=${JSON.stringify(trimForPrompt(normalizeText(memoryQuery), 120))} sender=${profile.sender ?? "-"} reason=trivial-body`,
1470 )
1471 return { messages: conversation, recalledChunkIds: [] }
1472 }
1473
1474 let semanticSignal: SemanticQuerySignal | null = null
1475 try {
1476 semanticSignal = await semanticQuerySignal(memoryQuery)
1477 } catch (err: any) {
1478 console.warn(`[memory] semantic query failed: ${err?.message ?? String(err)}`)
1479 }
1480
1481 if (shouldSkipForSemanticChatter(profile, semanticSignal)) {
1482 console.log(
1483 `[memory] skipped query=${JSON.stringify(trimForPrompt(normalizeText(memoryQuery), 120))} sender=${profile.sender ?? "-"} reason=semantic-chatter chatter=${semanticSignal?.chatterSimilarity?.toFixed(3) ?? "-"} recallIntent=${semanticSignal?.recallIntentSimilarity?.toFixed(3) ?? "-"}`,
1484 )
1485 return { messages: conversation, recalledChunkIds: [] }
1486 }
1487
1488 const personCount = allPersonHandles(profile).length
1489 const recallLimit = Math.min(
1490 MEMORY_RECALL_MAX_CHUNKS_HARD_CAP,
1491 personCount >= 2 ? MEMORY_RECALL_MAX_CHUNKS + (personCount - 1) : MEMORY_RECALL_MAX_CHUNKS,
1492 )
1493 const hits = await searchMemory(profile, cooldowns, currentTurn, recallLimit, semanticSignal)
1494 const aliasInfo = profile.senderAliases.length > 0 ? ` aliases=${profile.senderAliases.join(",")}` : ""
1495 const peopleInfo = profile.bodyPeople.length > 0 ? ` people=${profile.bodyPeople.join(",")}` : ""
1496 const debugTag = `sender=${profile.sender ?? "-"}${aliasInfo}${peopleInfo} personQuery=${profile.personQuery} eventQuery=${profile.eventQuery}`
1497 if (hits.length === 0) {
1498 console.log(
1499 `[memory] no-hits query=${JSON.stringify(trimForPrompt(normalizeText(memoryQuery), 120))} ${debugTag}`,
1500 )
1501 return { messages: conversation, recalledChunkIds: [] }
1502 }
1503 if (!shouldInjectHits(hits, profile)) {
1504 console.log(
1505 `[memory] skipped query=${JSON.stringify(trimForPrompt(normalizeText(memoryQuery), 120))} ${debugTag} reason=weak-match`,
1506 )
1507 return { messages: conversation, recalledChunkIds: [] }
1508 }
1509
1510 const extraPersons = Math.max(0, personCount - 1)
1511 const recallChars = MEMORY_RECALL_MAX_CHARS + extraPersons * MEMORY_RECALL_PER_EXTRA_PERSON_CHARS
1512 const recallContent = buildMemoryRecallMessage(hits, recallChars)
1513 console.log(
1514 `[memory] recalled query=${JSON.stringify(trimForPrompt(normalizeText(memoryQuery), 120))} ${debugTag}\n${recallContent}`,
1515 )
1516
1517 recordMetric({
1518 type: "memory",
1519 query: memoryQuery,
1520 results: hits.map(toMemorySearchResult),
1521 })
1522
1523 const recallMessage: Message = {
1524 role: "user",
1525 content: recallContent,
1526 }
1527
1528 return {
1529 messages: [...conversation, recallMessage],
1530 recalledChunkIds: hits.map((hit) => hit.chunkId),
1531 }
1532}
1533
1534export function rememberRecalledMemoryChunks(
1535 cooldowns: Record<number, number>,
1536 injectedChunkIds: number[],
1537 currentTurn: number,
1538): Record<number, number> {
1539 const next = { ...cooldowns }
1540 for (const chunkId of injectedChunkIds) next[chunkId] = currentTurn
1541
1542 for (const [chunkId, turn] of Object.entries(next)) {
1543 if (currentTurn - turn >= MEMORY_RECALL_COOLDOWN_TURNS * 2) delete next[Number(chunkId)]
1544 }
1545
1546 return next
1547}
1548
1549export const __memoryTest = {
1550 latestMemoryRecallQuery,
1551 memoryQueryForUserMessage,
1552 memoryQueryToString,
1553 normalizeBodyText,
1554 searchTokens,
1555 buildSearchProfile,
1556 resolveAliases,
1557 normalizeHandle,
1558}
1559
1560export async function searchMemories(rawQuery: string, limit = 5): Promise<MemorySearchResult[]> {
1561 await syncMemoryIndex()
1562
1563 const profile = await buildSearchProfile({ sender: null, source: null, body: rawQuery })
1564 if (profile.tokens.length === 0) return []
1565
1566 let semanticSignal: SemanticQuerySignal | null = null
1567 try {
1568 semanticSignal = await semanticQuerySignal(rawQuery)
1569 } catch {
1570 semanticSignal = null
1571 }
1572
1573 const results = (await searchMemory(
1574 profile,
1575 {},
1576 Number.POSITIVE_INFINITY,
1577 Math.max(1, Math.min(limit, 10)),
1578 semanticSignal,
1579 )).map(toMemorySearchResult)
1580 recordMetric({
1581 type: "memory",
1582 query: rawQuery,
1583 results,
1584 })
1585 return results
1586}