fix weave: LLM-first tag consolidation instead of embedding pre-filter

+75 -79

1 changed file

expand all

flows

weave.py

+75 -79

flows/weave.py

··· 185 185 tag_embeddings: dict[str, list[float]], 186 186 api_key: str, 187 187 ) -> list[dict[str, Any]]: 188 - """Cluster tags by embedding similarity, LLM confirms merges.""" 189 - tags = list(tag_embeddings.keys()) 188 + """Give the LLM the full tag inventory and let it propose consolidations.""" 189 + # format every tag with usage context so the LLM can make informed decisions 190 + tag_lines = [] 191 + for tag in sorted(tag_info.keys()): 192 + info = tag_info[tag] 193 + users = info.get("users", []) 194 + count = info.get("count", 0) 195 + episodic = info.get("episodic_count", 0) 196 + sample = (info.get("samples") or [""])[0][:120] 197 + tag_lines.append( 198 + f" {tag} (obs={count}, episodic={episodic}, users={len(users)})" 199 + f"\n sample: {sample}" 200 + ) 190 201 191 - # find high-similarity pairs (>= 0.85) as merge candidates 192 - candidates: list[tuple[str, str, float]] = [] 193 - for i, t1 in enumerate(tags): 194 - for t2 in tags[i + 1 :]: 195 - sim = cosine_similarity(tag_embeddings[t1], tag_embeddings[t2]) 196 - if sim >= 0.85: 197 - candidates.append((t1, t2, sim)) 198 - 199 - if not candidates: 200 - return [] 201 - 202 - candidates.sort(key=lambda x: -x[2]) 203 - candidates_text = "\n".join( 204 - f"- \"{t1}\" <-> \"{t2}\" (similarity: {sim:.3f})\n" 205 - f" {t1} context: {(tag_info.get(t1) or {}).get('samples', [''])[0][:100]}\n" 206 - f" {t2} context: {(tag_info.get(t2) or {}).get('samples', [''])[0][:100]}" 207 - for t1, t2, sim in candidates[:30] # cap at 30 pairs 208 - ) 202 + inventory = "\n".join(tag_lines) 209 203 210 204 model = AnthropicModel( 211 205 "claude-haiku-4-5", provider=AnthropicProvider(api_key=api_key) ··· 213 207 agent = Agent( 214 208 model, 215 209 system_prompt=( 216 - "you review tag merge candidates for a memory graph. for each pair, decide:\n" 217 - "- MERGE: same concept — pick the canonical form\n" 218 - "- RELATE: distinct but related — don't merge\n" 219 - "- SKIP: not meaningfully related despite embedding similarity\n\n" 220 - "prefer lowercase, hyphenated canonical forms (e.g. 'ai-systems' not 'AI_systems').\n" 221 - "group transitive merges (if a merges with b and b merges with c, " 222 - "produce one merge with canonical + all aliases).\n" 223 - "put RELATE pairs in the 'related' field of the merge they're closest to, " 224 - "or omit if they don't belong to any merge group." 210 + "you are consolidating the tag vocabulary for phi's memory graph.\n\n" 211 + "you will receive the full inventory of tags with usage counts and sample " 212 + "observations. your job:\n\n" 213 + "1. MERGE tags that are the same concept with different surface forms.\n" 214 + " examples: 'attestation' / 'self-attestation' → canonical: 'attestation'\n" 215 + " 'ai_systems' / 'bot' / 'system-improvement' → canonical: 'ai-systems'\n\n" 216 + "2. mark tags that are RELATED but distinct — these should link, not merge.\n" 217 + " example: 'epistemology' / 'social-epistemology' → related, not merged\n\n" 218 + "rules:\n" 219 + "- prefer lowercase, hyphenated canonical forms\n" 220 + "- group transitive merges into one entry\n" 221 + "- put related (but not merged) tags in the 'related' field\n" 222 + "- only merge when you're confident they're the same concept\n" 223 + "- it's fine to return zero merges if the tags are already clean\n" 224 + "- look for underscored vs hyphenated variants, singular/plural, " 225 + "abbreviations, and overlapping concepts" 225 226 ), 226 227 output_type=MergeProposal, 227 228 name="tag-merger", 228 229 ) 229 230 230 - result = await agent.run(f"merge candidates:\n{candidates_text}") 231 - # serialize to dicts for prefect result persistence 231 + result = await agent.run(f"full tag inventory ({len(tag_info)} tags):\n{inventory}") 232 232 return [m.model_dump() for m in result.output.merges] 233 233 234 234 ··· 353 353 merged_aliases: set[str], 354 354 api_key: str, 355 355 ) -> list[dict[str, Any]]: 356 - """Score and LLM-confirm relationships between non-merged tags.""" 357 - tags = [t for t in tag_embeddings if t not in merged_aliases] 358 - 359 - # score tag pairs by combined signal 360 - scored: list[tuple[str, str, float, str]] = [] # (a, b, score, reason) 361 - for i, t1 in enumerate(tags): 362 - for t2 in tags[i + 1 :]: 363 - sim = cosine_similarity(tag_embeddings[t1], tag_embeddings[t2]) 364 - if sim < 0.4: 365 - continue 366 - 367 - # co-occurrence score 368 - pair_key = "|".join(sorted([t1, t2])) 369 - cooccur = cooccurrences.get(pair_key, 0) 370 - 371 - # shared users score 372 - shared_users = sum( 373 - 1 374 - for tags_list in user_tag_sets.values() 375 - if t1 in tags_list and t2 in tags_list 376 - ) 377 - 378 - # combine signals 379 - score = sim * 0.5 380 - if cooccur > 0: 381 - score += min(cooccur / 5, 0.3) # cap at 0.3 382 - if shared_users > 0: 383 - score += min(shared_users / 3, 0.2) # cap at 0.2 356 + """Give the LLM the full tag list with co-occurrence context to find relationships.""" 357 + tags = [t for t in sorted(tag_info.keys()) if t not in merged_aliases] 384 358 385 - if score >= 0.5: 386 - reason = f"sim={sim:.2f}, cooccur={cooccur}, shared_users={shared_users}" 387 - scored.append((t1, t2, score, reason)) 359 + # precompute co-occurrence hints to give the LLM as context 360 + cooccur_hints: dict[str, list[str]] = defaultdict(list) 361 + for pair_key, count in cooccurrences.items(): 362 + t1, t2 = pair_key.split("|", 1) 363 + if t1 in merged_aliases or t2 in merged_aliases: 364 + continue 365 + if count >= 2: 366 + cooccur_hints[t1].append(f"{t2} ({count}x)") 367 + cooccur_hints[t2].append(f"{t1} ({count}x)") 388 368 389 - if not scored: 390 - return [] 369 + # format tag inventory with context 370 + tag_lines = [] 371 + for tag in tags: 372 + info = tag_info.get(tag, {}) 373 + count = info.get("count", 0) 374 + episodic = info.get("episodic_count", 0) 375 + n_users = len(info.get("users", [])) 376 + sample = (info.get("samples") or [""])[0][:120] 377 + cooccur_str = "" 378 + if tag in cooccur_hints: 379 + cooccur_str = f"\n co-occurs with: {', '.join(cooccur_hints[tag][:5])}" 380 + tag_lines.append( 381 + f" {tag} (obs={count}, episodic={episodic}, users={n_users})" 382 + f"\n sample: {sample}{cooccur_str}" 383 + ) 391 384 392 - scored.sort(key=lambda x: -x[2]) 393 - candidates_text = "\n".join( 394 - f"- \"{t1}\" <-> \"{t2}\" (score: {score:.2f}, {reason})\n" 395 - f" {t1}: {(tag_info.get(t1) or {}).get('samples', [''])[0][:100]}\n" 396 - f" {t2}: {(tag_info.get(t2) or {}).get('samples', [''])[0][:100]}" 397 - for t1, t2, score, reason in scored[:30] 398 - ) 385 + inventory = "\n".join(tag_lines) 399 386 400 387 model = AnthropicModel( 401 388 "claude-haiku-4-5", provider=AnthropicProvider(api_key=api_key) ··· 403 390 agent = Agent( 404 391 model, 405 392 system_prompt=( 406 - "you review candidate tag relationships for a memory graph belonging to phi, " 407 - "a bluesky bot that remembers conversations.\n\n" 408 - "for each pair, decide if there's a genuine conceptual relationship:\n" 393 + "you are mapping relationships between tags in phi's memory graph.\n" 394 + "phi is a bluesky bot that remembers conversations and builds knowledge.\n\n" 395 + "you will receive the full tag inventory with usage counts, sample observations, " 396 + "and co-occurrence data. your job: identify genuine conceptual relationships.\n\n" 397 + "relationship types:\n" 409 398 "- RELATED: broadly connected concepts\n" 410 399 "- SUBTOPIC: one is a narrower form of the other\n" 411 - "- OVERLAPPING: partially shared meaning\n" 412 - "- SKIP: co-occurrence is coincidental, not conceptual\n\n" 413 - "assign confidence 0.0-1.0. be honest — surface co-occurrence ≠ real relationship.\n" 414 - "provide brief evidence for each accepted relationship." 400 + "- OVERLAPPING: partially shared meaning but distinct\n\n" 401 + "rules:\n" 402 + "- assign confidence 0.0-1.0 based on how strong the connection is\n" 403 + "- co-occurrence is a signal but not proof — two tags appearing together " 404 + "might be coincidental\n" 405 + "- look for thematic clusters, not just pairs\n" 406 + "- provide brief evidence for each relationship\n" 407 + "- be selective — only include relationships you're genuinely confident about\n" 408 + "- skip trivially obvious connections (like a tag co-occurring with itself)" 415 409 ), 416 410 output_type=RelationshipProposal, 417 411 name="tag-relator", 418 412 ) 419 413 420 - result = await agent.run(f"relationship candidates:\n{candidates_text}") 414 + result = await agent.run( 415 + f"full tag inventory ({len(tags)} tags after merges):\n{inventory}" 416 + ) 421 417 return [r.model_dump() for r in result.output.relationships] 422 418 423 419

Configure Feed

Configure Feed