my prefect server setup prefect-metrics.waow.tech
python orchestration
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

weave: use observation-level tag mapping instead of embedding matcher

the embedding-based _match_cards_to_tags returned 0 matches because short
URL strings don't embed close to tag names. now create_cluster_cards
returns both cards and a tag->card mapping built from the observations
that contained those URLs — direct association, no similarity threshold.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+42 -19
+42 -19
flows/weave.py
··· 640 640 tpuf_key: str, 641 641 tag_info: dict[str, dict], 642 642 existing_cards: list[dict[str, Any]], 643 - ) -> list[dict[str, Any]]: 643 + ) -> dict[str, Any]: 644 644 """Create cosmik URL cards for URLs found in phi's observations. 645 645 646 646 Scans all observations and episodic memories for URLs, creates cards for 647 - those that don't exist yet. Returns the full updated card list. 647 + those that don't exist yet. Returns {cards: [...], tag_cards: {tag: [card]}}. 648 + The tag_cards mapping is built from observation-level tag associations, 649 + which is far more reliable than embedding-based matching. 648 650 """ 649 651 logger = get_run_logger() 650 652 ··· 693 695 url_evidence[url]["tags"].update(row_tags) 694 696 url_evidence[url]["count"] += 1 695 697 698 + # build tag->card mapping for ALL cards (existing + new) 699 + # for existing cards, match by URL presence in observations 700 + tag_cards: dict[str, list[dict[str, Any]]] = defaultdict(list) 701 + 702 + # also index existing cards by their URL for tag mapping 703 + existing_by_url: dict[str, dict[str, Any]] = {} 704 + for card in existing_cards: 705 + val = card.get("value", {}) 706 + if val.get("type") == "URL": 707 + card_url = val.get("content", {}).get("url", "") 708 + existing_by_url[card_url] = card 709 + # check if this URL appears in any observation evidence 710 + for url, evidence in url_evidence.items(): 711 + if url == card_url: 712 + for tag in evidence["tags"]: 713 + tag_cards[tag].append(card) 714 + # also check url_evidence for the already-existing cards 715 + for url in existing_urls: 716 + if url in url_evidence and url in existing_by_url: 717 + for tag in url_evidence[url]["tags"]: 718 + if existing_by_url[url] not in tag_cards.get(tag, []): 719 + tag_cards[tag].append(existing_by_url[url]) 720 + 696 721 if not url_evidence: 697 722 logger.info("no new URLs found in observations") 698 - return existing_cards 723 + return {"cards": existing_cards, "tag_cards": dict(tag_cards)} 699 724 700 - # sort by evidence strength (count * tag breadth) 725 + # sort by evidence strength 701 726 ranked = sorted( 702 727 url_evidence.items(), 703 728 key=lambda x: x[1]["count"] * len(x[1]["tags"]), ··· 706 731 707 732 new_cards = list(existing_cards) 708 733 created = 0 709 - for url, evidence in ranked[:20]: # cap at 20 new cards per run 734 + for url, evidence in ranked[:20]: 710 735 tags_str = ", ".join(sorted(evidence["tags"])[:5]) 711 736 record = { 712 737 "type": "URL", ··· 722 747 } 723 748 try: 724 749 result = _create_pds_record(session, "network.cosmik.card", record) 725 - new_cards.append({ 750 + card_entry = { 726 751 "uri": result["uri"], 727 752 "cid": result["cid"], 728 753 "value": record, 729 - }) 754 + } 755 + new_cards.append(card_entry) 756 + # map this card to its observation tags 757 + for tag in evidence["tags"]: 758 + tag_cards[tag].append(card_entry) 730 759 existing_urls.add(url) 731 760 created += 1 732 761 logger.info(f"created URL card: {url}") ··· 734 763 logger.warning(f"failed to create card for {url}: {e}") 735 764 736 765 logger.info(f"created {created} new URL cards from observations") 737 - return new_cards 766 + return {"cards": new_cards, "tag_cards": dict(tag_cards)} 738 767 739 768 740 769 @task ··· 977 1006 existing_conns = _list_cosmik_connections(PHI_DID) 978 1007 print(f"phase 4: found {len(existing_cards)} existing cards, {len(existing_conns)} connections") 979 1008 980 - # create cards from URLs in observations before trying to link 981 - cards = create_cluster_cards(session, tpuf_key, tag_info, existing_cards) 982 - print(f"phase 4: {len(cards)} total cards after creation") 983 - 984 - if not cards: 985 - print("phase 4: no cosmik cards — skipping promotion") 986 - return 987 - 988 - tag_cards = _match_cards_to_tags(cards, tag_info, openai_key) 989 - tags_with_cards = [t for t in tag_cards if tag_cards[t]] 990 - print(f"phase 4: matched cards to {len(tags_with_cards)} tags") 1009 + # create cards from URLs in observations + build tag->card mapping 1010 + card_result = create_cluster_cards(session, tpuf_key, tag_info, existing_cards) 1011 + cards = card_result["cards"] 1012 + tag_cards = card_result["tag_cards"] 1013 + print(f"phase 4: {len(cards)} total cards, {len(tag_cards)} tags with cards") 991 1014 992 1015 conn_count = promote_connections( 993 1016 session, rel_dicts, tag_cards, tag_info, existing_conns