personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add action log formatter for journal indexing

Implement format_logs() to convert facet action log JSONL files into
indexed markdown chunks. This makes user actions (todo changes, entity
updates, etc.) searchable through the journal index.

- Add format_logs() formatter in think/facets.py
- Register facets/*/logs/*.jsonl pattern in FORMATTERS registry
- Update find_formattable_files() to discover log files
- Topic set to "action" for indexer metadata

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+354 -1
+220
tests/test_formatters.py
··· 1673 1673 # Without path context, base_timestamp_ms is 0, so offsets are in ms 1674 1674 assert chunks[0]["timestamp"] == 0 1675 1675 assert chunks[1]["timestamp"] == 1000 # 1 second = 1000ms 1676 + 1677 + 1678 + class TestFormatLogs: 1679 + """Tests for the action logs formatter.""" 1680 + 1681 + def test_get_formatter_logs(self): 1682 + """Test pattern matching for logs/*.jsonl.""" 1683 + from think.formatters import get_formatter 1684 + 1685 + formatter = get_formatter("facets/work/logs/20240101.jsonl") 1686 + assert formatter is not None 1687 + assert formatter.__name__ == "format_logs" 1688 + 1689 + def test_format_logs_basic(self): 1690 + """Test basic action log formatting.""" 1691 + from think.facets import format_logs 1692 + 1693 + entries = [ 1694 + { 1695 + "timestamp": "2025-12-16T07:33:05.135587+00:00", 1696 + "source": "tool", 1697 + "actor": "todos:todo", 1698 + "action": "todo_add", 1699 + "params": {"line_number": 1, "text": "Test task"}, 1700 + } 1701 + ] 1702 + 1703 + chunks, meta = format_logs(entries) 1704 + 1705 + assert len(chunks) == 1 1706 + assert "Todo Add by todos:todo" in chunks[0]["markdown"] 1707 + assert "**Source:** tool" in chunks[0]["markdown"] 1708 + assert "**Time:** 07:33:05" in chunks[0]["markdown"] 1709 + assert "**Parameters:**" in chunks[0]["markdown"] 1710 + assert "- line_number: 1" in chunks[0]["markdown"] 1711 + assert "- text: Test task" in chunks[0]["markdown"] 1712 + 1713 + def test_format_logs_with_agent_id(self): 1714 + """Test that agent_id renders as a link.""" 1715 + from think.facets import format_logs 1716 + 1717 + entries = [ 1718 + { 1719 + "timestamp": "2025-12-16T07:33:05.135587+00:00", 1720 + "source": "tool", 1721 + "actor": "mcp", 1722 + "action": "entity_attach", 1723 + "params": {"type": "Person", "name": "Alice"}, 1724 + "agent_id": "1765870373972", 1725 + } 1726 + ] 1727 + 1728 + chunks, meta = format_logs(entries) 1729 + 1730 + assert len(chunks) == 1 1731 + assert ( 1732 + "**Agent:** [1765870373972](/app/agents/1765870373972)" 1733 + in chunks[0]["markdown"] 1734 + ) 1735 + 1736 + def test_format_logs_missing_action(self): 1737 + """Test that entries without action are skipped.""" 1738 + from think.facets import format_logs 1739 + 1740 + entries = [ 1741 + { 1742 + "timestamp": "2025-12-16T07:33:05.135587+00:00", 1743 + "source": "tool", 1744 + "actor": "mcp", 1745 + "action": "todo_add", 1746 + "params": {}, 1747 + }, 1748 + { 1749 + "timestamp": "2025-12-16T07:34:00.000000+00:00", 1750 + "source": "tool", 1751 + "actor": "mcp", 1752 + # Missing action 1753 + "params": {}, 1754 + }, 1755 + ] 1756 + 1757 + chunks, meta = format_logs(entries) 1758 + 1759 + assert len(chunks) == 1 1760 + assert "error" in meta 1761 + assert "Skipped 1 entries" in meta["error"] 1762 + assert "action" in meta["error"] 1763 + 1764 + def test_format_logs_returns_indexer(self): 1765 + """Test format_logs returns indexer with topic 'action'.""" 1766 + from think.facets import format_logs 1767 + 1768 + entries = [ 1769 + { 1770 + "timestamp": "2025-12-16T07:33:05.135587+00:00", 1771 + "source": "tool", 1772 + "actor": "mcp", 1773 + "action": "todo_add", 1774 + "params": {}, 1775 + } 1776 + ] 1777 + 1778 + chunks, meta = format_logs(entries) 1779 + 1780 + assert "indexer" in meta 1781 + assert meta["indexer"]["topic"] == "action" 1782 + 1783 + def test_format_logs_header_with_path(self): 1784 + """Test that header includes facet name and day from path.""" 1785 + from think.facets import format_logs 1786 + 1787 + entries = [ 1788 + { 1789 + "timestamp": "2025-12-16T07:33:05.135587+00:00", 1790 + "source": "app", 1791 + "actor": "todos", 1792 + "action": "todo_complete", 1793 + "params": {}, 1794 + } 1795 + ] 1796 + context = {"file_path": "/journal/facets/work/logs/20251216.jsonl"} 1797 + 1798 + chunks, meta = format_logs(entries, context) 1799 + 1800 + assert "header" in meta 1801 + assert "Action Log: work" in meta["header"] 1802 + assert "2025-12-16" in meta["header"] 1803 + 1804 + def test_format_logs_returns_source(self): 1805 + """Test format_logs returns source with original entry.""" 1806 + from think.facets import format_logs 1807 + 1808 + entry = { 1809 + "timestamp": "2025-12-16T07:33:05.135587+00:00", 1810 + "source": "tool", 1811 + "actor": "mcp", 1812 + "action": "todo_add", 1813 + "params": {"text": "Test"}, 1814 + "extra_field": "custom_value", 1815 + } 1816 + entries = [entry] 1817 + 1818 + chunks, meta = format_logs(entries) 1819 + 1820 + assert len(chunks) == 1 1821 + assert "source" in chunks[0] 1822 + assert chunks[0]["source"] is entry 1823 + assert chunks[0]["source"]["extra_field"] == "custom_value" 1824 + 1825 + def test_format_logs_timestamp_parsing(self): 1826 + """Test that ISO timestamps are converted to unix ms.""" 1827 + from think.facets import format_logs 1828 + 1829 + entries = [ 1830 + { 1831 + "timestamp": "2025-12-16T07:33:05.135587+00:00", 1832 + "source": "tool", 1833 + "actor": "mcp", 1834 + "action": "todo_add", 1835 + "params": {}, 1836 + }, 1837 + { 1838 + "timestamp": "2025-12-16T07:34:00.000000+00:00", 1839 + "source": "tool", 1840 + "actor": "mcp", 1841 + "action": "todo_done", 1842 + "params": {}, 1843 + }, 1844 + ] 1845 + 1846 + chunks, meta = format_logs(entries) 1847 + 1848 + assert len(chunks) == 2 1849 + # Second entry should have higher timestamp 1850 + assert chunks[1]["timestamp"] > chunks[0]["timestamp"] 1851 + # First timestamp should be approximately 1734336785135 (for 2025-12-16T07:33:05) 1852 + assert chunks[0]["timestamp"] > 1700000000000 1853 + 1854 + def test_format_logs_truncates_long_params(self): 1855 + """Test that long param values are truncated.""" 1856 + from think.facets import format_logs 1857 + 1858 + long_text = "x" * 200 1859 + 1860 + entries = [ 1861 + { 1862 + "timestamp": "2025-12-16T07:33:05.135587+00:00", 1863 + "source": "tool", 1864 + "actor": "mcp", 1865 + "action": "todo_add", 1866 + "params": {"text": long_text}, 1867 + } 1868 + ] 1869 + 1870 + chunks, meta = format_logs(entries) 1871 + 1872 + assert len(chunks) == 1 1873 + # Should truncate to 100 chars + "..." 1874 + assert ("x" * 100 + "...") in chunks[0]["markdown"] 1875 + assert ("x" * 150) not in chunks[0]["markdown"] 1876 + 1877 + def test_format_logs_action_display_formatting(self): 1878 + """Test that action names are formatted nicely.""" 1879 + from think.facets import format_logs 1880 + 1881 + entries = [ 1882 + { 1883 + "timestamp": "2025-12-16T07:33:05.135587+00:00", 1884 + "source": "tool", 1885 + "actor": "mcp", 1886 + "action": "entity_update_description", 1887 + "params": {}, 1888 + } 1889 + ] 1890 + 1891 + chunks, meta = format_logs(entries) 1892 + 1893 + assert len(chunks) == 1 1894 + # "entity_update_description" should become "Entity Update Description" 1895 + assert "Entity Update Description by mcp" in chunks[0]["markdown"]
+125
think/facets.py
··· 591 591 lines.append("") # Empty line between facets 592 592 593 593 return "\n".join(lines).strip() 594 + 595 + 596 + def format_logs( 597 + entries: list[dict], 598 + context: dict | None = None, 599 + ) -> tuple[list[dict], dict]: 600 + """Format action log JSONL entries to markdown chunks. 601 + 602 + This is the formatter function used by the formatters registry. 603 + 604 + Args: 605 + entries: Raw JSONL entries (one action log per line) 606 + context: Optional context with: 607 + - file_path: Path to JSONL file (for extracting facet name and day) 608 + 609 + Returns: 610 + Tuple of (chunks, meta) where: 611 + - chunks: List of dicts with keys: 612 + - timestamp: int (unix ms) 613 + - markdown: str 614 + - source: dict (original log entry) 615 + - meta: Dict with optional "header" and "error" keys 616 + """ 617 + ctx = context or {} 618 + file_path = ctx.get("file_path") 619 + meta: dict[str, Any] = {} 620 + chunks: list[dict[str, Any]] = [] 621 + skipped_count = 0 622 + 623 + # Extract facet name and day from path 624 + facet_name = "unknown" 625 + day_str: str | None = None 626 + 627 + if file_path: 628 + file_path = Path(file_path) 629 + 630 + # Extract facet name from path: facets/{facet}/logs/YYYYMMDD.jsonl 631 + path_str = str(file_path) 632 + facet_match = re.search(r"facets/([^/]+)/logs", path_str) 633 + if facet_match: 634 + facet_name = facet_match.group(1) 635 + 636 + # Extract day from filename 637 + if file_path.stem.isdigit() and len(file_path.stem) == 8: 638 + day_str = file_path.stem 639 + 640 + # Build header 641 + if day_str: 642 + formatted_day = f"{day_str[:4]}-{day_str[4:6]}-{day_str[6:8]}" 643 + meta["header"] = f"# Action Log: {facet_name} ({formatted_day})" 644 + else: 645 + meta["header"] = f"# Action Log: {facet_name}" 646 + 647 + # Format each log entry as a chunk 648 + for entry in entries: 649 + # Skip entries without action field 650 + action = entry.get("action") 651 + if not action: 652 + skipped_count += 1 653 + continue 654 + 655 + # Parse timestamp 656 + ts = 0 657 + timestamp_str = entry.get("timestamp", "") 658 + time_display = "" 659 + if timestamp_str: 660 + try: 661 + dt = datetime.fromisoformat(timestamp_str) 662 + ts = int(dt.timestamp() * 1000) 663 + time_display = dt.strftime("%H:%M:%S") 664 + except (ValueError, TypeError): 665 + pass 666 + 667 + # Extract fields 668 + source = entry.get("source", "unknown") 669 + actor = entry.get("actor", "unknown") 670 + params = entry.get("params", {}) 671 + agent_id = entry.get("agent_id") 672 + 673 + # Format action name for display (e.g., "todo_add" -> "Todo Add") 674 + action_display = action.replace("_", " ").title() 675 + 676 + # Build markdown 677 + lines = [f"### {action_display} by {actor}", ""] 678 + 679 + # Metadata line 680 + meta_parts = [f"**Source:** {source}"] 681 + if time_display: 682 + meta_parts.append(f"**Time:** {time_display}") 683 + lines.append(" | ".join(meta_parts)) 684 + 685 + # Agent link if present 686 + if agent_id: 687 + lines.append(f"**Agent:** [{agent_id}](/app/agents/{agent_id})") 688 + 689 + lines.append("") 690 + 691 + # Parameters 692 + if params and isinstance(params, dict): 693 + lines.append("**Parameters:**") 694 + for key, value in params.items(): 695 + # Format value - truncate long strings 696 + if isinstance(value, str) and len(value) > 100: 697 + value = value[:100] + "..." 698 + lines.append(f"- {key}: {value}") 699 + lines.append("") 700 + 701 + chunks.append( 702 + { 703 + "timestamp": ts, 704 + "markdown": "\n".join(lines), 705 + "source": entry, 706 + } 707 + ) 708 + 709 + # Report skipped entries 710 + if skipped_count > 0: 711 + error_msg = f"Skipped {skipped_count} entries missing 'action' field" 712 + meta["error"] = error_msg 713 + logging.info(error_msg) 714 + 715 + # Indexer metadata - topic is "action" for action logs 716 + meta["indexer"] = {"topic": "action"} 717 + 718 + return chunks, meta
+9 -1
think/formatters.py
··· 115 115 "facets/*/entities.jsonl": ("think.entities", "format_entities"), 116 116 "facets/*/events/*.jsonl": ("think.events", "format_events"), 117 117 "facets/*/todos/*.jsonl": ("apps.todos.todo", "format_todos"), 118 + "facets/*/logs/*.jsonl": ("think.facets", "format_logs"), 118 119 "*/*_screen.jsonl": ("observe.screen", "format_screen"), 119 120 "*/screen.jsonl": ("observe.screen", "format_screen"), 120 121 "*/*_audio.jsonl": ("observe.hear", "format_audio"), ··· 200 201 Locations scanned: 201 202 - Daily insights: YYYYMMDD/insights/*.md 202 203 - Segment content: YYYYMMDD/HHMMSS*/*.md, *.jsonl 203 - - Facet content: facets/*/events/*.jsonl, entities/, todos/, news/ 204 + - Facet content: facets/*/events/*.jsonl, entities/, todos/, news/, logs/ 204 205 - Import summaries: imports/*/summary.md 205 206 - App insights: apps/*/insights/*.md 206 207 ··· 290 291 for md_file in news_dir.glob("*.md"): 291 292 rel = f"facets/{facet_name}/news/{md_file.name}" 292 293 files[rel] = str(md_file) 294 + 295 + # Action logs: facets/*/logs/*.jsonl 296 + logs_dir = facet_dir / "logs" 297 + if logs_dir.is_dir(): 298 + for jsonl_file in logs_dir.glob("*.jsonl"): 299 + rel = f"facets/{facet_name}/logs/{jsonl_file.name}" 300 + files[rel] = str(jsonl_file) 293 301 294 302 # Import summaries: imports/*/summary.md 295 303 imports_dir = journal_path / "imports"