personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add --rescan-file option to indexer for single-file indexing

Adds ability to index a specific file without scanning the entire journal.
Useful for immediate updates after file creation/modification or integration
with file watchers.

- Add index_file() public function with path validation
- Add --rescan-file CLI option (mutually exclusive with --rescan/--rescan-full)
- Validate file exists, is under journal, and has a registered formatter
- Add comprehensive tests for valid/invalid paths and error cases

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+162 -1
+79
tests/test_journal_index.py
··· 632 632 # Content should no longer be searchable 633 633 total, _ = search_journal("historical_full_test") 634 634 assert total == 0 635 + 636 + 637 + def test_index_file_valid(journal_fixture): 638 + """Test indexing a single valid file.""" 639 + from think.indexer.journal import index_file, search_journal 640 + 641 + # Index a specific file 642 + result = index_file( 643 + str(journal_fixture), "20240101/agents/flow.md", verbose=True 644 + ) 645 + assert result is True 646 + 647 + # Should be searchable 648 + total, results = search_journal("project alpha") 649 + assert total >= 1 650 + 651 + 652 + def test_index_file_absolute_path(journal_fixture): 653 + """Test indexing with absolute path.""" 654 + from think.indexer.journal import index_file, search_journal 655 + 656 + abs_path = str(journal_fixture / "20240101" / "agents" / "flow.md") 657 + result = index_file(str(journal_fixture), abs_path, verbose=True) 658 + assert result is True 659 + 660 + # Should be searchable 661 + total, _ = search_journal("project alpha") 662 + assert total >= 1 663 + 664 + 665 + def test_index_file_updates_existing(journal_fixture): 666 + """Test that re-indexing a file replaces existing chunks.""" 667 + from think.indexer.journal import index_file, search_journal 668 + 669 + # Index the file 670 + index_file(str(journal_fixture), "20240101/agents/flow.md") 671 + 672 + # Get initial count 673 + total1, _ = search_journal("project alpha") 674 + 675 + # Re-index the same file 676 + index_file(str(journal_fixture), "20240101/agents/flow.md") 677 + 678 + # Count should be the same (not doubled) 679 + total2, _ = search_journal("project alpha") 680 + assert total2 == total1 681 + 682 + 683 + def test_index_file_not_found(journal_fixture): 684 + """Test indexing non-existent file raises error.""" 685 + from think.indexer.journal import index_file 686 + 687 + with pytest.raises(FileNotFoundError, match="File not found"): 688 + index_file(str(journal_fixture), "nonexistent/file.md") 689 + 690 + 691 + def test_index_file_outside_journal(journal_fixture, tmp_path_factory): 692 + """Test indexing file outside journal raises error.""" 693 + from think.indexer.journal import index_file 694 + 695 + # Create a file in a separate temp directory (outside the journal) 696 + outside_dir = tmp_path_factory.mktemp("outside") 697 + outside_file = outside_dir / "outside.md" 698 + outside_file.write_text("# Outside\n\nThis is outside the journal.\n") 699 + 700 + with pytest.raises(ValueError, match="outside journal directory"): 701 + index_file(str(journal_fixture), str(outside_file)) 702 + 703 + 704 + def test_index_file_no_formatter(journal_fixture): 705 + """Test indexing file without formatter raises error.""" 706 + from think.indexer.journal import index_file 707 + 708 + # Create a file with no formatter (e.g., .txt) 709 + txt_file = journal_fixture / "20240101" / "notes.txt" 710 + txt_file.write_text("Just some text notes.\n") 711 + 712 + with pytest.raises(ValueError, match="No formatter found"): 713 + index_file(str(journal_fixture), str(txt_file))
+2
think/indexer/__init__.py
··· 13 13 from .journal import ( 14 14 get_events, 15 15 get_journal_index, 16 + index_file, 16 17 reset_journal_index, 17 18 sanitize_fts_query, 18 19 scan_journal, ··· 25 26 # Journal (unified index) 26 27 "get_events", 27 28 "get_journal_index", 29 + "index_file", 28 30 "reset_journal_index", 29 31 "sanitize_fts_query", 30 32 "scan_journal",
+17 -1
think/indexer/cli.py
··· 9 9 from think.utils import get_journal, journal_log, setup_cli 10 10 11 11 from .journal import ( 12 + index_file, 12 13 reset_journal_index, 13 14 scan_journal, 14 15 search_counts, ··· 93 94 help="Full rescan including all historical day directories", 94 95 ) 95 96 parser.add_argument( 97 + "--rescan-file", 98 + metavar="PATH", 99 + help="Index a specific file (absolute or journal-relative path)", 100 + ) 101 + parser.add_argument( 96 102 "--reset", 97 103 action="store_true", 98 104 help="Remove the index before rescan", ··· 149 155 if ( 150 156 not args.rescan 151 157 and not args.rescan_full 158 + and not args.rescan_file 152 159 and not args.reset 153 160 and args.query is None 154 161 ): ··· 158 165 if args.reset: 159 166 reset_journal_index(journal) 160 167 161 - if args.rescan or args.rescan_full: 168 + if args.rescan_file: 169 + # Single file indexing (incompatible with --rescan/--rescan-full) 170 + if args.rescan or args.rescan_full: 171 + parser.error("--rescan-file cannot be used with --rescan or --rescan-full") 172 + try: 173 + index_file(journal, args.rescan_file, verbose=args.verbose) 174 + journal_log(f"indexer file indexed: {args.rescan_file}") 175 + except (ValueError, FileNotFoundError) as e: 176 + parser.error(str(e)) 177 + elif args.rescan or args.rescan_full: 162 178 changed = scan_journal(journal, verbose=args.verbose, full=args.rescan_full) 163 179 if changed: 164 180 journal_log("indexer journal rescan ok")
+64
think/indexer/journal.py
··· 26 26 extract_path_metadata, 27 27 find_formattable_files, 28 28 format_file, 29 + get_formatter, 29 30 load_jsonl, 30 31 ) 31 32 from think.utils import DATE_RE, get_journal ··· 88 89 os.unlink(db_path) 89 90 except FileNotFoundError: 90 91 pass 92 + 93 + 94 + def index_file(journal: str, file_path: str, verbose: bool = False) -> bool: 95 + """Index a single file into the journal index. 96 + 97 + Validates that the file exists, is under the journal directory, and has 98 + a registered formatter. Then indexes it (replacing any existing chunks). 99 + 100 + Args: 101 + journal: Path to journal root directory 102 + file_path: Absolute or journal-relative path to file 103 + verbose: If True, log detailed progress 104 + 105 + Returns: 106 + True if file was indexed successfully 107 + 108 + Raises: 109 + ValueError: If file is outside journal or has no formatter 110 + FileNotFoundError: If file doesn't exist 111 + """ 112 + journal_path = Path(journal).resolve() 113 + 114 + # Resolve file path (handle both absolute and relative) 115 + if os.path.isabs(file_path): 116 + abs_path = Path(file_path).resolve() 117 + else: 118 + abs_path = (journal_path / file_path).resolve() 119 + 120 + # Validate file exists 121 + if not abs_path.is_file(): 122 + raise FileNotFoundError(f"File not found: {abs_path}") 123 + 124 + # Validate file is under journal 125 + try: 126 + rel_path = str(abs_path.relative_to(journal_path)) 127 + except ValueError: 128 + raise ValueError(f"File is outside journal directory: {abs_path}") from None 129 + 130 + # Validate formatter exists 131 + if get_formatter(rel_path) is None: 132 + raise ValueError(f"No formatter found for: {rel_path}") 133 + 134 + # Get file mtime 135 + mtime = int(os.path.getmtime(abs_path)) 136 + 137 + # Index the file 138 + conn, _ = get_journal_index(journal) 139 + 140 + # Delete existing chunks for this file 141 + conn.execute("DELETE FROM chunks WHERE path=?", (rel_path,)) 142 + 143 + if verbose: 144 + logger.info("Indexing %s", rel_path) 145 + 146 + _index_file(conn, rel_path, str(abs_path), verbose) 147 + 148 + # Update file mtime 149 + conn.execute("REPLACE INTO files(path, mtime) VALUES (?, ?)", (rel_path, mtime)) 150 + 151 + conn.commit() 152 + conn.close() 153 + 154 + return True 91 155 92 156 93 157 def _index_file(