journal: export, merge --json, progress hook, archive validator (L1 plumbing)

+1

AGENTS.md

··· 74 74 **Adding a top-level command:** add an entry to `COMMANDS` in `think/sol_cli.py`; ensure the module has a `main()` function. 75 75 76 76 **Adding a `sol call` sub-verb:** add it to the app's `apps/<app>/call.py` Typer sub-app. No central registration needed — `think/call.py` discovers apps automatically. 77 + `sol call journal export` is the CLI entry for portable journal ZIPs; read-only archive validation lives in `think/importers/journal_archive.py`. 77 78 78 79 Run `sol` (no args) for live status plus the full grouped command list. 79 80

+72

tests/integration/test_journal_merge_roundtrip.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Integration coverage for export -> validate -> unpack -> merge.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + import os 10 + import shutil 11 + import subprocess 12 + import zipfile 13 + from pathlib import Path 14 + 15 + import pytest 16 + 17 + from tests._baseline_harness import copytree_tracked 18 + from think.importers.journal_archive import validate_journal_archive 19 + 20 + 21 + @pytest.mark.integration 22 + def test_export_validate_merge_rescan_round_trip(tmp_path, integration_journal_path): 23 + sol_binary = shutil.which("sol") 24 + if sol_binary is None: 25 + pytest.skip("sol binary is not on PATH for integration round-trip test") 26 + 27 + source_journal = tmp_path / "source-journal" 28 + copytree_tracked(Path("tests/fixtures/journal").resolve(), source_journal) 29 + archive_path = tmp_path / "journal-export.zip" 30 + 31 + export_env = os.environ.copy() 32 + export_env["SOLSTONE_JOURNAL"] = str(source_journal.resolve()) 33 + 34 + export_result = subprocess.run( 35 + [sol_binary, "call", "journal", "export", "--out", str(archive_path)], 36 + capture_output=True, 37 + text=True, 38 + check=False, 39 + env=export_env, 40 + ) 41 + assert export_result.returncode == 0, export_result.stderr 42 + assert archive_path.exists() 43 + 44 + validation = validate_journal_archive(archive_path) 45 + assert validation.ok is True 46 + 47 + unpack_dir = tmp_path / "unpacked" 48 + with zipfile.ZipFile(archive_path, "r") as archive: 49 + archive.extractall(unpack_dir) 50 + 51 + merge_env = os.environ.copy() 52 + merge_env["SOLSTONE_JOURNAL"] = str(integration_journal_path.resolve()) 53 + merge_env["SOL_SKIP_SUPERVISOR_CHECK"] = "1" 54 + 55 + merge_result = subprocess.run( 56 + [sol_binary, "call", "journal", "merge", str(unpack_dir), "--json"], 57 + capture_output=True, 58 + text=True, 59 + check=False, 60 + env=merge_env, 61 + ) 62 + assert merge_result.returncode == 0, merge_result.stderr 63 + payload = json.loads(merge_result.stdout) 64 + assert payload["ok"] is True 65 + assert payload["indexer_returncode"] == 0 66 + 67 + assert ( 68 + integration_journal_path / "chronicle" / "20260306" / "default" / "143000_300" 69 + ).exists() 70 + assert ( 71 + integration_journal_path / "entities" / "romeo_montague" / "entity.json" 72 + ).exists()

+202

tests/test_journal_archive.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for journal archive validation.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + import zipfile 10 + from pathlib import Path 11 + 12 + import think.importers.journal_archive as journal_archive 13 + 14 + 15 + def _write_zip(path: Path, members: dict[str, str]) -> None: 16 + with zipfile.ZipFile(path, "w") as archive: 17 + for name, payload in members.items(): 18 + archive.writestr(name, payload) 19 + 20 + 21 + def test_validate_journal_archive_rejects_missing_file(tmp_path): 22 + archive_path = tmp_path / "missing.zip" 23 + 24 + result = journal_archive.validate_journal_archive(archive_path) 25 + 26 + assert result.ok is False 27 + assert result.warnings[-1].code == "archive-not-found" 28 + 29 + 30 + def test_validate_journal_archive_accepts_flat_layout(tmp_path): 31 + archive_path = tmp_path / "flat.zip" 32 + _write_zip( 33 + archive_path, 34 + { 35 + "chronicle/": "", 36 + "chronicle/20260101/default/090000_300/audio.jsonl": "{}\n", 37 + "entities/alice/entity.json": "{}\n", 38 + "facets/work/facet.json": "{}\n", 39 + "imports/20260101_090000/manifest.json": "{}\n", 40 + "_export.json": json.dumps( 41 + { 42 + "solstone_version": "0.1.0", 43 + "exported_at": "2026-04-26T20:00:00Z", 44 + "source_journal": "/tmp/source", 45 + "day_count": 1, 46 + "entity_count": 1, 47 + "facet_count": 1, 48 + } 49 + ), 50 + }, 51 + ) 52 + 53 + result = journal_archive.validate_journal_archive(archive_path) 54 + 55 + assert result.ok is True 56 + assert result.root_prefix == "" 57 + assert result.day_count == 1 58 + assert result.entity_count == 1 59 + assert result.facet_count == 1 60 + assert result.warnings == [] 61 + 62 + 63 + def test_validate_journal_archive_accepts_wrapper_layout(tmp_path): 64 + archive_path = tmp_path / "wrapped.zip" 65 + _write_zip( 66 + archive_path, 67 + { 68 + "snapshot/chronicle/20260101/default/090000_300/audio.jsonl": "{}\n", 69 + "snapshot/entities/alice/entity.json": "{}\n", 70 + "snapshot/facets/work/facet.json": "{}\n", 71 + "snapshot/imports/20260101_090000/manifest.json": "{}\n", 72 + "snapshot/_export.json": json.dumps( 73 + { 74 + "solstone_version": "0.1.0", 75 + "exported_at": "2026-04-26T20:00:00Z", 76 + "source_journal": "/tmp/source", 77 + "day_count": 1, 78 + "entity_count": 1, 79 + "facet_count": 1, 80 + } 81 + ), 82 + }, 83 + ) 84 + 85 + result = journal_archive.validate_journal_archive(archive_path) 86 + 87 + assert result.ok is True 88 + assert result.root_prefix == "snapshot/" 89 + 90 + 91 + def test_validate_journal_archive_rejects_invalid_structure(tmp_path): 92 + archive_path = tmp_path / "structure.zip" 93 + _write_zip(archive_path, {"notes/readme.txt": "hello\n"}) 94 + 95 + result = journal_archive.validate_journal_archive(archive_path) 96 + 97 + assert result.ok is False 98 + assert result.warnings[-1].code == "archive-structure-invalid" 99 + 100 + 101 + def test_validate_journal_archive_rejects_invalid_and_encrypted_zip( 102 + tmp_path, monkeypatch 103 + ): 104 + invalid_path = tmp_path / "invalid.zip" 105 + invalid_path.write_text("not a zip", encoding="utf-8") 106 + invalid_result = journal_archive.validate_journal_archive(invalid_path) 107 + assert invalid_result.ok is False 108 + assert invalid_result.warnings[-1].code == "archive-invalid-zip" 109 + 110 + archive_path = tmp_path / "encrypted.zip" 111 + _write_zip( 112 + archive_path, {"chronicle/20260101/default/090000_300/audio.jsonl": "{}\n"} 113 + ) 114 + 115 + class EncryptedInfo: 116 + filename = "chronicle/20260101/default/090000_300/audio.jsonl" 117 + flag_bits = 0x1 118 + 119 + monkeypatch.setattr( 120 + journal_archive.zipfile.ZipFile, 121 + "infolist", 122 + lambda self: [EncryptedInfo()], 123 + ) 124 + 125 + encrypted_result = journal_archive.validate_journal_archive(archive_path) 126 + assert encrypted_result.ok is False 127 + assert encrypted_result.warnings[-1].code == "archive-encrypted" 128 + 129 + 130 + def test_validate_journal_archive_rejects_too_large_archive(tmp_path): 131 + archive_path = tmp_path / "large.zip" 132 + _write_zip( 133 + archive_path, {"chronicle/20260101/default/090000_300/audio.jsonl": "{}"} 134 + ) 135 + 136 + result = journal_archive.validate_journal_archive(archive_path, max_size_bytes=1) 137 + 138 + assert result.ok is False 139 + assert result.warnings[-1].code == "archive-too-large" 140 + 141 + 142 + def test_validate_journal_archive_manifest_warnings(tmp_path): 143 + archive_path = tmp_path / "warnings.zip" 144 + _write_zip( 145 + archive_path, 146 + { 147 + "__MACOSX/ignored.txt": "ignored\n", 148 + "snapshot/entities/alice/entity.json": "{}\n", 149 + "snapshot/facets/work/facet.json": "{}\n", 150 + "snapshot/_export.json": json.dumps( 151 + { 152 + "solstone_version": "0.1.0", 153 + "day_count": 99, 154 + "entity_count": 3, 155 + } 156 + ), 157 + "snapshot/.DS_Store": "ignored\n", 158 + }, 159 + ) 160 + 161 + result = journal_archive.validate_journal_archive(archive_path) 162 + 163 + assert result.ok is True 164 + codes = [warning.code for warning in result.warnings] 165 + assert "manifest-fields-missing" in codes 166 + assert "manifest-count-mismatch" in codes 167 + assert "chronicle-missing" in codes 168 + 169 + 170 + def test_validate_journal_archive_warns_for_missing_and_unparseable_manifest(tmp_path): 171 + missing_manifest_path = tmp_path / "missing-manifest.zip" 172 + _write_zip( 173 + missing_manifest_path, 174 + { 175 + "__MACOSX/ignored.txt": "ignored\n", 176 + "chronicle/20260101/default/090000_300/audio.jsonl": "{}\n", 177 + ".DS_Store": "ignored\n", 178 + }, 179 + ) 180 + 181 + missing_result = journal_archive.validate_journal_archive(missing_manifest_path) 182 + 183 + assert missing_result.ok is True 184 + assert [warning.code for warning in missing_result.warnings] == ["manifest-missing"] 185 + 186 + unparseable_manifest_path = tmp_path / "unparseable-manifest.zip" 187 + _write_zip( 188 + unparseable_manifest_path, 189 + { 190 + "chronicle/20260101/default/090000_300/audio.jsonl": "{}\n", 191 + "_export.json": "{not-json", 192 + }, 193 + ) 194 + 195 + unparseable_result = journal_archive.validate_journal_archive( 196 + unparseable_manifest_path 197 + ) 198 + 199 + assert unparseable_result.ok is True 200 + assert [warning.code for warning in unparseable_result.warnings] == [ 201 + "manifest-unparseable" 202 + ]

+138

tests/test_journal_export.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for journal export CLI and helper.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + import zipfile 10 + from pathlib import Path 11 + 12 + from typer.testing import CliRunner 13 + 14 + from think.call import call_app 15 + 16 + runner = CliRunner() 17 + 18 + 19 + def _read_manifest(archive_path: Path) -> dict: 20 + with zipfile.ZipFile(archive_path, "r") as archive: 21 + return json.loads(archive.read("_export.json")) 22 + 23 + 24 + def test_journal_export_writes_zip_and_manifest(journal_copy): 25 + archive_path = journal_copy.parent / "journal-export.zip" 26 + 27 + result = runner.invoke(call_app, ["journal", "export", "--out", str(archive_path)]) 28 + 29 + assert result.exit_code == 0 30 + assert result.stdout.strip() == str(archive_path.resolve()) 31 + assert archive_path.exists() 32 + manifest = _read_manifest(archive_path) 33 + assert manifest["solstone_version"] == "0.1.0" 34 + assert manifest["source_journal"] == str(journal_copy.resolve()) 35 + assert manifest["day_count"] > 0 36 + assert manifest["entity_count"] > 0 37 + assert manifest["facet_count"] > 0 38 + with zipfile.ZipFile(archive_path, "r") as archive: 39 + names = set(archive.namelist()) 40 + assert "chronicle/" in names 41 + assert "entities/" in names 42 + assert "facets/" in names 43 + assert "imports/" in names 44 + 45 + 46 + def test_journal_export_default_path_and_quiet(tmp_path, monkeypatch): 47 + journal_root = tmp_path / "journal" 48 + journal_root.mkdir() 49 + monkeypatch.setenv("SOLSTONE_JOURNAL", str(journal_root)) 50 + 51 + result = runner.invoke(call_app, ["journal", "export", "--quiet"]) 52 + 53 + assert result.exit_code == 0 54 + assert result.stdout == "" 55 + export_dir = journal_root.parent / f"{journal_root.name}.exports" 56 + archives = sorted(export_dir.glob("*.zip")) 57 + assert len(archives) == 1 58 + assert archives[0].exists() 59 + 60 + 61 + def test_journal_export_service_down_bypasses_require_up(journal_copy, monkeypatch): 62 + import think.tools.call as call_module 63 + 64 + archive_path = journal_copy.parent / "journal-export.zip" 65 + 66 + def should_not_run(): 67 + raise AssertionError("require_solstone should not be called for export") 68 + 69 + monkeypatch.setattr(call_module, "require_solstone", should_not_run) 70 + 71 + result = runner.invoke(call_app, ["journal", "export", "--out", str(archive_path)]) 72 + 73 + assert result.exit_code == 0 74 + assert archive_path.exists() 75 + 76 + 77 + def test_journal_export_atomic_write_failure(journal_copy, monkeypatch): 78 + import think.journal_export as export_module 79 + 80 + archive_path = journal_copy.parent / "journal-export.zip" 81 + 82 + def fail_replace(src, dst): 83 + raise OSError("disk full") 84 + 85 + monkeypatch.setattr(export_module.os, "replace", fail_replace) 86 + 87 + result = runner.invoke(call_app, ["journal", "export", "--out", str(archive_path)]) 88 + 89 + assert result.exit_code == 1 90 + assert "error: failed to write archive; try:" in result.stderr 91 + assert not archive_path.exists() 92 + 93 + 94 + def test_journal_export_service_up_advisory(journal_copy, monkeypatch): 95 + import think.tools.call as call_module 96 + 97 + archive_path = journal_copy.parent / "journal-export.zip" 98 + monkeypatch.setattr(call_module, "is_solstone_up", lambda: True) 99 + 100 + result = runner.invoke(call_app, ["journal", "export", "--out", str(archive_path)]) 101 + 102 + assert result.exit_code == 0 103 + assert ( 104 + "warning: solstone supervisor is running; export reflects a live snapshot" 105 + in result.stderr 106 + ) 107 + 108 + 109 + def test_journal_export_top_level_skipped_advisory(tmp_path, monkeypatch): 110 + journal_root = tmp_path / "journal" 111 + journal_root.mkdir() 112 + (journal_root / "misc").mkdir() 113 + monkeypatch.setenv("SOLSTONE_JOURNAL", str(journal_root)) 114 + archive_path = tmp_path / "journal-export.zip" 115 + 116 + result = runner.invoke(call_app, ["journal", "export", "--out", str(archive_path)]) 117 + 118 + assert result.exit_code == 0 119 + assert "advisory: skipped non-export entries: misc" in result.stderr 120 + 121 + 122 + def test_journal_export_empty_journal(tmp_path, monkeypatch): 123 + journal_root = tmp_path / "journal" 124 + journal_root.mkdir() 125 + monkeypatch.setenv("SOLSTONE_JOURNAL", str(journal_root)) 126 + archive_path = tmp_path / "journal-export.zip" 127 + 128 + result = runner.invoke(call_app, ["journal", "export", "--out", str(archive_path)]) 129 + 130 + assert result.exit_code == 0 131 + manifest = _read_manifest(archive_path) 132 + assert manifest["day_count"] == 0 133 + assert manifest["entity_count"] == 0 134 + assert manifest["facet_count"] == 0 135 + with zipfile.ZipFile(archive_path, "r") as archive: 136 + assert {"chronicle/", "entities/", "facets/", "imports/"} <= set( 137 + archive.namelist() 138 + )

+199 -1

tests/test_journal_merge.py

··· 6 6 import json 7 7 import shutil 8 8 from pathlib import Path 9 + from types import SimpleNamespace 9 10 10 11 import pytest 11 12 from typer.testing import CliRunner 12 13 13 14 from think.call import call_app 15 + from think.merge import DecisionLogWriteError, merge_journals 14 16 15 17 runner = CliRunner() 16 18 ··· 40 42 ] 41 43 42 44 45 + def _parse_cli_json(output: str) -> dict: 46 + return json.loads(output.strip()) 47 + 48 + 43 49 def _find_merge_artifact_root(target: Path) -> Path: 44 50 merge_dir = target.parent / f"{target.name}.merge" 45 51 runs = sorted(path for path in merge_dir.iterdir() if path.is_dir()) ··· 54 60 55 61 def _run(*args, **kwargs): 56 62 calls.append((args, kwargs)) 57 - return None 63 + return SimpleNamespace(returncode=0) 58 64 59 65 monkeypatch.setattr(call_module.subprocess, "run", _run) 60 66 return calls ··· 578 584 assert "does not appear to be a journal" in result.output 579 585 580 586 587 + def test_accepts_unpacked_export_root(merge_journals_fixture, monkeypatch): 588 + paths = merge_journals_fixture 589 + _mock_indexer(monkeypatch) 590 + 591 + chronicle_dir = paths["source"] / "chronicle" 592 + chronicle_dir.mkdir() 593 + shutil.move( 594 + str(paths["source"] / "20260101"), 595 + str(chronicle_dir / "20260101"), 596 + ) 597 + 598 + result = runner.invoke(call_app, ["journal", "merge", str(paths["source"])]) 599 + 600 + assert result.exit_code == 0 601 + assert ( 602 + paths["target"] / "chronicle" / "20260101" / "143022_300" / "audio.jsonl" 603 + ).exists() 604 + 605 + 581 606 def test_error_resilience(merge_journals_fixture, monkeypatch): 582 607 paths = merge_journals_fixture 583 608 _mock_indexer(monkeypatch) ··· 668 693 669 694 assert result.exit_code == 0 670 695 assert "staged" in result.output 696 + 697 + 698 + def test_merge_json_success_envelope(merge_journals_fixture, monkeypatch): 699 + paths = merge_journals_fixture 700 + _mock_indexer(monkeypatch) 701 + 702 + result = runner.invoke( 703 + call_app, ["journal", "merge", str(paths["source"]), "--json"] 704 + ) 705 + 706 + assert result.exit_code == 0 707 + payload = _parse_cli_json(result.output) 708 + assert payload["ok"] is True 709 + assert payload["code"] == "ok" 710 + assert payload["source"] == str(paths["source"].resolve()) 711 + assert payload["dry_run"] is False 712 + assert payload["summary"]["segments_copied"] >= 1 713 + assert payload["decision_log"].endswith("decisions.jsonl") 714 + assert payload["staging_dir"] is None 715 + assert payload["indexer_returncode"] == 0 716 + assert result.stderr == "" 717 + assert len(result.output.strip().splitlines()) == 1 718 + 719 + 720 + def test_merge_json_validation_error_envelope(): 721 + result = runner.invoke( 722 + call_app, ["journal", "merge", "/tmp/missing-source", "--json"] 723 + ) 724 + 725 + assert result.exit_code == 1 726 + payload = _parse_cli_json(result.stderr) 727 + assert payload == { 728 + "ok": False, 729 + "code": "source-not-a-directory", 730 + "message": "Source path is not a directory.", 731 + "source": str(Path("/tmp/missing-source").resolve()), 732 + "dry_run": False, 733 + "details": {}, 734 + } 735 + assert result.stdout == "" 736 + assert len(result.stderr.strip().splitlines()) == 1 737 + 738 + 739 + def test_merge_json_partial_errors_still_succeed(merge_journals_fixture, monkeypatch): 740 + paths = merge_journals_fixture 741 + _mock_indexer(monkeypatch) 742 + (paths["source"] / "20260101" / "150000_60").mkdir(parents=True) 743 + (paths["source"] / "20260101" / "150000_60" / "audio.jsonl").write_text( 744 + '{"audio": "bad"}\n', 745 + encoding="utf-8", 746 + ) 747 + 748 + import think.merge as journal_merge_module 749 + 750 + real_copytree = shutil.copytree 751 + bad_segment = paths["source"] / "20260101" / "150000_60" 752 + 753 + def failing_copytree(src, dst, *args, **kwargs): 754 + if Path(src) == bad_segment: 755 + raise OSError("boom") 756 + return real_copytree(src, dst, *args, **kwargs) 757 + 758 + monkeypatch.setattr(journal_merge_module.shutil, "copytree", failing_copytree) 759 + 760 + result = runner.invoke( 761 + call_app, ["journal", "merge", str(paths["source"]), "--json"] 762 + ) 763 + 764 + assert result.exit_code == 0 765 + payload = _parse_cli_json(result.output) 766 + assert payload["ok"] is True 767 + assert payload["summary"]["errors"] == ["segment 20260101/_default/150000_60: boom"] 768 + 769 + 770 + def test_merge_json_decision_log_failure(merge_journals_fixture, monkeypatch): 771 + paths = merge_journals_fixture 772 + _mock_indexer(monkeypatch) 773 + 774 + import think.merge as journal_merge_module 775 + 776 + def failing_log(*args, **kwargs): 777 + raise DecisionLogWriteError("boom") 778 + 779 + monkeypatch.setattr(journal_merge_module, "_log_decision", failing_log) 780 + 781 + result = runner.invoke( 782 + call_app, ["journal", "merge", str(paths["source"]), "--json"] 783 + ) 784 + 785 + assert result.exit_code == 1 786 + payload = _parse_cli_json(result.stderr) 787 + assert payload["code"] == "decision-log-write-failed" 788 + assert payload["details"]["exception_type"] == "DecisionLogWriteError" 789 + 790 + 791 + def test_merge_json_merge_engine_error(merge_journals_fixture, monkeypatch): 792 + paths = merge_journals_fixture 793 + _mock_indexer(monkeypatch) 794 + 795 + import think.merge as journal_merge_module 796 + 797 + def boom(*args, **kwargs): 798 + raise RuntimeError("merge boom") 799 + 800 + monkeypatch.setattr(journal_merge_module, "merge_journals", boom) 801 + 802 + result = runner.invoke( 803 + call_app, ["journal", "merge", str(paths["source"]), "--json"] 804 + ) 805 + 806 + assert result.exit_code == 1 807 + payload = _parse_cli_json(result.stderr) 808 + assert payload["code"] == "merge-engine-error" 809 + assert payload["details"]["exception_type"] == "RuntimeError" 810 + 811 + 812 + def test_merge_progress_callback_reports_all_phases(merge_journals_fixture): 813 + paths = merge_journals_fixture 814 + events = [] 815 + 816 + def progress(phase, completed, total, item_name): 817 + events.append((phase, completed, total, item_name)) 818 + 819 + summary = merge_journals( 820 + paths["source"], 821 + paths["target"], 822 + dry_run=True, 823 + log_path=paths["target"].parent / "dry-run.log", 824 + staging_path=paths["target"].parent / "staging", 825 + progress=progress, 826 + ) 827 + 828 + assert summary.segments_copied >= 1 829 + assert ("segments", 0, 2, None) in events 830 + assert ("segments", 2, 2, None) in events 831 + assert ("entities", 0, 1, None) in events 832 + assert ("entities", 1, 1, "alice_johnson") in events 833 + assert ("facets", 1, 1, "work") in events 834 + assert ("imports", 1, 1, "20260101_120000") in events 835 + 836 + 837 + def test_merge_progress_callback_exception_propagates(merge_journals_fixture): 838 + paths = merge_journals_fixture 839 + 840 + def progress(phase, completed, total, item_name): 841 + if phase == "segments" and completed == 1: 842 + raise RuntimeError("stop here") 843 + 844 + with pytest.raises(RuntimeError, match="stop here"): 845 + merge_journals( 846 + paths["source"], 847 + paths["target"], 848 + dry_run=True, 849 + log_path=paths["target"].parent / "dry-run.log", 850 + staging_path=paths["target"].parent / "staging", 851 + progress=progress, 852 + ) 853 + 854 + 855 + def test_merge_no_flag_snapshot_unchanged(merge_journals_fixture, monkeypatch): 856 + paths = merge_journals_fixture 857 + _mock_indexer(monkeypatch) 858 + 859 + result = runner.invoke(call_app, ["journal", "merge", str(paths["source"])]) 860 + 861 + assert result.exit_code == 0 862 + assert "\nMerged:\n" in result.output 863 + assert "Segments: 1 copied, 1 skipped, 0 errored" in result.output 864 + assert "Entities: 0 created, 1 merged, 0 staged, 0 skipped" in result.output 865 + assert "Facets: 1 created, 0 merged" in result.output 866 + assert "Imports: 1 copied, 0 skipped" in result.output 867 + assert "Decision log:" in result.output 868 + assert "Index rebuild started." in result.output

+267

think/importers/journal_archive.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Read-only validation for exported journal archives.""" 5 + 6 + # L1 read-only validator. L2 will add a JournalArchiveImporter class to this 7 + # module; the validator must remain free of writes (scope §4 / AGENTS.md §7 L7). 8 + 9 + from __future__ import annotations 10 + 11 + import json 12 + import re 13 + import zipfile 14 + from dataclasses import dataclass, field 15 + from pathlib import Path 16 + from typing import Any 17 + 18 + DATE_RE = re.compile(r"^\d{8}$") 19 + JOURNAL_ROOT_ENTRIES = {"chronicle", "entities", "facets", "imports", "_export.json"} 20 + MANIFEST_FIELDS = ( 21 + "solstone_version", 22 + "exported_at", 23 + "source_journal", 24 + "day_count", 25 + "entity_count", 26 + "facet_count", 27 + ) 28 + 29 + 30 + @dataclass 31 + class ArchiveWarning: 32 + code: str 33 + message: str 34 + 35 + 36 + @dataclass 37 + class ArchiveValidation: 38 + ok: bool 39 + archive_path: Path 40 + root_prefix: str 41 + manifest: dict[str, Any] | None 42 + warnings: list[ArchiveWarning] = field(default_factory=list) 43 + day_count: int = 0 44 + entity_count: int = 0 45 + facet_count: int = 0 46 + 47 + 48 + def _visible_name(name: str) -> str | None: 49 + parts = [part for part in name.split("/") if part] 50 + if not parts: 51 + return None 52 + if parts[0] == "__MACOSX": 53 + return None 54 + if parts[-1] == ".DS_Store": 55 + return None 56 + return "/".join(parts) 57 + 58 + 59 + def _top_level_names(names: list[str]) -> set[str]: 60 + return {name.split("/", 1)[0] for name in names if name} 61 + 62 + 63 + def _resolve_root_prefix(names: list[str]) -> str | None: 64 + top_level = _top_level_names(names) 65 + if top_level & JOURNAL_ROOT_ENTRIES: 66 + return "" 67 + 68 + top_level_dirs = { 69 + name for name in top_level if any(item.startswith(f"{name}/") for item in names) 70 + } 71 + if len(top_level_dirs) != 1: 72 + return None 73 + 74 + wrapper = next(iter(top_level_dirs)) 75 + nested = [] 76 + prefix = f"{wrapper}/" 77 + for name in names: 78 + if name.startswith(prefix): 79 + stripped = name[len(prefix) :] 80 + if stripped: 81 + nested.append(stripped) 82 + if _top_level_names(nested) & JOURNAL_ROOT_ENTRIES: 83 + return prefix 84 + return None 85 + 86 + 87 + def _scan_counts(names: list[str], root_prefix: str) -> tuple[int, int, int]: 88 + day_dirs: set[str] = set() 89 + entity_slugs: set[str] = set() 90 + facet_slugs: set[str] = set() 91 + 92 + for name in names: 93 + if root_prefix and not name.startswith(root_prefix): 94 + continue 95 + relative_name = name[len(root_prefix) :] if root_prefix else name 96 + parts = relative_name.split("/") 97 + if len(parts) >= 2 and parts[0] == "chronicle" and DATE_RE.match(parts[1]): 98 + day_dirs.add(parts[1]) 99 + if len(parts) == 3 and parts[0] == "entities" and parts[2] == "entity.json": 100 + entity_slugs.add(parts[1]) 101 + if len(parts) == 3 and parts[0] == "facets" and parts[2] == "facet.json": 102 + facet_slugs.add(parts[1]) 103 + 104 + return len(day_dirs), len(entity_slugs), len(facet_slugs) 105 + 106 + 107 + def _build_fatal( 108 + archive_path: Path, 109 + code: str, 110 + message: str, 111 + *, 112 + warnings: list[ArchiveWarning] | None = None, 113 + ) -> ArchiveValidation: 114 + all_warnings = list(warnings or []) 115 + all_warnings.append(ArchiveWarning(code=code, message=message)) 116 + return ArchiveValidation( 117 + ok=False, 118 + archive_path=archive_path, 119 + root_prefix="", 120 + manifest=None, 121 + warnings=all_warnings, 122 + ) 123 + 124 + 125 + def validate_journal_archive( 126 + path: Path, 127 + *, 128 + max_size_bytes: int = 50 * 1024**3, 129 + ) -> ArchiveValidation: 130 + archive_path = path.expanduser().resolve() 131 + warnings: list[ArchiveWarning] = [] 132 + 133 + if not archive_path.exists(): 134 + return _build_fatal( 135 + archive_path, 136 + "archive-not-found", 137 + "Archive file does not exist.", 138 + ) 139 + 140 + if archive_path.stat().st_size > max_size_bytes: 141 + return _build_fatal( 142 + archive_path, 143 + "archive-too-large", 144 + "Archive exceeds 50 GiB safety limit.", 145 + ) 146 + 147 + if not zipfile.is_zipfile(archive_path): 148 + return _build_fatal( 149 + archive_path, 150 + "archive-invalid-zip", 151 + "Archive is not a readable ZIP file.", 152 + ) 153 + 154 + try: 155 + with zipfile.ZipFile(archive_path, "r") as archive: 156 + infos = archive.infolist() 157 + if any(info.flag_bits & 0x1 for info in infos): 158 + return _build_fatal( 159 + archive_path, 160 + "archive-encrypted", 161 + "Encrypted ZIP entries are not supported.", 162 + ) 163 + 164 + visible_names = [ 165 + name 166 + for info in infos 167 + if (name := _visible_name(info.filename)) is not None 168 + ] 169 + root_prefix = _resolve_root_prefix(visible_names) 170 + if root_prefix is None: 171 + return _build_fatal( 172 + archive_path, 173 + "archive-structure-invalid", 174 + "Archive does not contain a recognizable journal root.", 175 + warnings=warnings, 176 + ) 177 + 178 + day_count, entity_count, facet_count = _scan_counts( 179 + visible_names, root_prefix 180 + ) 181 + manifest: dict[str, Any] | None = None 182 + manifest_name = f"{root_prefix}_export.json" 183 + try: 184 + manifest_bytes = archive.read(manifest_name) 185 + except KeyError: 186 + warnings.append( 187 + ArchiveWarning( 188 + code="manifest-missing", 189 + message="Manifest is missing optional export metadata.", 190 + ) 191 + ) 192 + else: 193 + try: 194 + manifest = json.loads(manifest_bytes.decode("utf-8")) 195 + except (UnicodeDecodeError, json.JSONDecodeError): 196 + warnings.append( 197 + ArchiveWarning( 198 + code="manifest-unparseable", 199 + message="Manifest could not be parsed as JSON.", 200 + ) 201 + ) 202 + manifest = None 203 + 204 + if manifest is not None: 205 + missing_fields = [ 206 + field for field in MANIFEST_FIELDS if field not in manifest 207 + ] 208 + if missing_fields: 209 + warnings.append( 210 + ArchiveWarning( 211 + code="manifest-fields-missing", 212 + message=( 213 + "Manifest is missing required export metadata fields: " 214 + + ", ".join(missing_fields) 215 + ), 216 + ) 217 + ) 218 + for field_name, actual_value in ( 219 + ("day_count", day_count), 220 + ("entity_count", entity_count), 221 + ("facet_count", facet_count), 222 + ): 223 + manifest_value = manifest.get(field_name) 224 + if ( 225 + isinstance(manifest_value, int) 226 + and manifest_value != actual_value 227 + ): 228 + warnings.append( 229 + ArchiveWarning( 230 + code="manifest-count-mismatch", 231 + message=( 232 + f"Manifest {field_name}={manifest_value} does not match " 233 + f"archive contents ({actual_value})." 234 + ), 235 + ) 236 + ) 237 + 238 + has_chronicle = any( 239 + name == f"{root_prefix}chronicle" 240 + or name.startswith(f"{root_prefix}chronicle/") 241 + for name in visible_names 242 + ) 243 + if not has_chronicle: 244 + warnings.append( 245 + ArchiveWarning( 246 + code="chronicle-missing", 247 + message="Archive has no chronicle/ directory; treating as partial journal.", 248 + ) 249 + ) 250 + 251 + return ArchiveValidation( 252 + ok=True, 253 + archive_path=archive_path, 254 + root_prefix=root_prefix, 255 + manifest=manifest, 256 + warnings=warnings, 257 + day_count=day_count, 258 + entity_count=entity_count, 259 + facet_count=facet_count, 260 + ) 261 + except (OSError, RuntimeError, zipfile.BadZipFile, zipfile.LargeZipFile): 262 + return _build_fatal( 263 + archive_path, 264 + "archive-invalid-zip", 265 + "Archive is not a readable ZIP file.", 266 + warnings=warnings, 267 + )

+129

think/journal_export.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Helpers for exporting a journal as a portable ZIP archive.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + import os 10 + import re 11 + import zipfile 12 + from datetime import datetime, timezone 13 + from importlib.metadata import version 14 + from pathlib import Path 15 + from typing import Iterator 16 + 17 + EXPORT_TOP_LEVEL_DIRS = ("chronicle", "entities", "facets", "imports") 18 + DATE_RE = re.compile(r"^\d{8}$") 19 + 20 + 21 + def _default_export_path(journal_root: Path) -> Path: 22 + timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") 23 + export_dir = journal_root.parent / f"{journal_root.name}.exports" 24 + export_dir.mkdir(parents=True, exist_ok=True) 25 + return export_dir / f"{timestamp}.zip" 26 + 27 + 28 + def _count_journal_export_stats(journal_root: Path) -> tuple[int, int, int]: 29 + chronicle_dir = journal_root / "chronicle" 30 + if chronicle_dir.is_dir(): 31 + day_count = sum( 32 + 1 33 + for entry in chronicle_dir.iterdir() 34 + if entry.is_dir() and DATE_RE.match(entry.name) 35 + ) 36 + else: 37 + day_count = 0 38 + 39 + entities_dir = journal_root / "entities" 40 + if entities_dir.is_dir(): 41 + entity_count = sum( 42 + 1 43 + for entry in entities_dir.iterdir() 44 + if entry.is_dir() and (entry / "entity.json").is_file() 45 + ) 46 + else: 47 + entity_count = 0 48 + 49 + facets_dir = journal_root / "facets" 50 + if facets_dir.is_dir(): 51 + facet_count = sum( 52 + 1 53 + for entry in facets_dir.iterdir() 54 + if entry.is_dir() and (entry / "facet.json").is_file() 55 + ) 56 + else: 57 + facet_count = 0 58 + 59 + return day_count, entity_count, facet_count 60 + 61 + 62 + def get_skipped_export_entries(journal_root: Path) -> list[str]: 63 + if not journal_root.is_dir(): 64 + return [] 65 + return sorted( 66 + entry.name 67 + for entry in journal_root.iterdir() 68 + if entry.name not in EXPORT_TOP_LEVEL_DIRS 69 + ) 70 + 71 + 72 + def _iter_export_members(journal_root: Path) -> Iterator[tuple[Path, str]]: 73 + for root_name in EXPORT_TOP_LEVEL_DIRS: 74 + root_path = journal_root / root_name 75 + if not root_path.exists(): 76 + continue 77 + if not root_path.is_dir(): 78 + raise NotADirectoryError(f"expected directory at {root_path}") 79 + for path in sorted(root_path.rglob("*")): 80 + if path.is_file(): 81 + yield path, path.relative_to(journal_root).as_posix() 82 + 83 + 84 + def _build_export_manifest(journal_root: Path) -> dict[str, str | int]: 85 + day_count, entity_count, facet_count = _count_journal_export_stats(journal_root) 86 + exported_at = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") 87 + return { 88 + "solstone_version": version("solstone"), 89 + "exported_at": exported_at, 90 + "source_journal": str(journal_root.resolve()), 91 + "day_count": day_count, 92 + "entity_count": entity_count, 93 + "facet_count": facet_count, 94 + } 95 + 96 + 97 + def export_journal_archive(journal_root: Path, out_path: Path | None = None) -> Path: 98 + if not journal_root.is_dir(): 99 + raise FileNotFoundError(f"journal root is not a directory: {journal_root}") 100 + 101 + final_path = (out_path or _default_export_path(journal_root)).expanduser().resolve() 102 + final_path.parent.mkdir(parents=True, exist_ok=True) 103 + partial_path = final_path.parent / f"{final_path.name}.partial" 104 + manifest = _build_export_manifest(journal_root) 105 + 106 + fd = os.open(partial_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) 107 + try: 108 + with os.fdopen(fd, "wb") as raw_handle: 109 + with zipfile.ZipFile( 110 + raw_handle, 111 + mode="w", 112 + compression=zipfile.ZIP_DEFLATED, 113 + ) as archive: 114 + for root_name in EXPORT_TOP_LEVEL_DIRS: 115 + archive.writestr(f"{root_name}/", b"") 116 + for source_path, arcname in _iter_export_members(journal_root): 117 + archive.write(source_path, arcname=arcname) 118 + archive.writestr("_export.json", json.dumps(manifest, indent=2)) 119 + raw_handle.flush() 120 + os.fsync(raw_handle.fileno()) 121 + os.replace(partial_path, final_path) 122 + except Exception: 123 + try: 124 + partial_path.unlink(missing_ok=True) 125 + except OSError: 126 + pass 127 + raise 128 + 129 + return final_path

+187 -68

think/merge.py

··· 7 7 import logging 8 8 import re 9 9 import shutil 10 + from collections.abc import Callable 10 11 from dataclasses import dataclass, field 11 12 from datetime import datetime, timezone 12 13 from pathlib import Path ··· 26 27 27 28 DATE_RE = re.compile(r"^\d{8}$") 28 29 logger = logging.getLogger(__name__) 30 + ProgressCallback = Callable[[str, int, int | None, str | None], None] 29 31 30 32 31 33 @dataclass ··· 44 46 errors: list[str] = field(default_factory=list) 45 47 46 48 49 + class DecisionLogWriteError(OSError): 50 + """Raised when the merge decision log cannot be written.""" 51 + 52 + 47 53 def merge_journals( 48 54 source: Path, 49 55 target: Path, 50 56 dry_run: bool = False, 51 57 log_path: Path | None = None, 52 58 staging_path: Path | None = None, 59 + *, 60 + progress: ProgressCallback | None = None, 53 61 ) -> MergeSummary: 54 62 summary = MergeSummary() 55 63 target_entities = load_all_journal_entities() 56 64 57 - _merge_segments(source, target, summary, dry_run, log_path=log_path) 65 + _merge_segments( 66 + source, target, summary, dry_run, log_path=log_path, progress=progress 67 + ) 58 68 _merge_entities( 59 69 source, 60 70 summary, ··· 62 72 target_entities, 63 73 log_path=log_path, 64 74 staging_path=staging_path, 75 + progress=progress, 65 76 ) 66 - _merge_facets(source, target, summary, dry_run, log_path=log_path) 67 - _merge_imports(source, target, summary, dry_run, log_path=log_path) 77 + _merge_facets( 78 + source, target, summary, dry_run, log_path=log_path, progress=progress 79 + ) 80 + _merge_imports( 81 + source, target, summary, dry_run, log_path=log_path, progress=progress 82 + ) 68 83 69 84 return summary 70 85 ··· 74 89 return 75 90 76 91 payload = {"ts": datetime.now(timezone.utc).isoformat(), **entry} 77 - log_path.parent.mkdir(parents=True, exist_ok=True) 78 - with open(log_path, "a", encoding="utf-8") as handle: 79 - handle.write(json.dumps(payload, ensure_ascii=False) + "\n") 92 + try: 93 + log_path.parent.mkdir(parents=True, exist_ok=True) 94 + with open(log_path, "a", encoding="utf-8") as handle: 95 + handle.write(json.dumps(payload, ensure_ascii=False) + "\n") 96 + except OSError as exc: 97 + raise DecisionLogWriteError(str(exc)) from exc 80 98 81 99 82 100 def _source_day_dirs(source: Path) -> dict[str, Path]: ··· 103 121 return chronicle_days or flat_days 104 122 105 123 124 + def _report_progress( 125 + progress: ProgressCallback | None, 126 + phase: str, 127 + completed: int, 128 + total: int | None, 129 + item_name: str | None, 130 + ) -> None: 131 + if progress is not None: 132 + progress(phase, completed, total, item_name) 133 + 134 + 135 + def _collect_segment_items(source: Path) -> list[tuple[str, str, str, Path]]: 136 + items: list[tuple[str, str, str, Path]] = [] 137 + for day_name, source_day in sorted(_source_day_dirs(source).items()): 138 + for stream, seg_key, seg_path in iter_segments(source_day): 139 + items.append((day_name, stream, seg_key, seg_path)) 140 + return items 141 + 142 + 143 + def _collect_entity_items(source: Path) -> list[Path]: 144 + source_entities_dir = source / "entities" 145 + if not source_entities_dir.is_dir(): 146 + return [] 147 + return [ 148 + entity_dir 149 + for entity_dir in sorted(source_entities_dir.iterdir()) 150 + if entity_dir.is_dir() and (entity_dir / "entity.json").is_file() 151 + ] 152 + 153 + 154 + def _collect_facet_items(source: Path) -> list[Path]: 155 + source_facets_dir = source / "facets" 156 + if not source_facets_dir.is_dir(): 157 + return [] 158 + return [ 159 + source_facet_dir 160 + for source_facet_dir in sorted(source_facets_dir.iterdir()) 161 + if source_facet_dir.is_dir() and (source_facet_dir / "facet.json").is_file() 162 + ] 163 + 164 + 165 + def _collect_import_items(source: Path) -> list[Path]: 166 + source_imports_dir = source / "imports" 167 + if not source_imports_dir.is_dir(): 168 + return [] 169 + return [ 170 + source_import_dir 171 + for source_import_dir in sorted(source_imports_dir.iterdir()) 172 + if source_import_dir.is_dir() 173 + ] 174 + 175 + 106 176 def _merge_segments( 107 177 source: Path, 108 178 target: Path, 109 179 summary: MergeSummary, 110 180 dry_run: bool, 111 181 log_path: Path | None = None, 182 + progress: ProgressCallback | None = None, 112 183 ) -> None: 113 184 target_chronicle = target / CHRONICLE_DIR 114 185 if not dry_run: 115 186 target_chronicle.mkdir(parents=True, exist_ok=True) 116 187 117 - for day_name, source_day in sorted(_source_day_dirs(source).items()): 188 + segment_items = _collect_segment_items(source) 189 + total = len(segment_items) 190 + _report_progress(progress, "segments", 0, total, None) 191 + 192 + for index, (day_name, stream, seg_key, seg_path) in enumerate( 193 + segment_items, start=1 194 + ): 118 195 target_day = target_chronicle / day_name 119 - for stream, seg_key, seg_path in iter_segments(source_day): 120 - if stream == "_default": 121 - target_path = target_day / seg_key 122 - else: 123 - target_path = target_day / stream / seg_key 196 + if stream == "_default": 197 + target_path = target_day / seg_key 198 + else: 199 + target_path = target_day / stream / seg_key 124 200 125 - item_id = f"{day_name}/{stream}/{seg_key}" 126 - try: 127 - if target_path.exists(): 128 - summary.segments_skipped += 1 129 - _log_decision( 130 - log_path, 131 - { 132 - "action": "segment_skipped", 133 - "item_type": "segment", 134 - "item_id": item_id, 135 - "reason": "target_exists", 136 - }, 137 - ) 138 - continue 201 + item_id = f"{day_name}/{stream}/{seg_key}" 202 + try: 203 + if target_path.exists(): 204 + summary.segments_skipped += 1 205 + _log_decision( 206 + log_path, 207 + { 208 + "action": "segment_skipped", 209 + "item_type": "segment", 210 + "item_id": item_id, 211 + "reason": "target_exists", 212 + }, 213 + ) 214 + continue 139 215 140 - if dry_run: 141 - summary.segments_copied += 1 142 - _log_decision( 143 - log_path, 144 - { 145 - "action": "segment_copied", 146 - "item_type": "segment", 147 - "item_id": item_id, 148 - "reason": "new", 149 - }, 150 - ) 151 - continue 152 - 153 - shutil.copytree(seg_path, target_path, copy_function=shutil.copy2) 216 + if dry_run: 154 217 summary.segments_copied += 1 155 218 _log_decision( 156 219 log_path, ··· 161 224 "reason": "new", 162 225 }, 163 226 ) 164 - except Exception as exc: 165 - summary.segments_errored += 1 166 - summary.errors.append(f"segment {day_name}/{stream}/{seg_key}: {exc}") 227 + continue 228 + 229 + shutil.copytree(seg_path, target_path, copy_function=shutil.copy2) 230 + summary.segments_copied += 1 231 + _log_decision( 232 + log_path, 233 + { 234 + "action": "segment_copied", 235 + "item_type": "segment", 236 + "item_id": item_id, 237 + "reason": "new", 238 + }, 239 + ) 240 + except DecisionLogWriteError: 241 + raise 242 + except Exception as exc: 243 + summary.segments_errored += 1 244 + summary.errors.append(f"segment {day_name}/{stream}/{seg_key}: {exc}") 245 + finally: 246 + _report_progress(progress, "segments", index, total, item_id) 247 + 248 + _report_progress(progress, "segments", total, total, None) 167 249 168 250 169 251 def _merge_entities( ··· 173 255 target_entities: dict[str, dict[str, Any]], 174 256 log_path: Path | None = None, 175 257 staging_path: Path | None = None, 258 + progress: ProgressCallback | None = None, 176 259 ) -> None: 177 - 178 260 target_has_principal = any( 179 261 bool(entity.get("is_principal")) for entity in target_entities.values() 180 262 ) 181 - source_entities_dir = source / "entities" 182 - if not source_entities_dir.is_dir(): 183 - return 263 + entity_items = _collect_entity_items(source) 264 + total = len(entity_items) 265 + _report_progress(progress, "entities", 0, total, None) 184 266 185 - for entity_dir in sorted(source_entities_dir.iterdir()): 267 + for index, entity_dir in enumerate(entity_items, start=1): 186 268 entity_path = entity_dir / "entity.json" 187 - if not entity_dir.is_dir() or not entity_path.is_file(): 188 - continue 269 + item_id = entity_dir.name 189 270 190 271 try: 191 272 source_entity = json.loads(entity_path.read_text(encoding="utf-8")) ··· 305 386 "fields_changed": fields_changed, 306 387 }, 307 388 ) 389 + except DecisionLogWriteError: 390 + raise 308 391 except Exception as exc: 309 392 summary.errors.append(f"entity {entity_dir.name}: {exc}") 393 + finally: 394 + _report_progress(progress, "entities", index, total, item_id) 395 + 396 + _report_progress(progress, "entities", total, total, None) 310 397 311 398 312 399 def _merge_facets( ··· 315 402 summary: MergeSummary, 316 403 dry_run: bool, 317 404 log_path: Path | None = None, 405 + progress: ProgressCallback | None = None, 318 406 ) -> None: 407 + facet_items = _collect_facet_items(source) 408 + total = len(facet_items) 409 + _report_progress(progress, "facets", 0, total, None) 319 410 320 - source_facets_dir = source / "facets" 321 - if not source_facets_dir.is_dir(): 322 - return 323 - 324 - for source_facet_dir in sorted(source_facets_dir.iterdir()): 325 - facet_json = source_facet_dir / "facet.json" 326 - if not source_facet_dir.is_dir() or not facet_json.is_file(): 327 - continue 328 - 411 + for index, source_facet_dir in enumerate(facet_items, start=1): 329 412 facet_name = source_facet_dir.name 330 413 target_facet_dir = target / "facets" / facet_name 331 414 ··· 367 450 "reason": "overlap", 368 451 }, 369 452 ) 453 + except DecisionLogWriteError: 454 + raise 370 455 except Exception as exc: 371 456 summary.errors.append(f"facet {facet_name}: {exc}") 457 + finally: 458 + _report_progress(progress, "facets", index, total, facet_name) 459 + 460 + _report_progress(progress, "facets", total, total, None) 372 461 373 462 374 463 def _merge_overlapping_facet( ··· 443 532 "reason": "new", 444 533 }, 445 534 ) 535 + except DecisionLogWriteError: 536 + raise 446 537 except Exception as exc: 447 538 summary.errors.append(f"facet {facet_name} entity {entity_id}: {exc}") 448 539 ··· 480 571 ) 481 572 if new_items and not dry_run: 482 573 _append_jsonl(target_det_file, new_items) 574 + except DecisionLogWriteError: 575 + raise 483 576 except Exception as exc: 484 577 summary.errors.append( 485 578 f"facet {facet_name} detected entities {source_det_file.name}: {exc}" ··· 518 611 ) 519 612 if new_items and not dry_run: 520 613 _append_jsonl(target_todo_file, new_items) 614 + except DecisionLogWriteError: 615 + raise 521 616 except Exception as exc: 522 617 summary.errors.append( 523 618 f"facet {facet_name} todo {source_todo_file.name}: {exc}" ··· 558 653 ) 559 654 if new_config and not dry_run: 560 655 _append_jsonl_locked(target_config_file, new_config) 656 + except DecisionLogWriteError: 657 + raise 561 658 except Exception as exc: 562 659 summary.errors.append(f"facet {facet_name} activities config: {exc}") 563 660 ··· 595 692 ) 596 693 if new_records and not dry_run: 597 694 _append_jsonl_locked(target_day_file, new_records) 695 + except DecisionLogWriteError: 696 + raise 598 697 except Exception as exc: 599 698 summary.errors.append( 600 699 f"facet {facet_name} activities {source_day_file.name}: {exc}" ··· 645 744 "reason": "copied", 646 745 }, 647 746 ) 747 + except DecisionLogWriteError: 748 + raise 648 749 except Exception as exc: 649 750 summary.errors.append( 650 751 "facet " ··· 670 771 "reason": "appended", 671 772 }, 672 773 ) 774 + except DecisionLogWriteError: 775 + raise 673 776 except Exception as exc: 674 777 summary.errors.append( 675 778 f"facet {facet_name} logs {source_log_file.name}: {exc}" ··· 704 807 "reason": "new", 705 808 }, 706 809 ) 810 + except DecisionLogWriteError: 811 + raise 707 812 except Exception as exc: 708 813 summary.errors.append( 709 814 f"facet {facet_name} news {source_news_file.name}: {exc}" ··· 716 821 summary: MergeSummary, 717 822 dry_run: bool, 718 823 log_path: Path | None = None, 824 + progress: ProgressCallback | None = None, 719 825 ) -> None: 720 - 721 - source_imports_dir = source / "imports" 722 - if not source_imports_dir.is_dir(): 723 - return 724 - 725 - for source_import_dir in sorted(source_imports_dir.iterdir()): 726 - if not source_import_dir.is_dir(): 727 - continue 826 + import_items = _collect_import_items(source) 827 + total = len(import_items) 828 + _report_progress(progress, "imports", 0, total, None) 728 829 830 + for index, source_import_dir in enumerate(import_items, start=1): 729 831 target_import_dir = target / "imports" / source_import_dir.name 730 832 try: 731 833 if target_import_dir.exists(): ··· 756 858 "reason": "new", 757 859 }, 758 860 ) 861 + except DecisionLogWriteError: 862 + raise 759 863 except Exception as exc: 760 864 summary.errors.append(f"import {source_import_dir.name}: {exc}") 865 + finally: 866 + _report_progress( 867 + progress, 868 + "imports", 869 + index, 870 + total, 871 + source_import_dir.name, 872 + ) 873 + 874 + _report_progress(progress, "imports", total, total, None) 761 875 762 876 763 877 def _read_jsonl(path: Path) -> list[dict[str, Any]]: ··· 794 908 locked_modify(path, modify_fn, create_if_missing=True) 795 909 796 910 797 - __all__ = ["MergeSummary", "merge_journals"] 911 + __all__ = [ 912 + "DecisionLogWriteError", 913 + "MergeSummary", 914 + "ProgressCallback", 915 + "merge_journals", 916 + ]

+253 -45

think/tools/call.py

··· 11 11 """ 12 12 13 13 import json 14 + import logging 15 + import re 14 16 import shutil 15 17 import subprocess 16 18 import sys 19 + from dataclasses import asdict 17 20 from datetime import datetime, timezone 18 21 from pathlib import Path 19 22 ··· 42 45 from think.utils import ( 43 46 day_path, 44 47 get_journal, 48 + is_solstone_up, 45 49 iter_segments, 46 50 require_solstone, 47 51 resolve_sol_day, ··· 55 59 56 60 57 61 @app.callback() 58 - def _require_up() -> None: 62 + def _require_up(ctx: typer.Context) -> None: 63 + if ( 64 + ctx.invoked_subcommand == "export" 65 + ): # export is read-only and must work when supervisor is down (scope §6) 66 + return 59 67 require_solstone() 60 68 61 69 ··· 1065 1073 ) 1066 1074 1067 1075 1076 + def _looks_like_journal_source(source_path: Path) -> bool: 1077 + try: 1078 + chronicle_dir = source_path / "chronicle" 1079 + if chronicle_dir.is_dir(): 1080 + if any( 1081 + entry.is_dir() and re.match(r"^\d{8}$", entry.name) 1082 + for entry in chronicle_dir.iterdir() 1083 + ): 1084 + return True 1085 + return any( 1086 + entry.is_dir() and re.match(r"^\d{8}$", entry.name) 1087 + for entry in source_path.iterdir() 1088 + ) 1089 + except OSError: 1090 + return False 1091 + 1092 + 1093 + def _merge_error_envelope( 1094 + *, 1095 + code: str, 1096 + message: str, 1097 + source: Path, 1098 + dry_run: bool, 1099 + details: dict[str, object] | None = None, 1100 + ) -> dict[str, object]: 1101 + return { 1102 + "ok": False, 1103 + "code": code, 1104 + "message": message, 1105 + "source": str(source), 1106 + "dry_run": dry_run, 1107 + "details": details or {}, 1108 + } 1109 + 1110 + 1111 + @app.command("export") 1112 + def journal_export( 1113 + out: Path | None = typer.Option( 1114 + None, 1115 + "--out", 1116 + help="Write the journal ZIP to PATH (default: alongside the journal root).", 1117 + ), 1118 + quiet: bool = typer.Option( 1119 + False, 1120 + "--quiet", 1121 + help="Suppress success output; errors still print.", 1122 + ), 1123 + ) -> None: 1124 + """Export the active journal as a portable ZIP archive.""" 1125 + from think.journal_export import export_journal_archive, get_skipped_export_entries 1126 + 1127 + journal_root = Path(get_journal()) 1128 + if is_solstone_up(): 1129 + typer.echo( 1130 + "warning: solstone supervisor is running; export reflects a live snapshot and may include partial writes", 1131 + err=True, 1132 + ) 1133 + 1134 + skipped_entries = get_skipped_export_entries(journal_root) 1135 + if skipped_entries and not quiet: 1136 + typer.echo( 1137 + f"advisory: skipped non-export entries: {', '.join(skipped_entries)}", 1138 + err=True, 1139 + ) 1140 + 1141 + try: 1142 + archive_path = export_journal_archive(journal_root, out) 1143 + except FileNotFoundError: 1144 + typer.echo( 1145 + "error: active journal root is not a directory; try: verify the journal path", 1146 + err=True, 1147 + ) 1148 + raise typer.Exit(1) 1149 + except OSError: 1150 + target_path = ( 1151 + out or journal_root.parent / f"{journal_root.name}.exports" 1152 + ).expanduser() 1153 + advisory_parent = target_path.parent if target_path.suffix else target_path 1154 + typer.echo( 1155 + f"error: failed to write archive; try: check disk space and write permissions on {advisory_parent.resolve()}", 1156 + err=True, 1157 + ) 1158 + raise typer.Exit(1) 1159 + 1160 + if not quiet: 1161 + typer.echo(str(archive_path)) 1162 + 1163 + 1068 1164 @app.command("merge") 1069 1165 def journal_merge( 1070 1166 source: str = typer.Argument( ··· 1075 1171 "--dry-run", 1076 1172 help="Show what would be merged without making changes.", 1077 1173 ), 1174 + json_output: bool = typer.Option( 1175 + False, 1176 + "--json", 1177 + help="Output result as a single-line JSON object (suppresses normal output).", 1178 + ), 1078 1179 ) -> None: 1079 1180 """Merge segments, entities, facets, and imports from a source journal.""" 1080 - from think.merge import MergeSummary, merge_journals 1081 - from think.utils import get_journal 1181 + from think.merge import DecisionLogWriteError, MergeSummary, merge_journals 1082 1182 1083 1183 source_path = Path(source).resolve() 1184 + target_path = Path(get_journal()) 1084 1185 1085 1186 if not source_path.is_dir(): 1086 - typer.echo(f"Error: '{source}' is not a directory.", err=True) 1187 + if json_output: 1188 + typer.echo( 1189 + json.dumps( 1190 + _merge_error_envelope( 1191 + code="source-not-a-directory", 1192 + message="Source path is not a directory.", 1193 + source=source_path, 1194 + dry_run=dry_run, 1195 + ) 1196 + ), 1197 + err=True, 1198 + ) 1199 + else: 1200 + typer.echo(f"Error: '{source}' is not a directory.", err=True) 1087 1201 raise typer.Exit(1) 1088 1202 1089 - import re 1203 + if target_path.exists() and not target_path.is_dir(): 1204 + if json_output: 1205 + typer.echo( 1206 + json.dumps( 1207 + _merge_error_envelope( 1208 + code="target-not-a-journal", 1209 + message="Target journal path is not a directory.", 1210 + source=source_path, 1211 + dry_run=dry_run, 1212 + details={"target": str(target_path)}, 1213 + ) 1214 + ), 1215 + err=True, 1216 + ) 1217 + else: 1218 + typer.echo( 1219 + f"Error: journal target '{target_path}' is not a directory.", err=True 1220 + ) 1221 + raise typer.Exit(1) 1090 1222 1091 - has_day = any( 1092 - entry.is_dir() and re.match(r"^\d{8}$", entry.name) 1093 - for entry in source_path.iterdir() 1094 - ) 1095 - if not has_day: 1096 - typer.echo( 1097 - f"Error: '{source}' does not appear to be a journal (no YYYYMMDD directories found).", 1098 - err=True, 1099 - ) 1223 + if not _looks_like_journal_source(source_path): 1224 + if json_output: 1225 + typer.echo( 1226 + json.dumps( 1227 + _merge_error_envelope( 1228 + code="source-not-a-journal", 1229 + message="Source path does not look like a journal directory.", 1230 + source=source_path, 1231 + dry_run=dry_run, 1232 + ) 1233 + ), 1234 + err=True, 1235 + ) 1236 + else: 1237 + typer.echo( 1238 + f"Error: '{source}' does not appear to be a journal (no YYYYMMDD directories found).", 1239 + err=True, 1240 + ) 1100 1241 raise typer.Exit(1) 1101 1242 1102 - target_path = Path(get_journal()) 1103 1243 run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") 1104 1244 artifact_root = target_path.parent / f"{target_path.name}.merge" / run_id 1105 1245 log_path = artifact_root / "decisions.jsonl" 1106 1246 staging_path = artifact_root / "staging" 1107 1247 1108 - summary: MergeSummary = merge_journals( 1109 - source_path, 1110 - target_path, 1111 - dry_run=dry_run, 1112 - log_path=log_path, 1113 - staging_path=staging_path, 1114 - ) 1248 + root_logger = logging.getLogger() 1249 + original_level = root_logger.level 1250 + try: 1251 + if json_output: 1252 + root_logger.setLevel(logging.CRITICAL) 1253 + summary: MergeSummary = merge_journals( 1254 + source_path, 1255 + target_path, 1256 + dry_run=dry_run, 1257 + log_path=log_path, 1258 + staging_path=staging_path, 1259 + ) 1260 + except DecisionLogWriteError as exc: 1261 + if json_output: 1262 + typer.echo( 1263 + json.dumps( 1264 + _merge_error_envelope( 1265 + code="decision-log-write-failed", 1266 + message="Decision log could not be written.", 1267 + source=source_path, 1268 + dry_run=dry_run, 1269 + details={ 1270 + "exception_type": type(exc).__name__, 1271 + "exception": repr(exc), 1272 + }, 1273 + ) 1274 + ), 1275 + err=True, 1276 + ) 1277 + raise typer.Exit(1) 1278 + raise 1279 + except Exception as exc: 1280 + if json_output: 1281 + typer.echo( 1282 + json.dumps( 1283 + _merge_error_envelope( 1284 + code="merge-engine-error", 1285 + message="Journal merge failed.", 1286 + source=source_path, 1287 + dry_run=dry_run, 1288 + details={ 1289 + "exception_type": type(exc).__name__, 1290 + "exception": repr(exc), 1291 + }, 1292 + ) 1293 + ), 1294 + err=True, 1295 + ) 1296 + raise typer.Exit(1) 1297 + raise 1298 + finally: 1299 + if json_output: 1300 + root_logger.setLevel(original_level) 1115 1301 1116 1302 action = "Would merge" if dry_run else "Merged" 1117 - typer.echo(f"\n{action}:") 1118 - typer.echo( 1119 - f" Segments: {summary.segments_copied} copied, {summary.segments_skipped} skipped, {summary.segments_errored} errored" 1120 - ) 1121 - typer.echo( 1122 - f" Entities: {summary.entities_created} created, {summary.entities_merged} merged, {summary.entities_staged} staged, {summary.entities_skipped} skipped" 1123 - ) 1124 - typer.echo( 1125 - f" Facets: {summary.facets_created} created, {summary.facets_merged} merged" 1126 - ) 1127 - typer.echo( 1128 - f" Imports: {summary.imports_copied} copied, {summary.imports_skipped} skipped" 1129 - ) 1303 + if not json_output: 1304 + typer.echo(f"\n{action}:") 1305 + typer.echo( 1306 + f" Segments: {summary.segments_copied} copied, {summary.segments_skipped} skipped, {summary.segments_errored} errored" 1307 + ) 1308 + typer.echo( 1309 + f" Entities: {summary.entities_created} created, {summary.entities_merged} merged, {summary.entities_staged} staged, {summary.entities_skipped} skipped" 1310 + ) 1311 + typer.echo( 1312 + f" Facets: {summary.facets_created} created, {summary.facets_merged} merged" 1313 + ) 1314 + typer.echo( 1315 + f" Imports: {summary.imports_copied} copied, {summary.imports_skipped} skipped" 1316 + ) 1130 1317 1131 - if summary.errors: 1132 - typer.echo(f"\n{len(summary.errors)} errors:") 1133 - for error in summary.errors: 1134 - typer.echo(f" - {error}") 1318 + if summary.errors: 1319 + typer.echo(f"\n{len(summary.errors)} errors:") 1320 + for error in summary.errors: 1321 + typer.echo(f" - {error}") 1135 1322 1136 - if log_path.exists(): 1137 - typer.echo(f"\nDecision log: {log_path}") 1138 - if summary.entities_staged > 0: 1139 - typer.echo(f"Staged entities: {staging_path}") 1323 + if log_path.exists(): 1324 + typer.echo(f"\nDecision log: {log_path}") 1325 + if summary.entities_staged > 0: 1326 + typer.echo(f"Staged entities: {staging_path}") 1140 1327 1328 + indexer_returncode = 0 1141 1329 if not dry_run: 1142 - subprocess.run( 1330 + indexer_result = subprocess.run( 1143 1331 ["sol", "indexer", "--rescan-full"], 1144 1332 check=False, 1145 1333 capture_output=True, 1146 1334 ) 1335 + indexer_returncode = indexer_result.returncode 1147 1336 1148 1337 log_call_action( 1149 1338 facet=None, ··· 1161 1350 }, 1162 1351 ) 1163 1352 1164 - typer.echo("Index rebuild started.") 1353 + if not json_output: 1354 + typer.echo("Index rebuild started.") 1355 + 1356 + if json_output: 1357 + typer.echo( 1358 + json.dumps( 1359 + { 1360 + "ok": True, 1361 + "code": "ok", 1362 + "source": str(source_path), 1363 + "dry_run": dry_run, 1364 + "summary": asdict(summary), 1365 + "decision_log": str(log_path), 1366 + "staging_dir": str(staging_path) 1367 + if summary.entities_staged > 0 1368 + else None, 1369 + "indexer_returncode": indexer_returncode, 1370 + } 1371 + ) 1372 + )

Configure Feed

Configure Feed