personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'hopper-4hcgwf3c-obsidian-sync-backend'

# Conflicts:
# think/importers/obsidian.py

+312 -35
+277
tests/test_importer_obsidian_sync.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + import glob 5 + from pathlib import Path 6 + from textwrap import dedent 7 + from unittest.mock import patch 8 + 9 + import pytest 10 + 11 + 12 + def _write_note( 13 + vault_dir: Path, 14 + rel_path: str, 15 + content: str, 16 + mtime: float | None = None, 17 + ) -> Path: 18 + """Write a note file to the vault.""" 19 + path = vault_dir / rel_path 20 + path.parent.mkdir(parents=True, exist_ok=True) 21 + path.write_text(content, encoding="utf-8") 22 + if mtime is not None: 23 + import os 24 + 25 + os.utime(path, (mtime, mtime)) 26 + return path 27 + 28 + 29 + SAMPLE_NOTE = dedent("""\ 30 + --- 31 + tags: [project, alpha] 32 + --- 33 + # Alpha Project 34 + 35 + This is a note about the [[Alpha Project]]. 36 + See also [[Bob Smith]] and [[Design Doc]]. 37 + """) 38 + 39 + SAMPLE_NOTE_2 = dedent("""\ 40 + # Daily Note 41 + 42 + Today I worked on [[Beta Launch]]. 43 + """) 44 + 45 + UPDATED_NOTE = dedent("""\ 46 + --- 47 + tags: [project, alpha] 48 + --- 49 + # Alpha Project 50 + 51 + This is an updated note about the [[Alpha Project]]. 52 + See also [[Bob Smith]], [[Design Doc]], and [[Launch Plan]]. 53 + """) 54 + 55 + 56 + def test_obsidian_sync_protocol_conformance(): 57 + """ObsidianSyncBackend satisfies SyncableBackend protocol.""" 58 + from think.importers.obsidian import ObsidianSyncBackend 59 + from think.importers.sync import SyncableBackend 60 + 61 + assert isinstance(ObsidianSyncBackend(), SyncableBackend) 62 + 63 + 64 + def test_obsidian_sync_registry_discovery(): 65 + """Registry discovery includes obsidian.""" 66 + from think.importers.sync import get_syncable_backends 67 + 68 + backends = get_syncable_backends() 69 + assert "obsidian" in [backend.name for backend in backends] 70 + 71 + 72 + def test_obsidian_sync_dry_run(tmp_path): 73 + """Dry-run catalogs notes and saves state.""" 74 + from think.importers.obsidian import ObsidianSyncBackend 75 + from think.importers.sync import load_sync_state 76 + 77 + vault = tmp_path / "vault" 78 + _write_note(vault, "Projects/Alpha.md", SAMPLE_NOTE, mtime=1_700_000_000) 79 + _write_note(vault, "Daily/2026-03-14.md", SAMPLE_NOTE_2, mtime=1_700_000_600) 80 + 81 + result = ObsidianSyncBackend().sync(tmp_path, source_path=vault, dry_run=True) 82 + 83 + assert result["total"] >= 2 84 + assert result["available"] == 2 85 + assert result["imported"] == 0 86 + assert result["downloaded"] == 0 87 + 88 + state = load_sync_state(tmp_path, "obsidian") 89 + assert state is not None 90 + assert state["files"]["Projects/Alpha.md"]["status"] == "available" 91 + assert state["files"]["Daily/2026-03-14.md"]["status"] == "available" 92 + 93 + 94 + def test_obsidian_sync_import(tmp_path, monkeypatch): 95 + """Import mode writes note segments and updates state.""" 96 + from think.importers.obsidian import ObsidianSyncBackend 97 + from think.importers.sync import load_sync_state 98 + 99 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 100 + vault = tmp_path / "vault" 101 + _write_note(vault, "Projects/Alpha.md", SAMPLE_NOTE, mtime=1_700_000_000) 102 + 103 + result = ObsidianSyncBackend().sync(tmp_path, source_path=vault, dry_run=False) 104 + 105 + assert result["downloaded"] >= 1 106 + assert result["imported"] >= 1 107 + 108 + segments = glob.glob(str(tmp_path / "*/import.obsidian/*/note_transcript.md")) 109 + assert len(segments) >= 1 110 + 111 + state = load_sync_state(tmp_path, "obsidian") 112 + assert state is not None 113 + assert state["files"]["Projects/Alpha.md"]["status"] == "imported" 114 + 115 + 116 + def test_obsidian_sync_edit_creates_new_segments(tmp_path, monkeypatch): 117 + """Editing a note creates new segments and preserves old ones.""" 118 + from think.importers.obsidian import ObsidianSyncBackend 119 + from think.importers.sync import load_sync_state 120 + 121 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 122 + vault = tmp_path / "vault" 123 + _write_note(vault, "Projects/Alpha.md", SAMPLE_NOTE, mtime=1_700_000_000) 124 + 125 + backend = ObsidianSyncBackend() 126 + first = backend.sync(tmp_path, source_path=vault, dry_run=False) 127 + assert first["downloaded"] == 1 128 + first_segments = sorted( 129 + glob.glob(str(tmp_path / "*/import.obsidian/*/note_transcript.md")) 130 + ) 131 + assert len(first_segments) == 1 132 + 133 + _write_note(vault, "Projects/Alpha.md", UPDATED_NOTE, mtime=1_700_000_900) 134 + second = backend.sync(tmp_path, source_path=vault, dry_run=False) 135 + assert second["downloaded"] == 1 136 + 137 + all_segments = sorted( 138 + glob.glob(str(tmp_path / "*/import.obsidian/*/note_transcript.md")) 139 + ) 140 + assert len(all_segments) == 2 141 + assert first_segments[0] in all_segments 142 + 143 + state = load_sync_state(tmp_path, "obsidian") 144 + assert state is not None 145 + assert state["files"]["Projects/Alpha.md"]["edit_count"] >= 2 146 + 147 + 148 + def test_obsidian_sync_unchanged_skip(tmp_path, monkeypatch): 149 + """Mtime-only changes are skipped when content hash matches.""" 150 + from think.importers.obsidian import ObsidianSyncBackend 151 + 152 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 153 + vault = tmp_path / "vault" 154 + _write_note(vault, "Projects/Alpha.md", SAMPLE_NOTE, mtime=1_700_000_000) 155 + 156 + backend = ObsidianSyncBackend() 157 + backend.sync(tmp_path, source_path=vault, dry_run=False) 158 + _write_note(vault, "Projects/Alpha.md", SAMPLE_NOTE, mtime=1_700_000_300) 159 + 160 + result = backend.sync(tmp_path, source_path=vault, dry_run=True) 161 + assert result["available"] == 0 162 + 163 + 164 + def test_obsidian_sync_deleted_note(tmp_path, monkeypatch): 165 + """Deleted notes are marked removed in state.""" 166 + from think.importers.obsidian import ObsidianSyncBackend 167 + from think.importers.sync import load_sync_state 168 + 169 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 170 + vault = tmp_path / "vault" 171 + note = _write_note(vault, "Projects/Alpha.md", SAMPLE_NOTE, mtime=1_700_000_000) 172 + 173 + backend = ObsidianSyncBackend() 174 + backend.sync(tmp_path, source_path=vault, dry_run=False) 175 + note.unlink() 176 + backend.sync(tmp_path, source_path=vault, dry_run=True) 177 + 178 + state = load_sync_state(tmp_path, "obsidian") 179 + assert state is not None 180 + assert state["files"]["Projects/Alpha.md"]["status"] == "removed" 181 + 182 + 183 + def test_obsidian_sync_force(tmp_path, monkeypatch): 184 + """Force re-detects notes by clearing state.""" 185 + from think.importers.obsidian import ObsidianSyncBackend 186 + 187 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 188 + vault = tmp_path / "vault" 189 + _write_note(vault, "Projects/Alpha.md", SAMPLE_NOTE, mtime=1_700_000_000) 190 + 191 + backend = ObsidianSyncBackend() 192 + backend.sync(tmp_path, source_path=vault, dry_run=False) 193 + result = backend.sync(tmp_path, source_path=vault, dry_run=True, force=True) 194 + 195 + assert result["available"] >= 1 196 + 197 + 198 + def test_obsidian_sync_vault_auto_detection(tmp_path, monkeypatch): 199 + """Raises when no vault can be auto-detected.""" 200 + from think.importers.obsidian import ObsidianSyncBackend 201 + 202 + home = tmp_path / "home" 203 + home.mkdir() 204 + monkeypatch.setattr("think.importers.obsidian.Path.home", lambda: home) 205 + 206 + with pytest.raises( 207 + ValueError, 208 + match="No Obsidian vault found. Use --path to specify your vault location.", 209 + ): 210 + ObsidianSyncBackend().sync(tmp_path) 211 + 212 + 213 + def test_obsidian_sync_entity_seeding(tmp_path, monkeypatch): 214 + """Wikilinks are converted into Topic entities on import.""" 215 + from think.importers.obsidian import ObsidianSyncBackend 216 + 217 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 218 + vault = tmp_path / "vault" 219 + _write_note(vault, "Projects/Alpha.md", SAMPLE_NOTE, mtime=1_700_000_000) 220 + 221 + captured: list[tuple[str, str, list[dict[str, str]]]] = [] 222 + 223 + def _fake_seed_entities( 224 + facet: str, 225 + day: str, 226 + entities: list[dict[str, str]], 227 + ) -> list[dict[str, str]]: 228 + captured.append((facet, day, entities)) 229 + return entities 230 + 231 + with patch( 232 + "think.importers.obsidian.seed_entities", side_effect=_fake_seed_entities 233 + ): 234 + ObsidianSyncBackend().sync(tmp_path, source_path=vault, dry_run=False) 235 + 236 + assert len(captured) == 1 237 + facet, _day, entities = captured[0] 238 + assert facet == "import.obsidian" 239 + assert entities == [ 240 + {"name": "Alpha Project", "type": "Topic"}, 241 + {"name": "Bob Smith", "type": "Topic"}, 242 + {"name": "Design Doc", "type": "Topic"}, 243 + ] 244 + 245 + 246 + def test_obsidian_sync_incremental(tmp_path, monkeypatch): 247 + """Incremental sync imports only newly added notes.""" 248 + from think.importers.obsidian import ObsidianSyncBackend 249 + 250 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 251 + vault = tmp_path / "vault" 252 + _write_note(vault, "Projects/Alpha.md", SAMPLE_NOTE, mtime=1_700_000_000) 253 + 254 + backend = ObsidianSyncBackend() 255 + first = backend.sync(tmp_path, source_path=vault, dry_run=False) 256 + assert first["downloaded"] == 1 257 + 258 + _write_note(vault, "Daily/2026-03-14.md", SAMPLE_NOTE_2, mtime=1_700_000_600) 259 + second = backend.sync(tmp_path, source_path=vault, dry_run=False) 260 + 261 + assert second["downloaded"] == 1 262 + assert second["available"] == 0 263 + assert second["imported"] >= 1 264 + 265 + 266 + def test_obsidian_backends_cli_flag(capsys, monkeypatch): 267 + """sol import --backends lists obsidian.""" 268 + import sys 269 + 270 + from think.importers.cli import main 271 + 272 + monkeypatch.setattr(sys, "argv", ["sol import", "--backends"]) 273 + monkeypatch.setenv("JOURNAL_PATH", "/tmp/test-journal") 274 + 275 + main() 276 + captured = capsys.readouterr() 277 + assert "obsidian" in captured.out
+35 -35
think/importers/obsidian.py
··· 154 154 return "\n".join(lines) 155 155 156 156 157 + def _walk_md_files(root: Path) -> list[Path]: 158 + """Walk vault directory, collecting markdown files. Skips hidden dirs and logseq recycle.""" 159 + md_files: list[Path] = [] 160 + for dirpath, dirnames, filenames in os.walk(root): 161 + dirnames[:] = [ 162 + d 163 + for d in dirnames 164 + if not _is_hidden(d) 165 + and not (d == ".recycle" and Path(dirpath).name == "logseq") 166 + ] 167 + rel = Path(dirpath).relative_to(root) 168 + if ( 169 + len(rel.parts) >= 2 170 + and rel.parts[0] == "logseq" 171 + and rel.parts[1] == ".recycle" 172 + ): 173 + continue 174 + 175 + for fname in filenames: 176 + if _is_hidden(fname): 177 + continue 178 + fpath = Path(dirpath) / fname 179 + ext = fpath.suffix.lower() 180 + if ext != ".md": 181 + continue 182 + if ext in SKIP_EXTENSIONS: 183 + continue 184 + md_files.append(fpath) 185 + return md_files 186 + 187 + 157 188 class ObsidianImporter: 158 189 name = "obsidian" 159 190 display_name = "Obsidian / Logseq Vault" 160 191 file_patterns = ["*.md"] 161 192 description = "Import notes from an Obsidian or Logseq vault" 193 + 194 + def _walk_md_files(self, root: Path) -> list[Path]: 195 + return _walk_md_files(root) 162 196 163 197 def detect(self, path: Path) -> bool: 164 198 if not path.is_dir(): ··· 389 423 date_range=(earliest, latest), 390 424 ) 391 425 392 - def _walk_md_files(self, root: Path) -> list[Path]: 393 - """Walk vault directory, yielding markdown files. Skips hidden dirs and logseq recycle.""" 394 - md_files: list[Path] = [] 395 - for dirpath, dirnames, filenames in os.walk(root): 396 - # Filter out hidden directories and logseq recycle in-place 397 - dirnames[:] = [ 398 - d 399 - for d in dirnames 400 - if not _is_hidden(d) 401 - and not (d == ".recycle" and Path(dirpath).name == "logseq") 402 - ] 403 - # Also skip the logseq/.recycle path explicitly 404 - rel = Path(dirpath).relative_to(root) 405 - if ( 406 - len(rel.parts) >= 2 407 - and rel.parts[0] == "logseq" 408 - and rel.parts[1] == ".recycle" 409 - ): 410 - continue 411 - 412 - for fname in filenames: 413 - if _is_hidden(fname): 414 - continue 415 - fpath = Path(dirpath) / fname 416 - ext = fpath.suffix.lower() 417 - if ext != ".md": 418 - continue 419 - if ext in SKIP_EXTENSIONS: 420 - continue 421 - md_files.append(fpath) 422 - return md_files 423 - 424 426 425 427 # Common Obsidian vault locations for auto-detection 426 428 DEFAULT_VAULT_PATHS = [ ··· 504 506 known_files: dict[str, dict[str, Any]] = state.get("files", {}) 505 507 506 508 # Walk vault using existing importer logic 507 - md_files = importer._walk_md_files(vault_path) 509 + md_files = _walk_md_files(vault_path) 508 510 509 511 current_rel_paths: set[str] = set() 510 512 to_import: list[tuple[Path, str, str]] = [] # (path, rel_path, change_type) ··· 683 685 684 686 685 687 importer = ObsidianImporter() 686 - 687 - # Module-level backend instance for registry discovery 688 688 backend = ObsidianSyncBackend()