personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Rewrite Obsidian importer: creation-moment segments with markdown output

+218 -40
+91
tests/test_importer.py
··· 1332 1332 assert len(manifests) == 1 1333 1333 # No import.json (legacy audio import metadata) 1334 1334 assert not list(imports_dir.rglob("import.json")) 1335 + 1336 + 1337 + def test_obsidian_process_segments(tmp_path, monkeypatch): 1338 + """Obsidian importer writes creation-moment segments with markdown output.""" 1339 + mod = importlib.import_module("think.importers.obsidian") 1340 + 1341 + vault = tmp_path / "vault" 1342 + vault.mkdir() 1343 + (vault / ".obsidian").mkdir() 1344 + 1345 + # Note 1: knowledge note with frontmatter and wikilinks 1346 + note1 = vault / "Project Ideas.md" 1347 + note1.write_text( 1348 + "---\ntags: [work, ideas]\n---\n\nSome thoughts about [[Alpha]] and [[Beta]].\n" 1349 + ) 1350 + 1351 + # Note 2: another knowledge note, same 5-min window 1352 + note2 = vault / "Meeting Notes.md" 1353 + note2.write_text("Notes from meeting with [[Charlie]].\n") 1354 + 1355 + # Note 3: daily note — still uses mtime for segment placement 1356 + note3 = vault / "2026-03-01.md" 1357 + note3.write_text("Daily log entry.\n") 1358 + 1359 + # Set mtimes: note1 and note2 within 5 min window, note3 in a different window 1360 + import os 1361 + 1362 + base_ts = dt.datetime(2026, 3, 15, 10, 0, 0).timestamp() 1363 + os.utime(note1, (base_ts, base_ts)) 1364 + os.utime(note2, (base_ts + 60, base_ts + 60)) # 1 min later, same window 1365 + os.utime(note3, (base_ts + 600, base_ts + 600)) # 10 min later, different window 1366 + 1367 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 1368 + 1369 + result = mod.ObsidianImporter().process(vault, tmp_path) 1370 + 1371 + assert result.entries_written == 3 1372 + assert result.errors == [] 1373 + assert len(result.segments) == 2 1374 + assert len(result.files_created) == 2 1375 + 1376 + # Both segments on same day (20260315) since all mtimes are on that day 1377 + first_day, first_key = result.segments[0] 1378 + second_day, second_key = result.segments[1] 1379 + assert first_day == "20260315" 1380 + assert second_day == "20260315" 1381 + assert first_key == "100000_300" 1382 + assert second_key == "101000_300" 1383 + 1384 + first_md = day_path("20260315") / "import.obsidian" / "100000_300" / "imported.md" 1385 + second_md = day_path("20260315") / "import.obsidian" / "101000_300" / "imported.md" 1386 + assert first_md.exists() 1387 + assert second_md.exists() 1388 + 1389 + first_content = first_md.read_text() 1390 + assert "## Project Ideas" in first_content 1391 + assert "Tags: work, ideas" in first_content 1392 + assert "[[Alpha]]" in first_content 1393 + assert "[[Beta]]" in first_content 1394 + assert "Some thoughts about" in first_content 1395 + # Frontmatter should be stripped from content 1396 + assert "---" not in first_content 1397 + assert "## Meeting Notes" in first_content 1398 + assert "[[Charlie]]" in first_content 1399 + 1400 + second_content = second_md.read_text() 1401 + assert "## 2026-03-01" in second_content 1402 + assert "Daily log entry." in second_content 1403 + 1404 + 1405 + def test_obsidian_render_note_markdown(): 1406 + """Test markdown rendering for a single note.""" 1407 + mod = importlib.import_module("think.importers.obsidian") 1408 + 1409 + note = { 1410 + "title": "Test Note", 1411 + "source_path": "subfolder/Test Note.md", 1412 + "tags": ["project", "draft"], 1413 + "wikilinks": ["Alice", "Project X"], 1414 + "content": "---\ntags: [project, draft]\n---\n\nMain content here.\n", 1415 + } 1416 + 1417 + rendered = mod._render_note_markdown(note) 1418 + 1419 + assert "## Test Note" in rendered 1420 + assert "Source: subfolder/Test Note.md" in rendered 1421 + assert "Tags: project, draft" in rendered 1422 + assert "Links: [[Alice]], [[Project X]]" in rendered 1423 + assert "Main content here." in rendered 1424 + # Frontmatter stripped 1425 + assert "---" not in rendered
+127 -40
think/importers/obsidian.py
··· 11 11 from typing import Any, Callable 12 12 13 13 from think.importers.file_importer import ImportPreview, ImportResult 14 - from think.importers.shared import seed_entities, write_structured_import 14 + from think.importers.shared import seed_entities 15 + from think.utils import day_path 15 16 16 17 logger = logging.getLogger(__name__) 17 18 ··· 109 110 return None 110 111 111 112 113 + def _strip_frontmatter(content: str) -> str: 114 + """Remove YAML frontmatter block from content.""" 115 + return FRONTMATTER_RE.sub("", content) 116 + 117 + 112 118 def _is_hidden(name: str) -> bool: 113 119 """Check if a filename/dirname starts with a dot.""" 114 120 return name.startswith(".") 115 121 116 122 123 + def _window_notes( 124 + notes: list[dict[str, Any]], 125 + window_duration: int = 300, 126 + ) -> list[tuple[str, str, list[dict[str, Any]]]]: 127 + """Group sorted notes into fixed-duration windows by mtime.""" 128 + if not notes: 129 + return [] 130 + 131 + windows: list[tuple[str, str, list[dict[str, Any]]]] = [] 132 + window_start: float | None = None 133 + window_day: str | None = None 134 + window_notes: list[dict[str, Any]] = [] 135 + 136 + for note in notes: 137 + mtime = note["mtime"] 138 + note_dt = dt.datetime.fromtimestamp(mtime) 139 + note_day = note_dt.strftime("%Y%m%d") 140 + 141 + if ( 142 + window_start is None 143 + or note_day != window_day 144 + or mtime - window_start >= window_duration 145 + ): 146 + if window_notes and window_day and window_start is not None: 147 + start_dt = dt.datetime.fromtimestamp(window_start) 148 + seg_key = f"{start_dt.strftime('%H%M%S')}_{window_duration}" 149 + windows.append((window_day, seg_key, window_notes)) 150 + 151 + window_start = mtime 152 + window_day = note_day 153 + window_notes = [] 154 + 155 + window_notes.append(note) 156 + 157 + if window_notes and window_day and window_start is not None: 158 + start_dt = dt.datetime.fromtimestamp(window_start) 159 + seg_key = f"{start_dt.strftime('%H%M%S')}_{window_duration}" 160 + windows.append((window_day, seg_key, window_notes)) 161 + 162 + return windows 163 + 164 + 165 + def _render_note_markdown(note: dict[str, Any]) -> str: 166 + """Render a note as markdown for imported.md output.""" 167 + title = note.get("title", "Untitled") 168 + lines = [f"## {title}"] 169 + 170 + source_path = note.get("source_path", "") 171 + if source_path: 172 + lines.append(f"Source: {source_path}") 173 + 174 + tags = note.get("tags", []) 175 + if tags: 176 + lines.append(f"Tags: {', '.join(tags)}") 177 + 178 + wikilinks = note.get("wikilinks", []) 179 + if wikilinks: 180 + lines.append("Links: " + ", ".join(f"[[{link}]]" for link in wikilinks)) 181 + 182 + content = note.get("content", "") 183 + if content: 184 + stripped = _strip_frontmatter(content).strip() 185 + if stripped: 186 + lines.append("") 187 + lines.append(stripped) 188 + 189 + return "\n".join(lines) 190 + 191 + 117 192 class ObsidianImporter: 118 193 name = "obsidian" 119 194 display_name = "Obsidian / Logseq Vault" ··· 149 224 date = _parse_daily_note_date(md_path.name) 150 225 if date: 151 226 daily_count += 1 152 - dates.append(date.strftime("%Y%m%d")) 153 227 else: 154 228 knowledge_count += 1 155 - try: 156 - mtime = dt.datetime.fromtimestamp(md_path.stat().st_mtime) 157 - dates.append(mtime.strftime("%Y%m%d")) 158 - except OSError: 159 - pass 229 + 230 + try: 231 + mtime = dt.datetime.fromtimestamp(md_path.stat().st_mtime) 232 + dates.append(mtime.strftime("%Y%m%d")) 233 + except OSError: 234 + pass 160 235 161 236 content = _read_file_safe(md_path) 162 237 if content: ··· 189 264 facet: str | None = None, 190 265 progress_callback: Callable | None = None, 191 266 ) -> ImportResult: 192 - import_id = dt.datetime.now().strftime("%Y%m%d_%H%M%S") 193 267 md_files = list(self._walk_md_files(path)) 194 268 total = len(md_files) 195 269 196 - entries: list[dict[str, Any]] = [] 270 + notes: list[dict[str, Any]] = [] 197 271 all_wikilinks: set[str] = set() 198 272 errors: list[str] = [] 199 273 ··· 207 281 208 282 rel_path = str(md_path.relative_to(path)) 209 283 title = md_path.stem 284 + is_daily = _parse_daily_note_date(md_path.name) is not None 210 285 211 - # Classify as daily note or knowledge note 212 - date = _parse_daily_note_date(md_path.name) 213 - is_daily = date is not None 214 - if date: 215 - ts = dt.datetime.combine(date, dt.time()).isoformat() 216 - else: 217 - try: 218 - mtime = md_path.stat().st_mtime 219 - ts = dt.datetime.fromtimestamp(mtime).isoformat() 220 - except OSError: 221 - ts = dt.datetime.now().isoformat() 286 + # All notes use file mtime for segment placement (creation-moment principle) 287 + try: 288 + mtime = md_path.stat().st_mtime 289 + except OSError: 290 + mtime = dt.datetime.now().timestamp() 222 291 223 - # Extract metadata 224 292 tags = _parse_frontmatter_tags(content) 225 293 wikilinks = WIKILINK_RE.findall(content) 226 294 all_wikilinks.update(wikilinks) 227 295 228 - entries.append( 296 + notes.append( 229 297 { 230 - "type": "note", 231 - "ts": ts, 298 + "mtime": mtime, 232 299 "title": title, 233 300 "content": content, 234 301 "source_path": rel_path, ··· 241 308 if progress_callback: 242 309 progress_callback(i + 1, total) 243 310 244 - # Write to journal 245 - created_files = write_structured_import( 246 - "obsidian", 247 - entries, 248 - import_id=import_id, 249 - facet=facet, 250 - ) 311 + if not notes: 312 + return ImportResult( 313 + entries_written=0, 314 + entities_seeded=0, 315 + files_created=[], 316 + errors=errors, 317 + summary="No notes found to import", 318 + ) 319 + 320 + notes.sort(key=lambda n: n["mtime"]) 321 + 322 + windows = _window_notes(notes) 323 + created_files: list[str] = [] 324 + segments: list[tuple[str, str]] = [] 325 + 326 + for day, seg_key, window_notes_list in windows: 327 + segment_dir = day_path(day) / "import.obsidian" / seg_key 328 + segment_dir.mkdir(parents=True, exist_ok=True) 329 + md_path = segment_dir / "imported.md" 330 + markdown = "\n\n".join( 331 + _render_note_markdown(note) for note in window_notes_list 332 + ) 333 + md_path.write_text(markdown + "\n", encoding="utf-8") 334 + created_files.append(str(md_path)) 335 + segments.append((day, seg_key)) 251 336 252 337 # Seed entities from wikilinks 253 338 entities_seeded = 0 254 339 if all_wikilinks and facet: 255 - # Use the earliest date for seeding 256 - dates = sorted(e["ts"] for e in entries) 257 - day = ( 258 - dt.datetime.fromisoformat(dates[0]).strftime("%Y%m%d") 259 - if dates 260 - else dt.datetime.now().strftime("%Y%m%d") 261 - ) 340 + day = segments[0][0] if segments else dt.datetime.now().strftime("%Y%m%d") 262 341 entity_dicts = [ 263 342 {"name": link, "type": "Topic"} for link in sorted(all_wikilinks) 264 343 ] 265 344 resolved = seed_entities(facet, day, entity_dicts) 266 345 entities_seeded = len(resolved) 267 346 347 + daily_count = sum(1 for n in notes if n["is_daily"]) 348 + knowledge_count = len(notes) - daily_count 349 + 268 350 return ImportResult( 269 - entries_written=len(entries), 351 + entries_written=len(notes), 270 352 entities_seeded=entities_seeded, 271 353 files_created=created_files, 272 354 errors=errors, 273 - summary=f"Imported {len(entries)} notes ({sum(1 for e in entries if e['is_daily'])} daily, {sum(1 for e in entries if not e['is_daily'])} knowledge) across {len(created_files)} days", 355 + summary=( 356 + f"Imported {len(notes)} notes ({daily_count} daily, " 357 + f"{knowledge_count} knowledge) across " 358 + f"{len({d for d, _ in segments})} days into {len(segments)} segments" 359 + ), 360 + segments=segments, 274 361 ) 275 362 276 363 def _walk_md_files(self, root: Path) -> list[Path]: