personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add import UX backend: progress stats, source metadata, and enriched events

Enhance the import system backend to support a guided, source-aware
import experience with accumulating item stats in Callosum events,
source metadata and guide endpoints, and enriched imported.json for
both file-importer and generic import paths.

+452 -46
+10
apps/import/guides/chatgpt.md
··· 1 + # Exporting Your ChatGPT History 2 + 3 + 1. Open [ChatGPT](https://chat.openai.com) 4 + 2. Click your profile icon in the bottom-left corner 5 + 3. Select **Settings** 6 + 4. Go to **Data controls** 7 + 5. Click **Export data** 8 + 6. You'll receive an email with a download link — this may take a few minutes 9 + 7. Download the .zip file from the email 10 + 8. Upload the .zip file here
+10
apps/import/guides/claude.md
··· 1 + # Exporting Your Claude History 2 + 3 + 1. Open [Claude](https://claude.ai) 4 + 2. Click your profile icon in the bottom-left corner 5 + 3. Select **Settings** 6 + 4. Scroll to **Account** section 7 + 5. Click **Export Data** 8 + 6. You'll receive an email with a download link 9 + 7. Download the .zip file from the email 10 + 8. Upload the .zip file here
+11
apps/import/guides/gemini.md
··· 1 + # Exporting Your Gemini History 2 + 3 + 1. Go to [Google Takeout](https://takeout.google.com) 4 + 2. Click **Deselect all** 5 + 3. Scroll down and select only **My Activity** 6 + 4. Click **All activity data included**, then deselect everything except **Gemini Apps** 7 + 5. Click **OK**, then scroll down and click **Next step** 8 + 6. Choose **Export once**, file type **.zip**, and your preferred size 9 + 7. Click **Create export** — this may take minutes to hours depending on your history 10 + 8. Download the .zip file when ready 11 + 9. Upload the .zip file here
+24
apps/import/guides/ics.md
··· 1 + # Exporting Your Calendar 2 + 3 + ## Google Calendar 4 + 5 + 1. Open [Google Calendar Settings](https://calendar.google.com/calendar/r/settings/export) 6 + 2. Click **Export** — this downloads a .zip file containing .ics files for each calendar 7 + 3. Upload the .zip file here 8 + 9 + ## Apple Calendar 10 + 11 + 1. Open the **Calendar** app on your Mac 12 + 2. Select the calendar you want to export in the sidebar 13 + 3. Go to **File → Export → Export…** 14 + 4. Save the .ics file 15 + 5. Upload it here 16 + 17 + ## Outlook 18 + 19 + 1. Open [Outlook Calendar](https://outlook.live.com/calendar) 20 + 2. Go to **Settings → View all Outlook settings** 21 + 3. Select **Calendar → Shared calendars** 22 + 4. Under **Publish a calendar**, select the calendar and choose **Can view all details** 23 + 5. Click the **ICS** link to download 24 + 6. Upload the .ics file here
+10
apps/import/guides/kindle.md
··· 1 + # Exporting Your Kindle Highlights 2 + 3 + 1. Connect your Kindle to your computer via USB 4 + 2. Open the Kindle drive in your file manager 5 + 3. Navigate to the **documents** folder 6 + 4. Find the file called **My Clippings.txt** 7 + 5. Copy it to your computer 8 + 6. Upload the file here 9 + 10 + If you use the Kindle app instead of a device, you can find your highlights at [read.amazon.com/notebook](https://read.amazon.com/notebook).
+21
apps/import/guides/obsidian.md
··· 1 + # Exporting Your Notes 2 + 3 + ## Obsidian 4 + 5 + 1. Open your file manager and navigate to your Obsidian vault folder 6 + 2. Select all files and folders in the vault 7 + 3. Compress them into a .zip file 8 + 4. Upload the .zip file here 9 + 10 + Your vault location is shown in **Settings → About → Vault path**. 11 + 12 + ## Logseq 13 + 14 + 1. Open your file manager and navigate to your Logseq graph folder 15 + 2. Select all files and folders 16 + 3. Compress them into a .zip file 17 + 4. Upload the .zip file here 18 + 19 + ## Other Markdown Notes 20 + 21 + Any folder of .md files can be imported. Zip the folder and upload it here.
+115
apps/import/routes.py
··· 3 3 4 4 from __future__ import annotations 5 5 6 + import re 6 7 import time 7 8 from pathlib import Path 8 9 from typing import Any ··· 29 30 __name__, 30 31 url_prefix="/app/import", 31 32 ) 33 + 34 + SOURCE_METADATA = [ 35 + { 36 + "name": "ics", 37 + "display_name": "Calendar", 38 + "icon": "calendar", 39 + "description": "Import events from Google Calendar, Apple Calendar, or Outlook", 40 + "input_type": "file", 41 + "upload_prompt": "Upload your .ics file or .zip export", 42 + "has_guide": True, 43 + "accept": ".ics,.zip", 44 + }, 45 + { 46 + "name": "chatgpt", 47 + "display_name": "ChatGPT", 48 + "icon": "message-square", 49 + "description": "Import your conversation history from ChatGPT", 50 + "input_type": "file", 51 + "upload_prompt": "Upload your ChatGPT export .zip file", 52 + "has_guide": True, 53 + "accept": ".zip", 54 + }, 55 + { 56 + "name": "claude", 57 + "display_name": "Claude", 58 + "icon": "message-circle", 59 + "description": "Import your conversation history from Claude", 60 + "input_type": "file", 61 + "upload_prompt": "Upload your Claude export .zip file", 62 + "has_guide": True, 63 + "accept": ".zip", 64 + }, 65 + { 66 + "name": "gemini", 67 + "display_name": "Gemini", 68 + "icon": "sparkles", 69 + "description": "Import your activity history from Google Gemini", 70 + "input_type": "file", 71 + "upload_prompt": "Upload your Google Takeout .zip file", 72 + "has_guide": True, 73 + "accept": ".zip,.json", 74 + }, 75 + { 76 + "name": "obsidian", 77 + "display_name": "Notes", 78 + "icon": "file-text", 79 + "description": "Import notes from Obsidian, Logseq, or any markdown vault", 80 + "input_type": "directory", 81 + "upload_prompt": "Upload your vault as a .zip file", 82 + "has_guide": True, 83 + "accept": ".zip", 84 + }, 85 + { 86 + "name": "kindle", 87 + "display_name": "Kindle", 88 + "icon": "book-open", 89 + "description": "Import highlights and clippings from your Kindle", 90 + "input_type": "file", 91 + "upload_prompt": "Upload your My Clippings.txt file", 92 + "has_guide": True, 93 + "accept": ".txt", 94 + }, 95 + { 96 + "name": "recording", 97 + "display_name": "Meeting Recording", 98 + "icon": "mic", 99 + "description": "Import audio recordings of meetings or conversations", 100 + "input_type": "file", 101 + "upload_prompt": "Upload an audio file (.m4a, .mp3, .wav)", 102 + "has_guide": False, 103 + "accept": ".m4a,.mp3,.wav,.ogg,.webm", 104 + }, 105 + { 106 + "name": "document", 107 + "display_name": "Document", 108 + "icon": "file", 109 + "description": "Import a document, PDF, or text file", 110 + "input_type": "file", 111 + "upload_prompt": "Upload a document (.pdf, .txt, .md)", 112 + "has_guide": False, 113 + "accept": ".pdf,.txt,.md", 114 + }, 115 + { 116 + "name": "quick", 117 + "display_name": "Quick Import", 118 + "icon": "zap", 119 + "description": "Paste text or drop any file for quick import", 120 + "input_type": "text", 121 + "upload_prompt": "Paste text or drag and drop a file", 122 + "has_guide": False, 123 + "accept": "", 124 + }, 125 + ] 32 126 33 127 34 128 @import_bp.route("/api/save", methods=["POST"]) ··· 240 334 imports.sort(key=lambda x: x.get("imported_at", 0), reverse=True) 241 335 242 336 return jsonify(imports) 337 + 338 + 339 + @import_bp.route("/api/sources") 340 + def import_sources() -> Any: 341 + """Return available import source metadata.""" 342 + return jsonify(SOURCE_METADATA) 343 + 344 + 345 + @import_bp.route("/api/guide/<source>") 346 + def import_guide(source: str) -> Any: 347 + """Return export guide markdown for a given source.""" 348 + if not re.fullmatch(r"[a-z_]+", source): 349 + return jsonify({"error": "Invalid source name"}), 400 350 + guide_path = Path(__file__).parent / "guides" / f"{source}.md" 351 + if not guide_path.is_file(): 352 + return jsonify({"error": f"No guide available for '{source}'"}), 404 353 + return ( 354 + guide_path.read_text(encoding="utf-8"), 355 + 200, 356 + {"Content-Type": "text/markdown; charset=utf-8"}, 357 + ) 243 358 244 359 245 360 @import_bp.route("/<timestamp>")
+13 -3
tests/test_importer.py
··· 7 7 import subprocess 8 8 import zipfile 9 9 from pathlib import Path 10 - from unittest.mock import MagicMock, patch 10 + from unittest.mock import ANY, MagicMock, patch 11 11 12 12 from think.importers.file_importer import ImportPreview, ImportResult 13 13 from think.utils import day_path ··· 942 942 943 943 mod.main() 944 944 945 - mock_imp.process.assert_called_once_with(Path(ics_file), Path(tmp_path), facet=None) 945 + mock_imp.process.assert_called_once_with( 946 + Path(ics_file), 947 + Path(tmp_path), 948 + facet=None, 949 + progress_callback=ANY, 950 + ) 946 951 mock_call = callosum.emit.call_args_list[0] 947 952 assert mock_call.args[0] == "importer" 948 953 assert mock_call.args[1] == "started" ··· 982 987 983 988 mod.main() 984 989 985 - mock_imp.process.assert_called_once_with(Path(ics_file), Path(tmp_path), facet=None) 990 + mock_imp.process.assert_called_once_with( 991 + Path(ics_file), 992 + Path(tmp_path), 993 + facet=None, 994 + progress_callback=ANY, 995 + ) 986 996 mock_call = callosum.emit.call_args_list[0] 987 997 assert mock_call.args[0] == "importer" 988 998 assert mock_call.args[1] == "started"
+14 -1
think/importers/chatgpt.py
··· 206 206 ) 207 207 208 208 messages.sort(key=lambda m: m["create_time"]) 209 + earliest = dt.datetime.fromtimestamp( 210 + messages[0]["create_time"], tz=dt.timezone.utc 211 + ).strftime("%Y%m%d") 212 + latest = dt.datetime.fromtimestamp( 213 + messages[-1]["create_time"], tz=dt.timezone.utc 214 + ).strftime("%Y%m%d") 209 215 210 216 if progress_callback: 211 - progress_callback(len(conversations), len(conversations)) 217 + progress_callback( 218 + len(conversations), 219 + len(conversations), 220 + earliest_date=earliest, 221 + latest_date=latest, 222 + entities_found=0, 223 + ) 212 224 213 225 windows = _window_messages(messages) 214 226 created_files: list[str] = [] ··· 254 266 f"{len(segments)} segments{model_info}" 255 267 ), 256 268 segments=segments, 269 + date_range=(earliest, latest), 257 270 ) 258 271 259 272
+14 -1
think/importers/claude_chat.py
··· 169 169 ) 170 170 171 171 messages.sort(key=lambda m: m["create_time"]) 172 + earliest = dt.datetime.fromtimestamp( 173 + messages[0]["create_time"], tz=dt.timezone.utc 174 + ).strftime("%Y%m%d") 175 + latest = dt.datetime.fromtimestamp( 176 + messages[-1]["create_time"], tz=dt.timezone.utc 177 + ).strftime("%Y%m%d") 172 178 173 179 if progress_callback: 174 - progress_callback(len(conversations), len(conversations)) 180 + progress_callback( 181 + len(conversations), 182 + len(conversations), 183 + earliest_date=earliest, 184 + latest_date=latest, 185 + entities_found=0, 186 + ) 175 187 176 188 windows = _window_messages(messages) 177 189 created_files: list[str] = [] ··· 213 225 f"{len(segments)} segments" 214 226 ), 215 227 segments=segments, 228 + date_range=(earliest, latest), 216 229 ) 217 230 218 231
+107 -36
think/importers/cli.py
··· 11 11 import threading 12 12 import time 13 13 from pathlib import Path 14 + from typing import Any 14 15 15 16 from think.callosum import CallosumConnection 16 17 from think.detect_created import detect_created ··· 40 41 _stages_run: list[str] = [] 41 42 _status_thread: threading.Thread | None = None 42 43 _status_running: bool = False 44 + _progress_stats: dict[str, Any] = {} 43 45 44 46 45 47 def _set_stage(stage: str) -> None: ··· 52 54 logger.debug(f"Stage changed to: {stage}") 53 55 54 56 57 + def _reset_progress_stats( 58 + source_type: str | None = None, 59 + source_display: str | None = None, 60 + ) -> None: 61 + """Reset progress stats for a new import.""" 62 + global _progress_stats 63 + _progress_stats = { 64 + "items_processed": 0, 65 + "items_total": 0, 66 + "earliest_date": None, 67 + "latest_date": None, 68 + "entities_found": 0, 69 + "source_type": source_type, 70 + "source_display": source_display, 71 + } 72 + 73 + 74 + def _progress_callback(current: int, total: int, **kwargs: Any) -> None: 75 + """Callback for importers to report progress stats.""" 76 + _progress_stats["items_processed"] = current 77 + _progress_stats["items_total"] = total 78 + for key in ("earliest_date", "latest_date", "entities_found"): 79 + if key in kwargs: 80 + _progress_stats[key] = kwargs[key] 81 + 82 + 55 83 def _status_emitter() -> None: 56 84 """Background thread that emits status events every 5 seconds.""" 57 85 while _status_running: ··· 65 93 stage=_current_stage, 66 94 elapsed_ms=elapsed_ms, 67 95 stage_elapsed_ms=stage_elapsed_ms, 96 + **_progress_stats, 68 97 ) 69 98 time.sleep(5) 70 99 ··· 521 550 _stage_start_time = _start_time 522 551 _current_stage = "initialization" 523 552 _stages_run = ["initialization"] 553 + if _file_importer is not None: 554 + _reset_progress_stats( 555 + source_type=_file_importer.name, 556 + source_display=_file_importer.display_name, 557 + ) 558 + else: 559 + _reset_progress_stats( 560 + source_type="generic", 561 + source_display=os.path.basename(args.media), 562 + ) 524 563 525 564 # Start Callosum connection with message queue for receiving events 526 565 _message_queue = queue.Queue() ··· 561 600 "facet": args.facet, 562 601 "setting": args.setting, 563 602 "outputs": [], 603 + "source_type": _progress_stats.get("source_type"), 604 + "source_display": _progress_stats.get("source_display"), 564 605 } 565 606 566 607 # Get parent directory for saving metadata ··· 595 636 596 637 _source_hash = hash_source(Path(args.media)) 597 638 598 - _setup_file_import(_import_id) 639 + import_dir = _setup_file_import(_import_id) 599 640 result = _file_importer.process( 600 - Path(args.media), journal_root, facet=args.facet 641 + Path(args.media), 642 + journal_root, 643 + facet=args.facet, 644 + progress_callback=_progress_callback, 601 645 ) 602 646 603 647 all_created_files.extend(result.files_created) ··· 612 656 "count": len(result.files_created), 613 657 } 614 658 ) 659 + processing_results["source_type"] = _file_importer.name 660 + processing_results["source_display"] = _file_importer.display_name 661 + processing_results["date_range"] = ( 662 + list(result.date_range) if result.date_range else None 663 + ) 664 + processing_results["entries_written"] = result.entries_written 665 + processing_results["entities_seeded"] = result.entities_seeded 615 666 616 667 if result.errors: 617 668 logger.warning( ··· 632 683 files_created=len(result.files_created), 633 684 errors=len(result.errors), 634 685 stream=stream, 686 + source_display=_file_importer.display_name, 687 + date_range=list(result.date_range) if result.date_range else None, 635 688 ) 636 689 637 690 if result.segments: ··· 933 986 processing_results["segments"] = created_segments 934 987 if failed_segments: 935 988 processing_results["failed_segments"] = failed_segments 989 + processing_results.setdefault("source_type", "generic") 990 + processing_results.setdefault("source_display", os.path.basename(args.media)) 991 + processing_results.setdefault("entries_written", len(all_created_files)) 992 + processing_results.setdefault("entities_seeded", 0) 993 + processing_results.setdefault( 994 + "date_range", 995 + [processing_results["target_day"], processing_results["target_day"]], 996 + ) 936 997 937 998 imported_path = import_dir / "imported.json" 938 - if _file_importer is None: 939 - # Write imported.json with all processing metadata 999 + # Write imported.json with all processing metadata 1000 + try: 1001 + with open(imported_path, "w", encoding="utf-8") as f: 1002 + json.dump(processing_results, f, indent=2) 1003 + logger.info(f"Saved import processing metadata: {imported_path}") 1004 + except Exception as e: 1005 + logger.warning(f"Failed to save imported.json: {e}") 1006 + 1007 + # Update import.json with processing summary if it exists 1008 + import_metadata_path = import_dir / "import.json" 1009 + if import_metadata_path.exists(): 940 1010 try: 941 - with open(imported_path, "w", encoding="utf-8") as f: 942 - json.dump(processing_results, f, indent=2) 943 - logger.info(f"Saved import processing metadata: {imported_path}") 1011 + with open(import_metadata_path, "r", encoding="utf-8") as f: 1012 + import_meta = json.load(f) 1013 + import_meta["processing_completed"] = processing_results[ 1014 + "processing_completed" 1015 + ] 1016 + import_meta["total_files_created"] = processing_results[ 1017 + "total_files_created" 1018 + ] 1019 + import_meta["imported_json_path"] = str(imported_path) 1020 + import_meta["segments"] = created_segments 1021 + with open(import_metadata_path, "w", encoding="utf-8") as f: 1022 + json.dump(import_meta, f, indent=2) 1023 + logger.info(f"Updated import metadata: {import_metadata_path}") 944 1024 except Exception as e: 945 - logger.warning(f"Failed to save imported.json: {e}") 946 - 947 - # Update import.json with processing summary if it exists 948 - import_metadata_path = import_dir / "import.json" 949 - if import_metadata_path.exists(): 950 - try: 951 - with open(import_metadata_path, "r", encoding="utf-8") as f: 952 - import_meta = json.load(f) 953 - import_meta["processing_completed"] = processing_results[ 954 - "processing_completed" 955 - ] 956 - import_meta["total_files_created"] = processing_results[ 957 - "total_files_created" 958 - ] 959 - import_meta["imported_json_path"] = str(imported_path) 960 - import_meta["segments"] = created_segments 961 - with open(import_metadata_path, "w", encoding="utf-8") as f: 962 - json.dump(import_meta, f, indent=2) 963 - logger.info(f"Updated import metadata: {import_metadata_path}") 964 - except Exception as e: 965 - logger.warning(f"Failed to update import metadata: {e}") 1025 + logger.warning(f"Failed to update import metadata: {e}") 966 1026 967 1027 # Emit completed event 968 1028 duration_ms = int((time.monotonic() - _start_time) * 1000) ··· 981 1041 stages_run=_stages_run, 982 1042 segments=created_segments, 983 1043 stream=stream, 1044 + source_type=processing_results.get("source_type"), 1045 + source_display=processing_results.get("source_display"), 1046 + entries_written=processing_results.get("entries_written", 0), 1047 + entities_seeded=processing_results.get("entities_seeded", 0), 1048 + date_range=processing_results.get("date_range"), 984 1049 ) 985 1050 986 1051 except Exception as e: ··· 988 1053 partial_outputs = [_get_relative_path(f) for f in all_created_files] 989 1054 imported_path = import_dir / "imported.json" 990 1055 1056 + # Ensure source metadata fields have defaults before error write 1057 + processing_results.setdefault("source_type", "generic") 1058 + processing_results.setdefault("source_display", os.path.basename(args.media)) 1059 + processing_results.setdefault("entries_written", len(all_created_files)) 1060 + processing_results.setdefault("entities_seeded", 0) 1061 + processing_results.setdefault("date_range", None) 1062 + 991 1063 error_results = { 992 1064 **processing_results, # Include all the metadata we have 993 1065 "processing_failed": dt.datetime.now().isoformat(), ··· 999 1071 "stages_run": _stages_run, 1000 1072 } 1001 1073 1002 - if _file_importer is None: 1003 - # Write error state to imported.json for persistent failure tracking 1004 - try: 1005 - with open(imported_path, "w", encoding="utf-8") as f: 1006 - json.dump(error_results, f, indent=2) 1007 - logger.info(f"Saved error state: {imported_path}") 1008 - except Exception as write_err: 1009 - logger.warning(f"Failed to write error state: {write_err}") 1074 + # Write error state to imported.json for persistent failure tracking 1075 + try: 1076 + with open(imported_path, "w", encoding="utf-8") as f: 1077 + json.dump(error_results, f, indent=2) 1078 + logger.info(f"Saved error state: {imported_path}") 1079 + except Exception as write_err: 1080 + logger.warning(f"Failed to write error state: {write_err}") 1010 1081 1011 1082 # Emit error event 1012 1083 if _callosum:
+1
think/importers/file_importer.py
··· 31 31 errors: list[str] 32 32 summary: str 33 33 segments: list[tuple[str, str]] | None = None 34 + date_range: tuple[str, str] | None = None 34 35 35 36 36 37 @runtime_checkable
+26 -1
think/importers/gemini.py
··· 230 230 skipped = 0 231 231 bard_count = 0 232 232 valid_count = 0 233 + earliest_so_far: str | None = None 234 + latest_so_far: str | None = None 233 235 234 236 for i, act in enumerate(activities): 235 237 activity_messages = _parse_activity(act) ··· 244 246 bard_count += 1 245 247 246 248 messages.extend(activity_messages) 249 + activity_dates = sorted( 250 + dt.datetime.fromtimestamp( 251 + msg["create_time"], tz=dt.timezone.utc 252 + ).strftime("%Y%m%d") 253 + for msg in activity_messages 254 + ) 255 + if earliest_so_far is None or activity_dates[0] < earliest_so_far: 256 + earliest_so_far = activity_dates[0] 257 + if latest_so_far is None or activity_dates[-1] > latest_so_far: 258 + latest_so_far = activity_dates[-1] 247 259 248 260 if progress_callback and (i + 1) % 100 == 0: 249 - progress_callback(i + 1, len(activities)) 261 + progress_callback( 262 + i + 1, 263 + len(activities), 264 + earliest_date=earliest_so_far, 265 + latest_date=latest_so_far, 266 + entities_found=0, 267 + ) 250 268 251 269 if not messages: 252 270 return ImportResult( ··· 258 276 ) 259 277 260 278 messages.sort(key=lambda msg: msg["create_time"]) 279 + earliest = dt.datetime.fromtimestamp( 280 + messages[0]["create_time"], tz=dt.timezone.utc 281 + ).strftime("%Y%m%d") 282 + latest = dt.datetime.fromtimestamp( 283 + messages[-1]["create_time"], tz=dt.timezone.utc 284 + ).strftime("%Y%m%d") 261 285 262 286 windows = _window_messages(messages) 263 287 created_files: list[str] = [] ··· 300 324 f"{len(segment_days)} days into {len(segments)} segments" 301 325 ), 302 326 segments=segments, 327 + date_range=(earliest, latest), 303 328 ) 304 329 305 330
+29 -2
think/importers/ics.py
··· 423 423 424 424 all_entries: list[dict[str, Any]] = [] 425 425 errors: list[str] = [] 426 + earliest_so_far: str | None = None 427 + latest_so_far: str | None = None 426 428 427 429 for i, blob in enumerate(ics_blobs): 428 430 try: 429 - all_entries.extend(_parse_events(blob)) 431 + parsed_entries = _parse_events(blob) 432 + all_entries.extend(parsed_entries) 433 + if parsed_entries: 434 + parsed_dates = sorted( 435 + dt.datetime.fromtimestamp( 436 + entry["create_ts"], tz=dt.timezone.utc 437 + ).strftime("%Y%m%d") 438 + for entry in parsed_entries 439 + ) 440 + if earliest_so_far is None or parsed_dates[0] < earliest_so_far: 441 + earliest_so_far = parsed_dates[0] 442 + if latest_so_far is None or parsed_dates[-1] > latest_so_far: 443 + latest_so_far = parsed_dates[-1] 430 444 except Exception as exc: 431 445 errors.append(f"Failed to parse ICS file {i}: {exc}") 432 446 433 447 if progress_callback: 434 - progress_callback(i + 1, len(ics_blobs)) 448 + progress_callback( 449 + i + 1, 450 + len(ics_blobs), 451 + earliest_date=earliest_so_far, 452 + latest_date=latest_so_far, 453 + entities_found=0, 454 + ) 435 455 436 456 if not all_entries: 437 457 return ImportResult( ··· 443 463 ) 444 464 445 465 all_entries.sort(key=lambda entry: entry["create_ts"]) 466 + earliest = dt.datetime.fromtimestamp( 467 + all_entries[0]["create_ts"], tz=dt.timezone.utc 468 + ).strftime("%Y%m%d") 469 + latest = dt.datetime.fromtimestamp( 470 + all_entries[-1]["create_ts"], tz=dt.timezone.utc 471 + ).strftime("%Y%m%d") 446 472 447 473 windows = window_items(all_entries, "create_ts") 448 474 created_files, segments = write_markdown_segments( ··· 494 520 f"{entities_seeded} entities seeded" 495 521 ), 496 522 segments=segments, 523 + date_range=(earliest, latest), 497 524 ) 498 525 499 526
+20 -1
think/importers/kindle.py
··· 261 261 errors: list[str] = [] 262 262 books: set[str] = set() 263 263 authors: set[str] = set() 264 + earliest_so_far: str | None = None 265 + latest_so_far: str | None = None 264 266 265 267 for i, block in enumerate(blocks): 266 268 if not block.strip(): ··· 274 276 275 277 # Add epoch timestamp for windowing 276 278 entry["create_ts"] = dt.datetime.fromisoformat(entry["ts"]).timestamp() 279 + entry_day = dt.datetime.fromisoformat(entry["ts"]).strftime("%Y%m%d") 280 + if earliest_so_far is None or entry_day < earliest_so_far: 281 + earliest_so_far = entry_day 282 + if latest_so_far is None or entry_day > latest_so_far: 283 + latest_so_far = entry_day 277 284 entries.append(entry) 278 285 books.add(entry["book_title"]) 279 286 if entry["author"]: 280 287 authors.add(entry["author"]) 281 288 282 289 if progress_callback and (i + 1) % 100 == 0: 283 - progress_callback(i + 1, len(blocks)) 290 + progress_callback( 291 + i + 1, 292 + len(blocks), 293 + earliest_date=earliest_so_far, 294 + latest_date=latest_so_far, 295 + entities_found=len(books) + len(authors), 296 + ) 284 297 285 298 if not entries: 286 299 return ImportResult( ··· 292 305 ) 293 306 294 307 entries.sort(key=lambda e: e["create_ts"]) 308 + date_range_val = ( 309 + (earliest_so_far, latest_so_far) 310 + if earliest_so_far and latest_so_far 311 + else None 312 + ) 295 313 296 314 windows = window_items(entries, "create_ts", tz=None) 297 315 created_files, segments = write_markdown_segments( ··· 329 347 f"across {len(segment_days)} days into {len(segments)} segments" 330 348 ), 331 349 segments=segments, 350 + date_range=date_range_val, 332 351 ) 333 352 334 353
+27 -1
think/importers/obsidian.py
··· 227 227 notes: list[dict[str, Any]] = [] 228 228 all_wikilinks: set[str] = set() 229 229 errors: list[str] = [] 230 + earliest_ts: float | None = None 231 + latest_ts: float | None = None 230 232 231 233 for i, md_path in enumerate(md_files): 232 234 content = _read_file_safe(md_path) ··· 245 247 mtime = md_path.stat().st_mtime 246 248 except OSError: 247 249 mtime = dt.datetime.now().timestamp() 250 + if earliest_ts is None or mtime < earliest_ts: 251 + earliest_ts = mtime 252 + if latest_ts is None or mtime > latest_ts: 253 + latest_ts = mtime 248 254 249 255 tags = _parse_frontmatter_tags(content) 250 256 wikilinks = WIKILINK_RE.findall(content) ··· 263 269 ) 264 270 265 271 if progress_callback: 266 - progress_callback(i + 1, total) 272 + progress_callback( 273 + i + 1, 274 + total, 275 + earliest_date=( 276 + dt.datetime.fromtimestamp(earliest_ts).strftime("%Y%m%d") 277 + if earliest_ts 278 + else None 279 + ), 280 + latest_date=( 281 + dt.datetime.fromtimestamp(latest_ts).strftime("%Y%m%d") 282 + if latest_ts 283 + else None 284 + ), 285 + entities_found=len(all_wikilinks), 286 + ) 267 287 268 288 if not notes: 269 289 return ImportResult( ··· 273 293 errors=errors, 274 294 summary="No notes found to import", 275 295 ) 296 + if earliest_ts is not None and latest_ts is not None: 297 + earliest = dt.datetime.fromtimestamp(earliest_ts).strftime("%Y%m%d") 298 + latest = dt.datetime.fromtimestamp(latest_ts).strftime("%Y%m%d") 299 + else: 300 + earliest = latest = dt.datetime.now().strftime("%Y%m%d") 276 301 277 302 notes.sort(key=lambda n: n["mtime"]) 278 303 ··· 308 333 f"{len({d for d, _ in segments})} days into {len(segments)} segments" 309 334 ), 310 335 segments=segments, 336 + date_range=(earliest, latest), 311 337 ) 312 338 313 339 def _walk_md_files(self, root: Path) -> list[Path]: