personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Rewrite ICS importer: creation-moment segments with markdown output

Group calendar events by LAST-MODIFIED/CREATED timestamp into 5-minute
windowed segments, writing imported.md narrative files instead of flat
imported.jsonl. Remove RRULE expansion — import only base VEVENT
definitions.

+435 -64
+25
tests/test_import_formatting.py
··· 222 222 assert formatter is not None 223 223 224 224 225 + def test_formatter_registration_ics_segment_markdown(): 226 + from think.formatters import get_formatter 227 + 228 + formatter = get_formatter("20260115/import.ics/120000_300/imported.md") 229 + assert formatter is not None 230 + assert formatter.__name__ == "format_markdown" 231 + 232 + 225 233 def test_formatter_registration_kindle(): 226 234 from think.formatters import get_formatter 227 235 ··· 240 248 meta = extract_path_metadata("20260301/import.obsidian/imported.jsonl") 241 249 assert meta["day"] == "20260301" 242 250 251 + meta = extract_path_metadata("20260115/import.ics/120000_300/imported.md") 252 + assert meta["day"] == "20260115" 253 + assert meta["agent"] == "imported" 254 + 243 255 244 256 def test_find_formattable_includes_imports(): 245 257 """Verify find_formattable_files picks up import JSONL.""" ··· 257 269 258 270 files = find_formattable_files(tmpdir) 259 271 assert "20260115/import.ics/imported.jsonl" in files 272 + 273 + 274 + def test_find_formattable_includes_segment_markdown(): 275 + from think.formatters import find_formattable_files 276 + 277 + with tempfile.TemporaryDirectory() as tmpdir: 278 + seg_dir = Path(tmpdir) / "20260115" / "import.ics" / "120000_300" 279 + seg_dir.mkdir(parents=True) 280 + md_path = seg_dir / "imported.md" 281 + md_path.write_text("## Test Event\n") 282 + 283 + files = find_formattable_files(tmpdir) 284 + assert "20260115/import.ics/120000_300/imported.md" in files 260 285 261 286 262 287 def test_format_file_integration():
+268
tests/test_importer.py
··· 938 938 assert (tmp_path / "imports" / "20260303_120000" / "manifest.json").exists() 939 939 940 940 941 + def test_ics_creation_timestamp_last_modified(): 942 + mod = importlib.import_module("think.importers.ics") 943 + icalendar = importlib.import_module("icalendar") 944 + 945 + ics_bytes = b"""BEGIN:VCALENDAR 946 + VERSION:2.0 947 + BEGIN:VEVENT 948 + DTSTART:20260315T100000Z 949 + LAST-MODIFIED:20260301T120500Z 950 + END:VEVENT 951 + END:VCALENDAR""" 952 + cal = icalendar.Calendar.from_ical(ics_bytes) 953 + component = list(cal.walk("VEVENT"))[0] 954 + 955 + assert ( 956 + mod._creation_timestamp(component) 957 + == dt.datetime(2026, 3, 1, 12, 5, 0, tzinfo=dt.timezone.utc).timestamp() 958 + ) 959 + 960 + 961 + def test_ics_creation_timestamp_created_only(): 962 + mod = importlib.import_module("think.importers.ics") 963 + icalendar = importlib.import_module("icalendar") 964 + 965 + ics_bytes = b"""BEGIN:VCALENDAR 966 + VERSION:2.0 967 + BEGIN:VEVENT 968 + DTSTART:20260315T100000Z 969 + CREATED:20260301T120000Z 970 + END:VEVENT 971 + END:VCALENDAR""" 972 + cal = icalendar.Calendar.from_ical(ics_bytes) 973 + component = list(cal.walk("VEVENT"))[0] 974 + 975 + assert ( 976 + mod._creation_timestamp(component) 977 + == dt.datetime(2026, 3, 1, 12, 0, 0, tzinfo=dt.timezone.utc).timestamp() 978 + ) 979 + 980 + 981 + def test_ics_creation_timestamp_dtstart_fallback(): 982 + mod = importlib.import_module("think.importers.ics") 983 + icalendar = importlib.import_module("icalendar") 984 + 985 + ics_bytes = b"""BEGIN:VCALENDAR 986 + VERSION:2.0 987 + BEGIN:VEVENT 988 + DTSTART:20260315T100000Z 989 + END:VEVENT 990 + END:VCALENDAR""" 991 + cal = icalendar.Calendar.from_ical(ics_bytes) 992 + component = list(cal.walk("VEVENT"))[0] 993 + 994 + assert ( 995 + mod._creation_timestamp(component) 996 + == dt.datetime(2026, 3, 15, 10, 0, 0, tzinfo=dt.timezone.utc).timestamp() 997 + ) 998 + 999 + 1000 + def test_ics_creation_timestamp_none(): 1001 + mod = importlib.import_module("think.importers.ics") 1002 + 1003 + class EmptyComponent: 1004 + def get(self, key, default=None): 1005 + return default 1006 + 1007 + assert mod._creation_timestamp(EmptyComponent()) is None 1008 + 1009 + 1010 + def test_ics_window_events_single_window(): 1011 + mod = importlib.import_module("think.importers.ics") 1012 + 1013 + base = dt.datetime(2026, 3, 1, 12, 0, 0, tzinfo=dt.timezone.utc).timestamp() 1014 + events = [ 1015 + {"title": "A", "create_ts": base}, 1016 + {"title": "B", "create_ts": base + 60}, 1017 + {"title": "C", "create_ts": base + 120}, 1018 + ] 1019 + 1020 + windows = mod._window_events(events) 1021 + 1022 + assert windows == [("20260301", "120000_300", events)] 1023 + 1024 + 1025 + def test_ics_window_events_time_gap_split(): 1026 + mod = importlib.import_module("think.importers.ics") 1027 + 1028 + base = dt.datetime(2026, 3, 1, 12, 0, 0, tzinfo=dt.timezone.utc).timestamp() 1029 + events = [ 1030 + {"title": "A", "create_ts": base}, 1031 + {"title": "B", "create_ts": base + 60}, 1032 + {"title": "C", "create_ts": base + 120}, 1033 + {"title": "D", "create_ts": base + 600}, 1034 + ] 1035 + 1036 + windows = mod._window_events(events) 1037 + 1038 + assert len(windows) == 2 1039 + assert windows[0][0] == "20260301" 1040 + assert windows[0][1] == "120000_300" 1041 + assert windows[0][2] == events[:3] 1042 + assert windows[1][1] == "121000_300" 1043 + assert windows[1][2] == [events[3]] 1044 + 1045 + 1046 + def test_ics_window_events_day_boundary(): 1047 + mod = importlib.import_module("think.importers.ics") 1048 + 1049 + first_day = dt.datetime(2026, 3, 1, 12, 0, 0, tzinfo=dt.timezone.utc).timestamp() 1050 + second_day = dt.datetime(2026, 3, 2, 12, 0, 0, tzinfo=dt.timezone.utc).timestamp() 1051 + events = [ 1052 + {"title": "A", "create_ts": first_day}, 1053 + {"title": "B", "create_ts": second_day}, 1054 + ] 1055 + 1056 + windows = mod._window_events(events) 1057 + 1058 + assert windows == [ 1059 + ("20260301", "120000_300", [events[0]]), 1060 + ("20260302", "120000_300", [events[1]]), 1061 + ] 1062 + 1063 + 1064 + def test_ics_render_event_markdown_full(): 1065 + mod = importlib.import_module("think.importers.ics") 1066 + 1067 + event = { 1068 + "title": "Team Sync", 1069 + "ts": "2026-01-15T10:00:00+00:00", 1070 + "end_ts": "2026-01-15T11:00:00+00:00", 1071 + "duration_minutes": 60, 1072 + "location": "Conference Room 3B", 1073 + "attendees": [ 1074 + {"name": "Alice Smith", "email": "alice@example.com"}, 1075 + {"name": "Bob Jones", "email": "bob@example.com"}, 1076 + ], 1077 + "content": "Event description text here.", 1078 + } 1079 + 1080 + rendered = mod._render_event_markdown(event) 1081 + 1082 + assert "## Team Sync" in rendered 1083 + assert "**2026-01-15 10:00 AM – 11:00 AM** (60 min)" in rendered 1084 + assert "📍 Conference Room 3B" in rendered 1085 + assert "👥 Alice Smith, Bob Jones" in rendered 1086 + assert "Event description text here." in rendered 1087 + 1088 + 1089 + def test_ics_render_event_markdown_minimal(): 1090 + mod = importlib.import_module("think.importers.ics") 1091 + 1092 + event = { 1093 + "title": "Minimal Event", 1094 + "ts": "2026-01-15T10:00:00+00:00", 1095 + "content": "", 1096 + "attendees": [], 1097 + } 1098 + 1099 + rendered = mod._render_event_markdown(event) 1100 + 1101 + assert "## Minimal Event" in rendered 1102 + assert "**2026-01-15 10:00 AM**" in rendered 1103 + assert "📍" not in rendered 1104 + assert "👥" not in rendered 1105 + assert "Minimal Event\n\n" not in rendered 1106 + 1107 + 1108 + def test_ics_render_event_markdown_without_scheduled_time(): 1109 + mod = importlib.import_module("think.importers.ics") 1110 + 1111 + event = { 1112 + "title": "Created Only Event", 1113 + "content": "", 1114 + "attendees": [], 1115 + } 1116 + 1117 + rendered = mod._render_event_markdown(event) 1118 + 1119 + assert rendered == "## Created Only Event" 1120 + 1121 + 1122 + def test_ics_process_segments(tmp_path, monkeypatch): 1123 + mod = importlib.import_module("think.importers.ics") 1124 + 1125 + ics_path = tmp_path / "calendar.ics" 1126 + ics_path.write_bytes( 1127 + b"""BEGIN:VCALENDAR 1128 + VERSION:2.0 1129 + BEGIN:VEVENT 1130 + DTSTART:20260315T100000Z 1131 + DTEND:20260315T110000Z 1132 + SUMMARY:Event One 1133 + DESCRIPTION:First description 1134 + CREATED:20260301T120000Z 1135 + ATTENDEE;CN=Alice Smith:mailto:alice@example.com 1136 + END:VEVENT 1137 + BEGIN:VEVENT 1138 + DTSTART:20260316T140000Z 1139 + DTEND:20260316T143000Z 1140 + SUMMARY:Event Two 1141 + CREATED:20260301T120200Z 1142 + END:VEVENT 1143 + BEGIN:VEVENT 1144 + DTSTART:20260317T090000Z 1145 + DTEND:20260317T093000Z 1146 + SUMMARY:Event Three 1147 + CREATED:20260302T090000Z 1148 + END:VEVENT 1149 + END:VCALENDAR""" 1150 + ) 1151 + 1152 + monkeypatch.setenv("JOURNAL_PATH", str(tmp_path)) 1153 + 1154 + result = mod.ICSImporter().process(ics_path, tmp_path, facet="work") 1155 + 1156 + first_md = day_path("20260301") / "import.ics" / "120000_300" / "imported.md" 1157 + second_md = day_path("20260302") / "import.ics" / "090000_300" / "imported.md" 1158 + 1159 + assert result.entries_written == 3 1160 + assert result.errors == [] 1161 + assert result.segments == [ 1162 + ("20260301", "120000_300"), 1163 + ("20260302", "090000_300"), 1164 + ] 1165 + assert len(result.files_created) == 2 1166 + assert first_md.exists() 1167 + assert second_md.exists() 1168 + first_content = first_md.read_text() 1169 + second_content = second_md.read_text() 1170 + assert "## Event One" in first_content 1171 + assert "First description" in first_content 1172 + assert "## Event Two" in first_content 1173 + assert "## Event Three" in second_content 1174 + assert "**2026-03-17 09:00 AM – 09:30 AM** (30 min)" in second_content 1175 + 1176 + 1177 + def test_ics_preview_uses_creation_timestamps(tmp_path): 1178 + mod = importlib.import_module("think.importers.ics") 1179 + 1180 + ics_path = tmp_path / "calendar.ics" 1181 + ics_path.write_bytes( 1182 + b"""BEGIN:VCALENDAR 1183 + VERSION:2.0 1184 + BEGIN:VEVENT 1185 + DTSTART:20260315T100000Z 1186 + DTEND:20260315T110000Z 1187 + SUMMARY:Event One 1188 + CREATED:20260301T120000Z 1189 + ATTENDEE;CN=Alice Smith:mailto:alice@example.com 1190 + END:VEVENT 1191 + BEGIN:VEVENT 1192 + DTSTART:20260316T100000Z 1193 + DTEND:20260316T110000Z 1194 + SUMMARY:Event Two 1195 + CREATED:20260305T090000Z 1196 + ATTENDEE;CN=Bob Jones:mailto:bob@example.com 1197 + END:VEVENT 1198 + END:VCALENDAR""" 1199 + ) 1200 + 1201 + preview = mod.ICSImporter().preview(ics_path) 1202 + 1203 + assert preview.date_range == ("20260301", "20260305") 1204 + assert preview.item_count == 2 1205 + assert preview.entity_count == 2 1206 + assert preview.summary == "2 events, 2 unique attendees" 1207 + 1208 + 941 1209 def test_list_importers_json(capsys, monkeypatch): 942 1210 """--list-importers --json returns machine-readable output.""" 943 1211 mod = importlib.import_module("think.importers.cli")
+1
think/formatters.py
··· 146 146 "format_imported", 147 147 True, 148 148 ), 149 + "*/import.*/*/imported.md": ("think.markdown", "format_markdown", True), 149 150 # Raw transcripts — formattable but not indexed (agent outputs are more useful) 150 151 # Layout: day/stream/segment/audio.jsonl 151 152 "*/*/*/audio.jsonl": ("observe.hear", "format_audio", False),
+141 -64
think/importers/ics.py
··· 10 10 from typing import Any, Callable 11 11 12 12 from think.importers.file_importer import ImportPreview, ImportResult 13 - from think.importers.shared import seed_entities, write_structured_import 13 + from think.importers.shared import seed_entities 14 + from think.utils import day_path 14 15 15 16 logger = logging.getLogger(__name__) 16 - 17 - # How far back to expand recurring events (from now) 18 - _RECURRENCE_LOOKBACK_YEARS = 2 19 17 20 18 21 19 def _extract_ics_data(path: Path) -> list[bytes]: ··· 101 99 return None 102 100 103 101 104 - def _expand_rrule(component: Any, dtstart_val: Any) -> list[dt.datetime]: 105 - """Expand RRULE into concrete datetimes within the lookback window. 102 + def _creation_timestamp(component: Any) -> float | None: 103 + """Extract a creation timestamp from VEVENT metadata.""" 106 104 107 - Returns list of occurrence datetimes (excluding the original DTSTART). 108 - """ 109 - from dateutil import rrule as du_rrule 105 + for field in ("LAST-MODIFIED", "CREATED", "DTSTART"): 106 + value = component.get(field) 107 + if value is None: 108 + continue 110 109 111 - rrule_prop = component.get("RRULE") 112 - if not rrule_prop: 110 + try: 111 + parsed = value.dt if hasattr(value, "dt") else value 112 + if isinstance(parsed, dt.date) and not isinstance(parsed, dt.datetime): 113 + parsed = dt.datetime(parsed.year, parsed.month, parsed.day) 114 + if not isinstance(parsed, dt.datetime): 115 + continue 116 + if parsed.tzinfo is None: 117 + parsed = parsed.replace(tzinfo=dt.timezone.utc) 118 + if field == "DTSTART": 119 + logger.debug( 120 + "VEVENT missing CREATED/LAST-MODIFIED; falling back to DTSTART" 121 + ) 122 + return parsed.timestamp() 123 + except Exception as exc: 124 + logger.debug("Failed to parse %s timestamp: %s", field, exc) 125 + 126 + return None 127 + 128 + 129 + def _window_events( 130 + events: list[dict[str, Any]], 131 + window_duration: int = 300, 132 + ) -> list[tuple[str, str, list[dict[str, Any]]]]: 133 + """Group sorted events into fixed-duration windows per creation time.""" 134 + if not events: 113 135 return [] 114 136 115 - try: 116 - start = dtstart_val.dt if hasattr(dtstart_val, "dt") else dtstart_val 137 + windows: list[tuple[str, str, list[dict[str, Any]]]] = [] 138 + window_start: float | None = None 139 + window_day: str | None = None 140 + window_events: list[dict[str, Any]] = [] 141 + 142 + for event in events: 143 + create_ts = event["create_ts"] 144 + event_dt = dt.datetime.fromtimestamp(create_ts, tz=dt.timezone.utc) 145 + event_day = event_dt.strftime("%Y%m%d") 146 + 147 + if ( 148 + window_start is None 149 + or event_day != window_day 150 + or create_ts - window_start >= window_duration 151 + ): 152 + if window_events and window_day and window_start is not None: 153 + start_dt = dt.datetime.fromtimestamp(window_start, tz=dt.timezone.utc) 154 + seg_key = f"{start_dt.strftime('%H%M%S')}_{window_duration}" 155 + windows.append((window_day, seg_key, window_events)) 156 + 157 + window_start = create_ts 158 + window_day = event_day 159 + window_events = [] 160 + 161 + window_events.append(event) 162 + 163 + if window_events and window_day and window_start is not None: 164 + start_dt = dt.datetime.fromtimestamp(window_start, tz=dt.timezone.utc) 165 + seg_key = f"{start_dt.strftime('%H%M%S')}_{window_duration}" 166 + windows.append((window_day, seg_key, window_events)) 167 + 168 + return windows 117 169 118 - # Normalize date → datetime 119 - if isinstance(start, dt.date) and not isinstance(start, dt.datetime): 120 - start = dt.datetime(start.year, start.month, start.day) 121 170 122 - # Ensure timezone-aware for bounds comparison 123 - now = dt.datetime.now(dt.timezone.utc) 124 - if start.tzinfo is None: 125 - now = now.replace(tzinfo=None) 171 + def _render_event_markdown(event: dict[str, Any]) -> str: 172 + """Render a calendar event as markdown.""" 173 + title = event.get("title", "Untitled event") 174 + lines = [f"## {title}"] 126 175 127 - window_start = now - dt.timedelta(days=_RECURRENCE_LOOKBACK_YEARS * 365) 176 + ts = event.get("ts") 177 + end_ts = event.get("end_ts") 178 + duration = event.get("duration_minutes") 179 + if ts: 180 + try: 181 + start_dt = dt.datetime.fromisoformat(ts) 182 + time_line = start_dt.strftime("%Y-%m-%d %I:%M %p") 183 + if end_ts: 184 + end_dt = dt.datetime.fromisoformat(end_ts) 185 + time_line = f"{time_line} – {end_dt.strftime('%I:%M %p')}" 186 + time_line = f"**{time_line}**" 187 + if duration is not None: 188 + time_line += f" ({duration} min)" 189 + lines.append(time_line) 190 + except ValueError: 191 + pass 128 192 129 - # Convert rrule dict to string for dateutil 130 - rrule_dict = dict(rrule_prop) 131 - rrule_str = ";".join(f"{k}={v}" for k, v in rrule_dict.items()) 193 + location = event.get("location", "") 194 + if location: 195 + lines.append(f"📍 {location}") 132 196 133 - rule = du_rrule.rrulestr( 134 - f"RRULE:{rrule_str}", 135 - dtstart=start, 136 - ignoretz=start.tzinfo is None, 137 - ) 197 + attendees = event.get("attendees", []) 198 + attendee_names = [] 199 + for attendee in attendees: 200 + if not isinstance(attendee, dict): 201 + attendee_names.append(str(attendee)) 202 + continue 203 + attendee_name = attendee.get("name") or attendee.get("email", "") 204 + if attendee_name: 205 + attendee_names.append(attendee_name) 206 + if attendee_names: 207 + lines.append(f"👥 {', '.join(attendee_names)}") 138 208 139 - # Collect occurrences within bounds (cap at 1000 to avoid runaway) 140 - occurrences = [] 141 - for occ in rule: 142 - if occ > now: 143 - break 144 - if occ >= window_start and occ != start: 145 - occurrences.append(occ) 146 - if len(occurrences) >= 1000: 147 - break 209 + description = event.get("content", "") 210 + if description: 211 + lines.append("") 212 + lines.append(description) 148 213 149 - return occurrences 150 - except Exception as exc: 151 - logger.debug("Failed to expand RRULE: %s", exc) 152 - return [] 214 + return "\n".join(lines) 153 215 154 216 155 217 def _parse_events(ics_bytes: bytes) -> list[dict[str, Any]]: ··· 170 232 171 233 try: 172 234 dtstart = component.get("DTSTART") 235 + dtend = component.get("DTEND") 173 236 ts = _dt_to_iso(dtstart) 174 - if not ts: 237 + end_ts = _dt_to_iso(dtend) if dtend else None 238 + create_ts = _creation_timestamp(component) 239 + if create_ts is None: 175 240 continue 176 241 177 - dtend = component.get("DTEND") 178 242 duration = _duration_minutes(dtstart, dtend) if dtend else None 179 243 180 244 title = str(component.get("SUMMARY", "")) or "Untitled event" ··· 207 271 # Build base entry 208 272 entry: dict[str, Any] = { 209 273 "type": "calendar_event", 210 - "ts": ts, 211 274 "title": title, 212 275 "content": description, 276 + "create_ts": create_ts, 213 277 } 278 + if ts: 279 + entry["ts"] = ts 280 + if end_ts: 281 + entry["end_ts"] = end_ts 214 282 if duration is not None: 215 283 entry["duration_minutes"] = duration 216 284 if location: ··· 219 287 entry["attendees"] = attendees 220 288 221 289 entries.append(entry) 222 - 223 - # Expand recurring events 224 - recurrences = _expand_rrule(component, dtstart) 225 - for occ_dt in recurrences: 226 - occ_ts = occ_dt.isoformat() 227 - occ_entry = {**entry, "ts": occ_ts} 228 - entries.append(occ_entry) 229 290 230 291 except Exception as exc: 231 292 summary = component.get("SUMMARY", "<unknown>") ··· 282 343 283 344 # Date range 284 345 dates = sorted( 285 - dt.datetime.fromisoformat(e["ts"]).strftime("%Y%m%d") 346 + dt.datetime.fromtimestamp(e["create_ts"], tz=dt.timezone.utc).strftime( 347 + "%Y%m%d" 348 + ) 286 349 for e in all_entries 287 - if e.get("ts") 350 + if e.get("create_ts") is not None 288 351 ) 289 352 date_range = (dates[0], dates[-1]) if dates else ("", "") 290 353 ··· 311 374 progress_callback: Callable | None = None, 312 375 ) -> ImportResult: 313 376 ics_blobs = _extract_ics_data(path) 314 - import_id = dt.datetime.now().strftime("%Y%m%d_%H%M%S") 315 377 316 378 all_entries: list[dict[str, Any]] = [] 317 379 errors: list[str] = [] ··· 334 396 summary="No events found to import", 335 397 ) 336 398 337 - # Write structured entries 338 - created_files = write_structured_import( 339 - "ics", 340 - all_entries, 341 - import_id=import_id, 342 - facet=facet, 343 - ) 399 + all_entries.sort(key=lambda entry: entry["create_ts"]) 400 + 401 + windows = _window_events(all_entries) 402 + created_files: list[str] = [] 403 + segments: list[tuple[str, str]] = [] 404 + 405 + for day, seg_key, window_events in windows: 406 + segment_dir = day_path(day) / "import.ics" / seg_key 407 + segment_dir.mkdir(parents=True, exist_ok=True) 408 + md_path = segment_dir / "imported.md" 409 + markdown = "\n\n".join( 410 + _render_event_markdown(event) for event in window_events 411 + ) 412 + md_path.write_text(markdown + "\n", encoding="utf-8") 413 + created_files.append(str(md_path)) 414 + segments.append((day, seg_key)) 415 + 416 + segment_days = {day for day, _ in segments} 344 417 345 418 # Seed entities from attendees 346 419 entities_seeded = 0 ··· 350 423 seen_emails: set[str] = set() 351 424 352 425 for entry in all_entries: 353 - day = dt.datetime.fromisoformat(entry["ts"]).strftime("%Y%m%d") 426 + day = dt.datetime.fromtimestamp( 427 + entry["create_ts"], tz=dt.timezone.utc 428 + ).strftime("%Y%m%d") 354 429 for att in entry.get("attendees", []): 355 430 email = att.get("email", "") 356 431 name = att.get("name", "") ··· 376 451 errors=errors, 377 452 summary=( 378 453 f"Imported {len(all_entries)} calendar events across " 379 - f"{len(created_files)} days, {entities_seeded} entities seeded" 454 + f"{len(segment_days)} days into {len(segments)} segments, " 455 + f"{entities_seeded} entities seeded" 380 456 ), 457 + segments=segments, 381 458 ) 382 459 383 460