personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'hopper-unydjif4-kindle-gemini-importers'

+473 -88
+39 -13
tests/test_gemini_importer.py
··· 189 189 os.environ["JOURNAL_PATH"] = journal 190 190 result = importer.process(Path(f.name), Path(journal)) 191 191 assert result.entries_written == 2 192 - assert len(result.files_created) == 1 # same day 193 192 assert result.errors == [] 194 - 195 - # Verify JSONL content 196 - jsonl_path = Path(result.files_created[0]) 197 - assert jsonl_path.exists() 198 - lines = jsonl_path.read_text().strip().split("\n") 199 - header = json.loads(lines[0]) 200 - assert header["import"]["source"] == "gemini" 201 - assert header["entry_count"] == 2 193 + assert result.segments is not None 194 + assert len(result.segments) >= 1 195 + assert any(Path(p).name == "imported.md" for p in result.files_created) 202 196 203 - entry = json.loads(lines[1]) 204 - assert entry["type"] == "ai_chat" 205 - assert entry["source"] == "gemini" 197 + md = "" 198 + for file_path in result.files_created: 199 + md_path = Path(file_path) 200 + assert md_path.exists() 201 + md += md_path.read_text() 202 + assert "Python" in md 203 + assert "sorted" in md 206 204 finally: 207 205 os.unlink(f.name) 208 206 os.environ.pop("JOURNAL_PATH", None) ··· 221 219 os.environ["JOURNAL_PATH"] = journal 222 220 result = importer.process(Path(tmp.name), Path(journal)) 223 221 assert result.entries_written == 1 224 - assert len(result.files_created) == 1 222 + assert result.segments is not None 223 + assert len(result.segments) == 1 224 + assert any(Path(p).suffix == ".md" for p in result.files_created) 225 225 finally: 226 226 os.unlink(tmp.name) 227 + os.environ.pop("JOURNAL_PATH", None) 228 + 229 + 230 + def test_process_multiple_windows(): 231 + """Activities more than 5 minutes apart land in different segments.""" 232 + with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f: 233 + activities = [ 234 + _sample_activity(time="2026-01-15T10:00:00Z"), 235 + _sample_activity( 236 + prompt="Second question", 237 + response="Second answer", 238 + time="2026-01-15T10:10:00Z", 239 + ), 240 + ] 241 + json.dump(activities, f) 242 + f.flush() 243 + try: 244 + with tempfile.TemporaryDirectory() as journal: 245 + os.environ["JOURNAL_PATH"] = journal 246 + result = importer.process(Path(f.name), Path(journal)) 247 + assert result.entries_written == 2 248 + assert result.segments is not None 249 + assert len(result.segments) == 2 250 + assert len(result.files_created) == 2 251 + finally: 252 + os.unlink(f.name) 227 253 os.environ.pop("JOURNAL_PATH", None) 228 254 229 255
+10 -1
tests/test_import_formatting.py
··· 233 233 def test_formatter_registration_kindle(): 234 234 from think.formatters import get_formatter 235 235 236 - formatter = get_formatter("20260115/import.kindle/imported.jsonl") 236 + formatter = get_formatter("20260115/import.kindle/103000_300/imported.md") 237 + assert formatter is not None 238 + assert formatter.__name__ == "format_markdown" 239 + 240 + 241 + def test_formatter_registration_gemini_segment(): 242 + from think.formatters import get_formatter 243 + 244 + formatter = get_formatter("20260115/import.gemini/100000_300/imported.md") 237 245 assert formatter is not None 246 + assert formatter.__name__ == "format_markdown" 238 247 239 248 240 249 def test_path_metadata_extraction():
+9 -9
tests/test_importer.py
··· 1007 1007 assert mod._creation_timestamp(EmptyComponent()) is None 1008 1008 1009 1009 1010 - def test_ics_window_events_single_window(): 1011 - mod = importlib.import_module("think.importers.ics") 1010 + def test_window_items_single_window(): 1011 + mod = importlib.import_module("think.importers.shared") 1012 1012 1013 1013 base = dt.datetime(2026, 3, 1, 12, 0, 0, tzinfo=dt.timezone.utc).timestamp() 1014 1014 events = [ ··· 1017 1017 {"title": "C", "create_ts": base + 120}, 1018 1018 ] 1019 1019 1020 - windows = mod._window_events(events) 1020 + windows = mod.window_items(events, "create_ts") 1021 1021 1022 1022 assert windows == [("20260301", "120000_300", events)] 1023 1023 1024 1024 1025 - def test_ics_window_events_time_gap_split(): 1026 - mod = importlib.import_module("think.importers.ics") 1025 + def test_window_items_time_gap_split(): 1026 + mod = importlib.import_module("think.importers.shared") 1027 1027 1028 1028 base = dt.datetime(2026, 3, 1, 12, 0, 0, tzinfo=dt.timezone.utc).timestamp() 1029 1029 events = [ ··· 1033 1033 {"title": "D", "create_ts": base + 600}, 1034 1034 ] 1035 1035 1036 - windows = mod._window_events(events) 1036 + windows = mod.window_items(events, "create_ts") 1037 1037 1038 1038 assert len(windows) == 2 1039 1039 assert windows[0][0] == "20260301" ··· 1043 1043 assert windows[1][2] == [events[3]] 1044 1044 1045 1045 1046 - def test_ics_window_events_day_boundary(): 1047 - mod = importlib.import_module("think.importers.ics") 1046 + def test_window_items_day_boundary(): 1047 + mod = importlib.import_module("think.importers.shared") 1048 1048 1049 1049 first_day = dt.datetime(2026, 3, 1, 12, 0, 0, tzinfo=dt.timezone.utc).timestamp() 1050 1050 second_day = dt.datetime(2026, 3, 2, 12, 0, 0, tzinfo=dt.timezone.utc).timestamp() ··· 1053 1053 {"title": "B", "create_ts": second_day}, 1054 1054 ] 1055 1055 1056 - windows = mod._window_events(events) 1056 + windows = mod.window_items(events, "create_ts") 1057 1057 1058 1058 assert windows == [ 1059 1059 ("20260301", "120000_300", [events[0]]),
+217
tests/test_kindle_importer.py
··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for think.importers.kindle — Kindle My Clippings.txt importer.""" 5 + 6 + import os 7 + import tempfile 8 + from pathlib import Path 9 + 10 + from think.importers.kindle import KindleImporter, _parse_block, _parse_date 11 + 12 + importer = KindleImporter() 13 + 14 + 15 + def _make_clipping( 16 + title: str = "Test Book (Author Name)", 17 + meta: str = "- Your Highlight on page 42 | location 100-101 | Added on Saturday, March 15, 2025 10:30:00 AM", 18 + content: str = "This is a highlighted passage.", 19 + ) -> str: 20 + return f"{title}\n{meta}\n\n{content}\n" 21 + 22 + 23 + def _make_clippings_file(clippings: list[str]) -> str: 24 + return "==========\n".join(clippings) + "==========\n" 25 + 26 + 27 + # --- Unit tests for helpers --- 28 + 29 + 30 + def test_parse_date(): 31 + result = _parse_date("Saturday, March 15, 2025 10:30:00 AM") 32 + assert result is not None 33 + assert result.month == 3 34 + assert result.day == 15 35 + 36 + 37 + def test_parse_block_basic(): 38 + block = _make_clipping() 39 + entry = _parse_block(block) 40 + assert entry is not None 41 + assert entry["type"] == "highlight" 42 + assert entry["book_title"] == "Test Book" 43 + assert entry["author"] == "Author Name" 44 + assert entry["content"] == "This is a highlighted passage." 45 + assert entry["page"] == 42 46 + assert entry["location"] == "100-101" 47 + 48 + 49 + def test_parse_block_note(): 50 + block = _make_clipping( 51 + meta="- Your Note on page 10 | Added on Saturday, March 15, 2025 10:30:00 AM", 52 + content="My personal note", 53 + ) 54 + entry = _parse_block(block) 55 + assert entry is not None 56 + assert entry["clip_type"] == "note" 57 + 58 + 59 + def test_parse_block_no_author(): 60 + block = _make_clipping(title="Title Without Author") 61 + entry = _parse_block(block) 62 + assert entry is not None 63 + assert entry["book_title"] == "Title Without Author" 64 + assert entry["author"] == "" 65 + 66 + 67 + # --- Detection tests --- 68 + 69 + 70 + def test_detect_valid(): 71 + content = _make_clippings_file([_make_clipping()]) 72 + with tempfile.NamedTemporaryFile(suffix=".txt", mode="w", delete=False) as f: 73 + f.write(content) 74 + f.flush() 75 + try: 76 + assert importer.detect(Path(f.name)) is True 77 + finally: 78 + os.unlink(f.name) 79 + 80 + 81 + def test_detect_wrong_format(): 82 + with tempfile.NamedTemporaryFile(suffix=".txt", mode="w", delete=False) as f: 83 + f.write("Just some random text file.\n") 84 + f.flush() 85 + try: 86 + assert importer.detect(Path(f.name)) is False 87 + finally: 88 + os.unlink(f.name) 89 + 90 + 91 + # --- Preview tests --- 92 + 93 + 94 + def test_preview(): 95 + content = _make_clippings_file( 96 + [ 97 + _make_clipping(), 98 + _make_clipping( 99 + title="Another Book (Jane Doe)", 100 + meta="- Your Note on page 5 | Added on Sunday, March 16, 2025 02:00:00 PM", 101 + content="A note", 102 + ), 103 + ] 104 + ) 105 + with tempfile.NamedTemporaryFile(suffix=".txt", mode="w", delete=False) as f: 106 + f.write(content) 107 + f.flush() 108 + try: 109 + preview = importer.preview(Path(f.name)) 110 + assert preview.item_count == 2 111 + assert preview.entity_count > 0 112 + assert "2 books" in preview.summary 113 + finally: 114 + os.unlink(f.name) 115 + 116 + 117 + # --- Process tests --- 118 + 119 + 120 + def test_process_basic(): 121 + content = _make_clippings_file( 122 + [ 123 + _make_clipping(), 124 + _make_clipping( 125 + meta="- Your Highlight on page 43 | Added on Saturday, March 15, 2025 10:31:00 AM", 126 + content="Another highlight from same session.", 127 + ), 128 + ] 129 + ) 130 + with tempfile.NamedTemporaryFile(suffix=".txt", mode="w", delete=False) as f: 131 + f.write(content) 132 + f.flush() 133 + try: 134 + with tempfile.TemporaryDirectory() as journal: 135 + os.environ["JOURNAL_PATH"] = journal 136 + result = importer.process(Path(f.name), Path(journal)) 137 + assert result.entries_written == 2 138 + assert result.errors == [] 139 + assert result.segments is not None 140 + assert len(result.segments) >= 1 141 + 142 + md_path = Path(result.files_created[0]) 143 + assert md_path.exists() 144 + assert md_path.name == "imported.md" 145 + md = md_path.read_text() 146 + assert "Test Book" in md 147 + assert "> This is a highlighted passage." in md 148 + assert "Page 42" in md 149 + finally: 150 + os.unlink(f.name) 151 + os.environ.pop("JOURNAL_PATH", None) 152 + 153 + 154 + def test_process_multiple_windows(): 155 + """Highlights more than 5 minutes apart land in different segments.""" 156 + content = _make_clippings_file( 157 + [ 158 + _make_clipping( 159 + meta="- Your Highlight on page 1 | Added on Saturday, March 15, 2025 10:00:00 AM", 160 + content="First highlight", 161 + ), 162 + _make_clipping( 163 + meta="- Your Highlight on page 2 | Added on Saturday, March 15, 2025 10:10:00 AM", 164 + content="Second highlight, 10 min later", 165 + ), 166 + ] 167 + ) 168 + with tempfile.NamedTemporaryFile(suffix=".txt", mode="w", delete=False) as f: 169 + f.write(content) 170 + f.flush() 171 + try: 172 + with tempfile.TemporaryDirectory() as journal: 173 + os.environ["JOURNAL_PATH"] = journal 174 + result = importer.process(Path(f.name), Path(journal)) 175 + assert result.entries_written == 2 176 + assert result.segments is not None 177 + assert len(result.segments) == 2 178 + assert len(result.files_created) == 2 179 + finally: 180 + os.unlink(f.name) 181 + os.environ.pop("JOURNAL_PATH", None) 182 + 183 + 184 + def test_process_note_markdown(): 185 + """Notes render with Note: prefix instead of blockquote.""" 186 + content = _make_clippings_file( 187 + [ 188 + _make_clipping( 189 + meta="- Your Note on page 10 | Added on Saturday, March 15, 2025 10:30:00 AM", 190 + content="My personal note", 191 + ), 192 + ] 193 + ) 194 + with tempfile.NamedTemporaryFile(suffix=".txt", mode="w", delete=False) as f: 195 + f.write(content) 196 + f.flush() 197 + try: 198 + with tempfile.TemporaryDirectory() as journal: 199 + os.environ["JOURNAL_PATH"] = journal 200 + result = importer.process(Path(f.name), Path(journal)) 201 + md = Path(result.files_created[0]).read_text() 202 + assert "Note: My personal note" in md 203 + finally: 204 + os.unlink(f.name) 205 + os.environ.pop("JOURNAL_PATH", None) 206 + 207 + 208 + # --- Registry test --- 209 + 210 + 211 + def test_registered_in_registry(): 212 + from think.importers.file_importer import FILE_IMPORTER_REGISTRY, get_file_importer 213 + 214 + assert "kindle" in FILE_IMPORTER_REGISTRY 215 + imp = get_file_importer("kindle") 216 + assert imp is not None 217 + assert imp.name == "kindle"
+58 -10
think/importers/gemini.py
··· 23 23 from typing import Any, Callable 24 24 25 25 from think.importers.file_importer import ImportPreview, ImportResult 26 - from think.importers.shared import write_structured_import 26 + from think.importers.shared import window_items 27 + from think.utils import day_path 27 28 28 29 logger = logging.getLogger(__name__) 29 30 ··· 150 151 return entry 151 152 152 153 154 + def _render_activity_markdown(activity: dict) -> str: 155 + """Render a Gemini activity as markdown.""" 156 + title = activity.get("title", "Gemini activity") 157 + lines = [f"## {title}"] 158 + 159 + content = activity.get("content", "") 160 + if content: 161 + # Content already has "Human: ..." and "Assistant: ..." format 162 + # Convert to bold labels 163 + for part in content.split("\n\n"): 164 + part = part.strip() 165 + if part.startswith("Human: "): 166 + lines.append(f"**Human:** {part[7:]}") 167 + elif part.startswith("Assistant: "): 168 + lines.append(f"**Assistant:** {part[11:]}") 169 + elif part: 170 + lines.append(part) 171 + 172 + return "\n\n".join(lines) 173 + 174 + 153 175 class GeminiImporter: 154 176 name = "gemini" 155 177 display_name = "Gemini Activity History" ··· 233 255 progress_callback: Callable | None = None, 234 256 ) -> ImportResult: 235 257 activities = _load_activities(path) 236 - import_id = dt.datetime.now().strftime("%Y%m%d_%H%M%S") 237 258 238 259 entries: list[dict[str, Any]] = [] 239 260 errors: list[str] = [] ··· 244 265 if entry is None: 245 266 skipped += 1 246 267 continue 268 + 269 + # Add epoch timestamp for windowing 270 + entry["create_ts"] = dt.datetime.fromisoformat(entry["ts"]).timestamp() 247 271 entries.append(entry) 248 272 249 273 if progress_callback and (i + 1) % 100 == 0: 250 274 progress_callback(i + 1, len(activities)) 251 275 252 - # Write to journal 253 - created_files = write_structured_import( 254 - "gemini", 255 - entries, 256 - import_id=import_id, 257 - facet=facet, 258 - ) 276 + if not entries: 277 + return ImportResult( 278 + entries_written=0, 279 + entities_seeded=0, 280 + files_created=[], 281 + errors=errors, 282 + summary="No activities found to import", 283 + ) 284 + 285 + entries.sort(key=lambda e: e["create_ts"]) 286 + 287 + windows = window_items(entries, "create_ts") 288 + created_files: list[str] = [] 289 + segments: list[tuple[str, str]] = [] 290 + 291 + for day, seg_key, window_activities in windows: 292 + segment_dir = day_path(day) / "import.gemini" / seg_key 293 + segment_dir.mkdir(parents=True, exist_ok=True) 294 + md_path = segment_dir / "imported.md" 295 + markdown = "\n\n".join( 296 + _render_activity_markdown(act) for act in window_activities 297 + ) 298 + md_path.write_text(markdown + "\n", encoding="utf-8") 299 + created_files.append(str(md_path)) 300 + segments.append((day, seg_key)) 301 + 302 + segment_days = {day for day, _ in segments} 259 303 260 304 if skipped: 261 305 logger.info("Skipped %d activities with no content", skipped) ··· 268 312 entities_seeded=0, 269 313 files_created=created_files, 270 314 errors=errors, 271 - summary=f"Imported {len(entries)} Gemini activities{bard_info} across {len(created_files)} days", 315 + summary=( 316 + f"Imported {len(entries)} Gemini activities{bard_info} across " 317 + f"{len(segment_days)} days into {len(segments)} segments" 318 + ), 319 + segments=segments, 272 320 ) 273 321 274 322
+2 -44
think/importers/ics.py
··· 10 10 from typing import Any, Callable 11 11 12 12 from think.importers.file_importer import ImportPreview, ImportResult 13 - from think.importers.shared import seed_entities 13 + from think.importers.shared import seed_entities, window_items 14 14 from think.utils import day_path 15 15 16 16 logger = logging.getLogger(__name__) ··· 124 124 logger.debug("Failed to parse %s timestamp: %s", field, exc) 125 125 126 126 return None 127 - 128 - 129 - def _window_events( 130 - events: list[dict[str, Any]], 131 - window_duration: int = 300, 132 - ) -> list[tuple[str, str, list[dict[str, Any]]]]: 133 - """Group sorted events into fixed-duration windows per creation time.""" 134 - if not events: 135 - return [] 136 - 137 - windows: list[tuple[str, str, list[dict[str, Any]]]] = [] 138 - window_start: float | None = None 139 - window_day: str | None = None 140 - window_events: list[dict[str, Any]] = [] 141 - 142 - for event in events: 143 - create_ts = event["create_ts"] 144 - event_dt = dt.datetime.fromtimestamp(create_ts, tz=dt.timezone.utc) 145 - event_day = event_dt.strftime("%Y%m%d") 146 - 147 - if ( 148 - window_start is None 149 - or event_day != window_day 150 - or create_ts - window_start >= window_duration 151 - ): 152 - if window_events and window_day and window_start is not None: 153 - start_dt = dt.datetime.fromtimestamp(window_start, tz=dt.timezone.utc) 154 - seg_key = f"{start_dt.strftime('%H%M%S')}_{window_duration}" 155 - windows.append((window_day, seg_key, window_events)) 156 - 157 - window_start = create_ts 158 - window_day = event_day 159 - window_events = [] 160 - 161 - window_events.append(event) 162 - 163 - if window_events and window_day and window_start is not None: 164 - start_dt = dt.datetime.fromtimestamp(window_start, tz=dt.timezone.utc) 165 - seg_key = f"{start_dt.strftime('%H%M%S')}_{window_duration}" 166 - windows.append((window_day, seg_key, window_events)) 167 - 168 - return windows 169 127 170 128 171 129 def _render_event_markdown(event: dict[str, Any]) -> str: ··· 398 356 399 357 all_entries.sort(key=lambda entry: entry["create_ts"]) 400 358 401 - windows = _window_events(all_entries) 359 + windows = window_items(all_entries, "create_ts") 402 360 created_files: list[str] = [] 403 361 segments: list[tuple[str, str]] = [] 404 362
+75 -11
think/importers/kindle.py
··· 10 10 from typing import Callable 11 11 12 12 from think.importers.file_importer import ImportPreview, ImportResult 13 - from think.importers.shared import seed_entities, write_structured_import 13 + from think.importers.shared import seed_entities, window_items 14 + from think.utils import day_path 14 15 15 16 logger = logging.getLogger(__name__) 16 17 ··· 128 129 return entry 129 130 130 131 132 + def _render_highlight_markdown(highlights: list[dict]) -> str: 133 + """Render highlights grouped by book as markdown.""" 134 + # Group by book 135 + by_book: dict[str, list[dict]] = {} 136 + for h in highlights: 137 + key = h["book_title"] 138 + by_book.setdefault(key, []).append(h) 139 + 140 + sections: list[str] = [] 141 + for book_title, book_highlights in by_book.items(): 142 + # Use first highlight's author (all from same book share author) 143 + author = book_highlights[0].get("author", "") 144 + if author: 145 + heading = f"## {book_title} by {author}" 146 + else: 147 + heading = f"## {book_title}" 148 + 149 + lines = [heading] 150 + for h in book_highlights: 151 + content = h.get("content", "") 152 + clip_type = h.get("clip_type", "highlight") 153 + 154 + if clip_type == "note": 155 + lines.append(f"Note: {content}") 156 + else: 157 + lines.append(f"> {content}") 158 + 159 + # Page / location metadata 160 + meta_parts: list[str] = [] 161 + if h.get("page") is not None: 162 + meta_parts.append(f"Page {h['page']}") 163 + if h.get("location"): 164 + meta_parts.append(f"Location {h['location']}") 165 + if meta_parts: 166 + lines.append(" | ".join(meta_parts)) 167 + 168 + sections.append("\n".join(lines)) 169 + 170 + return "\n\n".join(sections) 171 + 172 + 131 173 class KindleImporter: 132 174 name = "kindle" 133 175 display_name = "Kindle Highlights" ··· 211 253 ) -> ImportResult: 212 254 text = path.read_text(encoding="utf-8-sig") 213 255 blocks = text.split(DELIMITER) 214 - import_id = dt.datetime.now().strftime("%Y%m%d_%H%M%S") 215 256 216 257 entries: list[dict] = [] 217 258 errors: list[str] = [] ··· 223 264 continue 224 265 entry = _parse_block(block) 225 266 if entry is None: 226 - # Only log as error if block had real content (not just whitespace) 227 267 stripped = block.strip() 228 268 if stripped and "\n" in stripped: 229 269 errors.append(f"Failed to parse clipping block {i + 1}") 230 270 continue 231 271 272 + # Add epoch timestamp for windowing 273 + entry["create_ts"] = dt.datetime.fromisoformat(entry["ts"]).timestamp() 232 274 entries.append(entry) 233 275 books.add(entry["book_title"]) 234 276 if entry["author"]: ··· 237 279 if progress_callback and (i + 1) % 100 == 0: 238 280 progress_callback(i + 1, len(blocks)) 239 281 240 - # Write to journal 241 - created_files = write_structured_import( 242 - "kindle", 243 - entries, 244 - import_id=import_id, 245 - facet=facet, 246 - ) 282 + if not entries: 283 + return ImportResult( 284 + entries_written=0, 285 + entities_seeded=0, 286 + files_created=[], 287 + errors=errors, 288 + summary="No clippings found to import", 289 + ) 290 + 291 + entries.sort(key=lambda e: e["create_ts"]) 292 + 293 + windows = window_items(entries, "create_ts", tz=None) 294 + created_files: list[str] = [] 295 + segments: list[tuple[str, str]] = [] 296 + 297 + for day, seg_key, window_highlights in windows: 298 + segment_dir = day_path(day) / "import.kindle" / seg_key 299 + segment_dir.mkdir(parents=True, exist_ok=True) 300 + md_path = segment_dir / "imported.md" 301 + markdown = _render_highlight_markdown(window_highlights) 302 + md_path.write_text(markdown + "\n", encoding="utf-8") 303 + created_files.append(str(md_path)) 304 + segments.append((day, seg_key)) 305 + 306 + segment_days = {day for day, _ in segments} 247 307 248 308 # Seed entities (books and authors) 249 309 entities_seeded = 0 ··· 266 326 entities_seeded=entities_seeded, 267 327 files_created=created_files, 268 328 errors=errors, 269 - summary=f"Imported {len(entries)} Kindle clippings from {len(books)} books across {len(created_files)} days", 329 + summary=( 330 + f"Imported {len(entries)} Kindle clippings from {len(books)} books " 331 + f"across {len(segment_days)} days into {len(segments)} segments" 332 + ), 333 + segments=segments, 270 334 ) 271 335 272 336
+63
think/importers/shared.py
··· 178 178 return windows 179 179 180 180 181 + def window_items( 182 + items: list[dict[str, Any]], 183 + ts_key: str, 184 + *, 185 + window_duration: int = 300, 186 + tz: dt.timezone | None = dt.timezone.utc, 187 + ) -> list[tuple[str, str, list[dict[str, Any]]]]: 188 + """Group sorted items into fixed-duration windows per day. 189 + 190 + Parameters 191 + ---------- 192 + items : list[dict] 193 + Items sorted by ts_key. The ts_key field must be a float epoch. 194 + ts_key : str 195 + Key name for the float epoch timestamp in each item. 196 + window_duration : int 197 + Window size in seconds (default 300 = 5 minutes). 198 + tz : timezone or None 199 + Timezone for day grouping and seg_key formatting. 200 + Use dt.timezone.utc for UTC timestamps, None for local time. 201 + 202 + Returns 203 + ------- 204 + list[tuple[str, str, list[dict]]] 205 + (day_str, seg_key, items) tuples. 206 + """ 207 + if not items: 208 + return [] 209 + 210 + windows: list[tuple[str, str, list[dict[str, Any]]]] = [] 211 + window_start: float | None = None 212 + window_day: str | None = None 213 + window_items_acc: list[dict[str, Any]] = [] 214 + 215 + for item in items: 216 + ts = item[ts_key] 217 + item_dt = dt.datetime.fromtimestamp(ts, tz=tz) 218 + item_day = item_dt.strftime("%Y%m%d") 219 + 220 + if ( 221 + window_start is None 222 + or item_day != window_day 223 + or ts - window_start >= window_duration 224 + ): 225 + if window_items_acc and window_day and window_start is not None: 226 + start_dt = dt.datetime.fromtimestamp(window_start, tz=tz) 227 + seg_key = f"{start_dt.strftime('%H%M%S')}_{window_duration}" 228 + windows.append((window_day, seg_key, window_items_acc)) 229 + 230 + window_start = ts 231 + window_day = item_day 232 + window_items_acc = [] 233 + 234 + window_items_acc.append(item) 235 + 236 + if window_items_acc and window_day and window_start is not None: 237 + start_dt = dt.datetime.fromtimestamp(window_start, tz=tz) 238 + seg_key = f"{start_dt.strftime('%H%M%S')}_{window_duration}" 239 + windows.append((window_day, seg_key, window_items_acc)) 240 + 241 + return windows 242 + 243 + 181 244 # MIME type mapping for import metadata 182 245 _MIME_TYPES = { 183 246 ".m4a": "audio/mp4",