personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Fix _default segment ingest: bypass stream regex for system-defined default stream

The _STREAM_RE validation regex rejected "_default" (leading underscore),
causing 183 metadata segments to re-send on every export. Import
DEFAULT_STREAM from think.utils and exempt it from the user-stream regex,
so _default segments are recorded in state.json and skipped on re-export.

+84 -2
+2 -1
apps/import/ingest.py
··· 31 31 save_journal_entity, 32 32 ) 33 33 from think.entities.matching import find_matching_entity 34 + from think.utils import DEFAULT_STREAM 34 35 35 36 from .journal_sources import ( 36 37 get_state_directory, ··· 150 151 151 152 if not _DAY_RE.match(day): 152 153 raise ValueError("Invalid day format") 153 - if not _STREAM_RE.match(stream): 154 + if stream != DEFAULT_STREAM and not _STREAM_RE.match(stream): 154 155 raise ValueError("Invalid stream format") 155 156 if not _SEGMENT_RE.match(segment_key): 156 157 raise ValueError("Invalid segment_key format")
+25 -1
tests/test_export_integration.py
··· 205 205 return json.loads(path.read_text(encoding="utf-8")) 206 206 207 207 208 - def _setup_segments(journal_root: Path, *, day: str = "20260413") -> list[str]: 208 + def _setup_segments( 209 + journal_root: Path, *, day: str = "20260413", include_default: bool = False 210 + ) -> list[str]: 209 211 segment_dir = journal_root / day / "laptop" / "143022_300" 210 212 _write_bytes(segment_dir / "audio.flac", b"audio-data") 211 213 _write_bytes(segment_dir / "transcript.jsonl", b'{"text":"hello"}\n') 214 + if include_default: 215 + default_segment_dir = journal_root / day / "180000_300" 216 + _write_bytes(default_segment_dir / "audio.flac", b"default-audio") 212 217 return [day] 213 218 214 219 ··· 359 364 assert second_entities.sent == 0 and second_entities.skipped == 1 360 365 assert second_facets.sent == 0 and second_facets.skipped == 1 361 366 assert second_config.sent == 0 and second_config.skipped == 1 367 + 368 + 369 + def test_idempotent_reexport_default_stream(export_integration_env): 370 + env = export_integration_env 371 + _setup_segments(env["source"], include_default=True) 372 + 373 + first = export_segments( 374 + env["base_url"], env["key"], ["20260413"], False, session=env["adapter"] 375 + ) 376 + second = export_segments( 377 + env["base_url"], env["key"], ["20260413"], False, session=env["adapter"] 378 + ) 379 + 380 + assert first.sent == 2 381 + assert ( 382 + env["target"] / "20260413" / "_default" / "180000_300" / "audio.flac" 383 + ).exists() 384 + assert second.sent == 0 385 + assert second.skipped == 2 362 386 363 387 364 388 def test_partial_only_segments(export_integration_env):
+57
tests/test_segment_ingest.py
··· 524 524 } 525 525 526 526 527 + def test_ingest_default_stream_segment(ingest_env): 528 + env = ingest_env 529 + segments = [ 530 + { 531 + "day": "20260413", 532 + "stream": "_default", 533 + "segment_key": "143022_300", 534 + "files": [("transcript.jsonl", b'{"text":"default"}\n')], 535 + } 536 + ] 537 + 538 + response = _post_ingest(env["client"], env["key"], env["key_prefix"], segments) 539 + 540 + assert response.status_code == 200 541 + assert response.get_json() == { 542 + "segments_received": 1, 543 + "segments_skipped": 0, 544 + "segments_deconflicted": 0, 545 + "errors": [], 546 + } 547 + 548 + state_data = _read_state(env["key_prefix"]) 549 + assert "_default/143022_300" in state_data["20260413"] 550 + assert ( 551 + env["root"] 552 + / "20260413" 553 + / "_default" 554 + / "143022_300" 555 + / "transcript.jsonl" 556 + ).read_bytes() == b'{"text":"default"}\n' 557 + 558 + 559 + def test_ingest_default_stream_idempotent(ingest_env): 560 + env = ingest_env 561 + segments = [ 562 + { 563 + "day": "20260413", 564 + "stream": "_default", 565 + "segment_key": "143022_300", 566 + "files": [("transcript.jsonl", b'{"text":"default"}\n')], 567 + } 568 + ] 569 + 570 + first = _post_ingest(env["client"], env["key"], env["key_prefix"], segments) 571 + second = _post_ingest(env["client"], env["key"], env["key_prefix"], segments) 572 + 573 + assert first.status_code == 200 574 + assert second.status_code == 200 575 + assert first.get_json()["segments_received"] == 1 576 + assert second.get_json() == { 577 + "segments_received": 0, 578 + "segments_skipped": 1, 579 + "segments_deconflicted": 0, 580 + "errors": [], 581 + } 582 + 583 + 527 584 def test_ingest_idempotent(ingest_env): 528 585 env = ingest_env 529 586 segments = [