personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add sync history tracking and segments endpoint for remote ingest

Store upload history per-remote in JSONL files with file metadata (SHA256,
inode, size) for verification. New GET /ingest/<key>/segments/<day> endpoint
returns segment inventory with file status (present/relocated/missing).

- Compute SHA256 at ingest time for integrity verification
- Track inode to detect files moved by indexer/importer
- Deduplicate segment files by SHA256 (latest upload wins)
- History stored in apps/remote/remotes/<key>/hist/<day>.jsonl

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+686 -11
+250 -11
apps/remote/routes.py
··· 7 7 - Managing remote observer registrations (UI) 8 8 - Receiving file uploads from remote observers (ingest) 9 9 - Relaying events from remote observers to local Callosum 10 + - Retrieving segment upload history for sync verification 10 11 """ 11 12 12 13 from __future__ import annotations 13 14 14 15 import base64 16 + import hashlib 15 17 import json 16 18 import logging 17 19 import random ··· 240 242 ) 241 243 242 244 245 + # === Sync history helpers === 246 + 247 + 248 + def _compute_sha256(path: Path) -> str: 249 + """Compute SHA256 hash of a file. 250 + 251 + Args: 252 + path: Path to file 253 + 254 + Returns: 255 + Hex-encoded SHA256 hash 256 + """ 257 + sha256 = hashlib.sha256() 258 + with open(path, "rb") as f: 259 + for chunk in iter(lambda: f.read(65536), b""): 260 + sha256.update(chunk) 261 + return sha256.hexdigest() 262 + 263 + 264 + def _get_hist_dir(key_prefix: str, ensure_exists: bool = True) -> Path: 265 + """Get the history directory for a remote. 266 + 267 + Args: 268 + key_prefix: First 8 chars of remote key 269 + ensure_exists: Create directory if it doesn't exist (default: True) 270 + 271 + Returns: 272 + Path to apps/remote/remotes/<key_prefix>/hist/ 273 + """ 274 + return get_app_storage_path( 275 + "remote", "remotes", key_prefix, "hist", ensure_exists=ensure_exists 276 + ) 277 + 278 + 279 + def _append_sync_record(key_prefix: str, day: str, record: dict) -> None: 280 + """Append a sync record to the history file. 281 + 282 + Args: 283 + key_prefix: First 8 chars of remote key 284 + day: Day string (YYYYMMDD) 285 + record: Sync record to append 286 + """ 287 + hist_dir = _get_hist_dir(key_prefix) 288 + hist_path = hist_dir / f"{day}.jsonl" 289 + with open(hist_path, "a", encoding="utf-8") as f: 290 + f.write(json.dumps(record, ensure_ascii=False) + "\n") 291 + 292 + 293 + def _load_sync_history(key_prefix: str, day: str) -> list[dict]: 294 + """Load sync history for a remote on a given day. 295 + 296 + Args: 297 + key_prefix: First 8 chars of remote key 298 + day: Day string (YYYYMMDD) 299 + 300 + Returns: 301 + List of sync records, empty if file doesn't exist 302 + """ 303 + hist_dir = _get_hist_dir(key_prefix, ensure_exists=False) 304 + hist_path = hist_dir / f"{day}.jsonl" 305 + if not hist_path.exists(): 306 + return [] 307 + 308 + records = [] 309 + try: 310 + with open(hist_path, encoding="utf-8") as f: 311 + for line in f: 312 + line = line.strip() 313 + if line: 314 + records.append(json.loads(line)) 315 + except (json.JSONDecodeError, OSError) as e: 316 + logger.warning(f"Failed to load sync history {hist_path}: {e}") 317 + return records 318 + 319 + 320 + def _find_by_inode(day_dir: Path, inode: int) -> Path | None: 321 + """Find a file by inode in the day directory. 322 + 323 + Searches recursively for a file with the given inode. 324 + 325 + Args: 326 + day_dir: Path to day directory 327 + inode: Inode number to search for 328 + 329 + Returns: 330 + Path to file if found, None otherwise 331 + """ 332 + try: 333 + for path in day_dir.rglob("*"): 334 + if path.is_file(): 335 + try: 336 + if path.stat().st_ino == inode: 337 + return path 338 + except OSError: 339 + continue 340 + except OSError: 341 + pass 342 + return None 343 + 344 + 243 345 # === Segment collision helpers === 244 346 245 347 # Maximum attempts to find available segment key ··· 459 561 460 562 # Save files with adjusted segment key in filenames 461 563 saved_files = [] 564 + file_records = [] # For sync history 462 565 total_bytes = 0 463 566 464 567 for upload in files: 465 568 if not upload.filename: 466 569 continue 467 570 468 - # Secure the filename 469 - filename = secure_filename(upload.filename) 470 - if not filename: 571 + # Secure the filename - preserve original for history 572 + submitted_filename = secure_filename(upload.filename) 573 + if not submitted_filename: 471 574 continue 472 575 473 576 # Replace original segment with adjusted segment in filename 474 - if original_segment != segment and original_segment in filename: 475 - filename = filename.replace(original_segment, segment, 1) 577 + written_filename = submitted_filename 578 + if original_segment != segment and original_segment in submitted_filename: 579 + written_filename = submitted_filename.replace(original_segment, segment, 1) 476 580 477 - target_path = target_dir / filename 581 + target_path = target_dir / written_filename 478 582 479 583 # Save file 480 584 try: 481 585 upload.save(target_path) 482 - saved_files.append(filename) 483 - total_bytes += target_path.stat().st_size 484 - logger.info(f"Saved {filename} to {target_dir}") 586 + stat = target_path.stat() 587 + file_size = stat.st_size 588 + file_inode = stat.st_ino 589 + 590 + saved_files.append(written_filename) 591 + total_bytes += file_size 592 + 593 + # Compute SHA256 and record file info for sync history 594 + file_sha256 = _compute_sha256(target_path) 595 + file_records.append( 596 + { 597 + "submitted": submitted_filename, 598 + "written": written_filename, 599 + "inode": file_inode, 600 + "size": file_size, 601 + "sha256": file_sha256, 602 + } 603 + ) 604 + 605 + logger.info(f"Saved {written_filename} to {target_dir}") 485 606 except OSError as e: 486 - logger.error(f"Failed to save {filename}: {e}") 487 - return jsonify({"error": f"Failed to save {filename}"}), 500 607 + logger.error(f"Failed to save {written_filename}: {e}") 608 + return jsonify({"error": f"Failed to save {written_filename}"}), 500 488 609 489 610 if not saved_files: 490 611 return jsonify({"error": "No valid files saved"}), 400 612 + 613 + # Write sync history record 614 + key_prefix = key[:8] 615 + sync_record = { 616 + "ts": int(time.time() * 1000), 617 + "segment": segment, 618 + "files": file_records, 619 + } 620 + if segment != original_segment: 621 + sync_record["segment_original"] = original_segment 622 + _append_sync_record(key_prefix, day, sync_record) 491 623 492 624 # Update remote stats 493 625 remote["last_seen"] = int(time.time() * 1000) ··· 568 700 _save_remote(remote) 569 701 570 702 return jsonify({"status": "ok"}) 703 + 704 + 705 + @remote_bp.route("/ingest/<key>/segments/<day>") 706 + def ingest_segments(key: str, day: str) -> Any: 707 + """List uploaded segments for a day with file verification. 708 + 709 + Returns JSON array of segments with file status: 710 + - present: File exists at recorded path 711 + - relocated: File found at different path (by inode) 712 + - missing: File not found 713 + 714 + Args: 715 + key: Remote authentication key 716 + day: Day string (YYYYMMDD) 717 + """ 718 + # Validate key 719 + remote = _load_remote(key) 720 + if not remote: 721 + return jsonify({"error": "Invalid key"}), 401 722 + 723 + if remote.get("revoked", False): 724 + return jsonify({"error": "Remote revoked"}), 403 725 + 726 + if not remote.get("enabled", True): 727 + return jsonify({"error": "Remote disabled"}), 403 728 + 729 + # Validate day format (YYYYMMDD) 730 + if not re.match(r"^\d{8}$", day): 731 + return jsonify({"error": "Invalid day format"}), 400 732 + 733 + # Load sync history for this remote/day 734 + key_prefix = key[:8] 735 + records = _load_sync_history(key_prefix, day) 736 + 737 + if not records: 738 + return jsonify([]) 739 + 740 + # Get day directory for file verification 741 + target_dir = day_path(day) 742 + 743 + # Build response grouped by segment, deduplicating by sha256 744 + # Later records overwrite earlier ones (most recent upload wins) 745 + segments: dict[str, dict] = {} 746 + 747 + for record in records: 748 + segment = record.get("segment", "") 749 + segment_original = record.get("segment_original") 750 + 751 + if segment not in segments: 752 + segments[segment] = { 753 + "key": segment, 754 + "files_by_sha": {}, # Keyed by sha256 for deduplication 755 + } 756 + if segment_original: 757 + segments[segment]["original_key"] = segment_original 758 + 759 + # Check each file's status 760 + for file_rec in record.get("files", []): 761 + written = file_rec.get("written", "") 762 + submitted = file_rec.get("submitted", "") 763 + inode = file_rec.get("inode") 764 + size = file_rec.get("size", 0) 765 + sha256 = file_rec.get("sha256", "") 766 + 767 + file_info = { 768 + "name": written, 769 + "size": size, 770 + "sha256": sha256, 771 + } 772 + 773 + # Include submitted_name only if different 774 + if submitted != written: 775 + file_info["submitted_name"] = submitted 776 + 777 + # Check file status 778 + recorded_path = target_dir / written 779 + if recorded_path.exists(): 780 + file_info["status"] = "present" 781 + elif inode and target_dir.exists(): 782 + # Try to find by inode 783 + relocated = _find_by_inode(target_dir, inode) 784 + if relocated: 785 + file_info["status"] = "relocated" 786 + file_info["current_path"] = str(relocated.relative_to(target_dir)) 787 + else: 788 + file_info["status"] = "missing" 789 + else: 790 + file_info["status"] = "missing" 791 + 792 + # Deduplicate by sha256 - later uploads overwrite earlier 793 + segments[segment]["files_by_sha"][sha256] = file_info 794 + 795 + # Convert files_by_sha dicts to lists and sort by segment key 796 + result = [] 797 + for segment_data in sorted(segments.values(), key=lambda s: s["key"]): 798 + result.append( 799 + { 800 + "key": segment_data["key"], 801 + **( 802 + {"original_key": segment_data["original_key"]} 803 + if "original_key" in segment_data 804 + else {} 805 + ), 806 + "files": list(segment_data["files_by_sha"].values()), 807 + } 808 + ) 809 + return jsonify(result)
+436
apps/remote/tests/test_routes.py
··· 6 6 from __future__ import annotations 7 7 8 8 import io 9 + import json 9 10 10 11 11 12 def test_api_list_empty(remote_env): ··· 722 723 last_segment != "120000_300" 723 724 or (day_dir / f"{last_segment}_audio.flac").exists() 724 725 ) 726 + 727 + 728 + # === Sync history tests === 729 + 730 + 731 + def test_compute_sha256(remote_env): 732 + """Test SHA256 computation.""" 733 + from apps.remote.routes import _compute_sha256 734 + 735 + env = remote_env() 736 + test_file = env.journal / "test.txt" 737 + test_file.write_bytes(b"hello world") 738 + 739 + sha = _compute_sha256(test_file) 740 + # SHA256 of "hello world" 741 + assert sha == "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" 742 + 743 + 744 + def test_ingest_creates_sync_history(remote_env): 745 + """Test that ingest creates sync history record.""" 746 + env = remote_env() 747 + 748 + # Create a remote 749 + resp = env.client.post( 750 + "/app/remote/api/create", 751 + json={"name": "history-test"}, 752 + content_type="application/json", 753 + ) 754 + data = resp.get_json() 755 + key = data["key"] 756 + key_prefix = data["key_prefix"] 757 + 758 + # Upload a file 759 + test_data = b"test audio content for history" 760 + resp = env.client.post( 761 + f"/app/remote/ingest/{key}", 762 + data={ 763 + "day": "20250103", 764 + "segment": "120000_300", 765 + "files": (io.BytesIO(test_data), "120000_300_audio.flac"), 766 + }, 767 + ) 768 + assert resp.status_code == 200 769 + 770 + # Check history file exists 771 + hist_path = ( 772 + env.journal 773 + / "apps" 774 + / "remote" 775 + / "remotes" 776 + / key_prefix 777 + / "hist" 778 + / "20250103.jsonl" 779 + ) 780 + assert hist_path.exists() 781 + 782 + # Load and verify history 783 + with open(hist_path) as f: 784 + record = json.loads(f.readline()) 785 + 786 + assert record["segment"] == "120000_300" 787 + assert "segment_original" not in record # No collision 788 + assert len(record["files"]) == 1 789 + 790 + file_rec = record["files"][0] 791 + assert file_rec["submitted"] == "120000_300_audio.flac" 792 + assert file_rec["written"] == "120000_300_audio.flac" 793 + assert file_rec["size"] == len(test_data) 794 + assert len(file_rec["sha256"]) == 64 # SHA256 hex length 795 + assert file_rec["inode"] > 0 796 + 797 + 798 + def test_ingest_history_with_collision(remote_env): 799 + """Test that sync history records collision adjustment.""" 800 + env = remote_env() 801 + 802 + # Create a remote 803 + resp = env.client.post( 804 + "/app/remote/api/create", 805 + json={"name": "collision-history-test"}, 806 + content_type="application/json", 807 + ) 808 + data = resp.get_json() 809 + key = data["key"] 810 + key_prefix = data["key_prefix"] 811 + 812 + # Create conflicting file 813 + day_dir = env.journal / "20250103" 814 + day_dir.mkdir(parents=True) 815 + (day_dir / "120000_300_audio.flac").write_bytes(b"existing") 816 + 817 + # Upload with same segment key 818 + test_data = b"new audio content" 819 + resp = env.client.post( 820 + f"/app/remote/ingest/{key}", 821 + data={ 822 + "day": "20250103", 823 + "segment": "120000_300", 824 + "files": (io.BytesIO(test_data), "120000_300_audio.flac"), 825 + }, 826 + ) 827 + assert resp.status_code == 200 828 + 829 + # Load history 830 + hist_path = ( 831 + env.journal 832 + / "apps" 833 + / "remote" 834 + / "remotes" 835 + / key_prefix 836 + / "hist" 837 + / "20250103.jsonl" 838 + ) 839 + with open(hist_path) as f: 840 + record = json.loads(f.readline()) 841 + 842 + # Should record original segment 843 + assert record["segment_original"] == "120000_300" 844 + assert record["segment"] != "120000_300" 845 + 846 + # File names should reflect adjustment 847 + file_rec = record["files"][0] 848 + assert file_rec["submitted"] == "120000_300_audio.flac" 849 + assert file_rec["written"] != "120000_300_audio.flac" 850 + assert record["segment"] in file_rec["written"] 851 + 852 + 853 + def test_segments_endpoint_empty(remote_env): 854 + """Test segments endpoint returns empty for no uploads.""" 855 + env = remote_env() 856 + 857 + # Create a remote 858 + resp = env.client.post( 859 + "/app/remote/api/create", 860 + json={"name": "segments-empty-test"}, 861 + content_type="application/json", 862 + ) 863 + key = resp.get_json()["key"] 864 + 865 + # Query segments - should be empty 866 + resp = env.client.get(f"/app/remote/ingest/{key}/segments/20250103") 867 + assert resp.status_code == 200 868 + assert resp.get_json() == [] 869 + 870 + 871 + def test_segments_endpoint_invalid_key(remote_env): 872 + """Test segments endpoint rejects invalid key.""" 873 + env = remote_env() 874 + 875 + resp = env.client.get("/app/remote/ingest/invalid-key/segments/20250103") 876 + assert resp.status_code == 401 877 + 878 + 879 + def test_segments_endpoint_invalid_day(remote_env): 880 + """Test segments endpoint validates day format.""" 881 + env = remote_env() 882 + 883 + # Create a remote 884 + resp = env.client.post( 885 + "/app/remote/api/create", 886 + json={"name": "segments-day-test"}, 887 + content_type="application/json", 888 + ) 889 + key = resp.get_json()["key"] 890 + 891 + resp = env.client.get(f"/app/remote/ingest/{key}/segments/2025-01-03") 892 + assert resp.status_code == 400 893 + assert "Invalid day format" in resp.get_json()["error"] 894 + 895 + 896 + def test_segments_endpoint_lists_uploads(remote_env): 897 + """Test segments endpoint lists uploaded segments.""" 898 + env = remote_env() 899 + 900 + # Create a remote 901 + resp = env.client.post( 902 + "/app/remote/api/create", 903 + json={"name": "segments-list-test"}, 904 + content_type="application/json", 905 + ) 906 + key = resp.get_json()["key"] 907 + 908 + # Upload a file 909 + test_data = b"test audio content" 910 + resp = env.client.post( 911 + f"/app/remote/ingest/{key}", 912 + data={ 913 + "day": "20250103", 914 + "segment": "120000_300", 915 + "files": (io.BytesIO(test_data), "120000_300_audio.flac"), 916 + }, 917 + ) 918 + assert resp.status_code == 200 919 + 920 + # Query segments 921 + resp = env.client.get(f"/app/remote/ingest/{key}/segments/20250103") 922 + assert resp.status_code == 200 923 + data = resp.get_json() 924 + 925 + assert len(data) == 1 926 + segment = data[0] 927 + assert segment["key"] == "120000_300" 928 + assert "original_key" not in segment # No collision 929 + assert len(segment["files"]) == 1 930 + 931 + file_info = segment["files"][0] 932 + assert file_info["name"] == "120000_300_audio.flac" 933 + assert file_info["size"] == len(test_data) 934 + assert len(file_info["sha256"]) == 64 935 + assert file_info["status"] == "present" 936 + assert "submitted_name" not in file_info # Same as written 937 + 938 + 939 + def test_segments_endpoint_shows_collision(remote_env): 940 + """Test segments endpoint shows collision info.""" 941 + env = remote_env() 942 + 943 + # Create a remote 944 + resp = env.client.post( 945 + "/app/remote/api/create", 946 + json={"name": "segments-collision-test"}, 947 + content_type="application/json", 948 + ) 949 + key = resp.get_json()["key"] 950 + 951 + # Create conflicting file 952 + day_dir = env.journal / "20250103" 953 + day_dir.mkdir(parents=True) 954 + (day_dir / "120000_300_audio.flac").write_bytes(b"existing") 955 + 956 + # Upload with collision 957 + test_data = b"new audio" 958 + resp = env.client.post( 959 + f"/app/remote/ingest/{key}", 960 + data={ 961 + "day": "20250103", 962 + "segment": "120000_300", 963 + "files": (io.BytesIO(test_data), "120000_300_audio.flac"), 964 + }, 965 + ) 966 + assert resp.status_code == 200 967 + 968 + # Query segments 969 + resp = env.client.get(f"/app/remote/ingest/{key}/segments/20250103") 970 + data = resp.get_json() 971 + 972 + assert len(data) == 1 973 + segment = data[0] 974 + assert segment["key"] != "120000_300" 975 + assert segment["original_key"] == "120000_300" 976 + 977 + file_info = segment["files"][0] 978 + assert file_info["submitted_name"] == "120000_300_audio.flac" 979 + assert file_info["name"] != "120000_300_audio.flac" 980 + assert file_info["status"] == "present" 981 + 982 + 983 + def test_segments_endpoint_missing_file(remote_env): 984 + """Test segments endpoint reports missing files.""" 985 + env = remote_env() 986 + 987 + # Create a remote 988 + resp = env.client.post( 989 + "/app/remote/api/create", 990 + json={"name": "segments-missing-test"}, 991 + content_type="application/json", 992 + ) 993 + key = resp.get_json()["key"] 994 + 995 + # Upload a file 996 + test_data = b"test audio" 997 + resp = env.client.post( 998 + f"/app/remote/ingest/{key}", 999 + data={ 1000 + "day": "20250103", 1001 + "segment": "120000_300", 1002 + "files": (io.BytesIO(test_data), "120000_300_audio.flac"), 1003 + }, 1004 + ) 1005 + assert resp.status_code == 200 1006 + 1007 + # Delete the file 1008 + (env.journal / "20250103" / "120000_300_audio.flac").unlink() 1009 + 1010 + # Query segments 1011 + resp = env.client.get(f"/app/remote/ingest/{key}/segments/20250103") 1012 + data = resp.get_json() 1013 + 1014 + assert len(data) == 1 1015 + file_info = data[0]["files"][0] 1016 + assert file_info["status"] == "missing" 1017 + 1018 + 1019 + def test_segments_endpoint_relocated_file(remote_env): 1020 + """Test segments endpoint detects relocated files by inode.""" 1021 + env = remote_env() 1022 + 1023 + # Create a remote 1024 + resp = env.client.post( 1025 + "/app/remote/api/create", 1026 + json={"name": "segments-relocate-test"}, 1027 + content_type="application/json", 1028 + ) 1029 + key = resp.get_json()["key"] 1030 + 1031 + # Upload a file 1032 + test_data = b"test audio for relocation" 1033 + resp = env.client.post( 1034 + f"/app/remote/ingest/{key}", 1035 + data={ 1036 + "day": "20250103", 1037 + "segment": "120000_300", 1038 + "files": (io.BytesIO(test_data), "120000_300_audio.flac"), 1039 + }, 1040 + ) 1041 + assert resp.status_code == 200 1042 + 1043 + # Move the file to a subdirectory (simulating indexer moving it) 1044 + day_dir = env.journal / "20250103" 1045 + segment_dir = day_dir / "120000_300" 1046 + segment_dir.mkdir() 1047 + original_path = day_dir / "120000_300_audio.flac" 1048 + new_path = segment_dir / "audio.flac" 1049 + original_path.rename(new_path) 1050 + 1051 + # Query segments - should detect relocation by inode 1052 + resp = env.client.get(f"/app/remote/ingest/{key}/segments/20250103") 1053 + data = resp.get_json() 1054 + 1055 + assert len(data) == 1 1056 + file_info = data[0]["files"][0] 1057 + assert file_info["status"] == "relocated" 1058 + assert file_info["current_path"] == "120000_300/audio.flac" 1059 + 1060 + 1061 + def test_find_by_inode(remote_env): 1062 + """Test _find_by_inode helper.""" 1063 + from apps.remote.routes import _find_by_inode 1064 + 1065 + env = remote_env() 1066 + day_dir = env.journal / "20250103" 1067 + day_dir.mkdir(parents=True) 1068 + 1069 + # Create a file and get its inode 1070 + test_file = day_dir / "test.txt" 1071 + test_file.write_bytes(b"hello") 1072 + inode = test_file.stat().st_ino 1073 + 1074 + # Should find it at original location 1075 + found = _find_by_inode(day_dir, inode) 1076 + assert found == test_file 1077 + 1078 + # Move to subdirectory 1079 + subdir = day_dir / "subdir" 1080 + subdir.mkdir() 1081 + new_path = subdir / "renamed.txt" 1082 + test_file.rename(new_path) 1083 + 1084 + # Should still find by inode 1085 + found = _find_by_inode(day_dir, inode) 1086 + assert found == new_path 1087 + 1088 + # Non-existent inode returns None 1089 + found = _find_by_inode(day_dir, 999999999) 1090 + assert found is None 1091 + 1092 + 1093 + def test_segments_endpoint_revoked_key(remote_env): 1094 + """Test segments endpoint rejects revoked key.""" 1095 + env = remote_env() 1096 + 1097 + # Create and revoke a remote 1098 + resp = env.client.post( 1099 + "/app/remote/api/create", 1100 + json={"name": "segments-revoked-test"}, 1101 + content_type="application/json", 1102 + ) 1103 + data = resp.get_json() 1104 + key = data["key"] 1105 + key_prefix = data["key_prefix"] 1106 + 1107 + env.client.delete(f"/app/remote/api/{key_prefix}") 1108 + 1109 + # Query segments - should be rejected 1110 + resp = env.client.get(f"/app/remote/ingest/{key}/segments/20250103") 1111 + assert resp.status_code == 403 1112 + assert "Remote revoked" in resp.get_json()["error"] 1113 + 1114 + 1115 + def test_segments_endpoint_deduplicates_by_sha256(remote_env): 1116 + """Test that duplicate file uploads are deduplicated by sha256.""" 1117 + env = remote_env() 1118 + 1119 + # Create a remote 1120 + resp = env.client.post( 1121 + "/app/remote/api/create", 1122 + json={"name": "segments-dedup-test"}, 1123 + content_type="application/json", 1124 + ) 1125 + key = resp.get_json()["key"] 1126 + 1127 + # Upload a file 1128 + test_data = b"test audio content" 1129 + resp = env.client.post( 1130 + f"/app/remote/ingest/{key}", 1131 + data={ 1132 + "day": "20250103", 1133 + "segment": "120000_300", 1134 + "files": (io.BytesIO(test_data), "120000_300_audio.flac"), 1135 + }, 1136 + ) 1137 + assert resp.status_code == 200 1138 + 1139 + # Upload the same file again (same content = same sha256) 1140 + resp = env.client.post( 1141 + f"/app/remote/ingest/{key}", 1142 + data={ 1143 + "day": "20250103", 1144 + "segment": "120000_300", 1145 + "files": (io.BytesIO(test_data), "120000_300_audio.flac"), 1146 + }, 1147 + ) 1148 + assert resp.status_code == 200 1149 + 1150 + # Query segments - should have only one file entry (deduplicated) 1151 + resp = env.client.get(f"/app/remote/ingest/{key}/segments/20250103") 1152 + data = resp.get_json() 1153 + 1154 + # Should have 2 segments (one original, one collision-adjusted) 1155 + assert len(data) == 2 1156 + 1157 + # Each should have exactly 1 file (deduplicated by sha256) 1158 + for segment in data: 1159 + assert len(segment["files"]) == 1 1160 + assert segment["files"][0]["status"] == "present"