personal memory agent
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Auto-skip trashed and short recordings during Plaud sync

Use is_trash and duration fields from the Plaud API to automatically
mark junk files as skipped instead of available. Trashed recordings
and those under 30 seconds are filtered out with auditable skip_reason.

- Store richer metadata (duration, is_trash) in sync state entries
- Show skipped count in CLI summary output
- Update migration script with same filtering logic

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+81 -3
+59
tests/test_importer_sync.py
··· 238 238 239 239 assert result["total"] == 2 240 240 assert result["available"] == 2 241 + assert result["skipped"] == 0 241 242 assert result["imported"] == 0 242 243 assert result["downloaded"] == 0 243 244 ··· 351 352 state = load_sync_state(tmp_path, "plaud") 352 353 assert state["files"]["file2"]["status"] == "imported" 353 354 assert state["files"]["file2"]["import_timestamp"] == "20260117_134640" 355 + 356 + 357 + def _mock_list_files_with_junk(_session, _token): 358 + """Return a file list including trashed and short recordings.""" 359 + return [ 360 + { 361 + "id": "good1", 362 + "filename": "Team Standup", 363 + "fullname": "aaa.opus", 364 + "filesize": 5000, 365 + "start_time": 1737000000000, 366 + "duration": 300000, 367 + "is_trash": False, 368 + }, 369 + { 370 + "id": "trashed1", 371 + "filename": "Old Recording", 372 + "fullname": "bbb.opus", 373 + "filesize": 2000, 374 + "start_time": 1737100000000, 375 + "duration": 60000, 376 + "is_trash": True, 377 + }, 378 + { 379 + "id": "short1", 380 + "filename": "Accidental Tap", 381 + "fullname": "ccc.opus", 382 + "filesize": 500, 383 + "start_time": 1737200000000, 384 + "duration": 5000, 385 + "is_trash": False, 386 + }, 387 + ] 388 + 389 + 390 + def test_plaud_sync_skips_trashed_and_short(tmp_path, monkeypatch): 391 + """Trashed and short recordings are auto-skipped.""" 392 + from think.importers.plaud import PlaudBackend 393 + from think.importers.sync import load_sync_state 394 + 395 + monkeypatch.setenv("PLAUD_ACCESS_TOKEN", "test-token") 396 + 397 + with patch( 398 + "think.importers.plaud.list_files", side_effect=_mock_list_files_with_junk 399 + ): 400 + result = PlaudBackend().sync(tmp_path, dry_run=True) 401 + 402 + assert result["total"] == 3 403 + assert result["available"] == 1 404 + assert result["skipped"] == 2 405 + assert result["imported"] == 0 406 + 407 + state = load_sync_state(tmp_path, "plaud") 408 + assert state["files"]["good1"]["status"] == "available" 409 + assert state["files"]["trashed1"]["status"] == "skipped" 410 + assert state["files"]["trashed1"]["skip_reason"] == "trashed" 411 + assert state["files"]["short1"]["status"] == "skipped" 412 + assert state["files"]["short1"]["skip_reason"] == "too_short" 354 413 355 414 356 415 def test_plaud_sync_cli_flag(capsys, monkeypatch, tmp_path):
+5 -2
think/importers/cli.py
··· 124 124 total = result.get("total", 0) 125 125 imported = result.get("imported", 0) 126 126 available = result.get("available", 0) 127 + skipped = result.get("skipped", 0) 127 128 downloaded = result.get("downloaded", 0) 128 129 errors = result.get("errors", []) 129 130 130 131 # Print summary 131 132 print() 132 - print(f" Total recordings: {total}") 133 - print(f" Already imported: {imported}") 133 + print(f" Total recordings: {total}") 134 + print(f" Already imported: {imported}") 134 135 print(f" Available to import: {available}") 136 + if skipped: 137 + print(f" Skipped: {skipped} (trashed/short)") 135 138 136 139 if downloaded > 0: 137 140 print(f" Downloaded + imported: {downloaded}")
+17 -1
think/importers/plaud.py
··· 23 23 24 24 API_BASE = "https://api.plaud.ai" 25 25 26 + # Skip recordings shorter than this (milliseconds) 27 + MIN_DURATION_MS = 30_000 28 + 26 29 27 30 def make_session() -> requests.Session: 28 31 """Create a requests session with sane retries.""" ··· 316 319 entry["matched_at"] = dt.datetime.now().isoformat() 317 320 continue 318 321 319 - # New file — check if matched to an existing import 322 + # New file — build entry with full metadata 323 + duration = file_info.get("duration", 0) 324 + is_trash = file_info.get("is_trash", False) 325 + 320 326 entry: dict[str, Any] = { 321 327 "filename": file_info.get("filename", "unnamed"), 322 328 "fullname": file_info.get("fullname", ""), 323 329 "filesize": file_info.get("filesize", 0), 324 330 "start_time": file_info.get("start_time", 0), 331 + "duration": duration, 332 + "is_trash": is_trash, 325 333 } 326 334 327 335 if file_id in matches: 328 336 entry["status"] = "imported" 329 337 entry["import_timestamp"] = matches[file_id] 330 338 entry["matched_at"] = dt.datetime.now().isoformat() 339 + elif is_trash: 340 + entry["status"] = "skipped" 341 + entry["skip_reason"] = "trashed" 342 + elif duration and duration < MIN_DURATION_MS: 343 + entry["status"] = "skipped" 344 + entry["skip_reason"] = "too_short" 331 345 else: 332 346 entry["status"] = "available" 333 347 ··· 339 353 available = sum( 340 354 1 for f in known_files.values() if f.get("status") == "available" 341 355 ) 356 + skipped = sum(1 for f in known_files.values() if f.get("status") == "skipped") 342 357 343 358 result: dict[str, Any] = { 344 359 "total": total, 345 360 "imported": imported, 346 361 "available": available, 362 + "skipped": skipped, 347 363 "downloaded": 0, 348 364 "errors": [], 349 365 }