Auto-skip trashed and short recordings during Plaud sync

+59

tests/test_importer_sync.py

··· 238 238 239 239 assert result["total"] == 2 240 240 assert result["available"] == 2 241 + assert result["skipped"] == 0 241 242 assert result["imported"] == 0 242 243 assert result["downloaded"] == 0 243 244 ··· 351 352 state = load_sync_state(tmp_path, "plaud") 352 353 assert state["files"]["file2"]["status"] == "imported" 353 354 assert state["files"]["file2"]["import_timestamp"] == "20260117_134640" 355 + 356 + 357 + def _mock_list_files_with_junk(_session, _token): 358 + """Return a file list including trashed and short recordings.""" 359 + return [ 360 + { 361 + "id": "good1", 362 + "filename": "Team Standup", 363 + "fullname": "aaa.opus", 364 + "filesize": 5000, 365 + "start_time": 1737000000000, 366 + "duration": 300000, 367 + "is_trash": False, 368 + }, 369 + { 370 + "id": "trashed1", 371 + "filename": "Old Recording", 372 + "fullname": "bbb.opus", 373 + "filesize": 2000, 374 + "start_time": 1737100000000, 375 + "duration": 60000, 376 + "is_trash": True, 377 + }, 378 + { 379 + "id": "short1", 380 + "filename": "Accidental Tap", 381 + "fullname": "ccc.opus", 382 + "filesize": 500, 383 + "start_time": 1737200000000, 384 + "duration": 5000, 385 + "is_trash": False, 386 + }, 387 + ] 388 + 389 + 390 + def test_plaud_sync_skips_trashed_and_short(tmp_path, monkeypatch): 391 + """Trashed and short recordings are auto-skipped.""" 392 + from think.importers.plaud import PlaudBackend 393 + from think.importers.sync import load_sync_state 394 + 395 + monkeypatch.setenv("PLAUD_ACCESS_TOKEN", "test-token") 396 + 397 + with patch( 398 + "think.importers.plaud.list_files", side_effect=_mock_list_files_with_junk 399 + ): 400 + result = PlaudBackend().sync(tmp_path, dry_run=True) 401 + 402 + assert result["total"] == 3 403 + assert result["available"] == 1 404 + assert result["skipped"] == 2 405 + assert result["imported"] == 0 406 + 407 + state = load_sync_state(tmp_path, "plaud") 408 + assert state["files"]["good1"]["status"] == "available" 409 + assert state["files"]["trashed1"]["status"] == "skipped" 410 + assert state["files"]["trashed1"]["skip_reason"] == "trashed" 411 + assert state["files"]["short1"]["status"] == "skipped" 412 + assert state["files"]["short1"]["skip_reason"] == "too_short" 354 413 355 414 356 415 def test_plaud_sync_cli_flag(capsys, monkeypatch, tmp_path):

+5 -2

think/importers/cli.py

··· 124 124 total = result.get("total", 0) 125 125 imported = result.get("imported", 0) 126 126 available = result.get("available", 0) 127 + skipped = result.get("skipped", 0) 127 128 downloaded = result.get("downloaded", 0) 128 129 errors = result.get("errors", []) 129 130 130 131 # Print summary 131 132 print() 132 - print(f" Total recordings: {total}") 133 - print(f" Already imported: {imported}") 133 + print(f" Total recordings: {total}") 134 + print(f" Already imported: {imported}") 134 135 print(f" Available to import: {available}") 136 + if skipped: 137 + print(f" Skipped: {skipped} (trashed/short)") 135 138 136 139 if downloaded > 0: 137 140 print(f" Downloaded + imported: {downloaded}")

+17 -1

think/importers/plaud.py

··· 23 23 24 24 API_BASE = "https://api.plaud.ai" 25 25 26 + # Skip recordings shorter than this (milliseconds) 27 + MIN_DURATION_MS = 30_000 28 + 26 29 27 30 def make_session() -> requests.Session: 28 31 """Create a requests session with sane retries.""" ··· 316 319 entry["matched_at"] = dt.datetime.now().isoformat() 317 320 continue 318 321 319 - # New file — check if matched to an existing import 322 + # New file — build entry with full metadata 323 + duration = file_info.get("duration", 0) 324 + is_trash = file_info.get("is_trash", False) 325 + 320 326 entry: dict[str, Any] = { 321 327 "filename": file_info.get("filename", "unnamed"), 322 328 "fullname": file_info.get("fullname", ""), 323 329 "filesize": file_info.get("filesize", 0), 324 330 "start_time": file_info.get("start_time", 0), 331 + "duration": duration, 332 + "is_trash": is_trash, 325 333 } 326 334 327 335 if file_id in matches: 328 336 entry["status"] = "imported" 329 337 entry["import_timestamp"] = matches[file_id] 330 338 entry["matched_at"] = dt.datetime.now().isoformat() 339 + elif is_trash: 340 + entry["status"] = "skipped" 341 + entry["skip_reason"] = "trashed" 342 + elif duration and duration < MIN_DURATION_MS: 343 + entry["status"] = "skipped" 344 + entry["skip_reason"] = "too_short" 331 345 else: 332 346 entry["status"] = "available" 333 347 ··· 339 353 available = sum( 340 354 1 for f in known_files.values() if f.get("status") == "available" 341 355 ) 356 + skipped = sum(1 for f in known_files.values() if f.get("status") == "skipped") 342 357 343 358 result: dict[str, Any] = { 344 359 "total": total, 345 360 "imported": imported, 346 361 "available": available, 362 + "skipped": skipped, 347 363 "downloaded": 0, 348 364 "errors": [], 349 365 }

Configure Feed

Configure Feed