Import your Last.fm and Spotify listening history to the AT Protocol network using the fm.teal.alpha.feed.play lexicon.
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add resumable imports, conservative rate limits, and monotonic TID generation

Introduce stateful import resume support with --fresh and auto-resume logic, switch defaults to conservative batch and rate-limit settings to protect PDSs, and add an --aggressive mode for opt-in speed. Replace legacy TID generation with a monotonic clock-backed implementation supporting historical records, dry runs, and validation. Also add Node test scripts and minor type safety fixes.

+1778 -60
+4 -1
package.json
··· 14 14 "dev": "tsc && node dist/index.js", 15 15 "dry-run": "npm run build && node dist/index.js --dry-run", 16 16 "clean": "rm -rf dist", 17 - "type-check": "tsc --noEmit" 17 + "type-check": "tsc --noEmit", 18 + "test": "npm run build && node --test dist/tests/**/*.test.js", 19 + "test:tid": "npm run build && node --test dist/tests/tid.test.js", 20 + "test:watch": "npm run build && node --watch --test dist/tests/**/*.test.js" 18 21 }, 19 22 "keywords": [ 20 23 "lastfm",
+13 -9
src/config.ts
··· 6 6 // - This affects all users on your PDS, not just your account 7 7 // - See: https://docs.bsky.app/blog/rate-limits-pds-v3 8 8 // 9 - // Default limit: Aggressive initial limit that will dynamically adjust 10 - // Start high and back off if we hit rate limits 9 + // Default limit: Very conservative (7,500 records/day) to be safe 11 10 export const RECORDS_PER_DAY_LIMIT = 10000; 12 11 13 - // Safety margin factor - start aggressive, will back off if needed 14 - export const SAFETY_MARGIN = 1.0; 12 + // Safety margin factor - 75% by default for maximum safety 13 + // Use --aggressive flag to set to 85% for faster imports 14 + export const SAFETY_MARGIN = 0.75; 15 + 16 + // Aggressive safety margin (for --aggressive flag) 17 + export const AGGRESSIVE_SAFETY_MARGIN = 0.85; 15 18 16 19 // Record type 17 20 export const RECORD_TYPE = 'fm.teal.alpha.feed.play'; ··· 32 35 33 36 const CLIENT_AGENT = buildClientAgent(); 34 37 35 - // Default batch configuration - aggressive defaults for maximum speed 38 + // Default batch configuration - conservative for PDS safety 36 39 // Will dynamically adjust based on success/failure 37 - export const DEFAULT_BATCH_SIZE = 200; // Max allowed by applyWrites 38 - export const DEFAULT_BATCH_DELAY = 500; // Start with 500ms between batches 40 + export const DEFAULT_BATCH_SIZE = 100; // Conservative default 41 + export const DEFAULT_BATCH_DELAY = 2000; // Start with 2 seconds between batches 39 42 40 - // Minimum safe delay between batches (500ms for adaptive mode) 41 - export const MIN_BATCH_DELAY = 500; 43 + // Minimum safe delay between batches (1 second minimum) 44 + export const MIN_BATCH_DELAY = 1000; 42 45 43 46 // Maximum batch size (PDS limit is 200 operations per call) 44 47 export const MAX_BATCH_SIZE = 200; ··· 59 62 SLINGSHOT_RESOLVER, 60 63 RECORDS_PER_DAY_LIMIT, 61 64 SAFETY_MARGIN, 65 + AGGRESSIVE_SAFETY_MARGIN, 62 66 }; 63 67 64 68 export default config;
+64 -4
src/lib/cli.ts
··· 13 13 import { calculateOptimalBatchSize } from '../utils/helpers.js'; 14 14 import { fetchExistingRecords, filterNewRecords, displaySyncStats, removeDuplicates } from './sync.js'; 15 15 import { Logger, LogLevel, setGlobalLogger, log } from '../utils/logger.js'; 16 + import { 17 + loadImportState, 18 + createImportState, 19 + displayResumeInfo, 20 + clearImportState, 21 + ImportState, 22 + } from '../utils/import-state.js'; 16 23 17 24 /** 18 25 * Show help message ··· 42 49 deduplicate Remove duplicate records 43 50 44 51 ${'\x1b[1m'}BATCH CONFIGURATION:${'\x1b[0m'} 45 - -b, --batch-size <number> Records per batch (default: auto-calculated) 46 - -d, --batch-delay <ms> Delay between batches in ms (default: 500, min: 500) 52 + -b, --batch-size <number> Records per batch (default: 100) 53 + -d, --batch-delay <ms> Delay between batches in ms (default: 2000ms, min: 1000ms) 47 54 48 55 ${'\x1b[1m'}IMPORT OPTIONS:${'\x1b[0m'} 49 56 -r, --reverse Process newest records first (default: oldest first) 50 57 -y, --yes Skip confirmation prompts 51 58 --dry-run Preview without importing 59 + --aggressive Faster imports (8,500/day vs 7,500/day default) 60 + --fresh Start fresh (ignore previous import state) 52 61 53 62 ${'\x1b[1m'}OUTPUT:${'\x1b[0m'} 54 63 -v, --verbose Enable verbose logging (debug level) ··· 114 123 reverse: { type: 'boolean', short: 'r', default: false }, 115 124 yes: { type: 'boolean', short: 'y', default: false }, 116 125 'dry-run': { type: 'boolean', default: false }, 126 + aggressive: { type: 'boolean', default: false }, 127 + fresh: { type: 'boolean', default: false }, 117 128 118 129 // Output 119 130 verbose: { type: 'boolean', short: 'v', default: false }, ··· 145 156 reverse: values.reverse || values['reverse-chronological'], 146 157 yes: values.yes, 147 158 'dry-run': values['dry-run'], 159 + aggressive: values.aggressive, 160 + fresh: values.fresh, 148 161 verbose: values.verbose, 149 162 quiet: values.quiet, 150 163 }; ··· 351 364 352 365 log.info(`Batch delay: ${batchDelay}ms`); 353 366 367 + // Apply aggressive mode if enabled 368 + const safetyMargin = args.aggressive ? cfg.AGGRESSIVE_SAFETY_MARGIN : cfg.SAFETY_MARGIN; 369 + if (args.aggressive) { 370 + log.warn('⚡ Aggressive mode enabled: Using 85% of daily limit (8,500 records/day)'); 371 + } 372 + 354 373 // Show rate limiting information 355 374 log.section('Import Configuration'); 356 375 log.info(`Total records: ${totalRecords.toLocaleString()}`); 357 376 log.info(`Batch size: ${batchSize} records`); 358 377 log.info(`Batch delay: ${batchDelay}ms`); 359 378 360 - const recordsPerDay = cfg.RECORDS_PER_DAY_LIMIT * cfg.SAFETY_MARGIN; 379 + const recordsPerDay = cfg.RECORDS_PER_DAY_LIMIT * safetyMargin; 361 380 const estimatedDays = Math.ceil(totalRecords / recordsPerDay); 362 381 363 382 if (estimatedDays > 1) { ··· 367 386 368 387 log.blank(); 369 388 389 + // Check for existing import state (resume functionality) 390 + let importState: ImportState | null = null; 391 + if (!dryRun && args.input) { 392 + // Clear state if --fresh flag is used 393 + if (args.fresh) { 394 + clearImportState(args.input, mode); 395 + log.info('Starting fresh import (previous state cleared)'); 396 + } else { 397 + // Try to load existing state 398 + importState = loadImportState(args.input, mode); 399 + 400 + if (importState && !importState.completed) { 401 + displayResumeInfo(importState); 402 + 403 + if (!args.yes) { 404 + const answer = await prompt('Resume from previous import? (Y/n) '); 405 + if (answer.toLowerCase() === 'n') { 406 + importState = null; 407 + clearImportState(args.input, mode); 408 + log.info('Starting fresh import'); 409 + log.blank(); 410 + } 411 + } else { 412 + log.info('Auto-resuming previous import (--yes flag)'); 413 + log.blank(); 414 + } 415 + } else if (importState?.completed) { 416 + log.info('Previous import was completed - starting fresh'); 417 + importState = null; 418 + clearImportState(args.input, mode); 419 + } 420 + } 421 + 422 + // Create new state if not resuming 423 + if (!importState) { 424 + importState = createImportState(args.input, mode, totalRecords); 425 + log.debug('Created new import state'); 426 + } 427 + } 428 + 370 429 // Confirmation prompt 371 430 if (!dryRun && !args.yes) { 372 431 const modeLabel = mode === 'combined' ? 'merged' : mode === 'sync' ? 'new' : ''; ··· 389 448 batchDelay, 390 449 cfg, 391 450 dryRun, 392 - mode === 'sync' || mode === 'combined' 451 + mode === 'sync' || mode === 'combined', 452 + importState 393 453 ); 394 454 395 455 // Final output
+45 -12
src/lib/publisher.ts
··· 10 10 import { generateTIDFromISO } from '../utils/tid.js'; 11 11 import type { PlayRecord, Config, PublishResult } from '../types.js'; 12 12 import { log } from '../utils/logger.js'; 13 + import { 14 + ImportState, 15 + updateImportState, 16 + completeImport, 17 + getResumeStartIndex, 18 + } from '../utils/import-state.js'; 13 19 14 20 /** 15 21 * Maximum operations allowed per applyWrites call ··· 21 27 22 28 /** 23 29 * Publish records using com.atproto.repo.applyWrites for efficient batching 24 - * with adaptive rate limiting 30 + * with adaptive rate limiting and stateful resume support 25 31 */ 26 32 export async function publishRecordsWithApplyWrites( 27 33 agent: AtpAgent | null, ··· 30 36 batchDelay: number, 31 37 config: Config, 32 38 dryRun = false, 33 - syncMode = false 39 + syncMode = false, 40 + importState: ImportState | null = null 34 41 ): Promise<PublishResult> { 35 42 const { RECORD_TYPE } = config; 36 43 const totalRecords = records.length; ··· 52 59 let consecutiveFailures = 0; 53 60 const MAX_CONSECUTIVE_FAILURES = 3; 54 61 55 - log.section('Adaptive Import'); 56 - log.info(`Initial batch size: ${currentBatchSize} records`); 57 - log.info(`Initial delay: ${currentBatchDelay}ms`); 62 + log.section('Conservative Adaptive Import'); 63 + log.info(`Initial batch size: ${currentBatchSize} records (conservative)`); 64 + log.info(`Initial delay: ${currentBatchDelay}ms (2 seconds - very safe)`); 58 65 log.info(`Will automatically adjust based on server response`); 66 + log.info(`Using conservative settings to protect your PDS`); 59 67 log.blank(); 60 68 log.info(`Publishing ${totalRecords.toLocaleString()} records using adaptive batching...`); 61 69 log.warn('Press Ctrl+C to stop gracefully after current batch'); ··· 65 73 let errorCount = 0; 66 74 const startTime = Date.now(); 67 75 68 - let i = 0; 76 + // Resume from saved state if available 77 + let startIndex = importState ? getResumeStartIndex(importState) : 0; 78 + if (importState && startIndex > 0) { 79 + log.info(`Resuming from record ${startIndex + 1} (${(startIndex / totalRecords * 100).toFixed(1)}% complete)`); 80 + log.blank(); 81 + } 82 + 83 + let i = startIndex; 69 84 while (i < totalRecords) { 70 85 // Check killswitch before processing batch 71 86 if (isImportCancelled()) { ··· 83 98 const batchStartTime = Date.now(); 84 99 85 100 // Build writes array for applyWrites with TID-based rkeys 86 - const writes = batch.map((record) => ({ 87 - $type: 'com.atproto.repo.applyWrites#create', 88 - collection: RECORD_TYPE, 89 - rkey: generateTIDFromISO(record.playedTime), 90 - value: record, 91 - })); 101 + const writes = await Promise.all( 102 + batch.map(async (record) => ({ 103 + $type: 'com.atproto.repo.applyWrites#create', 104 + collection: RECORD_TYPE, 105 + rkey: await generateTIDFromISO(record.playedTime, 'inject:playlist'), 106 + value: record, 107 + })) 108 + ); 92 109 93 110 try { 94 111 // Call applyWrites with the batch ··· 106 123 const batchDuration = Date.now() - batchStartTime; 107 124 log.debug(`Batch complete in ${batchDuration}ms (${batchSuccessCount} successful)`); 108 125 126 + // Save state after successful batch 127 + if (importState) { 128 + updateImportState(importState, i + batch.length - 1, batchSuccessCount, 0); 129 + } 130 + 109 131 // Speed up if we're doing well (after 5 consecutive successes) 110 132 if (consecutiveSuccesses >= 5 && currentBatchDelay > config.MIN_BATCH_DELAY) { 111 133 const oldDelay = currentBatchDelay; ··· 168 190 log.debug(`... and ${batch.length - 3} more failed`); 169 191 } 170 192 193 + // Save state with errors 194 + if (importState) { 195 + updateImportState(importState, i + batch.length - 1, 0, batch.length); 196 + } 197 + 171 198 // If too many consecutive failures, slow down 172 199 if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) { 173 200 currentBatchDelay = Math.min(currentBatchDelay * 2, 10000); ··· 198 225 if (i < totalRecords) { 199 226 await new Promise((resolve) => setTimeout(resolve, currentBatchDelay)); 200 227 } 228 + } 229 + 230 + // Mark import as complete 231 + if (importState) { 232 + completeImport(importState); 233 + log.debug('Import state saved as completed'); 201 234 } 202 235 203 236 return { successCount, errorCount, cancelled: false };
+2 -2
src/lib/sync.ts
··· 46 46 }); 47 47 48 48 for (const record of response.data.records) { 49 - const playRecord = record.value as PlayRecord; 49 + const playRecord = record.value as unknown as PlayRecord; 50 50 // Create a unique key based on track, artist, and timestamp 51 51 const key = createRecordKey(playRecord); 52 52 // Note: This will overwrite duplicates, but that's OK for sync mode ··· 108 108 }); 109 109 110 110 for (const record of response.data.records) { 111 - const playRecord = record.value as PlayRecord; 111 + const playRecord = record.value as unknown as PlayRecord; 112 112 allRecords.push({ 113 113 uri: record.uri, 114 114 cid: record.cid,
+195
src/tests/tid-integration.test.ts
··· 1 + /** 2 + * Integration test for TID generation in the importer 3 + * 4 + * Tests the full flow with actual CSV data 5 + */ 6 + 7 + import { describe, it } from 'node:test'; 8 + import assert from 'node:assert'; 9 + import { initTidClock, generateTIDFromISO, resetTidClock, getTidClockState } from '../utils/tid.js'; 10 + import { validateTid, areMonotonic } from '../utils/tid-clock.js'; 11 + import { auditTids, formatAuditReport } from '../utils/tid-audit.js'; 12 + 13 + describe('TID Integration - Importer Flow', () => { 14 + it('should initialize TID clock for production', async () => { 15 + resetTidClock(); 16 + initTidClock({ mode: 'production' }); 17 + 18 + const tid = await generateTIDFromISO('2020-01-01T00:00:00Z', 'test'); 19 + assert.strictEqual(validateTid(tid), true); 20 + 21 + const state = getTidClockState(); 22 + assert.strictEqual(state.generatedCount, 1); 23 + }); 24 + 25 + it('should initialize TID clock for dry-run with deterministic output', async () => { 26 + const seed = 1000000000000000; 27 + resetTidClock(); 28 + initTidClock({ mode: 'dry-run', seed }); 29 + 30 + const dates = [ 31 + '2005-01-01T00:00:00Z', 32 + '2010-01-01T00:00:00Z', 33 + '2015-01-01T00:00:00Z', 34 + ]; 35 + 36 + const tids1 = await Promise.all(dates.map(d => generateTIDFromISO(d, 'test'))); 37 + 38 + // Reset and regenerate 39 + resetTidClock(); 40 + initTidClock({ mode: 'dry-run', seed }); 41 + 42 + const tids2 = await Promise.all(dates.map(d => generateTIDFromISO(d, 'test'))); 43 + 44 + // Should be identical 45 + assert.deepStrictEqual(tids1, tids2); 46 + }); 47 + 48 + it('should handle batch TID generation', async () => { 49 + resetTidClock(); 50 + initTidClock({ mode: 'production' }); 51 + 52 + // Simulate batch processing 53 + const batchSize = 200; 54 + const records = Array.from({ length: batchSize }, (_, i) => ({ 55 + playedTime: new Date(2020, 0, 1, 0, 0, i).toISOString(), 56 + })); 57 + 58 + const tids = await Promise.all( 59 + records.map(r => generateTIDFromISO(r.playedTime, 'batch:test')) 60 + ); 61 + 62 + // All should be valid 63 + for (const tid of tids) { 64 + assert.strictEqual(validateTid(tid), true); 65 + } 66 + 67 + // All should be unique 68 + const uniqueTids = new Set(tids); 69 + assert.strictEqual(uniqueTids.size, batchSize); 70 + 71 + // Should be monotonic 72 + assert.strictEqual(areMonotonic(tids), true); 73 + 74 + // State should track count 75 + const state = getTidClockState(); 76 + assert.strictEqual(state.generatedCount, batchSize); 77 + }); 78 + 79 + it('should handle historical dates from Last.fm (2005-present)', async () => { 80 + resetTidClock(); 81 + initTidClock({ mode: 'production' }); 82 + 83 + // Simulate Last.fm scrobbles from different eras 84 + const historicalDates = [ 85 + '2005-03-15T14:30:00Z', // Very old 86 + '2010-06-20T08:15:00Z', 87 + '2015-11-05T19:45:00Z', 88 + '2020-01-01T00:00:00Z', 89 + '2024-12-25T12:00:00Z', // Recent 90 + ]; 91 + 92 + const tids = await Promise.all( 93 + historicalDates.map(d => generateTIDFromISO(d, 'historical')) 94 + ); 95 + 96 + // All valid 97 + for (const tid of tids) { 98 + assert.strictEqual(validateTid(tid), true); 99 + } 100 + 101 + // All unique 102 + const uniqueTids = new Set(tids); 103 + assert.strictEqual(uniqueTids.size, historicalDates.length); 104 + 105 + // Monotonic despite being historical 106 + assert.strictEqual(areMonotonic(tids), true); 107 + }); 108 + 109 + it('should audit a batch of TIDs', async () => { 110 + resetTidClock(); 111 + initTidClock({ mode: 'production' }); 112 + 113 + const tids = await Promise.all( 114 + Array.from({ length: 100 }, (_, i) => 115 + generateTIDFromISO(new Date(2020, 0, 1, 0, 0, i).toISOString(), 'audit-test') 116 + ) 117 + ); 118 + 119 + const report = auditTids(tids); 120 + 121 + assert.strictEqual(report.totalTids, 100); 122 + assert.strictEqual(report.validTids, 100); 123 + assert.strictEqual(report.invalidTids, 0); 124 + assert.strictEqual(report.duplicates, 0); 125 + assert.strictEqual(report.monotonic, true); 126 + 127 + // Generate human-readable report 128 + const textReport = formatAuditReport(report); 129 + assert.ok(textReport.includes('TID AUDIT REPORT')); 130 + assert.ok(textReport.includes('✓ YES')); // Monotonic check 131 + }); 132 + 133 + it('should detect problems in TID audit', async () => { 134 + // Create a bad TID list 135 + const badTids = [ 136 + '3jzfcijpj2z2a', // Valid 137 + '3jzfcijpj2z2b', // Valid 138 + '3jzfcijpj2z2a', // Duplicate! 139 + '0000000000000', // Invalid format 140 + '3jzfcijpj2z2c', // Valid 141 + '3jzfcijpj2z2b', // Out of order 142 + ]; 143 + 144 + const report = auditTids(badTids); 145 + 146 + assert.strictEqual(report.totalTids, 6); 147 + assert.strictEqual(report.duplicates, 2); 148 + assert.strictEqual(report.monotonic, false); 149 + assert.ok(report.invalidTids > 0); 150 + assert.ok(report.errors.length > 0); 151 + 152 + const textReport = formatAuditReport(report); 153 + assert.ok(textReport.includes('✗ NO')); // Monotonic check 154 + assert.ok(textReport.includes('ERRORS')); 155 + }); 156 + }); 157 + 158 + describe('TID Integration - Concurrent Batches', () => { 159 + it('should handle multiple concurrent batches safely', async () => { 160 + resetTidClock(); 161 + initTidClock({ mode: 'production' }); 162 + 163 + // Simulate 5 concurrent batches of 50 records each 164 + const batches = 5; 165 + const batchSize = 50; 166 + 167 + const allTids = await Promise.all( 168 + Array.from({ length: batches }, async (_, batchIdx) => { 169 + return await Promise.all( 170 + Array.from({ length: batchSize }, (_, recordIdx) => { 171 + const date = new Date(2020, 0, 1, batchIdx, recordIdx).toISOString(); 172 + return generateTIDFromISO(date, `batch-${batchIdx}`); 173 + }) 174 + ); 175 + }) 176 + ); 177 + 178 + const flatTids = allTids.flat(); 179 + 180 + // All unique 181 + const uniqueTids = new Set(flatTids); 182 + assert.strictEqual(uniqueTids.size, batches * batchSize); 183 + 184 + // All valid 185 + for (const tid of flatTids) { 186 + assert.strictEqual(validateTid(tid), true); 187 + } 188 + 189 + // Total count correct 190 + const state = getTidClockState(); 191 + assert.strictEqual(state.generatedCount, batches * batchSize); 192 + }); 193 + }); 194 + 195 + console.log('✓ All integration tests completed');
+473
src/tests/tid.test.ts
··· 1 + /** 2 + * Unit tests for TID generation 3 + * 4 + * Tests cover: 5 + * - Format validation 6 + * - Monotonicity (single-threaded and concurrent) 7 + * - Deterministic dry-run mode 8 + * - Clock drift handling 9 + * - State persistence 10 + * - Collision detection 11 + */ 12 + 13 + import { describe, it, before, after } from 'node:test'; 14 + import assert from 'node:assert'; 15 + import fs from 'fs'; 16 + import path from 'path'; 17 + import os from 'os'; 18 + import { 19 + TidClock, 20 + RealClock, 21 + FakeClock, 22 + validateTid, 23 + ensureValidTid, 24 + decodeTidTimestamp, 25 + decodeTidClockId, 26 + compareTids, 27 + areMonotonic, 28 + InvalidTidError, 29 + SilentTidLogger, 30 + } from '../utils/tid-clock.js'; 31 + 32 + const TID_LENGTH = 13; 33 + 34 + describe('TID Format Validation', () => { 35 + it('should validate correct TID format', () => { 36 + const validTids = [ 37 + '3jzfcijpj2z2a', 38 + '7777777777777', 39 + '3zzzzzzzzzzzz', 40 + ]; 41 + 42 + for (const tid of validTids) { 43 + assert.strictEqual(validateTid(tid), true, `Should validate: ${tid}`); 44 + } 45 + }); 46 + 47 + it('should reject invalid TID format', () => { 48 + const invalidTids = [ 49 + '3jzfcijpj2z21', // Invalid character (1) 50 + '0000000000000', // Invalid character (0) 51 + '3jzfcijpj2z2aa', // Too long 52 + '3jzfcijpj2z2', // Too short 53 + '3jzf-cij-pj2z-2a', // Dashes not allowed 54 + 'zzzzzzzzzzzzz', // High bit violation 55 + 'kjzfcijpj2z2a', // High bit violation 56 + ]; 57 + 58 + for (const tid of invalidTids) { 59 + assert.strictEqual(validateTid(tid), false, `Should reject: ${tid}`); 60 + } 61 + }); 62 + 63 + it('should enforce TID length', () => { 64 + assert.strictEqual(validateTid('123'), false); 65 + assert.strictEqual(validateTid('12345678901234567890'), false); 66 + }); 67 + 68 + it('should enforce base32 alphabet', () => { 69 + assert.strictEqual(validateTid('3jzfcijpj2z21'), false); // Has '1' 70 + assert.strictEqual(validateTid('0jzfcijpj2z2a'), false); // Starts with '0' 71 + }); 72 + 73 + it('should throw on ensureValidTid for invalid input', () => { 74 + assert.throws( 75 + () => ensureValidTid('invalid'), 76 + InvalidTidError 77 + ); 78 + }); 79 + }); 80 + 81 + describe('TID Generation - Basic', () => { 82 + it('should generate valid TID format', async () => { 83 + const clock = new TidClock(new RealClock(), new SilentTidLogger()); 84 + const tid = await clock.next(); 85 + 86 + assert.strictEqual(tid.length, TID_LENGTH); 87 + assert.strictEqual(validateTid(tid), true); 88 + }); 89 + 90 + it('should generate TID from Date', async () => { 91 + const clock = new TidClock(new FakeClock(1000000000000000), new SilentTidLogger()); 92 + const date = new Date('2005-01-01T00:00:00Z'); 93 + const tid = await clock.fromDate(date); 94 + 95 + assert.strictEqual(validateTid(tid), true); 96 + assert.strictEqual(tid.length, TID_LENGTH); 97 + }); 98 + 99 + it('should encode timestamp correctly', async () => { 100 + const timestamp = 1000000000000000; // Fixed timestamp 101 + const clock = new TidClock(new FakeClock(timestamp), new SilentTidLogger()); 102 + const tid = await clock.next(); 103 + 104 + const decoded = decodeTidTimestamp(tid); 105 + assert.strictEqual(decoded, timestamp); 106 + }); 107 + 108 + it('should encode clock ID correctly', async () => { 109 + const clock = new TidClock( 110 + new FakeClock(1000000000000000), 111 + new SilentTidLogger(), 112 + { clockId: 15 } 113 + ); 114 + const tid = await clock.next(); 115 + 116 + const clockId = decodeTidClockId(tid); 117 + assert.strictEqual(clockId, 15); 118 + }); 119 + }); 120 + 121 + describe('TID Monotonicity - Single Thread', () => { 122 + it('should generate monotonically increasing TIDs', async () => { 123 + const fakeClock = new FakeClock(1000000000000000); 124 + const clock = new TidClock(fakeClock, new SilentTidLogger()); 125 + 126 + const tids: string[] = []; 127 + for (let i = 0; i < 100; i++) { 128 + fakeClock.advance(1000); // Advance 1ms 129 + tids.push(await clock.next()); 130 + } 131 + 132 + assert.strictEqual(areMonotonic(tids), true); 133 + }); 134 + 135 + it('should handle same timestamp with sequence increment', async () => { 136 + const fakeClock = new FakeClock(1000000000000000); 137 + const clock = new TidClock(fakeClock, new SilentTidLogger()); 138 + 139 + // Generate multiple TIDs at same timestamp 140 + const tid1 = await clock.next(); 141 + const tid2 = await clock.next(); 142 + const tid3 = await clock.next(); 143 + 144 + assert.notStrictEqual(tid1, tid2); 145 + assert.notStrictEqual(tid2, tid3); 146 + assert.strictEqual(compareTids(tid1, tid2), -1); 147 + assert.strictEqual(compareTids(tid2, tid3), -1); 148 + }); 149 + 150 + it('should handle backwards clock drift', async () => { 151 + const fakeClock = new FakeClock(1000000000000000); 152 + const clock = new TidClock(fakeClock, new SilentTidLogger()); 153 + 154 + const tid1 = await clock.next(); 155 + 156 + // Move clock backwards 157 + fakeClock.set(999999000000000); 158 + 159 + const tid2 = await clock.next(); 160 + 161 + // Should still be monotonic 162 + assert.strictEqual(compareTids(tid1, tid2), -1); 163 + }); 164 + 165 + it('should generate unique TIDs even with clock drift', async () => { 166 + const fakeClock = new FakeClock(1000000000000000); 167 + const clock = new TidClock(fakeClock, new SilentTidLogger()); 168 + 169 + const tids: string[] = []; 170 + 171 + // Generate some TIDs 172 + for (let i = 0; i < 5; i++) { 173 + tids.push(await clock.next()); 174 + } 175 + 176 + // Move clock backwards 177 + fakeClock.set(999999000000000); 178 + 179 + // Generate more TIDs 180 + for (let i = 0; i < 5; i++) { 181 + tids.push(await clock.next()); 182 + } 183 + 184 + // All should be unique and monotonic 185 + const uniqueTids = new Set(tids); 186 + assert.strictEqual(uniqueTids.size, tids.length); 187 + assert.strictEqual(areMonotonic(tids), true); 188 + }); 189 + }); 190 + 191 + describe('TID Monotonicity - Concurrent', () => { 192 + it('should handle concurrent generation safely', async () => { 193 + const clock = new TidClock(new RealClock(), new SilentTidLogger()); 194 + 195 + // Generate 100 TIDs concurrently 196 + const promises = Array.from({ length: 100 }, () => clock.next()); 197 + const tids = await Promise.all(promises); 198 + 199 + // All should be unique 200 + const uniqueTids = new Set(tids); 201 + assert.strictEqual(uniqueTids.size, tids.length); 202 + 203 + // All should be valid 204 + for (const tid of tids) { 205 + assert.strictEqual(validateTid(tid), true); 206 + } 207 + 208 + // Should be monotonic when sorted 209 + const sorted = [...tids].sort(compareTids); 210 + assert.strictEqual(areMonotonic(sorted), true); 211 + }); 212 + 213 + it('should handle high-frequency concurrent generation', async () => { 214 + const clock = new TidClock(new RealClock(), new SilentTidLogger()); 215 + 216 + // Generate 1000 TIDs in parallel batches 217 + const batchSize = 50; 218 + const batches = 20; 219 + const allTids: string[] = []; 220 + 221 + for (let b = 0; b < batches; b++) { 222 + const promises = Array.from({ length: batchSize }, () => clock.next()); 223 + const batchTids = await Promise.all(promises); 224 + allTids.push(...batchTids); 225 + } 226 + 227 + // All should be unique 228 + const uniqueTids = new Set(allTids); 229 + assert.strictEqual(uniqueTids.size, allTids.length); 230 + 231 + // All should be valid 232 + for (const tid of allTids) { 233 + assert.strictEqual(validateTid(tid), true); 234 + } 235 + }); 236 + }); 237 + 238 + describe('TID Deterministic Mode', () => { 239 + it('should generate deterministic TIDs with same seed', async () => { 240 + const seed = 1000000000000000; 241 + 242 + const clock1 = new TidClock(new FakeClock(seed), new SilentTidLogger(), { clockId: 10 }); 243 + const clock2 = new TidClock(new FakeClock(seed), new SilentTidLogger(), { clockId: 10 }); 244 + 245 + const tids1: string[] = []; 246 + const tids2: string[] = []; 247 + 248 + for (let i = 0; i < 10; i++) { 249 + tids1.push(await clock1.next()); 250 + tids2.push(await clock2.next()); 251 + } 252 + 253 + // Should generate identical sequences 254 + assert.deepStrictEqual(tids1, tids2); 255 + }); 256 + 257 + it('should be deterministic for historical dates', async () => { 258 + const dates = [ 259 + new Date('2005-01-01T00:00:00Z'), 260 + new Date('2010-06-15T12:30:00Z'), 261 + new Date('2020-12-31T23:59:59Z'), 262 + ]; 263 + 264 + const clock1 = new TidClock(new FakeClock(0), new SilentTidLogger(), { clockId: 5 }); 265 + const clock2 = new TidClock(new FakeClock(0), new SilentTidLogger(), { clockId: 5 }); 266 + 267 + const tids1 = await Promise.all(dates.map(d => clock1.fromDate(d))); 268 + const tids2 = await Promise.all(dates.map(d => clock2.fromDate(d))); 269 + 270 + assert.deepStrictEqual(tids1, tids2); 271 + }); 272 + }); 273 + 274 + describe('TID State Persistence', () => { 275 + const tempDir = path.join(os.tmpdir(), 'tid-test-' + Date.now()); 276 + const statePath = path.join(tempDir, 'tid-state.json'); 277 + 278 + before(() => { 279 + if (!fs.existsSync(tempDir)) { 280 + fs.mkdirSync(tempDir, { recursive: true }); 281 + } 282 + }); 283 + 284 + after(() => { 285 + if (fs.existsSync(tempDir)) { 286 + fs.rmSync(tempDir, { recursive: true, force: true }); 287 + } 288 + }); 289 + 290 + it('should persist state to disk', async () => { 291 + const clock = new TidClock( 292 + new FakeClock(1000000000000000), 293 + new SilentTidLogger(), 294 + { statePath } 295 + ); 296 + 297 + await clock.next(); 298 + await clock.next(); 299 + 300 + // State file should exist 301 + assert.strictEqual(fs.existsSync(statePath), true); 302 + 303 + // Should contain state 304 + const stateData = JSON.parse(fs.readFileSync(statePath, 'utf-8')); 305 + assert.strictEqual(stateData.generatedCount, 2); 306 + }); 307 + 308 + it('should restore state from disk', async () => { 309 + // Use a unique state file path for this test 310 + const restoreStatePath = path.join(tempDir, 'tid-state-restore.json'); 311 + 312 + // Create first clock and generate some TIDs 313 + const clock1 = new TidClock( 314 + new FakeClock(1000000000000000), 315 + new SilentTidLogger(), 316 + { statePath: restoreStatePath, clockId: 10 } 317 + ); 318 + 319 + await clock1.next(); 320 + const tid2 = await clock1.next(); 321 + 322 + // Create new clock with same state file 323 + const clock2 = new TidClock( 324 + new FakeClock(1000000000000000), 325 + new SilentTidLogger(), 326 + { statePath: restoreStatePath } 327 + ); 328 + 329 + const tid3 = await clock2.next(); 330 + 331 + // Should continue from where clock1 left off 332 + assert.strictEqual(compareTids(tid2, tid3), -1); 333 + 334 + const state = clock2.getState(); 335 + assert.strictEqual(state.generatedCount, 3); 336 + }); 337 + }); 338 + 339 + describe('TID Historical Dates', () => { 340 + it('should handle very old dates (2005)', async () => { 341 + const clock = new TidClock(new FakeClock(0), new SilentTidLogger()); 342 + const date = new Date('2005-01-01T00:00:00Z'); 343 + const tid = await clock.fromDate(date); 344 + 345 + assert.strictEqual(validateTid(tid), true); 346 + }); 347 + 348 + it('should maintain monotonicity with out-of-order dates', async () => { 349 + const clock = new TidClock(new FakeClock(0), new SilentTidLogger()); 350 + 351 + const dates = [ 352 + new Date('2020-01-01T00:00:00Z'), 353 + new Date('2015-01-01T00:00:00Z'), // Earlier! 354 + new Date('2010-01-01T00:00:00Z'), // Even earlier! 355 + new Date('2025-01-01T00:00:00Z'), 356 + ]; 357 + 358 + const tids = await Promise.all(dates.map(d => clock.fromDate(d))); 359 + 360 + // Should still be monotonic despite out-of-order input 361 + assert.strictEqual(areMonotonic(tids), true); 362 + }); 363 + 364 + it('should handle duplicate dates', async () => { 365 + const clock = new TidClock(new FakeClock(0), new SilentTidLogger()); 366 + const date = new Date('2020-01-01T00:00:00Z'); 367 + 368 + const tid1 = await clock.fromDate(date); 369 + const tid2 = await clock.fromDate(date); 370 + const tid3 = await clock.fromDate(date); 371 + 372 + // All should be unique 373 + assert.notStrictEqual(tid1, tid2); 374 + assert.notStrictEqual(tid2, tid3); 375 + assert.strictEqual(areMonotonic([tid1, tid2, tid3]), true); 376 + }); 377 + }); 378 + 379 + describe('TID Collision Detection', () => { 380 + it('should detect duplicate TIDs (should never happen)', async () => { 381 + const clock = new TidClock(new FakeClock(1000000000000000), new SilentTidLogger()); 382 + 383 + // Generate a TID 384 + await clock.next(); 385 + 386 + // Try to force a duplicate by manually resetting state (testing collision detection) 387 + // This simulates what would happen if there was a bug in generation 388 + const state = clock.getState(); 389 + 390 + // Create a new clock with the exact same state 391 + const clock2 = new TidClock( 392 + new FakeClock(state.lastTimestampUs), 393 + new SilentTidLogger(), 394 + { 395 + clockId: state.clockId, 396 + initialState: { 397 + lastTimestampUs: state.lastTimestampUs - 1, // Trick it into generating same timestamp 398 + generatedCount: 0, 399 + } 400 + } 401 + ); 402 + 403 + // First TID from clock2 might be a duplicate 404 + // But the clock should handle it via sequence increment 405 + const tid = await clock2.next(); 406 + assert.strictEqual(validateTid(tid), true); 407 + }); 408 + }); 409 + 410 + describe('TID Comparison and Sorting', () => { 411 + it('should compare TIDs correctly', () => { 412 + const tid1 = '3jzfcijpj2z2a'; 413 + const tid2 = '3jzfcijpj2z2b'; 414 + const tid3 = '7777777777777'; 415 + 416 + assert.strictEqual(compareTids(tid1, tid1), 0); 417 + assert.strictEqual(compareTids(tid1, tid2), -1); 418 + assert.strictEqual(compareTids(tid2, tid1), 1); 419 + assert.strictEqual(compareTids(tid1, tid3), -1); 420 + }); 421 + 422 + it('should sort TIDs correctly', () => { 423 + const tids = [ 424 + '7777777777777', 425 + '3jzfcijpj2z2a', 426 + '3zzzzzzzzzzzz', 427 + '3jzfcijpj2z2b', 428 + ]; 429 + 430 + const sorted = [...tids].sort(compareTids); 431 + 432 + assert.deepStrictEqual(sorted, [ 433 + '3jzfcijpj2z2a', 434 + '3jzfcijpj2z2b', 435 + '3zzzzzzzzzzzz', 436 + '7777777777777', 437 + ]); 438 + }); 439 + }); 440 + 441 + describe('TID Edge Cases', () => { 442 + it('should handle microsecond precision', async () => { 443 + const clock = new TidClock(new FakeClock(1234567890123456), new SilentTidLogger()); 444 + const tid = await clock.next(); 445 + const decoded = decodeTidTimestamp(tid); 446 + 447 + assert.strictEqual(decoded, 1234567890123456); 448 + }); 449 + 450 + it('should handle very large timestamps', async () => { 451 + const farFuture = new Date('2099-12-31T23:59:59.999Z').getTime() * 1000; 452 + const clock = new TidClock(new FakeClock(farFuture), new SilentTidLogger()); 453 + const tid = await clock.next(); 454 + 455 + assert.strictEqual(validateTid(tid), true); 456 + }); 457 + 458 + it('should handle rapid sequential generation', async () => { 459 + const clock = new TidClock(new RealClock(), new SilentTidLogger()); 460 + const tids: string[] = []; 461 + 462 + // Generate as fast as possible 463 + for (let i = 0; i < 1000; i++) { 464 + tids.push(await clock.next()); 465 + } 466 + 467 + const uniqueTids = new Set(tids); 468 + assert.strictEqual(uniqueTids.size, tids.length); 469 + assert.strictEqual(areMonotonic(tids), true); 470 + }); 471 + }); 472 + 473 + console.log('✓ All TID tests completed');
+3
src/types.ts
··· 57 57 reverse?: boolean; 58 58 yes?: boolean; 59 59 'dry-run'?: boolean; 60 + aggressive?: boolean; 61 + fresh?: boolean; 60 62 61 63 // Output 62 64 verbose?: boolean; ··· 92 94 MIN_BATCH_DELAY: number; // from rate limiter 93 95 RECORDS_PER_DAY_LIMIT: number; 94 96 SAFETY_MARGIN: number; 97 + AGGRESSIVE_SAFETY_MARGIN: number; 95 98 96 99 SLINGSHOT_RESOLVER: string; 97 100
+233
src/utils/import-state.ts
··· 1 + import fs from 'fs'; 2 + import path from 'path'; 3 + import os from 'os'; 4 + import crypto from 'crypto'; 5 + import type { PlayRecord } from '../types.js'; 6 + import { log } from './logger.js'; 7 + 8 + /** 9 + * Import state for resume functionality 10 + */ 11 + export interface ImportState { 12 + version: string; 13 + startedAt: string; 14 + lastUpdatedAt: string; 15 + inputFile: string; 16 + inputFileHash: string; 17 + totalRecords: number; 18 + processedRecords: number; 19 + successfulRecords: number; 20 + failedRecords: number; 21 + lastSuccessfulIndex: number; 22 + mode: 'lastfm' | 'spotify' | 'combined' | 'sync'; 23 + completed: boolean; 24 + } 25 + 26 + /** 27 + * Get the state file path for an import 28 + */ 29 + export function getStateFilePath(inputFile: string, mode: string): string { 30 + const stateDir = path.join(os.homedir(), '.lastfm-importer', 'state'); 31 + 32 + // Create state directory if it doesn't exist 33 + if (!fs.existsSync(stateDir)) { 34 + fs.mkdirSync(stateDir, { recursive: true }); 35 + } 36 + 37 + // Create a unique filename based on input file and mode 38 + const hash = crypto 39 + .createHash('md5') 40 + .update(inputFile + mode) 41 + .digest('hex') 42 + .substring(0, 8); 43 + 44 + return path.join(stateDir, `import-${hash}.json`); 45 + } 46 + 47 + /** 48 + * Calculate hash of input file for change detection 49 + */ 50 + export function calculateFileHash(filePath: string): string { 51 + if (!fs.existsSync(filePath)) { 52 + return ''; 53 + } 54 + 55 + const stats = fs.statSync(filePath); 56 + if (stats.isDirectory()) { 57 + // For directories (like Spotify exports), hash the directory name and modification time 58 + return crypto 59 + .createHash('md5') 60 + .update(filePath + stats.mtime.toISOString()) 61 + .digest('hex'); 62 + } 63 + 64 + // For files, use file size and modification time for quick comparison 65 + return crypto 66 + .createHash('md5') 67 + .update(`${stats.size}-${stats.mtime.toISOString()}`) 68 + .digest('hex'); 69 + } 70 + 71 + /** 72 + * Load import state from disk 73 + */ 74 + export function loadImportState(inputFile: string, mode: string): ImportState | null { 75 + const stateFile = getStateFilePath(inputFile, mode); 76 + 77 + if (!fs.existsSync(stateFile)) { 78 + return null; 79 + } 80 + 81 + try { 82 + const data = fs.readFileSync(stateFile, 'utf-8'); 83 + const state = JSON.parse(data) as ImportState; 84 + 85 + // Check if input file has changed 86 + const currentHash = calculateFileHash(inputFile); 87 + if (state.inputFileHash !== currentHash) { 88 + log.warn('Input file has changed since last import - starting fresh'); 89 + return null; 90 + } 91 + 92 + return state; 93 + } catch (error) { 94 + log.warn('Failed to load state file - starting fresh'); 95 + return null; 96 + } 97 + } 98 + 99 + /** 100 + * Save import state to disk 101 + */ 102 + export function saveImportState(state: ImportState): void { 103 + const stateFile = getStateFilePath(state.inputFile, state.mode); 104 + 105 + try { 106 + state.lastUpdatedAt = new Date().toISOString(); 107 + fs.writeFileSync(stateFile, JSON.stringify(state, null, 2), 'utf-8'); 108 + } catch (error) { 109 + log.error('Failed to save state file - progress may be lost on restart'); 110 + } 111 + } 112 + 113 + /** 114 + * Create initial import state 115 + */ 116 + export function createImportState( 117 + inputFile: string, 118 + mode: 'lastfm' | 'spotify' | 'combined' | 'sync', 119 + totalRecords: number 120 + ): ImportState { 121 + return { 122 + version: '1.0', 123 + startedAt: new Date().toISOString(), 124 + lastUpdatedAt: new Date().toISOString(), 125 + inputFile, 126 + inputFileHash: calculateFileHash(inputFile), 127 + totalRecords, 128 + processedRecords: 0, 129 + successfulRecords: 0, 130 + failedRecords: 0, 131 + lastSuccessfulIndex: -1, 132 + mode, 133 + completed: false, 134 + }; 135 + } 136 + 137 + /** 138 + * Update import state after batch 139 + */ 140 + export function updateImportState( 141 + state: ImportState, 142 + batchIndex: number, 143 + successCount: number, 144 + errorCount: number 145 + ): void { 146 + state.processedRecords += successCount + errorCount; 147 + state.successfulRecords += successCount; 148 + state.failedRecords += errorCount; 149 + 150 + if (successCount > 0) { 151 + state.lastSuccessfulIndex = batchIndex; 152 + } 153 + 154 + saveImportState(state); 155 + } 156 + 157 + /** 158 + * Mark import as completed 159 + */ 160 + export function completeImport(state: ImportState): void { 161 + state.completed = true; 162 + saveImportState(state); 163 + } 164 + 165 + /** 166 + * Clear import state (for fresh start) 167 + */ 168 + export function clearImportState(inputFile: string, mode: string): void { 169 + const stateFile = getStateFilePath(inputFile, mode); 170 + 171 + if (fs.existsSync(stateFile)) { 172 + fs.unlinkSync(stateFile); 173 + } 174 + } 175 + 176 + /** 177 + * Display resume information 178 + */ 179 + export function displayResumeInfo(state: ImportState): void { 180 + const elapsed = Date.now() - new Date(state.startedAt).getTime(); 181 + const elapsedHours = Math.floor(elapsed / (1000 * 60 * 60)); 182 + const elapsedMinutes = Math.floor((elapsed % (1000 * 60 * 60)) / (1000 * 60)); 183 + 184 + const remaining = state.totalRecords - state.processedRecords; 185 + const progress = ((state.processedRecords / state.totalRecords) * 100).toFixed(1); 186 + 187 + log.section('Resuming Previous Import'); 188 + log.info(`Started: ${new Date(state.startedAt).toLocaleString()}`); 189 + log.info(`Progress: ${state.processedRecords.toLocaleString()}/${state.totalRecords.toLocaleString()} (${progress}%)`); 190 + log.info(`Successful: ${state.successfulRecords.toLocaleString()}`); 191 + 192 + if (state.failedRecords > 0) { 193 + log.warn(`Failed: ${state.failedRecords.toLocaleString()}`); 194 + } 195 + 196 + log.info(`Remaining: ${remaining.toLocaleString()} records`); 197 + 198 + if (elapsedHours > 0) { 199 + log.info(`Time elapsed: ${elapsedHours}h ${elapsedMinutes}m`); 200 + } else if (elapsedMinutes > 0) { 201 + log.info(`Time elapsed: ${elapsedMinutes}m`); 202 + } 203 + 204 + log.blank(); 205 + } 206 + 207 + /** 208 + * Filter records to skip already processed ones 209 + */ 210 + export function filterUnprocessedRecords( 211 + records: PlayRecord[], 212 + state: ImportState 213 + ): PlayRecord[] { 214 + if (state.lastSuccessfulIndex < 0) { 215 + return records; 216 + } 217 + 218 + // Skip records up to and including the last successful index 219 + const startIndex = state.lastSuccessfulIndex + 1; 220 + 221 + if (startIndex >= records.length) { 222 + return []; 223 + } 224 + 225 + return records.slice(startIndex); 226 + } 227 + 228 + /** 229 + * Get the starting index for resume 230 + */ 231 + export function getResumeStartIndex(state: ImportState): number { 232 + return state.lastSuccessfulIndex + 1; 233 + }
+179
src/utils/tid-audit.ts
··· 1 + /** 2 + * TID Audit and Reporting Tools 3 + * 4 + * Utilities for auditing TID generation and producing reports 5 + */ 6 + 7 + import { validateTid, decodeTidTimestamp, decodeTidClockId, compareTids } from './tid-clock.js'; 8 + 9 + export interface TidAuditEntry { 10 + tid: string; 11 + valid: boolean; 12 + timestamp?: number; 13 + clockId?: number; 14 + date?: Date; 15 + errors: string[]; 16 + } 17 + 18 + export interface TidAuditReport { 19 + totalTids: number; 20 + validTids: number; 21 + invalidTids: number; 22 + duplicates: number; 23 + monotonic: boolean; 24 + firstTid?: TidAuditEntry; 25 + lastTid?: TidAuditEntry; 26 + entries: TidAuditEntry[]; 27 + errors: string[]; 28 + } 29 + 30 + /** 31 + * Audit a list of TIDs 32 + */ 33 + export function auditTids(tids: string[]): TidAuditReport { 34 + const entries: TidAuditEntry[] = []; 35 + const seenTids = new Set<string>(); 36 + let duplicates = 0; 37 + let validCount = 0; 38 + let invalidCount = 0; 39 + const globalErrors: string[] = []; 40 + 41 + // Audit each TID 42 + for (const tid of tids) { 43 + const entry: TidAuditEntry = { 44 + tid, 45 + valid: false, 46 + errors: [], 47 + }; 48 + 49 + // Check for duplicates 50 + if (seenTids.has(tid)) { 51 + duplicates++; 52 + entry.errors.push('Duplicate TID'); 53 + } 54 + seenTids.add(tid); 55 + 56 + // Validate format 57 + const valid = validateTid(tid); 58 + entry.valid = valid; 59 + 60 + if (valid) { 61 + validCount++; 62 + try { 63 + entry.timestamp = decodeTidTimestamp(tid); 64 + entry.clockId = decodeTidClockId(tid); 65 + entry.date = new Date(entry.timestamp / 1000); 66 + } catch (error) { 67 + entry.errors.push(`Decode error: ${error}`); 68 + } 69 + } else { 70 + invalidCount++; 71 + entry.errors.push('Invalid format'); 72 + } 73 + 74 + entries.push(entry); 75 + } 76 + 77 + // Check monotonicity 78 + let monotonic = true; 79 + for (let i = 1; i < tids.length; i++) { 80 + if (compareTids(tids[i - 1], tids[i]) >= 0) { 81 + monotonic = false; 82 + globalErrors.push(`Non-monotonic at index ${i}: ${tids[i - 1]} >= ${tids[i]}`); 83 + } 84 + } 85 + 86 + return { 87 + totalTids: tids.length, 88 + validTids: validCount, 89 + invalidTids: invalidCount, 90 + duplicates, 91 + monotonic, 92 + firstTid: entries[0], 93 + lastTid: entries[entries.length - 1], 94 + entries, 95 + errors: globalErrors, 96 + }; 97 + } 98 + 99 + /** 100 + * Format audit report as human-readable text 101 + */ 102 + export function formatAuditReport(report: TidAuditReport): string { 103 + const lines: string[] = []; 104 + 105 + lines.push('='.repeat(80)); 106 + lines.push('TID AUDIT REPORT'); 107 + lines.push('='.repeat(80)); 108 + lines.push(''); 109 + 110 + // Summary 111 + lines.push('SUMMARY'); 112 + lines.push('-'.repeat(80)); 113 + lines.push(`Total TIDs: ${report.totalTids.toLocaleString()}`); 114 + lines.push(`Valid: ${report.validTids.toLocaleString()} (${((report.validTids / report.totalTids) * 100).toFixed(1)}%)`); 115 + lines.push(`Invalid: ${report.invalidTids.toLocaleString()}`); 116 + lines.push(`Duplicates: ${report.duplicates.toLocaleString()}`); 117 + lines.push(`Monotonic: ${report.monotonic ? '✓ YES' : '✗ NO'}`); 118 + lines.push(''); 119 + 120 + // Time range 121 + if (report.firstTid && report.firstTid.date) { 122 + lines.push('TIME RANGE'); 123 + lines.push('-'.repeat(80)); 124 + lines.push(`First TID: ${report.firstTid.tid}`); 125 + lines.push(` Timestamp: ${report.firstTid.date.toISOString()}`); 126 + lines.push(` Clock ID: ${report.firstTid.clockId}`); 127 + lines.push(''); 128 + if (report.lastTid && report.lastTid.date) { 129 + lines.push(`Last TID: ${report.lastTid.tid}`); 130 + lines.push(` Timestamp: ${report.lastTid.date.toISOString()}`); 131 + lines.push(` Clock ID: ${report.lastTid.clockId}`); 132 + const durationMs = (report.lastTid.timestamp! - report.firstTid.timestamp!) / 1000; 133 + const durationDays = durationMs / (1000 * 60 * 60 * 24); 134 + lines.push(` Duration: ${durationDays.toFixed(2)} days`); 135 + lines.push(''); 136 + } 137 + } 138 + 139 + // Errors 140 + if (report.errors.length > 0) { 141 + lines.push('ERRORS'); 142 + lines.push('-'.repeat(80)); 143 + for (const error of report.errors.slice(0, 10)) { 144 + lines.push(` • ${error}`); 145 + } 146 + if (report.errors.length > 10) { 147 + lines.push(` ... and ${report.errors.length - 10} more errors`); 148 + } 149 + lines.push(''); 150 + } 151 + 152 + // Invalid TIDs 153 + const invalidEntries = report.entries.filter(e => !e.valid || e.errors.length > 0); 154 + if (invalidEntries.length > 0) { 155 + lines.push('INVALID/PROBLEM TIDs'); 156 + lines.push('-'.repeat(80)); 157 + for (const entry of invalidEntries.slice(0, 10)) { 158 + lines.push(` ${entry.tid}`); 159 + for (const error of entry.errors) { 160 + lines.push(` - ${error}`); 161 + } 162 + } 163 + if (invalidEntries.length > 10) { 164 + lines.push(` ... and ${invalidEntries.length - 10} more invalid TIDs`); 165 + } 166 + lines.push(''); 167 + } 168 + 169 + lines.push('='.repeat(80)); 170 + 171 + return lines.join('\n'); 172 + } 173 + 174 + /** 175 + * Format audit report as JSON 176 + */ 177 + export function formatAuditReportJson(report: TidAuditReport): string { 178 + return JSON.stringify(report, null, 2); 179 + }
+470
src/utils/tid-clock.ts
··· 1 + /** 2 + * TID (Timestamp Identifier) Clock for ATProto 3 + * 4 + * Implements spec-compliant, monotonic TID generation with: 5 + * - AT-Protocol format validation (13 chars, base32 alphabet) 6 + * - Strict monotonicity guarantees (even under clock drift) 7 + * - Concurrency safety (mutex-protected state) 8 + * - Deterministic mode for dry-runs 9 + * - Full observability (structured JSON logging) 10 + * - Collision resistance 11 + * 12 + * Based on AT-Protocol spec: https://atproto.com/specs/tid 13 + * Reference implementation: @atproto/common-web 14 + */ 15 + 16 + import { s32decode, s32encode } from '@atproto/common-web/dist/util.js'; 17 + import fs from 'fs'; 18 + import path from 'path'; 19 + import crypto from 'crypto'; 20 + 21 + const TID_LENGTH = 13; 22 + const TID_REGEX = /^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$/; 23 + 24 + /** 25 + * TID validation error 26 + */ 27 + export class InvalidTidError extends Error { 28 + constructor(message: string, public tid?: string) { 29 + super(message); 30 + this.name = 'InvalidTidError'; 31 + } 32 + } 33 + 34 + /** 35 + * TID generation modes 36 + */ 37 + export enum TidMode { 38 + PRODUCTION = 'production', // Real wall-clock time 39 + DRY_RUN = 'dry-run', // Deterministic with fixed seed 40 + REPLAY = 'replay', // Replay from logged state 41 + } 42 + 43 + /** 44 + * Clock source abstraction for testability 45 + */ 46 + export interface ClockSource { 47 + now(): number; // Returns microseconds since epoch 48 + } 49 + 50 + /** 51 + * Real wall-clock implementation 52 + */ 53 + export class RealClock implements ClockSource { 54 + now(): number { 55 + return Date.now() * 1000; // Convert ms to µs 56 + } 57 + } 58 + 59 + /** 60 + * Deterministic fake clock for testing/dry-runs 61 + */ 62 + export class FakeClock implements ClockSource { 63 + constructor(private timestamp: number) {} 64 + 65 + now(): number { 66 + return this.timestamp; 67 + } 68 + 69 + advance(microseconds: number): void { 70 + this.timestamp += microseconds; 71 + } 72 + 73 + set(microseconds: number): void { 74 + this.timestamp = microseconds; 75 + } 76 + } 77 + 78 + /** 79 + * TID generator state (for persistence and logging) 80 + */ 81 + export interface TidState { 82 + lastTimestampUs: number; // Last generated timestamp in microseconds 83 + clockId: number; // Clock identifier (0-31 for this implementation) 84 + generatedCount: number; // Total TIDs generated 85 + } 86 + 87 + /** 88 + * Metadata for each generated TID 89 + */ 90 + export interface TidMetadata { 91 + tid: string; 92 + timestampUs: number; 93 + clockId: number; 94 + generatedAt: string; // ISO8601 with microseconds 95 + validated: boolean; 96 + context?: string; 97 + } 98 + 99 + /** 100 + * Logger interface for TID operations 101 + */ 102 + export interface TidLogger { 103 + logGenerated(metadata: TidMetadata, opId: string): void; 104 + logWarning(message: string, details?: any): void; 105 + logError(message: string, details?: any): void; 106 + } 107 + 108 + /** 109 + * Default console-based logger 110 + */ 111 + export class ConsoleTidLogger implements TidLogger { 112 + logGenerated(metadata: TidMetadata, opId: string): void { 113 + const entry = { 114 + level: 'INFO', 115 + op_id: opId, 116 + ts: metadata.generatedAt, 117 + event: 'tid.generated', 118 + tid: metadata.tid, 119 + clock_id: metadata.clockId, 120 + wall_ts_us: metadata.timestampUs, 121 + generator: 'tid-clock-v1', 122 + context: metadata.context || 'unknown', 123 + validated: metadata.validated, 124 + }; 125 + console.log(JSON.stringify(entry)); 126 + } 127 + 128 + logWarning(message: string, details?: any): void { 129 + console.warn(JSON.stringify({ 130 + level: 'WARN', 131 + event: 'tid.warning', 132 + message, 133 + ...details, 134 + })); 135 + } 136 + 137 + logError(message: string, details?: any): void { 138 + console.error(JSON.stringify({ 139 + level: 'ERROR', 140 + event: 'tid.error', 141 + message, 142 + ...details, 143 + })); 144 + } 145 + } 146 + 147 + /** 148 + * Silent logger (for production when structured logging is handled elsewhere) 149 + */ 150 + export class SilentTidLogger implements TidLogger { 151 + logGenerated(): void {} 152 + logWarning(): void {} 153 + logError(): void {} 154 + } 155 + 156 + /** 157 + * Main TID Clock generator 158 + */ 159 + export class TidClock { 160 + private state: TidState; 161 + private clock: ClockSource; 162 + private logger: TidLogger; 163 + private statePath: string | null; 164 + private mutex: Promise<void> = Promise.resolve(); 165 + 166 + constructor( 167 + clock: ClockSource = new RealClock(), 168 + logger: TidLogger = new SilentTidLogger(), 169 + options: { 170 + statePath?: string; 171 + clockId?: number; 172 + initialState?: Partial<TidState>; 173 + } = {} 174 + ) { 175 + this.clock = clock; 176 + this.logger = logger; 177 + this.statePath = options.statePath || null; 178 + 179 + // Initialize or load state 180 + if (this.statePath && fs.existsSync(this.statePath)) { 181 + this.state = this.loadState(); 182 + } else { 183 + this.state = { 184 + lastTimestampUs: 0, 185 + clockId: options.clockId ?? this.generateClockId(), 186 + generatedCount: 0, 187 + ...options.initialState, 188 + }; 189 + if (this.statePath) { 190 + this.saveState(); 191 + } 192 + } 193 + } 194 + 195 + /** 196 + * Generate a cryptographically random clock ID (0-31) 197 + */ 198 + private generateClockId(): number { 199 + return crypto.randomInt(0, 32); 200 + } 201 + 202 + /** 203 + * Generate next TID with monotonicity guarantees 204 + * 205 + * Per AT-Protocol spec and reference implementation: 206 + * - Use max(currentTime, lastTimestamp) to handle clock drift 207 + * - If same as last timestamp, increment the timestamp itself (acts as sequence) 208 + * - This ensures monotonicity while keeping TID format simple 209 + */ 210 + async next(context?: string): Promise<string> { 211 + return this.withMutex(async () => { 212 + const currentTime = this.clock.now(); 213 + 214 + // Take max of current time and last timestamp (handles backwards clock drift) 215 + let timestamp = Math.max(currentTime, this.state.lastTimestampUs); 216 + 217 + // If we're at the same timestamp, increment by 1 microsecond 218 + // This acts as our sequence counter while maintaining monotonicity 219 + if (timestamp === this.state.lastTimestampUs) { 220 + timestamp = this.state.lastTimestampUs + 1; 221 + } 222 + 223 + if (currentTime < this.state.lastTimestampUs) { 224 + this.logger.logWarning('Clock moved backwards', { 225 + current: currentTime, 226 + last: this.state.lastTimestampUs, 227 + delta: this.state.lastTimestampUs - currentTime, 228 + action: 'using_incremented_timestamp', 229 + }); 230 + } 231 + 232 + // Generate TID 233 + const tid = this.encodeTid(timestamp, this.state.clockId); 234 + 235 + // Validate format 236 + const validated = this.validateTid(tid); 237 + if (!validated) { 238 + const error = new InvalidTidError('Generated invalid TID', tid); 239 + this.logger.logError('TID validation failed', { 240 + tid, 241 + timestamp, 242 + clockId: this.state.clockId, 243 + }); 244 + throw error; 245 + } 246 + 247 + // Update state 248 + this.state.lastTimestampUs = timestamp; 249 + this.state.generatedCount++; 250 + 251 + // Persist state if configured 252 + if (this.statePath) { 253 + this.saveState(); 254 + } 255 + 256 + // Log metadata 257 + const opId = crypto.randomUUID(); 258 + const metadata: TidMetadata = { 259 + tid, 260 + timestampUs: timestamp, 261 + clockId: this.state.clockId, 262 + generatedAt: this.formatMicrosecondTimestamp(timestamp), 263 + validated, 264 + context, 265 + }; 266 + this.logger.logGenerated(metadata, opId); 267 + 268 + return tid; 269 + }); 270 + } 271 + 272 + /** 273 + * Generate TID from a specific Date (for historical records) 274 + * Maintains monotonicity relative to previously generated TIDs 275 + */ 276 + async fromDate(date: Date, context?: string): Promise<string> { 277 + const timestamp = date.getTime() * 1000; // Convert ms to µs 278 + 279 + return this.withMutex(async () => { 280 + // Ensure monotonicity: use max of input timestamp and last generated 281 + let finalTimestamp = Math.max(timestamp, this.state.lastTimestampUs); 282 + 283 + // If we're at the same timestamp, increment by 1 microsecond 284 + if (finalTimestamp === this.state.lastTimestampUs) { 285 + finalTimestamp = this.state.lastTimestampUs + 1; 286 + } 287 + 288 + const tid = this.encodeTid(finalTimestamp, this.state.clockId); 289 + const validated = this.validateTid(tid); 290 + 291 + if (!validated) { 292 + throw new InvalidTidError('Generated invalid TID from date', tid); 293 + } 294 + 295 + // Update state 296 + this.state.lastTimestampUs = finalTimestamp; 297 + this.state.generatedCount++; 298 + 299 + if (this.statePath) { 300 + this.saveState(); 301 + } 302 + 303 + const opId = crypto.randomUUID(); 304 + const metadata: TidMetadata = { 305 + tid, 306 + timestampUs: finalTimestamp, 307 + clockId: this.state.clockId, 308 + generatedAt: this.formatMicrosecondTimestamp(finalTimestamp), 309 + validated, 310 + context, 311 + }; 312 + this.logger.logGenerated(metadata, opId); 313 + 314 + return tid; 315 + }); 316 + } 317 + 318 + /** 319 + * Encode timestamp and clock ID into TID format 320 + */ 321 + private encodeTid(timestampUs: number, clockId: number): string { 322 + const timestampStr = s32encode(timestampUs).padStart(11, '2'); 323 + const clockIdStr = s32encode(clockId).padStart(2, '2'); 324 + return timestampStr + clockIdStr; 325 + } 326 + 327 + /** 328 + * Validate TID format per AT-Protocol spec 329 + */ 330 + private validateTid(tid: string): boolean { 331 + if (tid.length !== TID_LENGTH) { 332 + return false; 333 + } 334 + if (!TID_REGEX.test(tid)) { 335 + return false; 336 + } 337 + return true; 338 + } 339 + 340 + /** 341 + * Format microsecond timestamp as ISO8601 with microsecond precision 342 + */ 343 + private formatMicrosecondTimestamp(timestampUs: number): string { 344 + const ms = Math.floor(timestampUs / 1000); 345 + const us = timestampUs % 1000; 346 + const date = new Date(ms); 347 + const iso = date.toISOString(); 348 + // Replace milliseconds with microseconds 349 + return iso.replace(/\.(\d{3})Z$/, `.${us.toString().padStart(3, '0')}000Z`); 350 + } 351 + 352 + /** 353 + * Load state from disk 354 + */ 355 + private loadState(): TidState { 356 + if (!this.statePath) { 357 + throw new Error('State path not configured'); 358 + } 359 + const data = fs.readFileSync(this.statePath, 'utf-8'); 360 + return JSON.parse(data); 361 + } 362 + 363 + /** 364 + * Save state to disk 365 + */ 366 + private saveState(): void { 367 + if (!this.statePath) { 368 + return; 369 + } 370 + const dir = path.dirname(this.statePath); 371 + if (!fs.existsSync(dir)) { 372 + fs.mkdirSync(dir, { recursive: true }); 373 + } 374 + fs.writeFileSync(this.statePath, JSON.stringify(this.state, null, 2), 'utf-8'); 375 + } 376 + 377 + /** 378 + * Get current state (for inspection/debugging) 379 + */ 380 + getState(): Readonly<TidState> { 381 + return { ...this.state }; 382 + } 383 + 384 + /** 385 + * Reset state (for testing) 386 + */ 387 + reset(preserveClockId: boolean = false): void { 388 + const clockId = preserveClockId ? this.state.clockId : this.generateClockId(); 389 + this.state = { 390 + lastTimestampUs: 0, 391 + clockId, 392 + generatedCount: 0, 393 + }; 394 + if (this.statePath) { 395 + this.saveState(); 396 + } 397 + } 398 + 399 + /** 400 + * Mutex for concurrent access protection 401 + */ 402 + private async withMutex<T>(fn: () => Promise<T>): Promise<T> { 403 + const currentMutex = this.mutex; 404 + let releaseMutex: () => void; 405 + this.mutex = new Promise((resolve) => { 406 + releaseMutex = resolve; 407 + }); 408 + 409 + try { 410 + await currentMutex; 411 + return await fn(); 412 + } finally { 413 + releaseMutex!(); 414 + } 415 + } 416 + } 417 + 418 + /** 419 + * Validate a TID string 420 + */ 421 + export function validateTid(tid: string): boolean { 422 + return tid.length === TID_LENGTH && TID_REGEX.test(tid); 423 + } 424 + 425 + /** 426 + * Ensure TID is valid (throws on invalid) 427 + */ 428 + export function ensureValidTid(tid: string): asserts tid is string { 429 + if (!validateTid(tid)) { 430 + throw new InvalidTidError(`Invalid TID format: ${tid}`, tid); 431 + } 432 + } 433 + 434 + /** 435 + * Decode TID to get timestamp 436 + */ 437 + export function decodeTidTimestamp(tid: string): number { 438 + ensureValidTid(tid); 439 + return s32decode(tid.slice(0, 11)); 440 + } 441 + 442 + /** 443 + * Decode TID to get clock ID 444 + */ 445 + export function decodeTidClockId(tid: string): number { 446 + ensureValidTid(tid); 447 + return s32decode(tid.slice(11, 13)); 448 + } 449 + 450 + /** 451 + * Compare two TIDs (for sorting) 452 + * Returns: -1 if a < b, 0 if equal, 1 if a > b 453 + */ 454 + export function compareTids(a: string, b: string): number { 455 + if (a < b) return -1; 456 + if (a > b) return 1; 457 + return 0; 458 + } 459 + 460 + /** 461 + * Check if TIDs are in strictly increasing order 462 + */ 463 + export function areMonotonic(tids: string[]): boolean { 464 + for (let i = 1; i < tids.length; i++) { 465 + if (tids[i] <= tids[i - 1]) { 466 + return false; 467 + } 468 + } 469 + return true; 470 + }
+97 -32
src/utils/tid.ts
··· 1 1 /** 2 2 * TID (Timestamp Identifier) generation for ATProto 3 - * Based on: https://atproto.com/specs/tid 4 3 * 5 - * Per the spec: "If the local clock has only millisecond precision, the timestamp 6 - * should be padded." Our implementation pads to ensure 11 characters for the timestamp 7 - * portion and 2 characters for the clockid, for a total of exactly 13 characters. 4 + * This module provides a high-level API for TID generation. 5 + * For the full implementation with monotonicity guarantees, see tid-clock.ts 8 6 * 9 - * This implementation uses the official ATProto s32encode function and properly handles 10 - * historical dates (like 2005 scrobbles) by padding to the required length. 7 + * Based on: https://atproto.com/specs/tid 8 + */ 9 + 10 + import { TidClock, RealClock, FakeClock, validateTid } from './tid-clock.js'; 11 + 12 + // Global TID clock instance 13 + let globalClock: TidClock | null = null; 14 + 15 + /** 16 + * Initialize the global TID clock 17 + * Should be called once at application startup 11 18 */ 19 + export function initTidClock(options: { 20 + mode?: 'production' | 'dry-run'; 21 + statePath?: string; 22 + seed?: number; 23 + clockId?: number; 24 + } = {}): void { 25 + const { mode = 'production', statePath, seed, clockId } = options; 12 26 13 - import { s32encode } from '@atproto/common-web/dist/util.js'; 27 + if (mode === 'dry-run' && seed !== undefined) { 28 + // Deterministic clock for dry-run with fixed clockId for reproducibility 29 + globalClock = new TidClock(new FakeClock(seed), undefined, { 30 + statePath, 31 + clockId: clockId ?? 0 // Use fixed clockId for deterministic TIDs 32 + }); 33 + } else { 34 + // Production real-time clock 35 + globalClock = new TidClock(new RealClock(), undefined, { statePath, clockId }); 36 + } 37 + } 14 38 15 39 /** 16 - * Generate a TID from a Date object 17 - * 18 - * TID format (13 characters total): 19 - * - Characters 0-10: timestamp in microseconds, base32-encoded 20 - * - Characters 11-12: clock ID, base32-encoded 40 + * Get or create the global TID clock 41 + */ 42 + function getClock(): TidClock { 43 + if (!globalClock) { 44 + // Auto-initialize with production settings if not explicitly initialized 45 + globalClock = new TidClock(new RealClock()); 46 + } 47 + return globalClock; 48 + } 49 + 50 + /** 51 + * Generate a TID from a Date object with monotonicity guarantees 21 52 * 22 - * The timestamp is padded with '2' (representing 0 in base32) to ensure exactly 11 characters. 23 - * The clockid is similarly padded to ensure exactly 2 characters. 53 + * This is the recommended way to generate TIDs for historical records. 54 + * TIDs are guaranteed to be: 55 + * - Spec-compliant (13 chars, valid base32) 56 + * - Strictly monotonic (even if dates are out of order) 57 + * - Collision-free (duplicate detection) 24 58 * 25 59 * @param date - The date to generate a TID from 60 + * @param context - Optional context for logging (e.g., "inject:playlist") 26 61 * @returns A valid 13-character TID string 27 62 */ 28 - export function generateTID(date: Date): string { 29 - // Convert to Unix microseconds (JS Date.getTime() returns milliseconds) 30 - // Per spec: multiply by 1000 to pad millisecond precision 31 - const unixMicros = date.getTime() * 1000; 32 - 33 - // Use a fixed clockid of 0 for deterministic TID generation from timestamps 34 - // This ensures the same playedTime always generates the same TID (important for deduplication) 35 - const clockid = 0; 36 - 37 - // Encode timestamp and clockid with proper padding 38 - // Timestamp should be 11 characters, clockid should be 2 characters 39 - // Padding character is '2' which represents 0 in base32 40 - const timestampStr = s32encode(unixMicros).padStart(11, '2'); 41 - const clockidStr = s32encode(clockid).padStart(2, '2'); 42 - 43 - return timestampStr + clockidStr; 63 + export async function generateTID(date: Date, context?: string): Promise<string> { 64 + const clock = getClock(); 65 + return await clock.fromDate(date, context); 44 66 } 45 67 46 68 /** 47 69 * Generate a TID from an ISO 8601 timestamp string 48 70 * 49 71 * @param isoString - ISO 8601 formatted datetime string 72 + * @param context - Optional context for logging 50 73 * @returns A valid 13-character TID string 51 74 */ 52 - export function generateTIDFromISO(isoString: string): string { 53 - return generateTID(new Date(isoString)); 75 + export async function generateTIDFromISO(isoString: string, context?: string): Promise<string> { 76 + return await generateTID(new Date(isoString), context); 54 77 } 78 + 79 + /** 80 + * Generate next TID using current time 81 + * 82 + * @param context - Optional context for logging 83 + * @returns A valid 13-character TID string 84 + */ 85 + export async function generateNextTID(context?: string): Promise<string> { 86 + const clock = getClock(); 87 + return await clock.next(context); 88 + } 89 + 90 + /** 91 + * Validate a TID string 92 + * 93 + * @param tid - The TID to validate 94 + * @returns true if valid, false otherwise 95 + */ 96 + export function isValidTID(tid: string): boolean { 97 + return validateTid(tid); 98 + } 99 + 100 + /** 101 + * Reset the global clock state (for testing only) 102 + */ 103 + export function resetTidClock(): void { 104 + if (globalClock) { 105 + globalClock.reset(); 106 + } 107 + } 108 + 109 + /** 110 + * Get the current clock state (for debugging/inspection) 111 + */ 112 + export function getTidClockState() { 113 + const clock = getClock(); 114 + return clock.getState(); 115 + } 116 + 117 + // Re-export types and utilities from tid-clock 118 + export { TidClock, RealClock, FakeClock, validateTid } from './tid-clock.js'; 119 + export type { TidState, TidMetadata } from './tid-clock.js';