import { readFile, access, rm } from 'fs/promises'; import { join } from 'path'; import { runBenchmarks } from './harness/benchmark.js'; import { printResults, saveResults } from './harness/reporter.js'; import { generateCharts } from './harness/chart.js'; import type { TestData, StoreBenchmarkResults, BenchmarkResult } from './harness/types.js'; import type { Url } from './generator/urls.js'; import type { Metadata } from './generator/metadata.js'; import type { ImageInfo } from './generator/images.js'; import type { DocumentInfo } from './generator/documents.js'; import { getStore, getStoreNames } from './stores/index.js'; const ITERATIONS = 10; const TEST_DATA_DIR = 'test-data'; async function fileExists(path: string): Promise { try { await access(path); return true; } catch { return false; } } async function loadTestData(): Promise { console.log('Loading test data...'); // Check if test data exists if (!await fileExists(join(TEST_DATA_DIR, 'urls.json'))) { console.error('Test data not found. Run "npm run generate" first.'); process.exit(1); } // Load URLs const urls: Url[] = JSON.parse( await readFile(join(TEST_DATA_DIR, 'urls.json'), 'utf-8') ); console.log(` Loaded ${urls.length} URLs`); // Load metadata const metadata: Metadata[] = JSON.parse( await readFile(join(TEST_DATA_DIR, 'metadata.json'), 'utf-8') ); console.log(` Loaded ${metadata.length} metadata rows`); // Load image index and data const imageIndex: ImageInfo[] = JSON.parse( await readFile(join(TEST_DATA_DIR, 'images.json'), 'utf-8') ); const images: TestData['images'] = []; for (const img of imageIndex) { const data = await readFile(join(TEST_DATA_DIR, 'images', img.filename)); images.push({ id: img.id, filename: img.filename, data }); } console.log(` Loaded ${images.length} images`); // Load document index and content const documentIndex: DocumentInfo[] = JSON.parse( await readFile(join(TEST_DATA_DIR, 'documents.json'), 'utf-8') ); const documents: TestData['documents'] = []; for (const doc of documentIndex) { const content = await readFile(join(TEST_DATA_DIR, 'documents', doc.filename), 'utf-8'); documents.push({ id: doc.id, filename: doc.filename, content }); } console.log(` Loaded ${documents.length} documents`); return { urls, metadata, images, documents }; } function parseArgs(): { stores: string[] } { const args = process.argv.slice(2); const storeIndex = args.indexOf('--store'); if (storeIndex !== -1 && args[storeIndex + 1]) { return { stores: [args[storeIndex + 1]] }; } return { stores: getStoreNames() }; } function formatDuration(ms: number): string { if (ms < 1000) return `${ms.toFixed(0)}ms`; if (ms < 60000) return `${(ms / 1000).toFixed(2)}s`; return `${(ms / 60000).toFixed(2)}m`; } // Calculate median, excluding highest and lowest values function medianExcludingExtremes(values: number[]): number { if (values.length <= 2) return values[0] ?? 0; const sorted = [...values].sort((a, b) => a - b); // Remove lowest and highest const trimmed = sorted.slice(1, -1); const mid = Math.floor(trimmed.length / 2); if (trimmed.length % 2 === 0) { return (trimmed[mid - 1] + trimmed[mid]) / 2; } return trimmed[mid]; } // Aggregate multiple benchmark results into a single median result function aggregateBenchmarkResult(results: BenchmarkResult[]): BenchmarkResult { // If any run succeeded, use successful runs for median const successful = results.filter(r => !r.failed); if (successful.length === 0) { // All failed - return first failure return results[0]; } const durations = successful.map(r => r.durationMs); const medianDuration = medianExcludingExtremes(durations); // Use metadata from first successful result const first = successful[0]; return { name: first.name, durationMs: medianDuration, itemCount: first.itemCount, bytesProcessed: first.bytesProcessed, failed: false }; } // Aggregate multiple store benchmark results into median results function aggregateStoreResults(runs: StoreBenchmarkResults[]): StoreBenchmarkResults { return { storeName: runs[0].storeName, init: aggregateBenchmarkResult(runs.map(r => r.init)), writes: { allUrls: aggregateBenchmarkResult(runs.map(r => r.writes.allUrls)), allImages: aggregateBenchmarkResult(runs.map(r => r.writes.allImages)), allDocuments: aggregateBenchmarkResult(runs.map(r => r.writes.allDocuments)), allMetadata: aggregateBenchmarkResult(runs.map(r => r.writes.allMetadata)), }, reads: { recentUrls: aggregateBenchmarkResult(runs.map(r => r.reads.recentUrls)), randomImages: aggregateBenchmarkResult(runs.map(r => r.reads.randomImages)), randomDocuments: aggregateBenchmarkResult(runs.map(r => r.reads.randomDocuments)), }, disk: { // Use median disk usage totalBytes: medianExcludingExtremes(runs.map(r => r.disk.totalBytes)) } }; } async function main() { console.log('='.repeat(60)); console.log('DATASTORE BENCHMARK SUITE'); console.log('='.repeat(60)); console.log(); const { stores: storeNames } = parseArgs(); const availableStores = getStoreNames(); // Validate store names for (const name of storeNames) { if (!availableStores.includes(name.toLowerCase())) { console.error(`Unknown store: ${name}`); console.error(`Available stores: ${availableStores.join(', ')}`); process.exit(1); } } console.log(`Stores to benchmark: ${storeNames.join(', ')}`); console.log(`Iterations per store: ${ITERATIONS} (excluding high/low, using median)\n`); // Load test data const testData = await loadTestData(); console.log(); // Run benchmarks const results: StoreBenchmarkResults[] = []; for (const storeName of storeNames) { console.log(`\n${'─'.repeat(50)}`); console.log(`Benchmarking ${storeName} (${ITERATIONS} iterations)`); console.log('─'.repeat(50)); const store = getStore(storeName); if (!store) continue; const storeRuns: StoreBenchmarkResults[] = []; for (let i = 0; i < ITERATIONS; i++) { console.log(`\n [Run ${i + 1}/${ITERATIONS}]`); const result = await runBenchmarks(store, testData, (phase, durationMs, failed, error) => { if (durationMs !== undefined) { if (failed) { console.log(` ✗ ${phase} FAILED (${formatDuration(durationMs)}) - ${error}`); } else { console.log(` ✓ ${phase} (${formatDuration(durationMs)})`); } } else { console.log(` → ${phase}`); } }); storeRuns.push(result); // Clean up store data after each iteration await rm('store-data', { recursive: true, force: true }); } // Aggregate results using median (excluding high/low) const aggregated = aggregateStoreResults(storeRuns); results.push(aggregated); console.log(`\n Median results for ${storeName} computed from ${ITERATIONS} runs`); } // Print and save results printResults(results); const runDir = await saveResults(results); // Generate comparison charts if multiple stores were tested if (results.length > 1) { console.log('Generating comparison charts...'); const chartFiles = await generateCharts(results, runDir); console.log(`Generated ${chartFiles.length} files in ${runDir}/`); } } main().catch((err) => { console.error('Benchmark failed:', err); process.exit(1); });