testing local-first datastores
1import { readFile, access, rm } from 'fs/promises';
2import { join } from 'path';
3import { runBenchmarks } from './harness/benchmark.js';
4import { printResults, saveResults } from './harness/reporter.js';
5import { generateCharts } from './harness/chart.js';
6import type { TestData, StoreBenchmarkResults, BenchmarkResult } from './harness/types.js';
7import type { Url } from './generator/urls.js';
8import type { Metadata } from './generator/metadata.js';
9import type { ImageInfo } from './generator/images.js';
10import type { DocumentInfo } from './generator/documents.js';
11import { getStore, getStoreNames } from './stores/index.js';
12
13const ITERATIONS = 10;
14
15const TEST_DATA_DIR = 'test-data';
16
17async function fileExists(path: string): Promise<boolean> {
18 try {
19 await access(path);
20 return true;
21 } catch {
22 return false;
23 }
24}
25
26async function loadTestData(): Promise<TestData> {
27 console.log('Loading test data...');
28
29 // Check if test data exists
30 if (!await fileExists(join(TEST_DATA_DIR, 'urls.json'))) {
31 console.error('Test data not found. Run "npm run generate" first.');
32 process.exit(1);
33 }
34
35 // Load URLs
36 const urls: Url[] = JSON.parse(
37 await readFile(join(TEST_DATA_DIR, 'urls.json'), 'utf-8')
38 );
39 console.log(` Loaded ${urls.length} URLs`);
40
41 // Load metadata
42 const metadata: Metadata[] = JSON.parse(
43 await readFile(join(TEST_DATA_DIR, 'metadata.json'), 'utf-8')
44 );
45 console.log(` Loaded ${metadata.length} metadata rows`);
46
47 // Load image index and data
48 const imageIndex: ImageInfo[] = JSON.parse(
49 await readFile(join(TEST_DATA_DIR, 'images.json'), 'utf-8')
50 );
51 const images: TestData['images'] = [];
52 for (const img of imageIndex) {
53 const data = await readFile(join(TEST_DATA_DIR, 'images', img.filename));
54 images.push({ id: img.id, filename: img.filename, data });
55 }
56 console.log(` Loaded ${images.length} images`);
57
58 // Load document index and content
59 const documentIndex: DocumentInfo[] = JSON.parse(
60 await readFile(join(TEST_DATA_DIR, 'documents.json'), 'utf-8')
61 );
62 const documents: TestData['documents'] = [];
63 for (const doc of documentIndex) {
64 const content = await readFile(join(TEST_DATA_DIR, 'documents', doc.filename), 'utf-8');
65 documents.push({ id: doc.id, filename: doc.filename, content });
66 }
67 console.log(` Loaded ${documents.length} documents`);
68
69 return { urls, metadata, images, documents };
70}
71
72function parseArgs(): { stores: string[] } {
73 const args = process.argv.slice(2);
74 const storeIndex = args.indexOf('--store');
75
76 if (storeIndex !== -1 && args[storeIndex + 1]) {
77 return { stores: [args[storeIndex + 1]] };
78 }
79
80 return { stores: getStoreNames() };
81}
82
83function formatDuration(ms: number): string {
84 if (ms < 1000) return `${ms.toFixed(0)}ms`;
85 if (ms < 60000) return `${(ms / 1000).toFixed(2)}s`;
86 return `${(ms / 60000).toFixed(2)}m`;
87}
88
89// Calculate median, excluding highest and lowest values
90function medianExcludingExtremes(values: number[]): number {
91 if (values.length <= 2) return values[0] ?? 0;
92
93 const sorted = [...values].sort((a, b) => a - b);
94 // Remove lowest and highest
95 const trimmed = sorted.slice(1, -1);
96
97 const mid = Math.floor(trimmed.length / 2);
98 if (trimmed.length % 2 === 0) {
99 return (trimmed[mid - 1] + trimmed[mid]) / 2;
100 }
101 return trimmed[mid];
102}
103
104// Aggregate multiple benchmark results into a single median result
105function aggregateBenchmarkResult(results: BenchmarkResult[]): BenchmarkResult {
106 // If any run succeeded, use successful runs for median
107 const successful = results.filter(r => !r.failed);
108
109 if (successful.length === 0) {
110 // All failed - return first failure
111 return results[0];
112 }
113
114 const durations = successful.map(r => r.durationMs);
115 const medianDuration = medianExcludingExtremes(durations);
116
117 // Use metadata from first successful result
118 const first = successful[0];
119 return {
120 name: first.name,
121 durationMs: medianDuration,
122 itemCount: first.itemCount,
123 bytesProcessed: first.bytesProcessed,
124 failed: false
125 };
126}
127
128// Aggregate multiple store benchmark results into median results
129function aggregateStoreResults(runs: StoreBenchmarkResults[]): StoreBenchmarkResults {
130 return {
131 storeName: runs[0].storeName,
132 init: aggregateBenchmarkResult(runs.map(r => r.init)),
133 writes: {
134 allUrls: aggregateBenchmarkResult(runs.map(r => r.writes.allUrls)),
135 allImages: aggregateBenchmarkResult(runs.map(r => r.writes.allImages)),
136 allDocuments: aggregateBenchmarkResult(runs.map(r => r.writes.allDocuments)),
137 allMetadata: aggregateBenchmarkResult(runs.map(r => r.writes.allMetadata)),
138 },
139 reads: {
140 recentUrls: aggregateBenchmarkResult(runs.map(r => r.reads.recentUrls)),
141 randomImages: aggregateBenchmarkResult(runs.map(r => r.reads.randomImages)),
142 randomDocuments: aggregateBenchmarkResult(runs.map(r => r.reads.randomDocuments)),
143 },
144 disk: {
145 // Use median disk usage
146 totalBytes: medianExcludingExtremes(runs.map(r => r.disk.totalBytes))
147 }
148 };
149}
150
151async function main() {
152 console.log('='.repeat(60));
153 console.log('DATASTORE BENCHMARK SUITE');
154 console.log('='.repeat(60));
155 console.log();
156
157 const { stores: storeNames } = parseArgs();
158 const availableStores = getStoreNames();
159
160 // Validate store names
161 for (const name of storeNames) {
162 if (!availableStores.includes(name.toLowerCase())) {
163 console.error(`Unknown store: ${name}`);
164 console.error(`Available stores: ${availableStores.join(', ')}`);
165 process.exit(1);
166 }
167 }
168
169 console.log(`Stores to benchmark: ${storeNames.join(', ')}`);
170 console.log(`Iterations per store: ${ITERATIONS} (excluding high/low, using median)\n`);
171
172 // Load test data
173 const testData = await loadTestData();
174 console.log();
175
176 // Run benchmarks
177 const results: StoreBenchmarkResults[] = [];
178
179 for (const storeName of storeNames) {
180 console.log(`\n${'─'.repeat(50)}`);
181 console.log(`Benchmarking ${storeName} (${ITERATIONS} iterations)`);
182 console.log('─'.repeat(50));
183
184 const store = getStore(storeName);
185 if (!store) continue;
186
187 const storeRuns: StoreBenchmarkResults[] = [];
188
189 for (let i = 0; i < ITERATIONS; i++) {
190 console.log(`\n [Run ${i + 1}/${ITERATIONS}]`);
191
192 const result = await runBenchmarks(store, testData, (phase, durationMs, failed, error) => {
193 if (durationMs !== undefined) {
194 if (failed) {
195 console.log(` ✗ ${phase} FAILED (${formatDuration(durationMs)}) - ${error}`);
196 } else {
197 console.log(` ✓ ${phase} (${formatDuration(durationMs)})`);
198 }
199 } else {
200 console.log(` → ${phase}`);
201 }
202 });
203
204 storeRuns.push(result);
205
206 // Clean up store data after each iteration
207 await rm('store-data', { recursive: true, force: true });
208 }
209
210 // Aggregate results using median (excluding high/low)
211 const aggregated = aggregateStoreResults(storeRuns);
212 results.push(aggregated);
213
214 console.log(`\n Median results for ${storeName} computed from ${ITERATIONS} runs`);
215 }
216
217 // Print and save results
218 printResults(results);
219 const runDir = await saveResults(results);
220
221 // Generate comparison charts if multiple stores were tested
222 if (results.length > 1) {
223 console.log('Generating comparison charts...');
224 const chartFiles = await generateCharts(results, runDir);
225 console.log(`Generated ${chartFiles.length} files in ${runDir}/`);
226 }
227}
228
229main().catch((err) => {
230 console.error('Benchmark failed:', err);
231 process.exit(1);
232});