···1818 super(`missing block in store; cid=${cid}` + (def ? `; type=${def}` : ``));
1919 }
2020}
2121-2222-/**
2323- * thrown when a block's decoded object doesn't match the expected type
2424- */
2525-export class UnexpectedObjectError extends Error {
2626- constructor(
2727- public cid: string,
2828- public def: string,
2929- ) {
3030- super(`unexpected object in store; cid=${cid}; expected=${def}`);
3131- }
3232-}
+8-8
packages/utilities/mst/lib/node-wrangler.ts
···44import { NodeStore } from './node-store.js';
5566/**
77- * array helper: replaces element at index with a new value
77+ * replaces element at index with a new value
88 */
99const replaceAt = <T>(arr: readonly T[], index: number, value: T): readonly T[] => {
1010- return [...arr.slice(0, index), value, ...arr.slice(index + 1)];
1010+ return arr.with(index, value);
1111};
12121313/**
1414- * array helper: inserts element at index
1414+ * inserts element at index
1515 */
1616const insertAt = <T>(arr: readonly T[], index: number, value: T): readonly T[] => {
1717- return [...arr.slice(0, index), value, ...arr.slice(index)];
1717+ return arr.toSpliced(index, 0, value);
1818};
19192020/**
2121- * array helper: removes element at index
2121+ * removes element at index
2222 */
2323const removeAt = <T>(arr: readonly T[], index: number): readonly T[] => {
2424- return [...arr.slice(0, index), ...arr.slice(index + 1)];
2424+ return arr.toSpliced(index, 1);
2525};
26262727/**
···3131 * the external APIs take a CID (the MST root) and return a CID (the new root),
3232 * while storing any newly created nodes in the NodeStore.
3333 *
3434- * neither method should ever fail - deleting a node that doesn't exist is a nop,
3535- * and adding the same node twice with the same value is also a nop. Callers
3434+ * neither method should ever fail - deleting a node that doesn't exist is a noop,
3535+ * and adding the same node twice with the same value is also a nop. callers
3636 * can detect these cases by seeing if the initial and final CIDs changed.
3737 */
3838export class NodeWrangler {
···11-import * as CBOR from '@atcute/cbor';
22-31import { deleteMany, setMany, type BlockMap } from './blockmap.js';
44-import { MissingBlockError, UnexpectedObjectError } from './errors.js';
5263/**
74 * a read-only interface for retrieving blocks by their CID
···132129 * all writes go to the upper store only
133130 */
134131export class OverlayBlockStore implements BlockStore {
135135- /** the writable upper layer store */
132132+ /** writable upper layer store */
136133 upper: BlockStore;
137137- /** the read-only lower layer store */
134134+ /** read-only lower layer store */
138135 lower: ReadonlyBlockStore;
139136140137 /**
···195192}
196193197194/**
198198- * reads and decodes a block, validating it matches the expected type
199199- * @param store block store to read from
200200- * @param cid CID of the block to read
201201- * @param def schema definition with name and validation function
202202- * @returns the decoded and validated object
203203- * @throws {MissingBlockError} if block is not found
204204- * @throws {UnexpectedObjectError} if block doesn't match expected type
195195+ * a read-only block store wrapper that tracks all get() accesses
196196+ * useful for collecting proof nodes during MST operations
205197 */
206206-export const readObject = async <T>(store: ReadonlyBlockStore, cid: string, def: CheckDef<T>): Promise<T> => {
207207- const bytes = await store.get(cid);
208208- if (bytes === null) {
209209- throw new MissingBlockError(cid, def.name);
210210- }
198198+export class LoggingBlockStore implements ReadonlyBlockStore {
199199+ /** block store being proxied */
200200+ readonly wrapped: ReadonlyBlockStore;
201201+ /** set of CIDs that were accessed via get() or getMany() */
202202+ readonly accessed = new Set<string>();
211203212212- const decoded = CBOR.decode(bytes);
213213- if (!def.check(decoded)) {
214214- throw new UnexpectedObjectError(cid, def.name);
204204+ /**
205205+ * creates a new logging block store wrapper
206206+ * @param store the block store to wrap
207207+ */
208208+ constructor(store: ReadonlyBlockStore) {
209209+ this.wrapped = store;
215210 }
216211217217- return decoded;
218218-};
212212+ async get(cid: string): Promise<Uint8Array<ArrayBuffer> | null> {
213213+ this.accessed.add(cid);
219214220220-/**
221221- * reads and decodes a block without type validation
222222- * @param store block store to read from
223223- * @param cid CID of the block to read
224224- * @returns the decoded object
225225- * @throws {MissingBlockError} if block is not found
226226- */
227227-export const readRecord = async (store: ReadonlyBlockStore, cid: string): Promise<unknown> => {
228228- const bytes = await store.get(cid);
229229- if (bytes === null) {
230230- throw new MissingBlockError(cid, undefined);
215215+ return this.wrapped.get(cid);
231216 }
232217233233- const decoded = CBOR.decode(bytes);
218218+ async getMany(cids: string[]): Promise<{ found: BlockMap; missing: string[] }> {
219219+ const accessed = this.accessed;
234220235235- return decoded;
236236-};
221221+ for (const cid of cids) {
222222+ accessed.add(cid);
223223+ }
237224238238-/**
239239- * defines a type validator for use with readObject
240240- * combines a human-readable type name with a type guard function
241241- */
242242-export interface CheckDef<T> {
243243- /** human-readable name of the expected type */
244244- name: string;
245245- /** type guard function to validate the decoded value */
246246- check: (value: unknown) => value is T;
225225+ return this.wrapped.getMany(cids);
226226+ }
227227+228228+ async has(cid: string): Promise<boolean> {
229229+ // has() doesn't count as an access for proof purposes
230230+ return this.wrapped.has(cid);
231231+ }
247232}
+157-114
packages/utilities/mst/lib/test-suite.test.ts
···11-import { readFileSync, readdirSync, statSync } from 'node:fs';
22-import { join } from 'node:path';
33-import { describe, expect, it } from 'vitest';
11+import { beforeAll, describe, expect, it } from 'vitest';
22+import * as v from 'valibot';
33+44+import * as fs from 'node:fs/promises';
55+import * as path from 'node:path';
4657import { fromUint8Array } from '@atcute/car/v4/car-reader';
68import * as CID from '@atcute/cid';
791010+import { setMany } from './blockmap.js';
811import { DeltaType, mstDiff, recordDiff } from './diff.js';
912import { NodeStore } from './node-store.js';
1010-import { MemoryBlockStore } from './stores.js';
1313+import { NodeWrangler } from './node-wrangler.js';
1414+import { buildExclusionProof, buildInclusionProof } from './proof.js';
1515+import {
1616+ LoggingBlockStore,
1717+ MemoryBlockStore,
1818+ OverlayBlockStore,
1919+ ReadonlyMemoryBlockStore,
2020+} from './stores.js';
11211212-interface MstDiffTestCase {
1313- $type: 'mst-diff';
1414- description: string;
1515- inputs: {
1616- mst_a: string;
1717- mst_b: string;
1818- };
1919- results: {
2020- created_nodes: string[];
2121- deleted_nodes: string[];
2222- record_ops: Array<{
2323- rpath: string;
2424- old_value: string | null;
2525- new_value: string | null;
2626- }>;
2727- proof_nodes: string[];
2828- inductive_proof_nodes: string[];
2929- firehose_cids: string | string[];
3030- };
3131-}
2222+const mstDiffTestCaseSchema = v.object({
2323+ $type: v.literal('mst-diff'),
2424+ description: v.string(),
2525+ inputs: v.object({
2626+ mst_a: v.string(),
2727+ mst_b: v.string(),
2828+ }),
2929+ results: v.object({
3030+ created_nodes: v.array(v.string()),
3131+ deleted_nodes: v.array(v.string()),
3232+ record_ops: v.array(
3333+ v.object({
3434+ rpath: v.string(),
3535+ old_value: v.nullable(v.string()),
3636+ new_value: v.nullable(v.string()),
3737+ }),
3838+ ),
3939+ proof_nodes: v.array(v.string()),
4040+ inductive_proof_nodes: v.array(v.string()),
4141+ }),
4242+});
4343+4444+type MstDiffTestCase = v.InferOutput<typeof mstDiffTestCaseSchema>;
4545+4646+const testSuiteRoot = path.join(__dirname, '../mst-test-suite');
32473348/**
3449 * Load a CAR file into a MemoryBlockStore and extract the root CID
3550 */
3636-const loadCar = (carPath: string): { store: MemoryBlockStore; root: string } => {
3737- const testSuiteRoot = join(__dirname, '..', '.research', 'mst-test-suite');
3838- const fullPath = join(testSuiteRoot, carPath);
3939- const carBytes = readFileSync(fullPath);
5151+const loadCar = async (relname: string): Promise<{ store: ReadonlyMemoryBlockStore; root: string }> => {
5252+ const filename = path.join(testSuiteRoot, relname);
5353+ const bytes = await fs.readFile(filename);
40544141- const car = fromUint8Array(carBytes);
5555+ const car = fromUint8Array(bytes);
4256 const store = new MemoryBlockStore();
43574444- // Load all blocks from CAR into the store
4558 for (const entry of car) {
4659 const cidStr = CID.toCidLink(entry.cid).$link;
4747- store.blocks.set(cidStr, entry.bytes);
6060+ store.blocks.set(cidStr, entry.bytes as Uint8Array<ArrayBuffer>);
4861 }
49625050- // Extract root CID from CAR header
5163 if (car.roots.length !== 1) {
5252- throw new Error(`Expected exactly 1 root in CAR, got ${car.roots.length}`);
6464+ throw new Error(`expected exactly 1 root in CAR, got ${car.roots.length}`);
5365 }
54665567 const root = car.roots[0].$link;
5668 return { store, root };
5769};
58705959-/**
6060- * Recursively find all .json test files in a directory
6161- */
6262-const findTestFiles = (dir: string): string[] => {
6363- const results: string[] = [];
6464- const entries = readdirSync(dir);
6565-6666- for (const entry of entries) {
6767- const fullPath = join(dir, entry);
6868- const stat = statSync(fullPath);
7171+const testCases = await (async () => {
7272+ const testsDir = path.join(testSuiteRoot, 'tests');
69737070- if (stat.isDirectory()) {
7171- results.push(...findTestFiles(fullPath));
7272- } else if (entry.endsWith('.json')) {
7373- results.push(fullPath);
7474- }
7575- }
7474+ const testCases: Array<{ path: string; description: string; testCase: MstDiffTestCase }> = [];
76757777- return results;
7878-};
7979-8080-/**
8181- * Load all test cases from the test suite
8282- */
8383-const loadTestCases = (): Array<{ path: string; testCase: MstDiffTestCase }> => {
8484- const testSuiteRoot = join(__dirname, '..', '.research', 'mst-test-suite');
8585- const testsDir = join(testSuiteRoot, 'tests');
8686- const testFiles = findTestFiles(testsDir);
7676+ for await (const name of fs.glob('**/*.json', { cwd: testsDir })) {
7777+ const filename = path.join(testsDir, name);
87788888- const testCases: Array<{ path: string; testCase: MstDiffTestCase }> = [];
7979+ const raw = await fs.readFile(filename, 'utf-8');
8080+ const json = JSON.parse(raw);
89819090- for (const filePath of testFiles) {
9191- const content = readFileSync(filePath, 'utf-8');
9292- const testCase = JSON.parse(content) as MstDiffTestCase;
8282+ const testCase = v.parse(mstDiffTestCaseSchema, json);
93839494- if (testCase.$type === 'mst-diff') {
9595- testCases.push({ path: filePath, testCase });
9696- }
8484+ testCases.push({
8585+ path: filename,
8686+ description: testCase.description.replace(`procedurally generated MST diff test case `, ``),
8787+ testCase,
8888+ });
9789 }
98909991 return testCases;
100100-};
9292+})();
1019310294describe('MST Test Suite', () => {
103103- const allTestCases = loadTestCases();
9595+ describe.each(testCases)('$description', ({ testCase }) => {
9696+ let storeA: ReadonlyMemoryBlockStore;
9797+ let rootA: string;
10498105105- // Run all test cases
106106- const testCases = allTestCases;
9999+ let storeB: ReadonlyMemoryBlockStore;
100100+ let rootB: string;
107101108108- it(`should have loaded test cases (${testCases.length} total)`, () => {
109109- expect(testCases.length).toBeGreaterThan(1000); // Should have 16k+ tests
110110- });
102102+ beforeAll(async () => {
103103+ ({ store: storeA, root: rootA } = await loadCar(testCase.inputs.mst_a));
104104+ ({ store: storeB, root: rootB } = await loadCar(testCase.inputs.mst_b));
105105+ });
111106112112- describe.each(testCases)('$testCase.description', ({ testCase }) => {
113113- it('should compute correct mstDiff', async () => {
114114- // Load both CARs
115115- const { store: storeA, root: rootA } = loadCar(testCase.inputs.mst_a);
116116- const { store: storeB, root: rootB } = loadCar(testCase.inputs.mst_b);
117117-118118- // Create NodeStores (combine both block stores for access to all blocks)
119119- // We need an overlay approach since diff needs to read from both trees
107107+ it('computes the correct mstDiff', async () => {
120108 const combinedStore = new MemoryBlockStore();
121121- for (const [cid, bytes] of storeA.blocks) {
122122- combinedStore.blocks.set(cid, bytes);
123123- }
124124- for (const [cid, bytes] of storeB.blocks) {
125125- combinedStore.blocks.set(cid, bytes);
126126- }
109109+ setMany(combinedStore.blocks, storeA.blocks);
110110+ setMany(combinedStore.blocks, storeB.blocks);
127111128112 const nodeStore = new NodeStore(combinedStore);
129113130130- // Run mstDiff
131114 const [createdNodes, deletedNodes] = await mstDiff(nodeStore, rootA, rootB);
132115133133- // Compare created_nodes (as sets, order doesn't matter)
134116 const expectedCreated = new Set(testCase.results.created_nodes);
135117 expect(createdNodes).toEqual(expectedCreated);
136118137137- // Compare deleted_nodes (as sets, order doesn't matter)
138119 const expectedDeleted = new Set(testCase.results.deleted_nodes);
139120 expect(deletedNodes).toEqual(expectedDeleted);
140121 });
141122142142- it('should compute correct recordDiff', async () => {
143143- // Load both CARs
144144- const { store: storeA, root: rootA } = loadCar(testCase.inputs.mst_a);
145145- const { store: storeB, root: rootB } = loadCar(testCase.inputs.mst_b);
146146-147147- // Create combined NodeStore
123123+ it('computes the correct recordDiff', async () => {
148124 const combinedStore = new MemoryBlockStore();
149149- for (const [cid, bytes] of storeA.blocks) {
150150- combinedStore.blocks.set(cid, bytes);
151151- }
152152- for (const [cid, bytes] of storeB.blocks) {
153153- combinedStore.blocks.set(cid, bytes);
154154- }
125125+ setMany(combinedStore.blocks, storeA.blocks);
126126+ setMany(combinedStore.blocks, storeB.blocks);
155127156128 const nodeStore = new NodeStore(combinedStore);
157129158158- // Run mstDiff and recordDiff
159130 const [createdNodes, deletedNodes] = await mstDiff(nodeStore, rootA, rootB);
160131161161- const deltas = [];
162162- for await (const delta of recordDiff(nodeStore, createdNodes, deletedNodes)) {
163163- deltas.push(delta);
164164- }
132132+ const deltas = await Array.fromAsync(recordDiff(nodeStore, createdNodes, deletedNodes));
133133+ deltas.sort((a, b) => +(a.path > b.path) - +(a.path < b.path));
165134166166- // Sort both actual and expected by rpath for comparison
167167- const sortedDeltas = deltas.sort((a, b) => a.path.localeCompare(b.path));
168168- const sortedExpected = [...testCase.results.record_ops].sort((a, b) => a.rpath.localeCompare(b.rpath));
135135+ const expectance = testCase.results.record_ops.toSorted(
136136+ (a, b) => +(a.rpath > b.rpath) - +(a.rpath < b.rpath),
137137+ );
169138170170- expect(sortedDeltas.length).toBe(sortedExpected.length);
139139+ expect(deltas.length).toBe(expectance.length);
171140172172- for (let i = 0; i < sortedDeltas.length; i++) {
173173- const actual = sortedDeltas[i];
174174- const expected = sortedExpected[i];
141141+ for (let idx = 0, len = deltas.length; idx < len; idx++) {
142142+ const actual = deltas[idx];
143143+ const expected = expectance[idx];
175144176145 expect(actual.path).toBe(expected.rpath);
177146 expect(actual.priorValue?.$link ?? null).toBe(expected.old_value);
···186155 expect(actual.deltaType).toBe(DeltaType.UPDATED);
187156 }
188157 }
158158+ });
159159+160160+ it('computes the correct proof_nodes', async () => {
161161+ // create combined store
162162+ const combinedStore = new MemoryBlockStore();
163163+ setMany(combinedStore.blocks, storeA.blocks);
164164+ setMany(combinedStore.blocks, storeB.blocks);
165165+166166+ const nodeStore = new NodeStore(combinedStore);
167167+168168+ // collect proof nodes for all record operations
169169+ const proofNodes = new Set<string>();
170170+171171+ for (const op of testCase.results.record_ops) {
172172+ let proof: Set<string>;
173173+174174+ if (op.old_value === null) {
175175+ // CREATED: inclusion proof for new record in rootB
176176+ proof = await buildInclusionProof(nodeStore, rootB, op.rpath);
177177+ } else if (op.new_value === null) {
178178+ // DELETED: exclusion proof in rootB
179179+ proof = await buildExclusionProof(nodeStore, rootB, op.rpath);
180180+ } else {
181181+ // UPDATED: inclusion proof for updated record in rootB
182182+ proof = await buildInclusionProof(nodeStore, rootB, op.rpath);
183183+ }
184184+185185+ // add all proof nodes to the set
186186+ for (const cid of proof) {
187187+ proofNodes.add(cid);
188188+ }
189189+ }
190190+191191+ // compare against expected proof_nodes (as sets, order doesn't matter)
192192+ const expectedProofNodes = new Set(testCase.results.proof_nodes);
193193+ expect(proofNodes).toEqual(expectedProofNodes);
194194+ });
195195+196196+ it('computes the correct inductive_proof_nodes', async () => {
197197+ // create combined store
198198+ const combinedStore = new MemoryBlockStore();
199199+ setMany(combinedStore.blocks, storeA.blocks);
200200+ setMany(combinedStore.blocks, storeB.blocks);
201201+202202+ // inductive proofs: nodes that get READ when applying ops in REVERSE order
203203+ // this is used for MST operation inversion (verifying B→A instead of A→B)
204204+205205+ const loggingStore = new LoggingBlockStore(combinedStore);
206206+207207+ const overlayStore = new OverlayBlockStore(new MemoryBlockStore(), loggingStore);
208208+ const nodeStore = new NodeStore(overlayStore);
209209+ const wrangler = new NodeWrangler(nodeStore);
210210+211211+ // start from rootB and apply operations in REVERSE order
212212+ let currentRoot = rootB;
213213+ const reversedOps = testCase.results.record_ops.toReversed();
214214+215215+ for (const op of reversedOps) {
216216+ if (op.old_value === null) {
217217+ // was CREATE, reverse it with DELETE
218218+ currentRoot = await wrangler.deleteRecord(currentRoot, op.rpath);
219219+ } else {
220220+ // was UPDATE or DELETE, reverse with PUT of old value
221221+ currentRoot = await wrangler.putRecord(currentRoot, op.rpath, { $link: op.old_value });
222222+ }
223223+ }
224224+225225+ // after reversing all operations, we should end up back at rootA
226226+ expect(currentRoot).toBe(rootA);
227227+228228+ // the blocks that were accessed (read) are the inductive proof nodes
229229+ const inductiveProofNodes = loggingStore.accessed;
230230+ const expectedInductiveProofNodes = new Set(testCase.results.inductive_proof_nodes);
231231+ expect(inductiveProofNodes).toEqual(expectedInductiveProofNodes);
189232 });
190233 });
191234});