👁️
5
fork

Configure Feed

Select the types of activity you want to include in your feed.

let claude write a streaming json parser

+188 -2
+99 -1
scripts/download-scryfall.test.ts
··· 1 1 import { describe, expect, it } from "vitest"; 2 2 import type { Card } from "../src/lib/scryfall-types.ts"; 3 3 import { asOracleId, asScryfallId } from "../src/lib/scryfall-types.ts"; 4 - import { compareCards, isDefaultPrinting } from "./download-scryfall.ts"; 4 + import { 5 + compareCards, 6 + isDefaultPrinting, 7 + parseJsonArrayStream, 8 + } from "./download-scryfall.ts"; 5 9 6 10 type TestCard = Card & { security_stamp?: string; [key: string]: unknown }; 7 11 ··· 398 402 expect(sorted[4].id).toBe(cards[4].id); // ja, default, paper, highres, 2025 399 403 }); 400 404 }); 405 + 406 + async function* chunksFrom(...strings: string[]): AsyncIterable<string> { 407 + for (const s of strings) { 408 + yield s; 409 + } 410 + } 411 + 412 + describe("parseJsonArrayStream", () => { 413 + it("parses a simple array", async () => { 414 + const result = await parseJsonArrayStream<{ a: number }>( 415 + chunksFrom('[{"a":1},{"a":2},{"a":3}]'), 416 + ); 417 + expect(result).toEqual([{ a: 1 }, { a: 2 }, { a: 3 }]); 418 + }); 419 + 420 + it("handles nested objects", async () => { 421 + const result = await parseJsonArrayStream<{ a: { b: { c: number } } }>( 422 + chunksFrom('[{"a":{"b":{"c":42}}}]'), 423 + ); 424 + expect(result).toEqual([{ a: { b: { c: 42 } } }]); 425 + }); 426 + 427 + it("handles escaped quotes in strings", async () => { 428 + const result = await parseJsonArrayStream<{ name: string }>( 429 + chunksFrom('[{"name":"say \\"hello\\""}]'), 430 + ); 431 + expect(result).toEqual([{ name: 'say "hello"' }]); 432 + }); 433 + 434 + it("handles braces inside strings", async () => { 435 + const result = await parseJsonArrayStream<{ text: string }>( 436 + chunksFrom('[{"text":"cost is {2}{W}"}]'), 437 + ); 438 + expect(result).toEqual([{ text: "cost is {2}{W}" }]); 439 + }); 440 + 441 + it("handles backslashes that are not escaping quotes", async () => { 442 + const result = await parseJsonArrayStream<{ path: string }>( 443 + chunksFrom('[{"path":"C:\\\\Users\\\\foo"}]'), 444 + ); 445 + expect(result).toEqual([{ path: "C:\\Users\\foo" }]); 446 + }); 447 + 448 + it("handles multi-byte UTF-8 characters", async () => { 449 + const result = await parseJsonArrayStream<{ name: string }>( 450 + chunksFrom('[{"name":"Séance"},{"name":"Lhurgoyf™"}]'), 451 + ); 452 + expect(result).toEqual([{ name: "Séance" }, { name: "Lhurgoyf™" }]); 453 + }); 454 + 455 + it("handles objects split across chunks", async () => { 456 + const result = await parseJsonArrayStream<{ name: string; id: number }>( 457 + chunksFrom('[{"na', 'me":"split","', 'id":1}]'), 458 + ); 459 + expect(result).toEqual([{ name: "split", id: 1 }]); 460 + }); 461 + 462 + it("handles whitespace and newlines between elements", async () => { 463 + const input = `[ 464 + {"a": 1}, 465 + {"a": 2}, 466 + {"a": 3} 467 + ]`; 468 + const result = await parseJsonArrayStream<{ a: number }>(chunksFrom(input)); 469 + expect(result).toEqual([{ a: 1 }, { a: 2 }, { a: 3 }]); 470 + }); 471 + 472 + it("handles empty array", async () => { 473 + const result = await parseJsonArrayStream(chunksFrom("[]")); 474 + expect(result).toEqual([]); 475 + }); 476 + 477 + it("handles arrays inside objects (not confused for top-level)", async () => { 478 + const result = await parseJsonArrayStream<{ colors: string[] }>( 479 + chunksFrom('[{"colors":["W","U"]},{"colors":[]}]'), 480 + ); 481 + expect(result).toEqual([{ colors: ["W", "U"] }, { colors: [] }]); 482 + }); 483 + 484 + it("handles colons and commas inside strings", async () => { 485 + const result = await parseJsonArrayStream<{ text: string }>( 486 + chunksFrom('[{"text":"key: value, other: thing"}]'), 487 + ); 488 + expect(result).toEqual([{ text: "key: value, other: thing" }]); 489 + }); 490 + 491 + it("handles escaped backslash before quote (regression)", async () => { 492 + // \" is escaped quote, but \\" is escaped backslash + end of string 493 + const result = await parseJsonArrayStream<{ a: string; b: number }>( 494 + chunksFrom('[{"a":"ends with backslash\\\\","b":1}]'), 495 + ); 496 + expect(result).toEqual([{ a: "ends with backslash\\", b: 1 }]); 497 + }); 498 + });
+89 -1
scripts/download-scryfall.ts
··· 23 23 */ 24 24 25 25 import { createHash } from "node:crypto"; 26 + import { createReadStream } from "node:fs"; 26 27 import { mkdir, readFile, writeFile } from "node:fs/promises"; 27 28 import { dirname, join } from "node:path"; 28 29 import { fileURLToPath } from "node:url"; ··· 402 403 } 403 404 } 404 405 406 + /** 407 + * Parse a JSON array from an async iterable of string chunks, element by element. 408 + * Tracks brace depth to find top-level object boundaries and JSON.parses each individually. 409 + */ 410 + export async function parseJsonArrayStream<T>( 411 + chunks: AsyncIterable<string>, 412 + ): Promise<T[]> { 413 + const results: T[] = []; 414 + let partial = ""; 415 + let depth = 0; 416 + let inString = false; 417 + let escaped = false; 418 + let started = false; 419 + let collecting = false; 420 + 421 + for await (const text of chunks) { 422 + for (let i = 0; i < text.length; i++) { 423 + const char = text[i]; 424 + 425 + if (escaped) { 426 + escaped = false; 427 + if (collecting) partial += char; 428 + continue; 429 + } 430 + 431 + if (char === "\\" && inString) { 432 + escaped = true; 433 + if (collecting) partial += char; 434 + continue; 435 + } 436 + 437 + if (char === '"') { 438 + inString = !inString; 439 + if (collecting) partial += char; 440 + continue; 441 + } 442 + 443 + if (inString) { 444 + if (collecting) partial += char; 445 + continue; 446 + } 447 + 448 + if (!started) { 449 + if (char === "[") started = true; 450 + continue; 451 + } 452 + 453 + if (char === "{") { 454 + depth++; 455 + if (depth === 1) { 456 + collecting = true; 457 + partial = "{"; 458 + } else { 459 + partial += char; 460 + } 461 + } else if (char === "}") { 462 + depth--; 463 + if (depth === 0 && collecting) { 464 + partial += "}"; 465 + results.push(JSON.parse(partial) as T); 466 + partial = ""; 467 + collecting = false; 468 + } else if (collecting) { 469 + partial += char; 470 + } 471 + } else if (collecting) { 472 + partial += char; 473 + } 474 + } 475 + } 476 + 477 + return results; 478 + } 479 + 480 + /** 481 + * Stream-parse a JSON array file element by element. 482 + * Avoids loading the entire file as a single string (which exceeds V8's 483 + * ~512MB string length limit for large Scryfall bulk data). 484 + */ 485 + async function streamParseJsonArrayFile<T>(filePath: string): Promise<T[]> { 486 + const stream = createReadStream(filePath, { 487 + encoding: "utf-8", 488 + highWaterMark: 1024 * 1024, 489 + }); 490 + return parseJsonArrayStream<T>(stream); 491 + } 492 + 405 493 async function processBulkData(offline: boolean): Promise<ProcessedCards> { 406 494 await mkdir(TEMP_DIR, { recursive: true }); 407 495 const tempFile = join(TEMP_DIR, "cards-bulk.json"); ··· 461 549 462 550 // Parse and filter 463 551 console.log("Processing cards..."); 464 - const rawData: ScryfallCard[] = JSON.parse(await readFile(tempFile, "utf-8")); 552 + const rawData = await streamParseJsonArrayFile<ScryfallCard>(tempFile); 465 553 466 554 // Build raw card map for sorting (before filtering strips fields like security_stamp) 467 555 const rawCardById = Object.fromEntries(