/** * @description * This is a modified version of parse-dictd to be used with Deno, using only * std dependencies. * * @reference https://github.com/nvdnkpr/parse-dictd */ import { TextLineStream } from "jsr:@std/streams/text-line-stream"; import { toTransformStream } from "jsr:@std/streams/to-transform-stream"; const az = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; const codes = {}; for (let i = 0; i < 64; i++) codes[az.charAt(i)] = i; const dzUtf8Stream = (await Deno.open("spa-eng/spa-eng.dict.dz")).readable .pipeThrough(new DecompressionStream("gzip")) .pipeThrough(new TextDecoderStream()); const indexUtf8Stream = (await Deno.open("spa-eng/spa-eng.index")).readable .pipeThrough(new TextDecoderStream()); const r = await parse(dzUtf8Stream, indexUtf8Stream); await Deno.writeTextFile("dict.json", JSON.stringify(r, null, "\t")); async function parse( dstream: ReadableStream, istream: ReadableStream, ) { const offsets: Record = {}; let pos = 0; // Transform the index stream into objects by processing each line const indexStream = istream .pipeThrough(new TextLineStream()) // Split the index stream by lines .pipeThrough(toTransformStream(async function* (src) { for await (const chunk of src) { const fields = chunk.trim().split("\t"); if (fields.length !== 3) continue; offsets[decode(fields[1])] = { word: fields[0] }; yield; } })); await Array.fromAsync(indexStream); const outputStream = dstream .pipeThrough(new TextLineStream()) .pipeThrough(toTransformStream(async function* (src) { for await (const line of src) { const to = line.trim() .replace(/[^\x20-\x7E]/g, "") .replace(/^\w+:/, "") .trim().split(","); if (offsets[pos]) { yield { from: offsets[pos].word, to }; } pos += new TextEncoder().encode(line).length; } })); const results = {}; for await (const output of outputStream) { if (output) results[output.from] = output.to; } return results; } function decode(s) { let index = 0; const len = s.length; for (let i = 0; i < len; i++) { index += codes[s.charAt(i)] * Math.pow(64, len - i - 1); } return index; }