this string has no description
0
parse_dict.ts
79 lines 2.3 kB view raw
1/** 2 * @description 3 * This is a modified version of parse-dictd to be used with Deno, using only 4 * std dependencies. 5 * 6 * @reference https://github.com/nvdnkpr/parse-dictd 7 */ 8import { TextLineStream } from "jsr:@std/streams/text-line-stream"; 9import { toTransformStream } from "jsr:@std/streams/to-transform-stream"; 10 11const az = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 12const codes = {}; 13for (let i = 0; i < 64; i++) codes[az.charAt(i)] = i; 14 15const dzUtf8Stream = (await Deno.open("spa-eng/spa-eng.dict.dz")).readable 16 .pipeThrough(new DecompressionStream("gzip")) 17 .pipeThrough(new TextDecoderStream()); 18 19const indexUtf8Stream = (await Deno.open("spa-eng/spa-eng.index")).readable 20 .pipeThrough(new TextDecoderStream()); 21 22const r = await parse(dzUtf8Stream, indexUtf8Stream); 23await Deno.writeTextFile("dict.json", JSON.stringify(r, null, "\t")); 24 25async function parse( 26 dstream: ReadableStream<string>, 27 istream: ReadableStream<string>, 28) { 29 const offsets: Record<number, { word: string }> = {}; 30 let pos = 0; 31 32 // Transform the index stream into objects by processing each line 33 const indexStream = istream 34 .pipeThrough(new TextLineStream()) // Split the index stream by lines 35 .pipeThrough(toTransformStream(async function* (src) { 36 for await (const chunk of src) { 37 const fields = chunk.trim().split("\t"); 38 if (fields.length !== 3) continue; 39 offsets[decode(fields[1])] = { word: fields[0] }; 40 yield; 41 } 42 })); 43 44 await Array.fromAsync(indexStream); 45 46 const outputStream = dstream 47 .pipeThrough(new TextLineStream()) 48 .pipeThrough(toTransformStream(async function* (src) { 49 for await (const line of src) { 50 const to = line.trim() 51 .replace(/[^\x20-\x7E]/g, "") 52 .replace(/^\w+:/, "") 53 .trim().split(","); 54 55 if (offsets[pos]) { 56 yield { from: offsets[pos].word, to }; 57 } 58 59 pos += new TextEncoder().encode(line).length; 60 } 61 })); 62 63 const results = {}; 64 65 for await (const output of outputStream) { 66 if (output) results[output.from] = output.to; 67 } 68 69 return results; 70} 71 72function decode(s) { 73 let index = 0; 74 const len = s.length; 75 for (let i = 0; i < len; i++) { 76 index += codes[s.charAt(i)] * Math.pow(64, len - i - 1); 77 } 78 return index; 79}