this string has no description
0
parse_dict.ts
1/**
2 * @description
3 * This is a modified version of parse-dictd to be used with Deno, using only
4 * std dependencies.
5 *
6 * @reference https://github.com/nvdnkpr/parse-dictd
7 */
8import { TextLineStream } from "jsr:@std/streams/text-line-stream";
9import { toTransformStream } from "jsr:@std/streams/to-transform-stream";
10
11const az = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
12const codes = {};
13for (let i = 0; i < 64; i++) codes[az.charAt(i)] = i;
14
15const dzUtf8Stream = (await Deno.open("spa-eng/spa-eng.dict.dz")).readable
16 .pipeThrough(new DecompressionStream("gzip"))
17 .pipeThrough(new TextDecoderStream());
18
19const indexUtf8Stream = (await Deno.open("spa-eng/spa-eng.index")).readable
20 .pipeThrough(new TextDecoderStream());
21
22const r = await parse(dzUtf8Stream, indexUtf8Stream);
23await Deno.writeTextFile("dict.json", JSON.stringify(r, null, "\t"));
24
25async function parse(
26 dstream: ReadableStream<string>,
27 istream: ReadableStream<string>,
28) {
29 const offsets: Record<number, { word: string }> = {};
30 let pos = 0;
31
32 // Transform the index stream into objects by processing each line
33 const indexStream = istream
34 .pipeThrough(new TextLineStream()) // Split the index stream by lines
35 .pipeThrough(toTransformStream(async function* (src) {
36 for await (const chunk of src) {
37 const fields = chunk.trim().split("\t");
38 if (fields.length !== 3) continue;
39 offsets[decode(fields[1])] = { word: fields[0] };
40 yield;
41 }
42 }));
43
44 await Array.fromAsync(indexStream);
45
46 const outputStream = dstream
47 .pipeThrough(new TextLineStream())
48 .pipeThrough(toTransformStream(async function* (src) {
49 for await (const line of src) {
50 const to = line.trim()
51 .replace(/[^\x20-\x7E]/g, "")
52 .replace(/^\w+:/, "")
53 .trim().split(",");
54
55 if (offsets[pos]) {
56 yield { from: offsets[pos].word, to };
57 }
58
59 pos += new TextEncoder().encode(line).length;
60 }
61 }));
62
63 const results = {};
64
65 for await (const output of outputStream) {
66 if (output) results[output.from] = output.to;
67 }
68
69 return results;
70}
71
72function decode(s) {
73 let index = 0;
74 const len = s.length;
75 for (let i = 0; i < len; i++) {
76 index += codes[s.charAt(i)] * Math.pow(64, len - i - 1);
77 }
78 return index;
79}