this repo has no description
1/**
2 * Dictionary utilities: reading and interpreting the shared CJK dictionary files
3 * and user-language meaning/hint/reading files.
4 *
5 * All files live under data/lang/ and are keyed by string ids (e.g. "c-00001").
6 */
7
8import { Transliteration } from '$/enums.ts'
9import type { Reading } from '$/models/subjects.ts'
10import { readTsv } from './fs.ts'
11
12/** A single entry from characters.tsv, vocabulary.tsv, or radicals.tsv. */
13export interface Definition {
14 /** Unique string id (e.g. "c-00001" for characters, "v-00001" for vocabulary). */
15 id: string
16 hans: string
17 hant: string
18 /** Japanese-specific form (kanji col in characters.tsv, ja col in vocabulary.tsv). */
19 ja?: string
20}
21
22export interface Hint {
23 id: string
24 locale: string
25 en: string
26}
27
28/** Maps TSV column names to their Transliteration enum values. */
29const COL_TYPE: Record<string, Transliteration> = {
30 pinyin: Transliteration.Pinyin,
31 jyutping: Transliteration.Jyutping,
32 zhuyin: Transliteration.Zhuyin,
33 kunyomi: Transliteration.Kunyomi,
34 onyomi: Transliteration.Onyomi,
35 reading: Transliteration.Hiragana,
36}
37
38/** Reads a CJK dictionary TSV (characters.tsv, vocabulary.tsv, or radicals.tsv). */
39export function readDict(path: string): Definition[] {
40 return readTsv(path).map((row) => ({
41 id: row.id,
42 hans: row.hans,
43 hant: row.hant,
44 ja: row.kanji || row.ja || undefined,
45 }))
46}
47
48/** Returns a dictionary indexed by traditional (hant) form. */
49export function readDictByHant(path: string): Record<string, Definition> {
50 return Object.fromEntries(readDict(path).map((d) => [d.hant, d]))
51}
52
53/** Returns a dictionary indexed by simplified (hans) form. */
54export function readDictByHans(path: string): Record<string, Definition> {
55 return Object.fromEntries(readDict(path).map((d) => [d.hans, d]))
56}
57
58/** Returns a dictionary indexed by subject id. */
59export function readDictById(path: string): Record<string, Definition> {
60 return Object.fromEntries(readDict(path).map((d) => [d.id, d]))
61}
62
63/**
64 * Reads a user-language meanings file (e.g. lang/en/characters.tsv).
65 * Returns a map of subject id → meaning string.
66 */
67export function readMeanings(path: string): Record<string, string> {
68 return Object.fromEntries(readTsv(path).map((row) => [row.id, row.value]))
69}
70
71/**
72 * Reads a meaning-override TSV (e.g. lang/en/meanings/ja.characters.tsv).
73 * Returns a map of subject id → semicolon-separated meaning string.
74 * Returns an empty map if the file doesn't exist.
75 */
76export function readMeaningOverrides(path: string): Record<string, string> {
77 try {
78 return Object.fromEntries(readTsv(path).map((row) => [row.id, row.meaning]))
79 } catch {
80 return {}
81 }
82}
83
84/**
85 * Reads a readings TSV (e.g. lang/zh_CN/readings.tsv, lang/ja/reading.characters.tsv)
86 * and returns a map of subject id → Reading[].
87 *
88 * Each non-id column maps to a Transliteration type via COL_TYPE. Semicolon-separated
89 * values produce multiple readings; the first value in the first column is isPrimary.
90 * Returns an empty map if the file doesn't exist.
91 */
92export function readReadingsMap(path: string): Record<string, Reading[]> {
93 const result: Record<string, Reading[]> = {}
94 try {
95 const rows = readTsv(path)
96 if (!rows.length) return result
97 // Reverse so isPrimary goes to the first column
98 const cols = Object.keys(rows[0]).filter((k) => k !== 'id').reverse()
99 for (const row of rows) {
100 const readings: Reading[] = []
101 let firstCol = true
102 for (const col of cols) {
103 const val = row[col] || ''
104 const type = COL_TYPE[col]
105 if (!type || !val) continue
106 val.split(';').map((s) => s.trim()).filter(Boolean).forEach((value, i) => {
107 readings.push({ value, type, isAcceptedAnswer: true, isPrimary: firstCol && i === 0 })
108 })
109 firstCol = false
110 }
111 if (readings.length) result[row.id] = readings
112 }
113 } catch { /* file missing — return empty */ }
114 return result
115}
116
117/** Reads a hint TSV (lang/en/hints/*.tsv). Returns an empty array if the file doesn't exist. */
118export function readHints(path: string): Hint[] {
119 try {
120 return readTsv(path).map((row) => ({ id: row.id, locale: row.locale, en: row.en }))
121 } catch {
122 return []
123 }
124}
125
126/** Reads hints and returns a nested map of subject id → locale → hint text. */
127export function readHintsById(path: string): Record<string, Record<string, string>> {
128 const result: Record<string, Record<string, string>> = {}
129 for (const row of readHints(path)) {
130 result[row.id] ??= {}
131 result[row.id][row.locale] = row.en
132 }
133 return result
134}
135
136/**
137 * Reads hints and returns only those with locale === 'ALL', keyed by subject id.
138 * Used for hints that apply regardless of target language.
139 */
140export function readAllLocaleHints(path: string): Record<string, string> {
141 return Object.fromEntries(
142 readHints(path)
143 .filter((row) => row.locale === 'ALL')
144 .map((row) => [row.id, row.en]),
145 )
146}