/** * Dictionary utilities: reading and interpreting the shared CJK dictionary files * and user-language meaning/hint/reading files. * * All files live under data/lang/ and are keyed by string ids (e.g. "c-00001"). */ import { Transliteration } from '$/enums.ts' import type { Reading } from '$/models/subjects.ts' import { readTsv } from './fs.ts' /** A single entry from characters.tsv, vocabulary.tsv, or radicals.tsv. */ export interface Definition { /** Unique string id (e.g. "c-00001" for characters, "v-00001" for vocabulary). */ id: string hans: string hant: string /** Japanese-specific form (kanji col in characters.tsv, ja col in vocabulary.tsv). */ ja?: string } export interface Hint { id: string locale: string en: string } /** Maps TSV column names to their Transliteration enum values. */ const COL_TYPE: Record = { pinyin: Transliteration.Pinyin, jyutping: Transliteration.Jyutping, zhuyin: Transliteration.Zhuyin, kunyomi: Transliteration.Kunyomi, onyomi: Transliteration.Onyomi, reading: Transliteration.Hiragana, } /** Reads a CJK dictionary TSV (characters.tsv, vocabulary.tsv, or radicals.tsv). */ export function readDict(path: string): Definition[] { return readTsv(path).map((row) => ({ id: row.id, hans: row.hans, hant: row.hant, ja: row.kanji || row.ja || undefined, })) } /** Returns a dictionary indexed by traditional (hant) form. */ export function readDictByHant(path: string): Record { return Object.fromEntries(readDict(path).map((d) => [d.hant, d])) } /** Returns a dictionary indexed by simplified (hans) form. */ export function readDictByHans(path: string): Record { return Object.fromEntries(readDict(path).map((d) => [d.hans, d])) } /** Returns a dictionary indexed by subject id. */ export function readDictById(path: string): Record { return Object.fromEntries(readDict(path).map((d) => [d.id, d])) } /** * Reads a user-language meanings file (e.g. lang/en/characters.tsv). * Returns a map of subject id → meaning string. */ export function readMeanings(path: string): Record { return Object.fromEntries(readTsv(path).map((row) => [row.id, row.value])) } /** * Reads a meaning-override TSV (e.g. lang/en/meanings/ja.characters.tsv). * Returns a map of subject id → semicolon-separated meaning string. * Returns an empty map if the file doesn't exist. */ export function readMeaningOverrides(path: string): Record { try { return Object.fromEntries(readTsv(path).map((row) => [row.id, row.meaning])) } catch { return {} } } /** * Reads a readings TSV (e.g. lang/zh_CN/readings.tsv, lang/ja/reading.characters.tsv) * and returns a map of subject id → Reading[]. * * Each non-id column maps to a Transliteration type via COL_TYPE. Semicolon-separated * values produce multiple readings; the first value in the first column is isPrimary. * Returns an empty map if the file doesn't exist. */ export function readReadingsMap(path: string): Record { const result: Record = {} try { const rows = readTsv(path) if (!rows.length) return result // Reverse so isPrimary goes to the first column const cols = Object.keys(rows[0]).filter((k) => k !== 'id').reverse() for (const row of rows) { const readings: Reading[] = [] let firstCol = true for (const col of cols) { const val = row[col] || '' const type = COL_TYPE[col] if (!type || !val) continue val.split(';').map((s) => s.trim()).filter(Boolean).forEach((value, i) => { readings.push({ value, type, isAcceptedAnswer: true, isPrimary: firstCol && i === 0 }) }) firstCol = false } if (readings.length) result[row.id] = readings } } catch { /* file missing — return empty */ } return result } /** Reads a hint TSV (lang/en/hints/*.tsv). Returns an empty array if the file doesn't exist. */ export function readHints(path: string): Hint[] { try { return readTsv(path).map((row) => ({ id: row.id, locale: row.locale, en: row.en })) } catch { return [] } } /** Reads hints and returns a nested map of subject id → locale → hint text. */ export function readHintsById(path: string): Record> { const result: Record> = {} for (const row of readHints(path)) { result[row.id] ??= {} result[row.id][row.locale] = row.en } return result } /** * Reads hints and returns only those with locale === 'ALL', keyed by subject id. * Used for hints that apply regardless of target language. */ export function readAllLocaleHints(path: string): Record { return Object.fromEntries( readHints(path) .filter((row) => row.locale === 'ALL') .map((row) => [row.id, row.en]), ) }