this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 146 lines 5.0 kB view raw
1/** 2 * Dictionary utilities: reading and interpreting the shared CJK dictionary files 3 * and user-language meaning/hint/reading files. 4 * 5 * All files live under data/lang/ and are keyed by string ids (e.g. "c-00001"). 6 */ 7 8import { Transliteration } from '$/enums.ts' 9import type { Reading } from '$/models/subjects.ts' 10import { readTsv } from './fs.ts' 11 12/** A single entry from characters.tsv, vocabulary.tsv, or radicals.tsv. */ 13export interface Definition { 14 /** Unique string id (e.g. "c-00001" for characters, "v-00001" for vocabulary). */ 15 id: string 16 hans: string 17 hant: string 18 /** Japanese-specific form (kanji col in characters.tsv, ja col in vocabulary.tsv). */ 19 ja?: string 20} 21 22export interface Hint { 23 id: string 24 locale: string 25 en: string 26} 27 28/** Maps TSV column names to their Transliteration enum values. */ 29const COL_TYPE: Record<string, Transliteration> = { 30 pinyin: Transliteration.Pinyin, 31 jyutping: Transliteration.Jyutping, 32 zhuyin: Transliteration.Zhuyin, 33 kunyomi: Transliteration.Kunyomi, 34 onyomi: Transliteration.Onyomi, 35 reading: Transliteration.Hiragana, 36} 37 38/** Reads a CJK dictionary TSV (characters.tsv, vocabulary.tsv, or radicals.tsv). */ 39export function readDict(path: string): Definition[] { 40 return readTsv(path).map((row) => ({ 41 id: row.id, 42 hans: row.hans, 43 hant: row.hant, 44 ja: row.kanji || row.ja || undefined, 45 })) 46} 47 48/** Returns a dictionary indexed by traditional (hant) form. */ 49export function readDictByHant(path: string): Record<string, Definition> { 50 return Object.fromEntries(readDict(path).map((d) => [d.hant, d])) 51} 52 53/** Returns a dictionary indexed by simplified (hans) form. */ 54export function readDictByHans(path: string): Record<string, Definition> { 55 return Object.fromEntries(readDict(path).map((d) => [d.hans, d])) 56} 57 58/** Returns a dictionary indexed by subject id. */ 59export function readDictById(path: string): Record<string, Definition> { 60 return Object.fromEntries(readDict(path).map((d) => [d.id, d])) 61} 62 63/** 64 * Reads a user-language meanings file (e.g. lang/en/characters.tsv). 65 * Returns a map of subject id → meaning string. 66 */ 67export function readMeanings(path: string): Record<string, string> { 68 return Object.fromEntries(readTsv(path).map((row) => [row.id, row.value])) 69} 70 71/** 72 * Reads a meaning-override TSV (e.g. lang/en/meanings/ja.characters.tsv). 73 * Returns a map of subject id → semicolon-separated meaning string. 74 * Returns an empty map if the file doesn't exist. 75 */ 76export function readMeaningOverrides(path: string): Record<string, string> { 77 try { 78 return Object.fromEntries(readTsv(path).map((row) => [row.id, row.meaning])) 79 } catch { 80 return {} 81 } 82} 83 84/** 85 * Reads a readings TSV (e.g. lang/zh_CN/readings.tsv, lang/ja/reading.characters.tsv) 86 * and returns a map of subject id → Reading[]. 87 * 88 * Each non-id column maps to a Transliteration type via COL_TYPE. Semicolon-separated 89 * values produce multiple readings; the first value in the first column is isPrimary. 90 * Returns an empty map if the file doesn't exist. 91 */ 92export function readReadingsMap(path: string): Record<string, Reading[]> { 93 const result: Record<string, Reading[]> = {} 94 try { 95 const rows = readTsv(path) 96 if (!rows.length) return result 97 // Reverse so isPrimary goes to the first column 98 const cols = Object.keys(rows[0]).filter((k) => k !== 'id').reverse() 99 for (const row of rows) { 100 const readings: Reading[] = [] 101 let firstCol = true 102 for (const col of cols) { 103 const val = row[col] || '' 104 const type = COL_TYPE[col] 105 if (!type || !val) continue 106 val.split(';').map((s) => s.trim()).filter(Boolean).forEach((value, i) => { 107 readings.push({ value, type, isAcceptedAnswer: true, isPrimary: firstCol && i === 0 }) 108 }) 109 firstCol = false 110 } 111 if (readings.length) result[row.id] = readings 112 } 113 } catch { /* file missing — return empty */ } 114 return result 115} 116 117/** Reads a hint TSV (lang/en/hints/*.tsv). Returns an empty array if the file doesn't exist. */ 118export function readHints(path: string): Hint[] { 119 try { 120 return readTsv(path).map((row) => ({ id: row.id, locale: row.locale, en: row.en })) 121 } catch { 122 return [] 123 } 124} 125 126/** Reads hints and returns a nested map of subject id → locale → hint text. */ 127export function readHintsById(path: string): Record<string, Record<string, string>> { 128 const result: Record<string, Record<string, string>> = {} 129 for (const row of readHints(path)) { 130 result[row.id] ??= {} 131 result[row.id][row.locale] = row.en 132 } 133 return result 134} 135 136/** 137 * Reads hints and returns only those with locale === 'ALL', keyed by subject id. 138 * Used for hints that apply regardless of target language. 139 */ 140export function readAllLocaleHints(path: string): Record<string, string> { 141 return Object.fromEntries( 142 readHints(path) 143 .filter((row) => row.locale === 'ALL') 144 .map((row) => [row.id, row.en]), 145 ) 146}