this repo has no description
1/**
2 * Subject utilities: reading/writing compiled subject JSON files, and creating
3 * new Subject objects from dictionary + curriculum data.
4 *
5 * Compiled subject files live at www/static/gen/lang/{userLang}/{targetLang}.json.
6 */
7
8import { distinct } from '@std/collections/distinct'
9import { dirname } from '@std/path'
10import stringifyJSON from 'json-stringify-pretty-compact'
11import { Locale, SubjectType } from '$/enums.ts'
12import type { Audio, Subject } from '$/models/subjects.ts'
13import { APP_ROOT } from './fs.ts'
14import type { Definition } from './dict.ts'
15import type { Sentences } from './sentences.ts'
16
17const { Character, Vocabulary } = SubjectType
18
19// ---------------------------------------------------------------------------
20// Subject I/O
21// ---------------------------------------------------------------------------
22
23/** Reads compiled subject JSON from `www/static/gen/`. Returns an empty array on error. */
24export function readSubjects(input: string): Subject[] {
25 try {
26 return JSON.parse(Deno.readTextFileSync(APP_ROOT + input))
27 } catch {
28 return []
29 }
30}
31
32/**
33 * Reads compiled subject JSON and returns a map keyed by `data.slug`.
34 * Subjects with a missing id or slug are skipped with a warning.
35 */
36export function readSubjectsMap(input: string): Record<string, Subject> {
37 const map: Record<string, Subject> = {}
38 readSubjects(input).forEach((subject) => {
39 if (!subject.id || !subject.data?.slug) {
40 console.warn(
41 `Skipping subject with missing id/slug in ${input}:`,
42 JSON.stringify(subject).slice(0, 120),
43 )
44 return
45 }
46 map[subject.data.slug] = subject
47 })
48 return map
49}
50
51/**
52 * Writes compiled subjects to `www/static/gen/`. Before writing, subjects are:
53 * - Filtered to require id, slug, and type (corrupt entries are dropped)
54 * - Remapped with a stable property order for consistent diffs
55 * - Sorted by level → type (Radical < Character < Vocabulary) → position
56 */
57export function writeSubjects(output: string, subjects: Subject[], minify = false): void {
58 const levelAndPosition = new Set<string>()
59
60 const toWrite = subjects
61 .filter((subject) => {
62 if (!subject.id || !subject.data?.slug || !subject.data?.type) {
63 console.warn(
64 'Dropping invalid subject (missing id/slug/type):',
65 JSON.stringify(subject).slice(0, 120),
66 )
67 return false
68 }
69 return true
70 })
71 .map((subject) => {
72 const { data } = subject
73 const levelPosition = `${data.type}-${data.level}-${data.position}`
74 if (levelAndPosition.has(levelPosition) && levelPosition !== `${data.type}-0-0`) {
75 console.warn(`Two subjects at same position ${levelPosition}: ${data.slug}`)
76 } else {
77 levelAndPosition.add(levelPosition)
78 }
79 // Explicit property order for stable JSON diffs
80 return {
81 id: subject.id,
82 hiddenAt: subject.hiddenAt,
83 learnCards: subject.learnCards?.length ? subject.learnCards : ['meanings'],
84 quizCards: subject.quizCards?.length ? subject.quizCards : ['meanings', 'readings'],
85 data: {
86 audios: data.audios,
87 character: data.character,
88 requiredSubjects: data.requiredSubjects,
89 examples: data.examples,
90 level: data.level,
91 meanings: data.meanings,
92 meaningHint: data.meaningHint,
93 meaningMnemonic: data.meaningMnemonic,
94 position: data.position,
95 readings: data.readings,
96 readingHint: data.readingHint,
97 readingMnemonic: data.readingMnemonic,
98 slug: data.slug,
99 srsId: data.srsId,
100 type: data.type,
101 },
102 } as Subject
103 })
104 .sort((a, b) => {
105 if (!a.data.level || !a.data.position) return 1
106 if (!b.data.level || !b.data.position) return -1
107 const levelDiff = a.data.level - b.data.level
108 if (levelDiff) return levelDiff
109 const typePriority: Record<string, number> = { Radical: 0, Character: 1, Vocabulary: 2 }
110 const typeDiff = (typePriority[a.data.type] ?? 0) - (typePriority[b.data.type] ?? 0)
111 if (typeDiff) return typeDiff
112 return a.data.position - b.data.position
113 })
114
115 const outPath = APP_ROOT + output
116 Deno.mkdirSync(dirname(outPath), { recursive: true })
117 Deno.writeTextFileSync(outPath, minify ? JSON.stringify(toWrite) : stringifyJSON(toWrite))
118}
119
120// ---------------------------------------------------------------------------
121// Subject creation
122// ---------------------------------------------------------------------------
123
124/**
125 * Indexes for fast slug/hans/ja lookups. Built lazily on first call to createSubject.
126 * We defer loading so that commands that don't need subject creation (gen-progress,
127 * gen-licenses) don't pay the startup cost of reading the dictionary files.
128 */
129let charBySlug: Record<string, Definition> | null = null
130let charByHans: Record<string, Definition> | null = null
131let charByJa: Record<string, Definition> | null = null
132let vocabBySlug: Record<string, Definition> | null = null
133let vocabByJa: Record<string, Definition> | null = null
134let audioMeta: Record<string, Record<string, Audio[]>> | null = null
135
136/**
137 * Builds an audio index from a list of audio filenames.
138 * Handles both filename formats:
139 * {id}_{locale}_{voiceId}.mp3
140 * {id}_{locale}_{voiceId}_{col}_{reading}.mp3
141 * Returns a nested map of locale → id → Audio[].
142 */
143export function buildAudioIndex(audioFiles: string[]): Record<string, Record<string, Audio[]>> {
144 const meta: Record<string, Record<string, Audio[]>> = {}
145 audioFiles.forEach((filename) => {
146 const parts = filename.replace('.mp3', '').split('_')
147 if (parts.length < 3) return
148 const [idStr, localeHyphen, voiceId, , reading] = parts
149 const locale = localeHyphen?.replace('-', '_')
150 if (!locale || !voiceId) return
151 meta[locale] ??= {}
152 meta[locale][idStr] ??= []
153 meta[locale][idStr].push({ url: filename, voiceId, reading: reading || undefined })
154 })
155 return meta
156}
157
158function initDicts(
159 charDefs: Definition[],
160 vocabDefs: Definition[],
161 audioFiles: string[],
162): void {
163 if (charBySlug) return // already initialized
164 charBySlug = Object.fromEntries(charDefs.map((d) => [d.hant, d]))
165 charByHans = Object.fromEntries(charDefs.map((d) => [d.hans, d]))
166 charByJa = Object.fromEntries(charDefs.filter((d) => d.ja).map((d) => [d.ja!, d]))
167 vocabBySlug = Object.fromEntries(vocabDefs.map((d) => [d.hant, d]))
168 vocabByJa = Object.fromEntries(vocabDefs.filter((d) => d.ja).map((d) => [d.ja!, d]))
169
170 audioMeta = buildAudioIndex(audioFiles)
171}
172
173function getCharForLocale(targetLang: string, hans: string, hant: string, ja?: string): string {
174 if (targetLang === 'ja') return ja || hant
175 return targetLang === Locale.zh_CN ? hans : hant
176}
177
178/**
179 * Creates a new Subject from dictionary and curriculum data.
180 * Used when a slug has no existing entry in the output JSON.
181 *
182 * @param charDefs - All character definitions (from lang/characters.tsv)
183 * @param vocabDefs - All vocabulary definitions (from lang/vocabulary.tsv)
184 * @param audioFiles - List of existing audio filenames (from listAudioFiles)
185 */
186export function createSubject(
187 slug: string,
188 level: number,
189 position: number,
190 targetLang: string,
191 charMeanings: Record<string, string>,
192 vocabMeanings: Record<string, string>,
193 sentences: Sentences,
194 charDefs: Definition[],
195 vocabDefs: Definition[],
196 audioFiles: string[],
197): Subject {
198 initDicts(charDefs, vocabDefs, audioFiles)
199
200 const isVocab = slug.length > 1
201 const dictEntry = isVocab
202 ? (vocabBySlug![slug] || vocabByJa![slug])
203 : (charBySlug![slug] || charByHans![slug] || charByJa![slug])
204
205 if (!dictEntry) {
206 console.error(`No valid dictionary entry for slug: ${slug}`)
207 return { data: {} } as Subject
208 }
209
210 const { id, hans, hant, ja } = dictEntry
211 const en = isVocab ? (vocabMeanings[id] || '') : (charMeanings[id] || '')
212 const character = getCharForLocale(targetLang, hans, hant, ja)
213 const charForSentences = targetLang === Locale.zh_CN ? hans : hant
214
215 return {
216 id,
217 learnCards: ['meanings'],
218 quizCards: ['meanings', 'readings'],
219 data: {
220 audios: audioMeta![targetLang]?.[id] ?? [],
221 character,
222 examples: (
223 charForSentences.length === 1
224 ? (sentences.byChar.get(charForSentences) ?? [])
225 : (sentences.byChar.get(charForSentences[0]) ?? []).filter((key) =>
226 key.includes(charForSentences)
227 )
228 )
229 .slice(0, 3)
230 .map((value) => ({ value, translation: sentences.bySentence[value] })),
231 level,
232 meanings: en.split(';').map((def, i) => ({
233 value: def.trim(),
234 isPrimary: i === 0,
235 isAcceptedAnswer: true,
236 })),
237 position,
238 readings: [],
239 requiredSubjects: distinct(
240 slug.split('').map((c) => charBySlug![c]?.id ?? ''),
241 ).filter((reqId) => reqId && reqId !== charBySlug![slug]?.id),
242 slug,
243 srsId: level > 2 ? 1 : 2,
244 type: isVocab ? Vocabulary : Character,
245 },
246 } as Subject
247}