/** * Subject utilities: reading/writing compiled subject JSON files, and creating * new Subject objects from dictionary + curriculum data. * * Compiled subject files live at www/static/gen/lang/{userLang}/{targetLang}.json. */ import { distinct } from '@std/collections/distinct' import { dirname } from '@std/path' import stringifyJSON from 'json-stringify-pretty-compact' import { Locale, SubjectType } from '$/enums.ts' import type { Audio, Subject } from '$/models/subjects.ts' import { APP_ROOT } from './fs.ts' import type { Definition } from './dict.ts' import type { Sentences } from './sentences.ts' const { Character, Vocabulary } = SubjectType // --------------------------------------------------------------------------- // Subject I/O // --------------------------------------------------------------------------- /** Reads compiled subject JSON from `www/static/gen/`. Returns an empty array on error. */ export function readSubjects(input: string): Subject[] { try { return JSON.parse(Deno.readTextFileSync(APP_ROOT + input)) } catch { return [] } } /** * Reads compiled subject JSON and returns a map keyed by `data.slug`. * Subjects with a missing id or slug are skipped with a warning. */ export function readSubjectsMap(input: string): Record { const map: Record = {} readSubjects(input).forEach((subject) => { if (!subject.id || !subject.data?.slug) { console.warn( `Skipping subject with missing id/slug in ${input}:`, JSON.stringify(subject).slice(0, 120), ) return } map[subject.data.slug] = subject }) return map } /** * Writes compiled subjects to `www/static/gen/`. Before writing, subjects are: * - Filtered to require id, slug, and type (corrupt entries are dropped) * - Remapped with a stable property order for consistent diffs * - Sorted by level → type (Radical < Character < Vocabulary) → position */ export function writeSubjects(output: string, subjects: Subject[], minify = false): void { const levelAndPosition = new Set() const toWrite = subjects .filter((subject) => { if (!subject.id || !subject.data?.slug || !subject.data?.type) { console.warn( 'Dropping invalid subject (missing id/slug/type):', JSON.stringify(subject).slice(0, 120), ) return false } return true }) .map((subject) => { const { data } = subject const levelPosition = `${data.type}-${data.level}-${data.position}` if (levelAndPosition.has(levelPosition) && levelPosition !== `${data.type}-0-0`) { console.warn(`Two subjects at same position ${levelPosition}: ${data.slug}`) } else { levelAndPosition.add(levelPosition) } // Explicit property order for stable JSON diffs return { id: subject.id, hiddenAt: subject.hiddenAt, learnCards: subject.learnCards?.length ? subject.learnCards : ['meanings'], quizCards: subject.quizCards?.length ? subject.quizCards : ['meanings', 'readings'], data: { audios: data.audios, character: data.character, requiredSubjects: data.requiredSubjects, examples: data.examples, level: data.level, meanings: data.meanings, meaningHint: data.meaningHint, meaningMnemonic: data.meaningMnemonic, position: data.position, readings: data.readings, readingHint: data.readingHint, readingMnemonic: data.readingMnemonic, slug: data.slug, srsId: data.srsId, type: data.type, }, } as Subject }) .sort((a, b) => { if (!a.data.level || !a.data.position) return 1 if (!b.data.level || !b.data.position) return -1 const levelDiff = a.data.level - b.data.level if (levelDiff) return levelDiff const typePriority: Record = { Radical: 0, Character: 1, Vocabulary: 2 } const typeDiff = (typePriority[a.data.type] ?? 0) - (typePriority[b.data.type] ?? 0) if (typeDiff) return typeDiff return a.data.position - b.data.position }) const outPath = APP_ROOT + output Deno.mkdirSync(dirname(outPath), { recursive: true }) Deno.writeTextFileSync(outPath, minify ? JSON.stringify(toWrite) : stringifyJSON(toWrite)) } // --------------------------------------------------------------------------- // Subject creation // --------------------------------------------------------------------------- /** * Indexes for fast slug/hans/ja lookups. Built lazily on first call to createSubject. * We defer loading so that commands that don't need subject creation (gen-progress, * gen-licenses) don't pay the startup cost of reading the dictionary files. */ let charBySlug: Record | null = null let charByHans: Record | null = null let charByJa: Record | null = null let vocabBySlug: Record | null = null let vocabByJa: Record | null = null let audioMeta: Record> | null = null /** * Builds an audio index from a list of audio filenames. * Handles both filename formats: * {id}_{locale}_{voiceId}.mp3 * {id}_{locale}_{voiceId}_{col}_{reading}.mp3 * Returns a nested map of locale → id → Audio[]. */ export function buildAudioIndex(audioFiles: string[]): Record> { const meta: Record> = {} audioFiles.forEach((filename) => { const parts = filename.replace('.mp3', '').split('_') if (parts.length < 3) return const [idStr, localeHyphen, voiceId, , reading] = parts const locale = localeHyphen?.replace('-', '_') if (!locale || !voiceId) return meta[locale] ??= {} meta[locale][idStr] ??= [] meta[locale][idStr].push({ url: filename, voiceId, reading: reading || undefined }) }) return meta } function initDicts( charDefs: Definition[], vocabDefs: Definition[], audioFiles: string[], ): void { if (charBySlug) return // already initialized charBySlug = Object.fromEntries(charDefs.map((d) => [d.hant, d])) charByHans = Object.fromEntries(charDefs.map((d) => [d.hans, d])) charByJa = Object.fromEntries(charDefs.filter((d) => d.ja).map((d) => [d.ja!, d])) vocabBySlug = Object.fromEntries(vocabDefs.map((d) => [d.hant, d])) vocabByJa = Object.fromEntries(vocabDefs.filter((d) => d.ja).map((d) => [d.ja!, d])) audioMeta = buildAudioIndex(audioFiles) } function getCharForLocale(targetLang: string, hans: string, hant: string, ja?: string): string { if (targetLang === 'ja') return ja || hant return targetLang === Locale.zh_CN ? hans : hant } /** * Creates a new Subject from dictionary and curriculum data. * Used when a slug has no existing entry in the output JSON. * * @param charDefs - All character definitions (from lang/characters.tsv) * @param vocabDefs - All vocabulary definitions (from lang/vocabulary.tsv) * @param audioFiles - List of existing audio filenames (from listAudioFiles) */ export function createSubject( slug: string, level: number, position: number, targetLang: string, charMeanings: Record, vocabMeanings: Record, sentences: Sentences, charDefs: Definition[], vocabDefs: Definition[], audioFiles: string[], ): Subject { initDicts(charDefs, vocabDefs, audioFiles) const isVocab = slug.length > 1 const dictEntry = isVocab ? (vocabBySlug![slug] || vocabByJa![slug]) : (charBySlug![slug] || charByHans![slug] || charByJa![slug]) if (!dictEntry) { console.error(`No valid dictionary entry for slug: ${slug}`) return { data: {} } as Subject } const { id, hans, hant, ja } = dictEntry const en = isVocab ? (vocabMeanings[id] || '') : (charMeanings[id] || '') const character = getCharForLocale(targetLang, hans, hant, ja) const charForSentences = targetLang === Locale.zh_CN ? hans : hant return { id, learnCards: ['meanings'], quizCards: ['meanings', 'readings'], data: { audios: audioMeta![targetLang]?.[id] ?? [], character, examples: ( charForSentences.length === 1 ? (sentences.byChar.get(charForSentences) ?? []) : (sentences.byChar.get(charForSentences[0]) ?? []).filter((key) => key.includes(charForSentences) ) ) .slice(0, 3) .map((value) => ({ value, translation: sentences.bySentence[value] })), level, meanings: en.split(';').map((def, i) => ({ value: def.trim(), isPrimary: i === 0, isAcceptedAnswer: true, })), position, readings: [], requiredSubjects: distinct( slug.split('').map((c) => charBySlug![c]?.id ?? ''), ).filter((reqId) => reqId && reqId !== charBySlug![slug]?.id), slug, srsId: level > 2 ? 1 : 2, type: isVocab ? Vocabulary : Character, }, } as Subject }