/** * Sentence utilities: reading pre-processed example sentence files and building * the character-level indexes used when generating subject data. * * Sentence files live at data/lang/{userLang}/sentences/{targetLang}.tsv. */ import { distinct } from '@std/collections/distinct' import { parse } from '@std/csv/parse' import type { Locale } from '$/enums.ts' import { DATA_ROOT } from './fs.ts' export interface Sentences { /** Maps sentence text → user-language translation. */ bySentence: Record /** Maps each individual character → all sentence texts that contain it. */ byChar: Map /** All sentence texts, in curriculum-sorted order (simplest first). */ sorted: string[] } /** * Reads the pre-processed sentence TSV for a given user + target language pair. * Returns `{ bySentence, keys }`. Returns empty values if the file doesn't exist. */ export function readSentences( userLang: string, locale: Locale, ): { bySentence: Record; keys: string[] } { let text = '' try { text = Deno.readTextFileSync(`${DATA_ROOT}lang/${userLang}/sentences/${locale}.tsv`) } catch { return { bySentence: {}, keys: [] } } const rows = parse(text, { separator: '\t', lazyQuotes: true }) const bySentence: Record = {} const keys = distinct( rows.map(([_id, value, _enId, translation]) => { bySentence[value] = translation return value }), ) return { bySentence, keys } } export interface SentenceEntry { value: string translation: string level: number } /** * Reads the pre-processed sentence TSV (with optional level column) for a given * user + target language pair, returning only sentences that have a level assigned. * Used to generate the sentence game data files. */ export function readSentencesForGame(userLang: string, locale: string): SentenceEntry[] { let text = '' try { text = Deno.readTextFileSync(`${DATA_ROOT}lang/${userLang}/sentences/${locale}.tsv`) } catch { return [] } const rows = parse(text, { separator: '\t', lazyQuotes: true }) const result: SentenceEntry[] = [] for (const [_id, value, _enId, translation, levelStr] of rows) { const level = parseInt(levelStr ?? '') if (value && translation && !isNaN(level)) { result.push({ value, translation, level }) } } return result } /** * Loads and indexes sentences for a user + target language pair. * Builds `byChar` for fast per-character lookup when generating subject examples. */ export function loadSentences(userLang: string, targetLang: string): Sentences { const raw = readSentences(userLang, targetLang as Locale) const byChar = new Map() for (const key of raw.keys) { for (const char of key) { if (!byChar.has(char)) byChar.set(char, []) byChar.get(char)!.push(key) } } return { bySentence: raw.bySentence, byChar, sorted: raw.keys } }