this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 90 lines 3.0 kB view raw
1/** 2 * Sentence utilities: reading pre-processed example sentence files and building 3 * the character-level indexes used when generating subject data. 4 * 5 * Sentence files live at data/lang/{userLang}/sentences/{targetLang}.tsv. 6 */ 7 8import { distinct } from '@std/collections/distinct' 9import { parse } from '@std/csv/parse' 10import type { Locale } from '$/enums.ts' 11import { DATA_ROOT } from './fs.ts' 12 13export interface Sentences { 14 /** Maps sentence text → user-language translation. */ 15 bySentence: Record<string, string> 16 /** Maps each individual character → all sentence texts that contain it. */ 17 byChar: Map<string, string[]> 18 /** All sentence texts, in curriculum-sorted order (simplest first). */ 19 sorted: string[] 20} 21 22/** 23 * Reads the pre-processed sentence TSV for a given user + target language pair. 24 * Returns `{ bySentence, keys }`. Returns empty values if the file doesn't exist. 25 */ 26export function readSentences( 27 userLang: string, 28 locale: Locale, 29): { bySentence: Record<string, string>; keys: string[] } { 30 let text = '' 31 try { 32 text = Deno.readTextFileSync(`${DATA_ROOT}lang/${userLang}/sentences/${locale}.tsv`) 33 } catch { 34 return { bySentence: {}, keys: [] } 35 } 36 const rows = parse(text, { separator: '\t', lazyQuotes: true }) 37 const bySentence: Record<string, string> = {} 38 const keys = distinct( 39 rows.map(([_id, value, _enId, translation]) => { 40 bySentence[value] = translation 41 return value 42 }), 43 ) 44 return { bySentence, keys } 45} 46 47export interface SentenceEntry { 48 value: string 49 translation: string 50 level: number 51} 52 53/** 54 * Reads the pre-processed sentence TSV (with optional level column) for a given 55 * user + target language pair, returning only sentences that have a level assigned. 56 * Used to generate the sentence game data files. 57 */ 58export function readSentencesForGame(userLang: string, locale: string): SentenceEntry[] { 59 let text = '' 60 try { 61 text = Deno.readTextFileSync(`${DATA_ROOT}lang/${userLang}/sentences/${locale}.tsv`) 62 } catch { 63 return [] 64 } 65 const rows = parse(text, { separator: '\t', lazyQuotes: true }) 66 const result: SentenceEntry[] = [] 67 for (const [_id, value, _enId, translation, levelStr] of rows) { 68 const level = parseInt(levelStr ?? '') 69 if (value && translation && !isNaN(level)) { 70 result.push({ value, translation, level }) 71 } 72 } 73 return result 74} 75 76/** 77 * Loads and indexes sentences for a user + target language pair. 78 * Builds `byChar` for fast per-character lookup when generating subject examples. 79 */ 80export function loadSentences(userLang: string, targetLang: string): Sentences { 81 const raw = readSentences(userLang, targetLang as Locale) 82 const byChar = new Map<string, string[]>() 83 for (const key of raw.keys) { 84 for (const char of key) { 85 if (!byChar.has(char)) byChar.set(char, []) 86 byChar.get(char)!.push(key) 87 } 88 } 89 return { bySentence: raw.bySentence, byChar, sorted: raw.keys } 90}