forked from
jollywhoppers.com/witchsky.app
Bluesky app fork with some witchin' additions 💫
1import {useEffect, useMemo, useRef, useState} from 'react'
2import {Platform, Text as RNText, View} from 'react-native'
3import {RichText} from '@atproto/api'
4import {parseLanguageString} from '@atproto/syntax'
5import {
6 guessLanguageAsync,
7 type LanguageResult,
8} from '@bsky.app/expo-guess-language'
9import {Trans, useLingui} from '@lingui/react/macro'
10import debounce from 'lodash.debounce'
11
12import {useNonReactiveCallback} from '#/lib/hooks/useNonReactiveCallback'
13import {useNonReactiveObject} from '#/lib/hooks/useNonReactiveObject'
14import {deviceLanguageCodes} from '#/locale/deviceLocales'
15import {codeToLanguageName} from '#/locale/helpers'
16import {useLanguagePrefs} from '#/state/preferences/languages'
17import {atoms as a, platform, useTheme} from '#/alf'
18import {Button, ButtonIcon} from '#/components/Button'
19import {Check_Stroke2_Corner0_Rounded as CheckIcon} from '#/components/icons/Check'
20import {Earth_Stroke2_Corner2_Rounded as EarthIcon} from '#/components/icons/Globe'
21import {TimesLarge_Stroke2_Corner0_Rounded as XIcon} from '#/components/icons/Times'
22import {Text} from '#/components/Typography'
23import {useAnalytics} from '#/analytics'
24import {IS_WEB} from '#/env'
25
26type LanguageDetectionPerLanguageConfig = {
27 acceptanceThreshold?: number
28 deviceLocaleAcceptanceThreshold?: number
29}
30
31type LanguageDetectionConfig = {
32 acceptanceThreshold: number
33 deviceLocaleAcceptanceThreshold: number
34 overrides: Record<string, LanguageDetectionPerLanguageConfig>
35}
36
37const MIN_TEXT_LENGTH = IS_WEB ? 20 : 10
38const NOISE_FLOOR = 0.1
39
40/**
41 * Platform-resolved defaults. Web uses `lande` under the hood, which
42 * spreads probability across many candidates — so both the noise floor
43 * and the acceptance bar sit higher than on native (MLKit).
44 *
45 * Per-language carve-outs override the platform-level acceptance
46 * threshold.
47 */
48const DEFAULT_CONFIG: LanguageDetectionConfig = {
49 acceptanceThreshold: platform({
50 web: 0.97,
51 ios: 0.9,
52 android: 0.9,
53 default: 0.97,
54 }),
55 /*
56 * Device locales are an independent prior — the OS tells us which
57 * languages the user has installed, separate from what the model sees
58 * in the text. Combining the two lets us accept a candidate at lower
59 * model confidence when the language is one the user actually reads.
60 * It also fails softer: a wrong suggestion for a language the user
61 * knows ("are you writing in Spanish?") is easier to dismiss than one
62 * for a language they don't ("are you writing in Japanese?"), so we
63 * can afford to be more aggressive there.
64 *
65 * Native-only. On web we keep the bar at 0.97 because (a) lande's
66 * confidence is tightly bimodal — a score of 0.85 means the model
67 * doesn't know, not that it's "mostly sure" — and (b) the browser's
68 * locale signal is noisier (navigator.languages usually includes
69 * English regardless of what the user actually reads).
70 */
71 deviceLocaleAcceptanceThreshold: platform({
72 web: 0.97,
73 ios: 0.8,
74 android: 0.8,
75 default: 0.97,
76 }),
77 /*
78 * Per-language carve-outs for known confusable pairs / clusters. The
79 * acceptance bar is raised above the platform baseline because these
80 * are languages the detector (especially `lande` on web) is known to
81 * misclassify or over-commit on.
82 *
83 * The device-locale bar is also raised for most tightly-confusable
84 * pairs: if the user has both languages in the pair installed (common
85 * for id/ms or nb/da speakers), the device-locale prior no longer
86 * discriminates between them, so we can't afford to drop the bar as
87 * aggressively.
88 *
89 * Each value uses `platform({web, default})` — `default` applies to
90 * iOS/Android/etc. (MLKit is better at these distinctions, so the
91 * bump above baseline is smaller).
92 */
93 overrides: {
94 // Example
95 // id: {
96 // acceptanceThreshold: platform({web: 0.99, default: 0.95}),
97 // deviceLocaleAcceptanceThreshold: platform({web: 0.97, default: 0.9}),
98 // },
99 },
100}
101
102export function SuggestedLanguage({
103 text,
104 replyToLanguages: replyToLanguagesProp,
105 currentLanguages,
106 onAcceptSuggestedLanguage,
107 onNudge,
108}: {
109 text: string
110 /**
111 * All languages associated with the post being replied to.
112 */
113 replyToLanguages: string[]
114 /**
115 * All languages currently selected for the post being composed.
116 */
117 currentLanguages: string[]
118 /**
119 * Called when the user accepts a suggested language. We only pass a single
120 * language here. If the post being replied to has multiple languages, we
121 * only suggest the first one.
122 */
123 onAcceptSuggestedLanguage: (language: string | null) => void
124 /**
125 * Fired when detection produced ambiguous results — no strong suggestion
126 * to show, but we want to hint to the user that the detector is unsure.
127 * Expected to be an incrementing counter setter on the parent so the
128 * nudge can re-fire on each detection cycle.
129 */
130 onNudge?: () => void
131}) {
132 const ax = useAnalytics()
133 const [hasInteracted, setHasInteracted] = useState(false)
134 const [suggLang, setSuggLang] = useState<string | undefined>(undefined)
135 const declinedSuggLangsRef = useRef<string[]>([])
136
137 /*
138 * Shared callbacks
139 */
140 const onAccept = (language: string) => {
141 onAcceptSuggestedLanguage(language)
142 // clear
143 setSuggLang(undefined)
144 }
145 const onDecline = () => {
146 if (suggLang) {
147 declinedSuggLangsRef.current.push(suggLang)
148 // clear
149 setSuggLang(undefined)
150 }
151 }
152
153 /**
154 * Merge in remote config (eventually)
155 */
156 const config = useMemo(() => DEFAULT_CONFIG, [])
157
158 /**
159 * Create non-reactive ref for debounced detection method.
160 */
161 const detectionPropsRef = useNonReactiveObject({
162 config,
163 currentLanguages,
164 })
165
166 /*
167 * Held in a ref so the debounced detection closure always sees the
168 * latest callback identity without rebuilding the debounce timer.
169 */
170 const handleOnNudge = useNonReactiveCallback(onNudge)
171
172 /*
173 * Main language detection effect
174 */
175 const detectLanguage = useMemo(() => {
176 return debounce(async (text: string) => {
177 try {
178 const currLangs = detectionPropsRef.current.currentLanguages
179 const {certain, uncertain} = await guessLanguage(
180 text,
181 detectionPropsRef.current.config,
182 )
183 const topCandidate = certain.at(0)?.language
184 if (
185 certain.length === 1 &&
186 uncertain.length === 0 &&
187 topCandidate !== undefined &&
188 !currLangs.includes(topCandidate) &&
189 !declinedSuggLangsRef.current.includes(topCandidate)
190 ) {
191 // we have a single confident candidate with no competitors — show it!
192 setSuggLang(topCandidate)
193 } else {
194 const nextBestCandidate = uncertain.at(0)?.language
195 // ambiguous results — if the top candidate isn't already
196 // selected or previously declined, nudge the user
197 if (
198 nextBestCandidate !== undefined &&
199 !currLangs.includes(nextBestCandidate) &&
200 !declinedSuggLangsRef.current.includes(nextBestCandidate)
201 ) {
202 handleOnNudge()
203 ax.metric('composer:language:nudgeUser', {
204 os: Platform.OS,
205 suggestedLanguage: nextBestCandidate,
206 currentTargetLanguages: currLangs,
207 textLength: text.length,
208 })
209 }
210
211 setSuggLang(undefined)
212 }
213 } catch (e) {
214 ax.logger.error('Error detecting language', {safeMessage: e})
215 }
216 }, 500)
217 }, [])
218
219 useEffect(() => {
220 // show reply prompt if there's not enough text to start using the model
221 if (text.length > 0 && !hasInteracted) {
222 setHasInteracted(true)
223 }
224
225 if (ax.features.enabled(ax.features.ComposerLanguageDetectionEnable)) {
226 const textTrimmed = sanitizeTextForDetection(text)
227
228 /*
229 * If text drops under the min length requirement, reset suggestions state
230 * objects.
231 *
232 * And we don't run the language model on small posts, the results are
233 * likely to be inaccurate.
234 */
235 if (textTrimmed.length < MIN_TEXT_LENGTH) {
236 setSuggLang(undefined)
237 return
238 }
239
240 void detectLanguage(textTrimmed)
241 }
242
243 // Cancel any pending debounced invocation on unmount / re-run so we
244 // don't call setSuggLang after the composer has closed (or after the
245 // user has already accepted a language).
246 return () => {
247 detectLanguage.cancel()
248 }
249 }, [text, hasInteracted, detectLanguage, ax])
250
251 /*
252 * This is intentionally computed based on a ref. Since we set and clear
253 * `suggLang` this derivation is safe, but be aware of it
254 * when making changes.
255 */
256 const hasDeclined = suggLang
257 ? // eslint-disable-next-line react-hooks/refs
258 declinedSuggLangsRef.current.includes(suggLang)
259 : false
260
261 /*
262 * We've detected a language, and the user hasn't already selected it.
263 */
264 const hasLanguageSuggestion = suggLang && !currentLanguages.includes(suggLang)
265
266 /*
267 * We have not detected a different language, and the user is not already
268 * using or has not already selected one of the languages of the post they
269 * are replying to.
270 */
271 const replyToLanguages = replyToLanguagesProp
272 .filter(Boolean)
273 .map(lang => parseLanguageString(lang)?.language)
274 .filter(Boolean) as string[]
275 const hasSuggestedReplyLanguage =
276 !hasInteracted &&
277 !suggLang &&
278 replyToLanguages.length &&
279 !replyToLanguages.some(l => currentLanguages.includes(l))
280
281 if (hasDeclined) {
282 return null
283 } else if (hasLanguageSuggestion) {
284 return (
285 <GuessedLanguage
286 language={suggLang}
287 metadata={{currentTargetLanguages: currentLanguages, rawText: text}}
288 onAccept={onAccept}
289 onDecline={onDecline}
290 />
291 )
292 } else if (hasSuggestedReplyLanguage) {
293 return (
294 <ReplyLanguageNudge
295 language={replyToLanguages[0]}
296 metadata={{currentTargetLanguages: currentLanguages}}
297 onAccept={onAccept}
298 onDecline={onDecline}
299 />
300 )
301 } else {
302 return null
303 }
304}
305
306function GuessedLanguage({
307 language,
308 metadata,
309 onAccept: onAcceptOuter,
310 onDecline: onDeclineOuter,
311}: {
312 language: string
313 metadata: {
314 currentTargetLanguages: string[]
315 rawText: string
316 }
317 onAccept: (language: string) => void
318 onDecline: () => void
319}) {
320 const ax = useAnalytics()
321 const langPrefs = useLanguagePrefs()
322 const suggestedLanguageName = codeToLanguageName(
323 language,
324 langPrefs.appLanguage,
325 )
326 const onAccept = () => {
327 ax.metric('composer:language:acceptSuggestion', {
328 os: Platform.OS,
329 suggestedLanguage: language,
330 currentTargetLanguages: metadata.currentTargetLanguages,
331 textLength: sanitizeTextForDetection(metadata.rawText).length,
332 })
333 onAcceptOuter(language)
334 }
335 const onDecline = () => {
336 ax.metric('composer:language:declineSuggestion', {
337 os: Platform.OS,
338 suggestedLanguage: language,
339 currentTargetLanguages: metadata.currentTargetLanguages,
340 textLength: sanitizeTextForDetection(metadata.rawText).length,
341 })
342 onDeclineOuter()
343 }
344
345 const metaRef = useNonReactiveObject(metadata)
346 useEffect(() => {
347 ax.metric('composer:language:suggestLanguage', {
348 os: Platform.OS,
349 suggestedLanguage: language,
350 currentTargetLanguages: metaRef.current.currentTargetLanguages,
351 textLength: sanitizeTextForDetection(metadata.rawText).length,
352 })
353 }, [ax, language])
354
355 return (
356 <LanguageSuggestionButton
357 label={
358 <RNText>
359 <Trans>
360 Are you writing in{' '}
361 <Text style={[a.font_semi_bold]}>{suggestedLanguageName}</Text>?
362 </Trans>
363 </RNText>
364 }
365 value={language}
366 onAccept={onAccept}
367 onDecline={onDecline}
368 />
369 )
370}
371
372function ReplyLanguageNudge({
373 language,
374 metadata,
375 onAccept: onAcceptOuter,
376 onDecline: onDeclineOuter,
377}: {
378 language: string
379 metadata: {
380 currentTargetLanguages: string[]
381 }
382 onAccept: (language: string) => void
383 onDecline: () => void
384}) {
385 const ax = useAnalytics()
386 const langPrefs = useLanguagePrefs()
387 const suggestedLanguageName = codeToLanguageName(
388 language,
389 langPrefs.appLanguage,
390 )
391 const onAccept = () => {
392 ax.metric('composer:language:replyNudgeAccept', {
393 replyToLanguage: language,
394 currentTargetLanguages: metadata.currentTargetLanguages,
395 })
396 onAcceptOuter(language)
397 }
398 const onDecline = () => {
399 ax.metric('composer:language:replyNudgeDecline', {
400 replyToLanguage: language,
401 currentTargetLanguages: metadata.currentTargetLanguages,
402 })
403 onDeclineOuter()
404 }
405
406 return (
407 <LanguageSuggestionButton
408 label={
409 <RNText>
410 <Trans>
411 The post you’re replying to was marked as being written in{' '}
412 {suggestedLanguageName} by its author. Would you like to reply in{' '}
413 <Text style={[a.font_semi_bold]}>{suggestedLanguageName}</Text>?
414 </Trans>
415 </RNText>
416 }
417 value={language}
418 onAccept={onAccept}
419 onDecline={onDecline}
420 />
421 )
422}
423
424function LanguageSuggestionButton({
425 label,
426 value,
427 onAccept,
428 onDecline,
429}: {
430 label: React.ReactNode
431 value: string
432 onAccept: (language: string | null) => void
433 onDecline: () => void
434}) {
435 const t = useTheme()
436 const {t: l} = useLingui()
437
438 return (
439 <View style={[a.px_lg, a.py_sm]}>
440 <View
441 style={[
442 a.gap_md,
443 a.border,
444 a.flex_row,
445 a.align_center,
446 a.rounded_sm,
447 a.p_md,
448 a.pl_lg,
449 t.atoms.bg,
450 t.atoms.border_contrast_low,
451 ]}>
452 <EarthIcon />
453 <View style={[a.flex_1]}>
454 <Text
455 style={[
456 a.leading_snug,
457 {
458 maxWidth: 400,
459 },
460 ]}>
461 {label}
462 </Text>
463 </View>
464
465 <Button
466 size="small"
467 color="primary_subtle"
468 shape="round"
469 onPress={() => onAccept(value)}
470 label={l`Accept this language suggestion`}>
471 <ButtonIcon icon={CheckIcon} size="sm" />
472 </Button>
473
474 <Button
475 size="small"
476 color="secondary"
477 shape="round"
478 onPress={() => onDecline()}
479 label={l`Decline this language suggestion`}>
480 <ButtonIcon icon={XIcon} size="sm" />
481 </Button>
482 </View>
483 </View>
484 )
485}
486
487/**
488 * Run detection and partition candidates into "certain" (confident enough
489 * to suggest on their own) and "uncertain" (above the noise floor but not
490 * confident enough to suggest). Callers decide what to do with the shape:
491 * a single certain candidate with no uncertain competitors is a strong
492 * suggestion; everything else is ambiguous.
493 *
494 * The acceptance threshold is resolved per candidate with this precedence:
495 * 1. Per-language override (e.g. maybe `id` requires higher confidence)
496 * 2. Device-locale bar (lower on native — the user likely writes in a
497 * language they have installed)
498 * 3. Platform-level bar
499 */
500async function guessLanguage(
501 text: string,
502 config: LanguageDetectionConfig,
503): Promise<{
504 certain: LanguageResult[]
505 uncertain: LanguageResult[]
506}> {
507 const suggestions = await guessLanguageAsync(text)
508 const certain: LanguageResult[] = []
509 const uncertain: LanguageResult[] = []
510
511 for (const suggestion of suggestions) {
512 const isDeviceLocale = deviceLanguageCodes.includes(suggestion.language)
513 const override = config.overrides[suggestion.language]
514 const threshold = isDeviceLocale
515 ? (override?.deviceLocaleAcceptanceThreshold ??
516 config.deviceLocaleAcceptanceThreshold)
517 : (override?.acceptanceThreshold ?? config.acceptanceThreshold)
518
519 if (suggestion.confidence >= threshold) {
520 certain.push(suggestion)
521 } else if (suggestion.confidence >= NOISE_FLOOR) {
522 uncertain.push(suggestion)
523 }
524 }
525
526 return {certain, uncertain}
527}
528
529/**
530 * Strip any detected facets from the text to improve language detection
531 * accuracy. For example, URLs and mentions.
532 *
533 * Tags are intentionally kept — their word content is usually in the
534 * post's language and helps detection; the leading `#` is short enough
535 * not to distort results.
536 */
537function sanitizeTextForDetection(text: string): string {
538 const rt = new RichText({text: text.trim()})
539 rt.detectFacetsWithoutResolution()
540
541 let sanitized = ''
542 for (const segment of rt.segments()) {
543 if (segment.isLink() || segment.isMention() || segment.isTag()) {
544 continue
545 }
546 sanitized += segment.text
547 }
548
549 return sanitized.trim()
550}