src/view/com/composer/select-language/SuggestedLanguage.tsx at theme-changes

ellioth.co / witchsky.app
forked from jollywhoppers.com/witchsky.app
fork
Bluesky app fork with some witchin' additions 💫
fork
witchsky.app / src / view / com / composer / select-language / SuggestedLanguage.tsx
at theme-changes 550 lines 17 kB view raw
wrap content
Samuel Newman Replace lande with native language detection (#9974) 17d ago
1c38665d
  1import {useEffect, useMemo, useRef, useState} from 'react'
  2import {Platform, Text as RNText, View} from 'react-native'
  3import {RichText} from '@atproto/api'
  4import {parseLanguageString} from '@atproto/syntax'
  5import {
  6  guessLanguageAsync,
  7  type LanguageResult,
  8} from '@bsky.app/expo-guess-language'
  9import {Trans, useLingui} from '@lingui/react/macro'
 10import debounce from 'lodash.debounce'
 11
 12import {useNonReactiveCallback} from '#/lib/hooks/useNonReactiveCallback'
 13import {useNonReactiveObject} from '#/lib/hooks/useNonReactiveObject'
 14import {deviceLanguageCodes} from '#/locale/deviceLocales'
 15import {codeToLanguageName} from '#/locale/helpers'
 16import {useLanguagePrefs} from '#/state/preferences/languages'
 17import {atoms as a, platform, useTheme} from '#/alf'
 18import {Button, ButtonIcon} from '#/components/Button'
 19import {Check_Stroke2_Corner0_Rounded as CheckIcon} from '#/components/icons/Check'
 20import {Earth_Stroke2_Corner2_Rounded as EarthIcon} from '#/components/icons/Globe'
 21import {TimesLarge_Stroke2_Corner0_Rounded as XIcon} from '#/components/icons/Times'
 22import {Text} from '#/components/Typography'
 23import {useAnalytics} from '#/analytics'
 24import {IS_WEB} from '#/env'
 25
 26type LanguageDetectionPerLanguageConfig = {
 27  acceptanceThreshold?: number
 28  deviceLocaleAcceptanceThreshold?: number
 29}
 30
 31type LanguageDetectionConfig = {
 32  acceptanceThreshold: number
 33  deviceLocaleAcceptanceThreshold: number
 34  overrides: Record<string, LanguageDetectionPerLanguageConfig>
 35}
 36
 37const MIN_TEXT_LENGTH = IS_WEB ? 20 : 10
 38const NOISE_FLOOR = 0.1
 39
 40/**
 41 * Platform-resolved defaults. Web uses `lande` under the hood, which
 42 * spreads probability across many candidates — so both the noise floor
 43 * and the acceptance bar sit higher than on native (MLKit).
 44 *
 45 * Per-language carve-outs override the platform-level acceptance
 46 * threshold.
 47 */
 48const DEFAULT_CONFIG: LanguageDetectionConfig = {
 49  acceptanceThreshold: platform({
 50    web: 0.97,
 51    ios: 0.9,
 52    android: 0.9,
 53    default: 0.97,
 54  }),
 55  /*
 56   * Device locales are an independent prior — the OS tells us which
 57   * languages the user has installed, separate from what the model sees
 58   * in the text. Combining the two lets us accept a candidate at lower
 59   * model confidence when the language is one the user actually reads.
 60   * It also fails softer: a wrong suggestion for a language the user
 61   * knows ("are you writing in Spanish?") is easier to dismiss than one
 62   * for a language they don't ("are you writing in Japanese?"), so we
 63   * can afford to be more aggressive there.
 64   *
 65   * Native-only. On web we keep the bar at 0.97 because (a) lande's
 66   * confidence is tightly bimodal — a score of 0.85 means the model
 67   * doesn't know, not that it's "mostly sure" — and (b) the browser's
 68   * locale signal is noisier (navigator.languages usually includes
 69   * English regardless of what the user actually reads).
 70   */
 71  deviceLocaleAcceptanceThreshold: platform({
 72    web: 0.97,
 73    ios: 0.8,
 74    android: 0.8,
 75    default: 0.97,
 76  }),
 77  /*
 78   * Per-language carve-outs for known confusable pairs / clusters. The
 79   * acceptance bar is raised above the platform baseline because these
 80   * are languages the detector (especially `lande` on web) is known to
 81   * misclassify or over-commit on.
 82   *
 83   * The device-locale bar is also raised for most tightly-confusable
 84   * pairs: if the user has both languages in the pair installed (common
 85   * for id/ms or nb/da speakers), the device-locale prior no longer
 86   * discriminates between them, so we can't afford to drop the bar as
 87   * aggressively.
 88   *
 89   * Each value uses `platform({web, default})` — `default` applies to
 90   * iOS/Android/etc. (MLKit is better at these distinctions, so the
 91   * bump above baseline is smaller).
 92   */
 93  overrides: {
 94    // Example
 95    // id: {
 96    //   acceptanceThreshold: platform({web: 0.99, default: 0.95}),
 97    //   deviceLocaleAcceptanceThreshold: platform({web: 0.97, default: 0.9}),
 98    // },
 99  },
100}
101
102export function SuggestedLanguage({
103  text,
104  replyToLanguages: replyToLanguagesProp,
105  currentLanguages,
106  onAcceptSuggestedLanguage,
107  onNudge,
108}: {
109  text: string
110  /**
111   * All languages associated with the post being replied to.
112   */
113  replyToLanguages: string[]
114  /**
115   * All languages currently selected for the post being composed.
116   */
117  currentLanguages: string[]
118  /**
119   * Called when the user accepts a suggested language. We only pass a single
120   * language here. If the post being replied to has multiple languages, we
121   * only suggest the first one.
122   */
123  onAcceptSuggestedLanguage: (language: string | null) => void
124  /**
125   * Fired when detection produced ambiguous results — no strong suggestion
126   * to show, but we want to hint to the user that the detector is unsure.
127   * Expected to be an incrementing counter setter on the parent so the
128   * nudge can re-fire on each detection cycle.
129   */
130  onNudge?: () => void
131}) {
132  const ax = useAnalytics()
133  const [hasInteracted, setHasInteracted] = useState(false)
134  const [suggLang, setSuggLang] = useState<string | undefined>(undefined)
135  const declinedSuggLangsRef = useRef<string[]>([])
136
137  /*
138   * Shared callbacks
139   */
140  const onAccept = (language: string) => {
141    onAcceptSuggestedLanguage(language)
142    // clear
143    setSuggLang(undefined)
144  }
145  const onDecline = () => {
146    if (suggLang) {
147      declinedSuggLangsRef.current.push(suggLang)
148      // clear
149      setSuggLang(undefined)
150    }
151  }
152
153  /**
154   * Merge in remote config (eventually)
155   */
156  const config = useMemo(() => DEFAULT_CONFIG, [])
157
158  /**
159   * Create non-reactive ref for debounced detection method.
160   */
161  const detectionPropsRef = useNonReactiveObject({
162    config,
163    currentLanguages,
164  })
165
166  /*
167   * Held in a ref so the debounced detection closure always sees the
168   * latest callback identity without rebuilding the debounce timer.
169   */
170  const handleOnNudge = useNonReactiveCallback(onNudge)
171
172  /*
173   * Main language detection effect
174   */
175  const detectLanguage = useMemo(() => {
176    return debounce(async (text: string) => {
177      try {
178        const currLangs = detectionPropsRef.current.currentLanguages
179        const {certain, uncertain} = await guessLanguage(
180          text,
181          detectionPropsRef.current.config,
182        )
183        const topCandidate = certain.at(0)?.language
184        if (
185          certain.length === 1 &&
186          uncertain.length === 0 &&
187          topCandidate !== undefined &&
188          !currLangs.includes(topCandidate) &&
189          !declinedSuggLangsRef.current.includes(topCandidate)
190        ) {
191          // we have a single confident candidate with no competitors — show it!
192          setSuggLang(topCandidate)
193        } else {
194          const nextBestCandidate = uncertain.at(0)?.language
195          // ambiguous results — if the top candidate isn't already
196          // selected or previously declined, nudge the user
197          if (
198            nextBestCandidate !== undefined &&
199            !currLangs.includes(nextBestCandidate) &&
200            !declinedSuggLangsRef.current.includes(nextBestCandidate)
201          ) {
202            handleOnNudge()
203            ax.metric('composer:language:nudgeUser', {
204              os: Platform.OS,
205              suggestedLanguage: nextBestCandidate,
206              currentTargetLanguages: currLangs,
207              textLength: text.length,
208            })
209          }
210
211          setSuggLang(undefined)
212        }
213      } catch (e) {
214        ax.logger.error('Error detecting language', {safeMessage: e})
215      }
216    }, 500)
217  }, [])
218
219  useEffect(() => {
220    // show reply prompt if there's not enough text to start using the model
221    if (text.length > 0 && !hasInteracted) {
222      setHasInteracted(true)
223    }
224
225    if (ax.features.enabled(ax.features.ComposerLanguageDetectionEnable)) {
226      const textTrimmed = sanitizeTextForDetection(text)
227
228      /*
229       * If text drops under the min length requirement, reset suggestions state
230       * objects.
231       *
232       * And we don't run the language model on small posts, the results are
233       * likely to be inaccurate.
234       */
235      if (textTrimmed.length < MIN_TEXT_LENGTH) {
236        setSuggLang(undefined)
237        return
238      }
239
240      void detectLanguage(textTrimmed)
241    }
242
243    // Cancel any pending debounced invocation on unmount / re-run so we
244    // don't call setSuggLang after the composer has closed (or after the
245    // user has already accepted a language).
246    return () => {
247      detectLanguage.cancel()
248    }
249  }, [text, hasInteracted, detectLanguage, ax])
250
251  /*
252   * This is intentionally computed based on a ref. Since we set and clear
253   * `suggLang` this derivation is safe, but be aware of it
254   * when making changes.
255   */
256  const hasDeclined = suggLang
257    ? // eslint-disable-next-line react-hooks/refs
258      declinedSuggLangsRef.current.includes(suggLang)
259    : false
260
261  /*
262   * We've detected a language, and the user hasn't already selected it.
263   */
264  const hasLanguageSuggestion = suggLang && !currentLanguages.includes(suggLang)
265
266  /*
267   * We have not detected a different language, and the user is not already
268   * using or has not already selected one of the languages of the post they
269   * are replying to.
270   */
271  const replyToLanguages = replyToLanguagesProp
272    .filter(Boolean)
273    .map(lang => parseLanguageString(lang)?.language)
274    .filter(Boolean) as string[]
275  const hasSuggestedReplyLanguage =
276    !hasInteracted &&
277    !suggLang &&
278    replyToLanguages.length &&
279    !replyToLanguages.some(l => currentLanguages.includes(l))
280
281  if (hasDeclined) {
282    return null
283  } else if (hasLanguageSuggestion) {
284    return (
285      <GuessedLanguage
286        language={suggLang}
287        metadata={{currentTargetLanguages: currentLanguages, rawText: text}}
288        onAccept={onAccept}
289        onDecline={onDecline}
290      />
291    )
292  } else if (hasSuggestedReplyLanguage) {
293    return (
294      <ReplyLanguageNudge
295        language={replyToLanguages[0]}
296        metadata={{currentTargetLanguages: currentLanguages}}
297        onAccept={onAccept}
298        onDecline={onDecline}
299      />
300    )
301  } else {
302    return null
303  }
304}
305
306function GuessedLanguage({
307  language,
308  metadata,
309  onAccept: onAcceptOuter,
310  onDecline: onDeclineOuter,
311}: {
312  language: string
313  metadata: {
314    currentTargetLanguages: string[]
315    rawText: string
316  }
317  onAccept: (language: string) => void
318  onDecline: () => void
319}) {
320  const ax = useAnalytics()
321  const langPrefs = useLanguagePrefs()
322  const suggestedLanguageName = codeToLanguageName(
323    language,
324    langPrefs.appLanguage,
325  )
326  const onAccept = () => {
327    ax.metric('composer:language:acceptSuggestion', {
328      os: Platform.OS,
329      suggestedLanguage: language,
330      currentTargetLanguages: metadata.currentTargetLanguages,
331      textLength: sanitizeTextForDetection(metadata.rawText).length,
332    })
333    onAcceptOuter(language)
334  }
335  const onDecline = () => {
336    ax.metric('composer:language:declineSuggestion', {
337      os: Platform.OS,
338      suggestedLanguage: language,
339      currentTargetLanguages: metadata.currentTargetLanguages,
340      textLength: sanitizeTextForDetection(metadata.rawText).length,
341    })
342    onDeclineOuter()
343  }
344
345  const metaRef = useNonReactiveObject(metadata)
346  useEffect(() => {
347    ax.metric('composer:language:suggestLanguage', {
348      os: Platform.OS,
349      suggestedLanguage: language,
350      currentTargetLanguages: metaRef.current.currentTargetLanguages,
351      textLength: sanitizeTextForDetection(metadata.rawText).length,
352    })
353  }, [ax, language])
354
355  return (
356    <LanguageSuggestionButton
357      label={
358        <RNText>
359          <Trans>
360            Are you writing in{' '}
361            <Text style={[a.font_semi_bold]}>{suggestedLanguageName}</Text>?
362          </Trans>
363        </RNText>
364      }
365      value={language}
366      onAccept={onAccept}
367      onDecline={onDecline}
368    />
369  )
370}
371
372function ReplyLanguageNudge({
373  language,
374  metadata,
375  onAccept: onAcceptOuter,
376  onDecline: onDeclineOuter,
377}: {
378  language: string
379  metadata: {
380    currentTargetLanguages: string[]
381  }
382  onAccept: (language: string) => void
383  onDecline: () => void
384}) {
385  const ax = useAnalytics()
386  const langPrefs = useLanguagePrefs()
387  const suggestedLanguageName = codeToLanguageName(
388    language,
389    langPrefs.appLanguage,
390  )
391  const onAccept = () => {
392    ax.metric('composer:language:replyNudgeAccept', {
393      replyToLanguage: language,
394      currentTargetLanguages: metadata.currentTargetLanguages,
395    })
396    onAcceptOuter(language)
397  }
398  const onDecline = () => {
399    ax.metric('composer:language:replyNudgeDecline', {
400      replyToLanguage: language,
401      currentTargetLanguages: metadata.currentTargetLanguages,
402    })
403    onDeclineOuter()
404  }
405
406  return (
407    <LanguageSuggestionButton
408      label={
409        <RNText>
410          <Trans>
411            The post you’re replying to was marked as being written in{' '}
412            {suggestedLanguageName} by its author. Would you like to reply in{' '}
413            <Text style={[a.font_semi_bold]}>{suggestedLanguageName}</Text>?
414          </Trans>
415        </RNText>
416      }
417      value={language}
418      onAccept={onAccept}
419      onDecline={onDecline}
420    />
421  )
422}
423
424function LanguageSuggestionButton({
425  label,
426  value,
427  onAccept,
428  onDecline,
429}: {
430  label: React.ReactNode
431  value: string
432  onAccept: (language: string | null) => void
433  onDecline: () => void
434}) {
435  const t = useTheme()
436  const {t: l} = useLingui()
437
438  return (
439    <View style={[a.px_lg, a.py_sm]}>
440      <View
441        style={[
442          a.gap_md,
443          a.border,
444          a.flex_row,
445          a.align_center,
446          a.rounded_sm,
447          a.p_md,
448          a.pl_lg,
449          t.atoms.bg,
450          t.atoms.border_contrast_low,
451        ]}>
452        <EarthIcon />
453        <View style={[a.flex_1]}>
454          <Text
455            style={[
456              a.leading_snug,
457              {
458                maxWidth: 400,
459              },
460            ]}>
461            {label}
462          </Text>
463        </View>
464
465        <Button
466          size="small"
467          color="primary_subtle"
468          shape="round"
469          onPress={() => onAccept(value)}
470          label={l`Accept this language suggestion`}>
471          <ButtonIcon icon={CheckIcon} size="sm" />
472        </Button>
473
474        <Button
475          size="small"
476          color="secondary"
477          shape="round"
478          onPress={() => onDecline()}
479          label={l`Decline this language suggestion`}>
480          <ButtonIcon icon={XIcon} size="sm" />
481        </Button>
482      </View>
483    </View>
484  )
485}
486
487/**
488 * Run detection and partition candidates into "certain" (confident enough
489 * to suggest on their own) and "uncertain" (above the noise floor but not
490 * confident enough to suggest). Callers decide what to do with the shape:
491 * a single certain candidate with no uncertain competitors is a strong
492 * suggestion; everything else is ambiguous.
493 *
494 * The acceptance threshold is resolved per candidate with this precedence:
495 *   1. Per-language override (e.g. maybe `id` requires higher confidence)
496 *   2. Device-locale bar (lower on native — the user likely writes in a
497 *      language they have installed)
498 *   3. Platform-level bar
499 */
500async function guessLanguage(
501  text: string,
502  config: LanguageDetectionConfig,
503): Promise<{
504  certain: LanguageResult[]
505  uncertain: LanguageResult[]
506}> {
507  const suggestions = await guessLanguageAsync(text)
508  const certain: LanguageResult[] = []
509  const uncertain: LanguageResult[] = []
510
511  for (const suggestion of suggestions) {
512    const isDeviceLocale = deviceLanguageCodes.includes(suggestion.language)
513    const override = config.overrides[suggestion.language]
514    const threshold = isDeviceLocale
515      ? (override?.deviceLocaleAcceptanceThreshold ??
516        config.deviceLocaleAcceptanceThreshold)
517      : (override?.acceptanceThreshold ?? config.acceptanceThreshold)
518
519    if (suggestion.confidence >= threshold) {
520      certain.push(suggestion)
521    } else if (suggestion.confidence >= NOISE_FLOOR) {
522      uncertain.push(suggestion)
523    }
524  }
525
526  return {certain, uncertain}
527}
528
529/**
530 * Strip any detected facets from the text to improve language detection
531 * accuracy. For example, URLs and mentions.
532 *
533 * Tags are intentionally kept — their word content is usually in the
534 * post's language and helps detection; the leading `#` is short enough
535 * not to distort results.
536 */
537function sanitizeTextForDetection(text: string): string {
538  const rt = new RichText({text: text.trim()})
539  rt.detectFacetsWithoutResolution()
540
541  let sanitized = ''
542  for (const segment of rt.segments()) {
543    if (segment.isLink() || segment.isMention() || segment.isTag()) {
544      continue
545    }
546    sanitized += segment.text
547  }
548
549  return sanitized.trim()
550}
Configure Feed

Configure Feed