Bluesky app fork with some witchin' additions 💫
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Replace lande with native language detection (#9974)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: DS Boyce <260543580+ds-boyce@users.noreply.github.com>
Co-authored-by: Eric Bailey <git@esb.lol>

+704 -139
+7 -3
jest/jestSetup.js
··· 61 61 usePermissions: jest.fn(() => [true]), 62 62 })) 63 63 64 - jest.mock('lande', () => ({ 65 - __esModule: true, // this property makes it work 66 - default: jest.fn().mockReturnValue([['eng']]), 64 + jest.mock('@bsky.app/expo-guess-language', () => ({ 65 + guessLanguageSync: jest 66 + .fn() 67 + .mockReturnValue([{language: 'en', confidence: 1}]), 68 + guessLanguageAsync: jest 69 + .fn() 70 + .mockResolvedValue([{language: 'en', confidence: 1}]), 67 71 })) 68 72 69 73 jest.mock('sentry-expo', () => ({
+2
package.json
··· 82 82 }, 83 83 "dependencies": { 84 84 "@atproto/api": "^0.19.10", 85 + "@atproto/syntax": "0.5.2", 85 86 "@bitdrift/react-native": "^0.6.8", 86 87 "@braintree/sanitize-url": "^6.0.2", 87 88 "@bsky.app/alf": "^0.1.7", 89 + "@bsky.app/expo-guess-language": "^0.2.8", 88 90 "@bsky.app/expo-image-crop-tool": "^0.5.0", 89 91 "@bsky.app/expo-scroll-edge-effect": "^0.1.4", 90 92 "@bsky.app/expo-translate-text": "^0.2.9",
+2
src/analytics/features/types.ts
··· 14 14 GroupChatsEnable = 'group_chats:enable', 15 15 GroupChatsHasBeenReleased = 'group_chats:has_been_released', 16 16 DmsNewMessageComposerEnable = 'dms:new_message_composer:enable', 17 + ComposerLanguageDetectionEnable = 'composer:language_detection:enable', 17 18 KlipyGifProviderEnable = 'klipy_gif_provider:enable', 18 19 PostGalleryEmbedEnable = 'post_gallery_embed:enable', 20 + 19 21 AATest = 'aa-test', 20 22 }
+94
src/analytics/metrics/types.ts
··· 800 800 */ 801 801 resultSourceLanguage: string 802 802 } 803 + 'composer:language:suggestLanguage': { 804 + os: Platform['OS'] 805 + /** 806 + * The language we detected and suggested to the user as an override for the 807 + * expected target language. 808 + */ 809 + suggestedLanguage: string | undefined 810 + /** 811 + * This is the user's current composer languages, which are always defined. 812 + */ 813 + currentTargetLanguages: string[] 814 + /** 815 + * The length of the text being translated. We assume shorter texts are 816 + * more likely to have inaccurate translations. 817 + */ 818 + textLength: number 819 + } 820 + 'composer:language:acceptSuggestion': { 821 + os: Platform['OS'] 822 + /** 823 + * The language we detected and suggested to the user as an override for the 824 + * expected target language. 825 + */ 826 + suggestedLanguage: string | undefined 827 + /** 828 + * This is the user's current composer languages, which are always defined. 829 + */ 830 + currentTargetLanguages: string[] 831 + /** 832 + * The length of the text being translated. We assume shorter texts are 833 + * more likely to have inaccurate translations. 834 + */ 835 + textLength: number 836 + } 837 + 'composer:language:declineSuggestion': { 838 + os: Platform['OS'] 839 + /** 840 + * The language we detected and suggested to the user as an override for the 841 + * expected target language. 842 + */ 843 + suggestedLanguage: string | undefined 844 + /** 845 + * This is the user's current composer languages, which are always defined. 846 + */ 847 + currentTargetLanguages: string[] 848 + /** 849 + * The length of the text being translated. We assume shorter texts are 850 + * more likely to have inaccurate translations. 851 + */ 852 + textLength: number 853 + } 854 + 'composer:language:replyNudgeAccept': { 855 + /** 856 + * The language of the post the user is replying to. 857 + */ 858 + replyToLanguage: string 859 + /** 860 + * This is the user's current composer languages, which are always defined. 861 + */ 862 + currentTargetLanguages: string[] 863 + } 864 + 'composer:language:replyNudgeDecline': { 865 + /** 866 + * The language of the post the user is replying to. 867 + */ 868 + replyToLanguage: string 869 + /** 870 + * This is the user's current composer languages, which are always defined. 871 + */ 872 + currentTargetLanguages: string[] 873 + } 874 + 'composer:language:nudgeUser': { 875 + os: Platform['OS'] 876 + /** 877 + * The language we detected and suggested to the user as an override for the 878 + * expected target language. 879 + */ 880 + suggestedLanguage: string | undefined 881 + /** 882 + * This is the user's current composer languages, which are always defined. 883 + */ 884 + currentTargetLanguages: string[] 885 + /** 886 + * The length of the text being translated. We assume shorter texts are 887 + * more likely to have inaccurate translations. 888 + */ 889 + textLength: number 890 + } 891 + 'composer:language:langSelectorPressed': { 892 + /** 893 + * If the user was nudged by our language detection to update their language 894 + */ 895 + wasNudged: boolean 896 + } 803 897 804 898 'postMenu:openMuteWordsDialog': { 805 899 uri: string
+19 -11
src/lib/hooks/useNonReactiveCallback.ts
··· 1 1 import {useCallback, useInsertionEffect, useRef} from 'react' 2 2 3 - // This should be used sparingly. It erases reactivity, i.e. when the inputs 4 - // change, the function itself will remain the same. This means that if you 5 - // use this at a higher level of your tree, and then some state you read in it 6 - // changes, there is no mechanism for anything below in the tree to "react" 7 - // to this change (e.g. by knowing to call your function again). 8 - // 9 - // Also, you should avoid calling the returned function during rendering 10 - // since the values captured by it are going to lag behind. 11 - export function useNonReactiveCallback<T extends Function>(fn: T): T { 12 - const ref = useRef(fn) 3 + const noop = () => {} 4 + 5 + /** 6 + * This should be used sparingly. It erases reactivity, i.e. when the inputs 7 + * change, the function itself will remain the same. This means that if you use 8 + * this at a higher level of your tree, and then some state you read in it 9 + * changes, there is no mechanism for anything below in the tree to "react" to 10 + * this change (e.g. by knowing to call your function again). 11 + * 12 + * Also, you should avoid calling the returned function during rendering since 13 + * the values captured by it are going to lag behind. 14 + * 15 + * For objects, see `useNonReactiveObject` instead. 16 + */ 17 + export function useNonReactiveCallback<T extends Function = () => void>( 18 + fn?: T, 19 + ): T { 20 + const ref = useRef<T>((fn ?? noop) as T) 13 21 useInsertionEffect(() => { 14 - ref.current = fn 22 + ref.current = (fn ?? noop) as T 15 23 }, [fn]) 16 24 return useCallback( 17 25 (...args: any) => {
+20
src/lib/hooks/useNonReactiveObject.ts
··· 1 + import {useInsertionEffect, useRef} from 'react' 2 + 3 + /** 4 + * This should be used sparingly. It erases reactivity, i.e. when the inputs 5 + * change, the returned object itself will remain the same. This means that if 6 + * you use this at a higher level of your tree, and then some state you read in 7 + * it changes, there is no mechanism for anything below in the tree to "react" 8 + * to this change (e.g. by knowing to call your function again). 9 + * 10 + * For callbacks, see `useNonReactiveCallback` instead. 11 + */ 12 + export function useNonReactiveObject<T extends Record<string, unknown>>( 13 + o: T, 14 + ): React.RefObject<T> { 15 + const ref = useRef(o) 16 + useInsertionEffect(() => { 17 + ref.current = o 18 + }, [o]) 19 + return ref 20 + }
+19
src/view/com/composer/Composer.tsx
··· 268 268 setReplyToLanguages([]) 269 269 } 270 270 271 + /** 272 + * Timestamp (ms) of the last honored nudge from language detection. 273 + * Used to rate-limit the pulse animation: we ignore back-to-back 274 + * nudges that arrive within NUDGE_COOLDOWN_MS. Consumers key an effect 275 + * on this value — it only changes when we actually want to re-pulse. 276 + */ 277 + const [languageNudgeAt, setLanguageNudgeAt] = useState(0) 278 + const onLanguageNudge = () => { 279 + const now = Date.now() 280 + // ignore back-to-back nudges within 10s; only update state (and 281 + // therefore re-pulse) once the cooldown has elapsed 282 + setLanguageNudgeAt(prev => (now - prev > 10_000 ? now : prev)) 283 + } 284 + 271 285 const [composerState, composerDispatch] = useReducer( 272 286 composerReducer, 273 287 { ··· 1146 1160 replyToLanguages={replyToLanguages} 1147 1161 currentLanguages={currentLanguages} 1148 1162 onAcceptSuggestedLanguage={setAcceptedLanguageSuggestion} 1163 + onNudge={onLanguageNudge} 1149 1164 /> 1150 1165 <ComposerPills 1151 1166 isReply={!!replyTo} ··· 1169 1184 }} 1170 1185 currentLanguages={currentLanguages} 1171 1186 onSelectLanguage={onSelectLanguage} 1187 + languageNudgeAt={languageNudgeAt} 1172 1188 openGallery={openGallery} 1173 1189 textInputRef={textInputRef} 1174 1190 /> ··· 1873 1889 onAddPost, 1874 1890 currentLanguages, 1875 1891 onSelectLanguage, 1892 + languageNudgeAt, 1876 1893 openGallery, 1877 1894 textInputRef, 1878 1895 }: { ··· 1884 1901 onAddPost: () => void 1885 1902 currentLanguages: string[] 1886 1903 onSelectLanguage?: (language: string) => void 1904 + languageNudgeAt: number 1887 1905 openGallery?: boolean 1888 1906 textInputRef: React.RefObject<TextInputRef | null> 1889 1907 }) { ··· 2049 2067 <PostLanguageSelect 2050 2068 currentLanguages={currentLanguages} 2051 2069 onSelectLanguage={onSelectLanguage} 2070 + nudgeAt={languageNudgeAt} 2052 2071 /> 2053 2072 <CharProgress 2054 2073 count={post.shortenedGraphemeLength}
+103 -29
src/view/com/composer/select-language/PostLanguageSelect.tsx
··· 1 + import {useEffect} from 'react' 2 + import Animated, { 3 + Easing, 4 + useAnimatedStyle, 5 + useSharedValue, 6 + withSequence, 7 + withTiming, 8 + } from 'react-native-reanimated' 1 9 import {msg} from '@lingui/core/macro' 2 10 import {useLingui} from '@lingui/react' 3 11 import {Trans} from '@lingui/react/macro' ··· 17 25 import {Globe_Stroke2_Corner0_Rounded as GlobeIcon} from '#/components/icons/Globe' 18 26 import * as Menu from '#/components/Menu' 19 27 import {Text} from '#/components/Typography' 28 + import {useAnalytics} from '#/analytics' 20 29 21 30 export function PostLanguageSelect({ 22 31 currentLanguages: currentLanguagesProp, 23 32 onSelectLanguage, 33 + nudgeAt = 0, 24 34 }: { 25 35 currentLanguages?: string[] 26 36 onSelectLanguage?: (language: string) => void 37 + /** 38 + * Timestamp (ms) of the last honored language-detection nudge. Each 39 + * time this changes, the button flashes a transient hint and fades. 40 + * The parent rate-limits updates, so successive detector firings inside 41 + * the cooldown won't re-flash. The initial `0` on mount is intentionally 42 + * ignored. 43 + */ 44 + nudgeAt?: number 27 45 }) { 28 46 const {_} = useLingui() 29 47 const langPrefs = useLanguagePrefs() ··· 52 70 ) { 53 71 return ( 54 72 <> 55 - <LanguageBtn onPress={languageDialogControl.open} /> 73 + <LanguageBtn onPress={languageDialogControl.open} nudgeAt={nudgeAt} /> 56 74 <LanguageSelectDialog 57 75 titleText={<Trans>Choose post languages</Trans>} 58 76 subtitleText={ ··· 72 90 <Menu.Root> 73 91 <Menu.Trigger label={_(msg`Select post language`)}> 74 92 {({props}) => ( 75 - <LanguageBtn currentLanguages={currentLanguages} {...props} /> 93 + <LanguageBtn 94 + currentLanguages={currentLanguages} 95 + nudgeAt={nudgeAt} 96 + {...props} 97 + /> 76 98 )} 77 99 </Menu.Trigger> 78 100 <Menu.Outer> ··· 122 144 ) 123 145 } 124 146 125 - function LanguageBtn( 126 - props: Omit<ButtonProps, 'label' | 'children'> & { 127 - currentLanguages?: string[] 128 - }, 129 - ) { 147 + const PULSE_FADE_IN_MS = 300 148 + const PULSE_FADE_OUT_MS = 500 149 + 150 + function LanguageBtn({ 151 + currentLanguages: currentLanguagesProp, 152 + nudgeAt = 0, 153 + ...props 154 + }: Omit<ButtonProps, 'label' | 'children'> & { 155 + currentLanguages?: string[] 156 + nudgeAt?: number 157 + }) { 158 + const t = useTheme() 159 + const ax = useAnalytics() 130 160 const {_} = useLingui() 131 161 const langPrefs = useLanguagePrefs() 132 - const t = useTheme() 133 162 134 163 const postLanguagesPref = toPostLanguages(langPrefs.postLanguage) 135 - const currentLanguages = props.currentLanguages ?? postLanguagesPref 164 + const currentLanguages = currentLanguagesProp ?? postLanguagesPref 165 + 166 + /* 167 + * Stays at 0 when idle; each nudge runs two pulses with a faster 168 + * fade-in and slower fade-out, ease-in-out throughout. Reassigning 169 + * `value` cancels any prior sequence, so rapid re-nudges cleanly 170 + * restart. 171 + */ 172 + const nudgePulse = useSharedValue(0) 173 + useEffect(() => { 174 + if (nudgeAt === 0) return 175 + const easing = Easing.inOut(Easing.quad) 176 + const fadeIn = {duration: PULSE_FADE_IN_MS, easing} 177 + const fadeOut = {duration: PULSE_FADE_OUT_MS, easing} 178 + nudgePulse.value = withSequence( 179 + withTiming(1, fadeIn), 180 + withTiming(0, fadeOut), 181 + withTiming(1, fadeIn), 182 + withTiming(0, fadeOut), 183 + ) 184 + }, [nudgeAt, nudgePulse]) 185 + const pulseStyle = useAnimatedStyle(() => ({ 186 + opacity: nudgePulse.value, 187 + })) 136 188 137 189 return ( 138 190 <Button ··· 146 198 }), 147 199 )} 148 200 accessibilityHint={_(msg`Opens post language settings`)} 149 - style={[a.mr_xs]} 150 - {...props}> 201 + style={[a.mr_xs, a.overflow_hidden]} 202 + {...props} 203 + onPress={e => { 204 + props.onPress?.(e) 205 + ax.metric('composer:language:langSelectorPressed', { 206 + wasNudged: nudgeAt > 0, 207 + }) 208 + }}> 151 209 {({pressed, hovered}) => { 152 210 const color = 153 211 pressed || hovered ? t.palette.primary_300 : t.palette.primary_500 154 - if (currentLanguages.length > 0) { 155 - return ( 156 - <Text 212 + return ( 213 + <> 214 + <Animated.View 215 + pointerEvents="none" 157 216 style={[ 158 - {color}, 159 - a.font_semi_bold, 160 - a.text_sm, 161 - a.leading_snug, 162 - {maxWidth: 100}, 217 + a.absolute, 218 + { 219 + top: 0, 220 + right: 0, 221 + bottom: 0, 222 + left: 0, 223 + backgroundColor: t.atoms.bg_contrast_50.backgroundColor, 224 + }, 225 + pulseStyle, 163 226 ]} 164 - numberOfLines={1} 165 - maxFontSizeMultiplier={1.5}> 166 - {currentLanguages 167 - .map(lang => codeToLanguageName(lang, langPrefs.appLanguage)) 168 - .join(', ')} 169 - </Text> 170 - ) 171 - } else { 172 - return <GlobeIcon size="xs" style={{color}} /> 173 - } 227 + /> 228 + {currentLanguages.length > 0 ? ( 229 + <Text 230 + style={[ 231 + {color}, 232 + a.font_semi_bold, 233 + a.text_sm, 234 + a.leading_snug, 235 + {maxWidth: 100}, 236 + ]} 237 + numberOfLines={1} 238 + maxFontSizeMultiplier={1.5}> 239 + {currentLanguages 240 + .map(lang => codeToLanguageName(lang, langPrefs.appLanguage)) 241 + .join(', ')} 242 + </Text> 243 + ) : ( 244 + <GlobeIcon size="xs" style={{color}} /> 245 + )} 246 + </> 247 + ) 174 248 }} 175 249 </Button> 176 250 )
+430 -95
src/view/com/composer/select-language/SuggestedLanguage.tsx
··· 1 - import {useEffect, useState} from 'react' 2 - import {Text as RNText, View} from 'react-native' 3 - import {parseLanguage} from '@atproto/api' 4 - import {msg} from '@lingui/core/macro' 5 - import {useLingui} from '@lingui/react' 6 - import {Trans} from '@lingui/react/macro' 7 - import lande from 'lande' 1 + import {useEffect, useMemo, useRef, useState} from 'react' 2 + import {Platform, Text as RNText, View} from 'react-native' 3 + import {RichText} from '@atproto/api' 4 + import {parseLanguageString} from '@atproto/syntax' 5 + import { 6 + guessLanguageAsync, 7 + type LanguageResult, 8 + } from '@bsky.app/expo-guess-language' 9 + import {Trans, useLingui} from '@lingui/react/macro' 10 + import debounce from 'lodash.debounce' 8 11 9 - import {code3ToCode2Strict, codeToLanguageName} from '#/locale/helpers' 12 + import {useNonReactiveCallback} from '#/lib/hooks/useNonReactiveCallback' 13 + import {useNonReactiveObject} from '#/lib/hooks/useNonReactiveObject' 14 + import {deviceLanguageCodes} from '#/locale/deviceLocales' 15 + import {codeToLanguageName} from '#/locale/helpers' 10 16 import {useLanguagePrefs} from '#/state/preferences/languages' 11 - import {atoms as a, useTheme} from '#/alf' 12 - import {Button, ButtonText} from '#/components/Button' 17 + import {atoms as a, platform, useTheme} from '#/alf' 18 + import {Button, ButtonIcon} from '#/components/Button' 19 + import {Check_Stroke2_Corner0_Rounded as CheckIcon} from '#/components/icons/Check' 13 20 import {Earth_Stroke2_Corner2_Rounded as EarthIcon} from '#/components/icons/Globe' 21 + import {TimesLarge_Stroke2_Corner0_Rounded as XIcon} from '#/components/icons/Times' 14 22 import {Text} from '#/components/Typography' 23 + import {useAnalytics} from '#/analytics' 24 + import {IS_WEB} from '#/env' 15 25 16 - // fallbacks for safari 17 - const onIdle = 18 - globalThis.requestIdleCallback || ((cb: () => void) => setTimeout(cb, 1)) 19 - const cancelIdle = globalThis.cancelIdleCallback || clearTimeout 26 + type LanguageDetectionPerLanguageConfig = { 27 + acceptanceThreshold?: number 28 + deviceLocaleAcceptanceThreshold?: number 29 + } 30 + 31 + type LanguageDetectionConfig = { 32 + acceptanceThreshold: number 33 + deviceLocaleAcceptanceThreshold: number 34 + overrides: Record<string, LanguageDetectionPerLanguageConfig> 35 + } 36 + 37 + const MIN_TEXT_LENGTH = IS_WEB ? 20 : 10 38 + const NOISE_FLOOR = 0.1 39 + 40 + /** 41 + * Platform-resolved defaults. Web uses `lande` under the hood, which 42 + * spreads probability across many candidates — so both the noise floor 43 + * and the acceptance bar sit higher than on native (MLKit). 44 + * 45 + * Per-language carve-outs override the platform-level acceptance 46 + * threshold. 47 + */ 48 + const DEFAULT_CONFIG: LanguageDetectionConfig = { 49 + acceptanceThreshold: platform({ 50 + web: 0.97, 51 + ios: 0.9, 52 + android: 0.9, 53 + default: 0.97, 54 + }), 55 + /* 56 + * Device locales are an independent prior — the OS tells us which 57 + * languages the user has installed, separate from what the model sees 58 + * in the text. Combining the two lets us accept a candidate at lower 59 + * model confidence when the language is one the user actually reads. 60 + * It also fails softer: a wrong suggestion for a language the user 61 + * knows ("are you writing in Spanish?") is easier to dismiss than one 62 + * for a language they don't ("are you writing in Japanese?"), so we 63 + * can afford to be more aggressive there. 64 + * 65 + * Native-only. On web we keep the bar at 0.97 because (a) lande's 66 + * confidence is tightly bimodal — a score of 0.85 means the model 67 + * doesn't know, not that it's "mostly sure" — and (b) the browser's 68 + * locale signal is noisier (navigator.languages usually includes 69 + * English regardless of what the user actually reads). 70 + */ 71 + deviceLocaleAcceptanceThreshold: platform({ 72 + web: 0.97, 73 + ios: 0.8, 74 + android: 0.8, 75 + default: 0.97, 76 + }), 77 + /* 78 + * Per-language carve-outs for known confusable pairs / clusters. The 79 + * acceptance bar is raised above the platform baseline because these 80 + * are languages the detector (especially `lande` on web) is known to 81 + * misclassify or over-commit on. 82 + * 83 + * The device-locale bar is also raised for most tightly-confusable 84 + * pairs: if the user has both languages in the pair installed (common 85 + * for id/ms or nb/da speakers), the device-locale prior no longer 86 + * discriminates between them, so we can't afford to drop the bar as 87 + * aggressively. 88 + * 89 + * Each value uses `platform({web, default})` — `default` applies to 90 + * iOS/Android/etc. (MLKit is better at these distinctions, so the 91 + * bump above baseline is smaller). 92 + */ 93 + overrides: { 94 + // Example 95 + // id: { 96 + // acceptanceThreshold: platform({web: 0.99, default: 0.95}), 97 + // deviceLocaleAcceptanceThreshold: platform({web: 0.97, default: 0.9}), 98 + // }, 99 + }, 100 + } 20 101 21 102 export function SuggestedLanguage({ 22 103 text, 23 104 replyToLanguages: replyToLanguagesProp, 24 105 currentLanguages, 25 106 onAcceptSuggestedLanguage, 107 + onNudge, 26 108 }: { 27 109 text: string 28 110 /** ··· 39 121 * only suggest the first one. 40 122 */ 41 123 onAcceptSuggestedLanguage: (language: string | null) => void 124 + /** 125 + * Fired when detection produced ambiguous results — no strong suggestion 126 + * to show, but we want to hint to the user that the detector is unsure. 127 + * Expected to be an incrementing counter setter on the parent so the 128 + * nudge can re-fire on each detection cycle. 129 + */ 130 + onNudge?: () => void 42 131 }) { 43 - const langPrefs = useLanguagePrefs() 44 - const replyToLanguages = replyToLanguagesProp 45 - .map(lang => cleanUpLanguage(lang)) 46 - .filter(Boolean) as string[] 132 + const ax = useAnalytics() 47 133 const [hasInteracted, setHasInteracted] = useState(false) 48 - const [suggestedLanguage, setSuggestedLanguage] = useState< 49 - string | undefined 50 - >(undefined) 134 + const [suggLang, setSuggLang] = useState<string | undefined>(undefined) 135 + const declinedSuggLangsRef = useRef<string[]>([]) 136 + 137 + /* 138 + * Shared callbacks 139 + */ 140 + const onAccept = (language: string) => { 141 + onAcceptSuggestedLanguage(language) 142 + // clear 143 + setSuggLang(undefined) 144 + } 145 + const onDecline = () => { 146 + if (suggLang) { 147 + declinedSuggLangsRef.current.push(suggLang) 148 + // clear 149 + setSuggLang(undefined) 150 + } 151 + } 152 + 153 + /** 154 + * Merge in remote config (eventually) 155 + */ 156 + const config = useMemo(() => DEFAULT_CONFIG, []) 157 + 158 + /** 159 + * Create non-reactive ref for debounced detection method. 160 + */ 161 + const detectionPropsRef = useNonReactiveObject({ 162 + config, 163 + currentLanguages, 164 + }) 165 + 166 + /* 167 + * Held in a ref so the debounced detection closure always sees the 168 + * latest callback identity without rebuilding the debounce timer. 169 + */ 170 + const handleOnNudge = useNonReactiveCallback(onNudge) 171 + 172 + /* 173 + * Main language detection effect 174 + */ 175 + const detectLanguage = useMemo(() => { 176 + return debounce(async (text: string) => { 177 + try { 178 + const currLangs = detectionPropsRef.current.currentLanguages 179 + const {certain, uncertain} = await guessLanguage( 180 + text, 181 + detectionPropsRef.current.config, 182 + ) 183 + const topCandidate = certain.at(0)?.language 184 + if ( 185 + certain.length === 1 && 186 + uncertain.length === 0 && 187 + topCandidate !== undefined && 188 + !currLangs.includes(topCandidate) && 189 + !declinedSuggLangsRef.current.includes(topCandidate) 190 + ) { 191 + // we have a single confident candidate with no competitors — show it! 192 + setSuggLang(topCandidate) 193 + } else { 194 + const nextBestCandidate = uncertain.at(0)?.language 195 + // ambiguous results — if the top candidate isn't already 196 + // selected or previously declined, nudge the user 197 + if ( 198 + nextBestCandidate !== undefined && 199 + !currLangs.includes(nextBestCandidate) && 200 + !declinedSuggLangsRef.current.includes(nextBestCandidate) 201 + ) { 202 + handleOnNudge() 203 + ax.metric('composer:language:nudgeUser', { 204 + os: Platform.OS, 205 + suggestedLanguage: nextBestCandidate, 206 + currentTargetLanguages: currLangs, 207 + textLength: text.length, 208 + }) 209 + } 210 + 211 + setSuggLang(undefined) 212 + } 213 + } catch (e) { 214 + ax.logger.error('Error detecting language', {safeMessage: e}) 215 + } 216 + }, 500) 217 + }, []) 51 218 52 219 useEffect(() => { 220 + // show reply prompt if there's not enough text to start using the model 53 221 if (text.length > 0 && !hasInteracted) { 54 222 setHasInteracted(true) 55 223 } 56 - }, [text, hasInteracted]) 57 224 58 - useEffect(() => { 59 - const textTrimmed = text.trim() 225 + if (ax.features.enabled(ax.features.ComposerLanguageDetectionEnable)) { 226 + const textTrimmed = sanitizeTextForDetection(text) 227 + 228 + /* 229 + * If text drops under the min length requirement, reset suggestions state 230 + * objects. 231 + * 232 + * And we don't run the language model on small posts, the results are 233 + * likely to be inaccurate. 234 + */ 235 + if (textTrimmed.length < MIN_TEXT_LENGTH) { 236 + setSuggLang(undefined) 237 + return 238 + } 60 239 61 - // Don't run the language model on small posts, the results are likely 62 - // to be inaccurate anyway. 63 - if (textTrimmed.length < 40) { 64 - setSuggestedLanguage(undefined) 65 - return 240 + void detectLanguage(textTrimmed) 66 241 } 67 242 68 - const idle = onIdle(() => { 69 - setSuggestedLanguage(guessLanguage(textTrimmed)) 70 - }) 243 + // Cancel any pending debounced invocation on unmount / re-run so we 244 + // don't call setSuggLang after the composer has closed (or after the 245 + // user has already accepted a language). 246 + return () => { 247 + detectLanguage.cancel() 248 + } 249 + }, [text, hasInteracted, detectLanguage, ax]) 71 250 72 - return () => cancelIdle(idle) 73 - }, [text]) 251 + /* 252 + * This is intentionally computed based on a ref. Since we set and clear 253 + * `suggLang` this derivation is safe, but be aware of it 254 + * when making changes. 255 + */ 256 + const hasDeclined = suggLang 257 + ? // eslint-disable-next-line react-hooks/refs 258 + declinedSuggLangsRef.current.includes(suggLang) 259 + : false 74 260 75 261 /* 76 262 * We've detected a language, and the user hasn't already selected it. 77 263 */ 78 - const hasLanguageSuggestion = 79 - suggestedLanguage && !currentLanguages.includes(suggestedLanguage) 264 + const hasLanguageSuggestion = suggLang && !currentLanguages.includes(suggLang) 265 + 80 266 /* 81 267 * We have not detected a different language, and the user is not already 82 268 * using or has not already selected one of the languages of the post they 83 269 * are replying to. 84 270 */ 271 + const replyToLanguages = replyToLanguagesProp 272 + .filter(Boolean) 273 + .map(lang => parseLanguageString(lang)?.language) 274 + .filter(Boolean) as string[] 85 275 const hasSuggestedReplyLanguage = 86 276 !hasInteracted && 87 - !suggestedLanguage && 277 + !suggLang && 88 278 replyToLanguages.length && 89 279 !replyToLanguages.some(l => currentLanguages.includes(l)) 90 280 91 - if (hasLanguageSuggestion) { 92 - const suggestedLanguageName = codeToLanguageName( 93 - suggestedLanguage, 94 - langPrefs.appLanguage, 95 - ) 96 - 281 + if (hasDeclined) { 282 + return null 283 + } else if (hasLanguageSuggestion) { 97 284 return ( 98 - <LanguageSuggestionButton 99 - label={ 100 - <RNText> 101 - <Trans> 102 - Are you writing in{' '} 103 - <Text style={[a.font_bold]}>{suggestedLanguageName}</Text>? 104 - </Trans> 105 - </RNText> 106 - } 107 - value={suggestedLanguage} 108 - onAccept={onAcceptSuggestedLanguage} 285 + <GuessedLanguage 286 + language={suggLang} 287 + metadata={{currentTargetLanguages: currentLanguages, rawText: text}} 288 + onAccept={onAccept} 289 + onDecline={onDecline} 109 290 /> 110 291 ) 111 292 } else if (hasSuggestedReplyLanguage) { 112 - const suggestedLanguageName = codeToLanguageName( 113 - replyToLanguages[0], 114 - langPrefs.appLanguage, 115 - ) 116 - 117 293 return ( 118 - <LanguageSuggestionButton 119 - label={ 120 - <RNText> 121 - <Trans> 122 - The post you're replying to was marked as being written in{' '} 123 - {suggestedLanguageName} by its author. Would you like to reply in{' '} 124 - <Text style={[a.font_bold]}>{suggestedLanguageName}</Text>? 125 - </Trans> 126 - </RNText> 127 - } 128 - value={replyToLanguages[0]} 129 - onAccept={onAcceptSuggestedLanguage} 294 + <ReplyLanguageNudge 295 + language={replyToLanguages[0]} 296 + metadata={{currentTargetLanguages: currentLanguages}} 297 + onAccept={onAccept} 298 + onDecline={onDecline} 130 299 /> 131 300 ) 132 301 } else { ··· 134 303 } 135 304 } 136 305 306 + function GuessedLanguage({ 307 + language, 308 + metadata, 309 + onAccept: onAcceptOuter, 310 + onDecline: onDeclineOuter, 311 + }: { 312 + language: string 313 + metadata: { 314 + currentTargetLanguages: string[] 315 + rawText: string 316 + } 317 + onAccept: (language: string) => void 318 + onDecline: () => void 319 + }) { 320 + const ax = useAnalytics() 321 + const langPrefs = useLanguagePrefs() 322 + const suggestedLanguageName = codeToLanguageName( 323 + language, 324 + langPrefs.appLanguage, 325 + ) 326 + const onAccept = () => { 327 + ax.metric('composer:language:acceptSuggestion', { 328 + os: Platform.OS, 329 + suggestedLanguage: language, 330 + currentTargetLanguages: metadata.currentTargetLanguages, 331 + textLength: sanitizeTextForDetection(metadata.rawText).length, 332 + }) 333 + onAcceptOuter(language) 334 + } 335 + const onDecline = () => { 336 + ax.metric('composer:language:declineSuggestion', { 337 + os: Platform.OS, 338 + suggestedLanguage: language, 339 + currentTargetLanguages: metadata.currentTargetLanguages, 340 + textLength: sanitizeTextForDetection(metadata.rawText).length, 341 + }) 342 + onDeclineOuter() 343 + } 344 + 345 + const metaRef = useNonReactiveObject(metadata) 346 + useEffect(() => { 347 + ax.metric('composer:language:suggestLanguage', { 348 + os: Platform.OS, 349 + suggestedLanguage: language, 350 + currentTargetLanguages: metaRef.current.currentTargetLanguages, 351 + textLength: sanitizeTextForDetection(metadata.rawText).length, 352 + }) 353 + }, [ax, language]) 354 + 355 + return ( 356 + <LanguageSuggestionButton 357 + label={ 358 + <RNText> 359 + <Trans> 360 + Are you writing in{' '} 361 + <Text style={[a.font_semi_bold]}>{suggestedLanguageName}</Text>? 362 + </Trans> 363 + </RNText> 364 + } 365 + value={language} 366 + onAccept={onAccept} 367 + onDecline={onDecline} 368 + /> 369 + ) 370 + } 371 + 372 + function ReplyLanguageNudge({ 373 + language, 374 + metadata, 375 + onAccept: onAcceptOuter, 376 + onDecline: onDeclineOuter, 377 + }: { 378 + language: string 379 + metadata: { 380 + currentTargetLanguages: string[] 381 + } 382 + onAccept: (language: string) => void 383 + onDecline: () => void 384 + }) { 385 + const ax = useAnalytics() 386 + const langPrefs = useLanguagePrefs() 387 + const suggestedLanguageName = codeToLanguageName( 388 + language, 389 + langPrefs.appLanguage, 390 + ) 391 + const onAccept = () => { 392 + ax.metric('composer:language:replyNudgeAccept', { 393 + replyToLanguage: language, 394 + currentTargetLanguages: metadata.currentTargetLanguages, 395 + }) 396 + onAcceptOuter(language) 397 + } 398 + const onDecline = () => { 399 + ax.metric('composer:language:replyNudgeDecline', { 400 + replyToLanguage: language, 401 + currentTargetLanguages: metadata.currentTargetLanguages, 402 + }) 403 + onDeclineOuter() 404 + } 405 + 406 + return ( 407 + <LanguageSuggestionButton 408 + label={ 409 + <RNText> 410 + <Trans> 411 + The post you’re replying to was marked as being written in{' '} 412 + {suggestedLanguageName} by its author. Would you like to reply in{' '} 413 + <Text style={[a.font_semi_bold]}>{suggestedLanguageName}</Text>? 414 + </Trans> 415 + </RNText> 416 + } 417 + value={language} 418 + onAccept={onAccept} 419 + onDecline={onDecline} 420 + /> 421 + ) 422 + } 423 + 137 424 function LanguageSuggestionButton({ 138 425 label, 139 426 value, 140 427 onAccept, 428 + onDecline, 141 429 }: { 142 430 label: React.ReactNode 143 431 value: string 144 432 onAccept: (language: string | null) => void 433 + onDecline: () => void 145 434 }) { 146 435 const t = useTheme() 147 - const {_} = useLingui() 436 + const {t: l} = useLingui() 148 437 149 438 return ( 150 439 <View style={[a.px_lg, a.py_sm]}> ··· 175 464 176 465 <Button 177 466 size="small" 467 + color="primary_subtle" 468 + shape="round" 469 + onPress={() => onAccept(value)} 470 + label={l`Accept this language suggestion`}> 471 + <ButtonIcon icon={CheckIcon} size="sm" /> 472 + </Button> 473 + 474 + <Button 475 + size="small" 178 476 color="secondary" 179 - onPress={() => onAccept(value)} 180 - label={_(msg`Accept this language suggestion`)}> 181 - <ButtonText> 182 - <Trans>Yes</Trans> 183 - </ButtonText> 477 + shape="round" 478 + onPress={() => onDecline()} 479 + label={l`Decline this language suggestion`}> 480 + <ButtonIcon icon={XIcon} size="sm" /> 184 481 </Button> 185 482 </View> 186 483 </View> ··· 188 485 } 189 486 190 487 /** 191 - * This function is using the lande language model to attempt to detect the language 192 - * We want to only make suggestions when we feel a high degree of certainty 193 - * The magic numbers are based on debugging sessions against some test strings 488 + * Run detection and partition candidates into "certain" (confident enough 489 + * to suggest on their own) and "uncertain" (above the noise floor but not 490 + * confident enough to suggest). Callers decide what to do with the shape: 491 + * a single certain candidate with no uncertain competitors is a strong 492 + * suggestion; everything else is ambiguous. 493 + * 494 + * The acceptance threshold is resolved per candidate with this precedence: 495 + * 1. Per-language override (e.g. maybe `id` requires higher confidence) 496 + * 2. Device-locale bar (lower on native — the user likely writes in a 497 + * language they have installed) 498 + * 3. Platform-level bar 194 499 */ 195 - function guessLanguage(text: string): string | undefined { 196 - const scores = lande(text).filter(([_lang, value]) => value >= 0.0002) 197 - // if the model has multiple items with a score higher than 0.0002, it isn't certain enough 198 - if (scores.length !== 1) { 199 - return undefined 500 + async function guessLanguage( 501 + text: string, 502 + config: LanguageDetectionConfig, 503 + ): Promise<{ 504 + certain: LanguageResult[] 505 + uncertain: LanguageResult[] 506 + }> { 507 + const suggestions = await guessLanguageAsync(text) 508 + const certain: LanguageResult[] = [] 509 + const uncertain: LanguageResult[] = [] 510 + 511 + for (const suggestion of suggestions) { 512 + const isDeviceLocale = deviceLanguageCodes.includes(suggestion.language) 513 + const override = config.overrides[suggestion.language] 514 + const threshold = isDeviceLocale 515 + ? (override?.deviceLocaleAcceptanceThreshold ?? 516 + config.deviceLocaleAcceptanceThreshold) 517 + : (override?.acceptanceThreshold ?? config.acceptanceThreshold) 518 + 519 + if (suggestion.confidence >= threshold) { 520 + certain.push(suggestion) 521 + } else if (suggestion.confidence >= NOISE_FLOOR) { 522 + uncertain.push(suggestion) 523 + } 200 524 } 201 - const [lang, value] = scores[0] 202 - // if the model doesn't give a score of 0.97 or above, it isn't certain enough 203 - if (value < 0.97) { 204 - return undefined 205 - } 206 - return code3ToCode2Strict(lang) 525 + 526 + return {certain, uncertain} 207 527 } 208 528 209 - function cleanUpLanguage(text: string | undefined): string | undefined { 210 - if (!text) { 211 - return undefined 529 + /** 530 + * Strip any detected facets from the text to improve language detection 531 + * accuracy. For example, URLs and mentions. 532 + * 533 + * Tags are intentionally kept — their word content is usually in the 534 + * post's language and helps detection; the leading `#` is short enough 535 + * not to distort results. 536 + */ 537 + function sanitizeTextForDetection(text: string): string { 538 + const rt = new RichText({text: text.trim()}) 539 + rt.detectFacetsWithoutResolution() 540 + 541 + let sanitized = '' 542 + for (const segment of rt.segments()) { 543 + if (segment.isLink() || segment.isMention() || segment.isTag()) { 544 + continue 545 + } 546 + sanitized += segment.text 212 547 } 213 548 214 - return parseLanguage(text)?.language 549 + return sanitized.trim() 215 550 }
+8 -1
yarn.lock
··· 101 101 multiformats "^9.9.0" 102 102 zod "^3.23.8" 103 103 104 - "@atproto/syntax@^0.5.0", "@atproto/syntax@^0.5.1": 104 + "@atproto/syntax@0.5.2", "@atproto/syntax@^0.5.0", "@atproto/syntax@^0.5.1": 105 105 version "0.5.2" 106 106 resolved "https://registry.yarnpkg.com/@atproto/syntax/-/syntax-0.5.2.tgz#d4b32c9feb421ceeb5ade1fa80bc42764d51e52e" 107 107 integrity sha512-W41szOnkppoHr0iCUrzL8gy3OD6qmDyp1UvUgmTx2oFQfgbudpz51T/gznesiCcqiUT5obfHdx4PJ+WdlEOE7Q== ··· 2423 2423 integrity sha512-/Qe3fTDaInfZdwXKk6kkDd7xoAWUmUqrlAbuU1ydEzSzjMFa+icfeEaRmbQ8cFIv3XJiU+iuvgBKjJSw0msypQ== 2424 2424 dependencies: 2425 2425 react-responsive "^10.0.1" 2426 + 2427 + "@bsky.app/expo-guess-language@^0.2.8": 2428 + version "0.2.8" 2429 + resolved "https://registry.yarnpkg.com/@bsky.app/expo-guess-language/-/expo-guess-language-0.2.8.tgz#e1c2d03b8852eb5fb7397316b0ec8cd7c4f98747" 2430 + integrity sha512-krcQfMSJn39kaFRpaOWxLUW9rT04reoBqjQviu2fTGQWXWEImG25SJondSObVNyGXlmRMrltt72Sc+aRPpQeog== 2431 + dependencies: 2432 + lande "^1.0.10" 2426 2433 2427 2434 "@bsky.app/expo-image-crop-tool@^0.5.0": 2428 2435 version "0.5.0"