source dump of claude code
23
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 222 lines 7.2 kB view raw
1import emojiRegex from 'emoji-regex' 2import { eastAsianWidth } from 'get-east-asian-width' 3import stripAnsi from 'strip-ansi' 4import { getGraphemeSegmenter } from '../utils/intl.js' 5 6const EMOJI_REGEX = emojiRegex() 7 8/** 9 * Fallback JavaScript implementation of stringWidth when Bun.stringWidth is not available. 10 * 11 * Get the display width of a string as it would appear in a terminal. 12 * 13 * This is a more accurate alternative to the string-width package that correctly handles 14 * characters like ⚠ (U+26A0) which string-width incorrectly reports as width 2. 15 * 16 * The implementation uses eastAsianWidth directly with ambiguousAsWide: false, 17 * which correctly treats ambiguous-width characters as narrow (width 1) as 18 * recommended by the Unicode standard for Western contexts. 19 */ 20function stringWidthJavaScript(str: string): number { 21 if (typeof str !== 'string' || str.length === 0) { 22 return 0 23 } 24 25 // Fast path: pure ASCII string (no ANSI codes, no wide chars) 26 let isPureAscii = true 27 for (let i = 0; i < str.length; i++) { 28 const code = str.charCodeAt(i) 29 // Check for non-ASCII or ANSI escape (0x1b) 30 if (code >= 127 || code === 0x1b) { 31 isPureAscii = false 32 break 33 } 34 } 35 if (isPureAscii) { 36 // Count printable characters (exclude control chars) 37 let width = 0 38 for (let i = 0; i < str.length; i++) { 39 const code = str.charCodeAt(i) 40 if (code > 0x1f) { 41 width++ 42 } 43 } 44 return width 45 } 46 47 // Strip ANSI if escape character is present 48 if (str.includes('\x1b')) { 49 str = stripAnsi(str) 50 if (str.length === 0) { 51 return 0 52 } 53 } 54 55 // Fast path: simple Unicode (no emoji, variation selectors, or joiners) 56 if (!needsSegmentation(str)) { 57 let width = 0 58 for (const char of str) { 59 const codePoint = char.codePointAt(0)! 60 if (!isZeroWidth(codePoint)) { 61 width += eastAsianWidth(codePoint, { ambiguousAsWide: false }) 62 } 63 } 64 return width 65 } 66 67 let width = 0 68 69 for (const { segment: grapheme } of getGraphemeSegmenter().segment(str)) { 70 // Check for emoji first (most emoji sequences are width 2) 71 EMOJI_REGEX.lastIndex = 0 72 if (EMOJI_REGEX.test(grapheme)) { 73 width += getEmojiWidth(grapheme) 74 continue 75 } 76 77 // Calculate width for non-emoji graphemes 78 // For grapheme clusters (like Devanagari conjuncts with virama+ZWJ), only count 79 // the first non-zero-width character's width since the cluster renders as one glyph 80 for (const char of grapheme) { 81 const codePoint = char.codePointAt(0)! 82 if (!isZeroWidth(codePoint)) { 83 width += eastAsianWidth(codePoint, { ambiguousAsWide: false }) 84 break 85 } 86 } 87 } 88 89 return width 90} 91 92function needsSegmentation(str: string): boolean { 93 for (const char of str) { 94 const cp = char.codePointAt(0)! 95 // Emoji ranges 96 if (cp >= 0x1f300 && cp <= 0x1faff) return true 97 if (cp >= 0x2600 && cp <= 0x27bf) return true 98 if (cp >= 0x1f1e6 && cp <= 0x1f1ff) return true 99 // Variation selectors, ZWJ 100 if (cp >= 0xfe00 && cp <= 0xfe0f) return true 101 if (cp === 0x200d) return true 102 } 103 return false 104} 105 106function getEmojiWidth(grapheme: string): number { 107 // Regional indicators: single = 1, pair = 2 108 const first = grapheme.codePointAt(0)! 109 if (first >= 0x1f1e6 && first <= 0x1f1ff) { 110 let count = 0 111 for (const _ of grapheme) count++ 112 return count === 1 ? 1 : 2 113 } 114 115 // Incomplete keycap: digit/symbol + VS16 without U+20E3 116 if (grapheme.length === 2) { 117 const second = grapheme.codePointAt(1) 118 if ( 119 second === 0xfe0f && 120 ((first >= 0x30 && first <= 0x39) || first === 0x23 || first === 0x2a) 121 ) { 122 return 1 123 } 124 } 125 126 return 2 127} 128 129function isZeroWidth(codePoint: number): boolean { 130 // Fast path for common printable range 131 if (codePoint >= 0x20 && codePoint < 0x7f) return false 132 if (codePoint >= 0xa0 && codePoint < 0x0300) return codePoint === 0x00ad 133 134 // Control characters 135 if (codePoint <= 0x1f || (codePoint >= 0x7f && codePoint <= 0x9f)) return true 136 137 // Zero-width and invisible characters 138 if ( 139 (codePoint >= 0x200b && codePoint <= 0x200d) || // ZW space/joiner 140 codePoint === 0xfeff || // BOM 141 (codePoint >= 0x2060 && codePoint <= 0x2064) // Word joiner etc. 142 ) { 143 return true 144 } 145 146 // Variation selectors 147 if ( 148 (codePoint >= 0xfe00 && codePoint <= 0xfe0f) || 149 (codePoint >= 0xe0100 && codePoint <= 0xe01ef) 150 ) { 151 return true 152 } 153 154 // Combining diacritical marks 155 if ( 156 (codePoint >= 0x0300 && codePoint <= 0x036f) || 157 (codePoint >= 0x1ab0 && codePoint <= 0x1aff) || 158 (codePoint >= 0x1dc0 && codePoint <= 0x1dff) || 159 (codePoint >= 0x20d0 && codePoint <= 0x20ff) || 160 (codePoint >= 0xfe20 && codePoint <= 0xfe2f) 161 ) { 162 return true 163 } 164 165 // Indic script combining marks (covers Devanagari through Malayalam) 166 if (codePoint >= 0x0900 && codePoint <= 0x0d4f) { 167 // Signs and vowel marks at start of each script block 168 const offset = codePoint & 0x7f 169 if (offset <= 0x03) return true // Signs at block start 170 if (offset >= 0x3a && offset <= 0x4f) return true // Vowel signs, virama 171 if (offset >= 0x51 && offset <= 0x57) return true // Stress signs 172 if (offset >= 0x62 && offset <= 0x63) return true // Vowel signs 173 } 174 175 // Thai/Lao combining marks 176 // Note: U+0E32 (SARA AA), U+0E33 (SARA AM), U+0EB2, U+0EB3 are spacing vowels (width 1), not combining marks 177 if ( 178 codePoint === 0x0e31 || // Thai MAI HAN-AKAT 179 (codePoint >= 0x0e34 && codePoint <= 0x0e3a) || // Thai vowel signs (skip U+0E32, U+0E33) 180 (codePoint >= 0x0e47 && codePoint <= 0x0e4e) || // Thai vowel signs and marks 181 codePoint === 0x0eb1 || // Lao MAI KAN 182 (codePoint >= 0x0eb4 && codePoint <= 0x0ebc) || // Lao vowel signs (skip U+0EB2, U+0EB3) 183 (codePoint >= 0x0ec8 && codePoint <= 0x0ecd) // Lao tone marks 184 ) { 185 return true 186 } 187 188 // Arabic formatting 189 if ( 190 (codePoint >= 0x0600 && codePoint <= 0x0605) || 191 codePoint === 0x06dd || 192 codePoint === 0x070f || 193 codePoint === 0x08e2 194 ) { 195 return true 196 } 197 198 // Surrogates, tag characters 199 if (codePoint >= 0xd800 && codePoint <= 0xdfff) return true 200 if (codePoint >= 0xe0000 && codePoint <= 0xe007f) return true 201 202 return false 203} 204 205// Note: complex-script graphemes like Devanagari क्ष (ka+virama+ZWJ+ssa) render 206// as a single ligature glyph but occupy 2 terminal cells (wcwidth sums the base 207// consonants). Bun.stringWidth=2 matches terminal cell allocation, which is what 208// we need for cursor positioning — the JS fallback's grapheme-cluster width of 1 209// would desync Ink's layout from the terminal. 210// 211// Bun.stringWidth is resolved once at module scope rather than checked on every 212// call — typeof guards deopt property access and this is a hot path (~100k calls/frame). 213const bunStringWidth = 214 typeof Bun !== 'undefined' && typeof Bun.stringWidth === 'function' 215 ? Bun.stringWidth 216 : null 217 218const BUN_STRING_WIDTH_OPTS = { ambiguousIsNarrow: true } as const 219 220export const stringWidth: (str: string) => number = bunStringWidth 221 ? str => bunStringWidth(str, BUN_STRING_WIDTH_OPTS) 222 : stringWidthJavaScript