A virtual jailed shell environment for Go apps backed by an io/fs#FS.
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix(unexpand): respect last tab stop and add wide-rune width

Implements GNU coreutils compatibility for unexpand:

- -t implies -a (convertAll set when -t is present)
- Beyond-last-tabstop rule honored: with multi-stop
tablists, no space-to-tab conversion occurs past the
last stop
- East Asian Wide and Fullwidth runes count as two
columns via golang.org/x/text/width
- Backspace never decrements column below 1

Refs: docs/posix2018/CONFORMANCE.md
Assisted-by: Claude Opus 4.7 via Claude Code
Signed-off-by: Xe Iaso <me@xeiaso.net>

Xe Iaso 79c531ea 2d39fa25

+167 -6
+42 -6
command/internal/unexpand/unexpand.go
··· 10 10 "strings" 11 11 12 12 "github.com/pborman/getopt/v2" 13 + "golang.org/x/text/width" 13 14 "mvdan.cc/sh/v3/interp" 14 15 "tangled.org/xeiaso.net/kefka/command" 15 16 ) ··· 65 66 return interp.ExitStatus(1) 66 67 } 67 68 69 + convertAll := *allBlanks || set.IsSet("tabs") 70 + 68 71 files := set.Args() 69 72 70 73 var output strings.Builder ··· 74 77 if err != nil { 75 78 return err 76 79 } 77 - output.WriteString(processContent(content, tabStops, *allBlanks)) 80 + output.WriteString(processContent(content, tabStops, convertAll)) 78 81 } else { 79 82 for _, file := range files { 80 83 content, err := readFile(ec, file, stderr) ··· 82 85 execErr = err 83 86 break 84 87 } 85 - output.WriteString(processContent(content, tabStops, *allBlanks)) 88 + output.WriteString(processContent(content, tabStops, convertAll)) 86 89 } 87 90 } 88 91 ··· 118 121 return stop 119 122 } 120 123 } 124 + return -1 125 + } 126 + 127 + func runeWidth(r rune) int { 128 + switch width.LookupRune(r).Kind() { 129 + case width.EastAsianWide, width.EastAsianFullwidth: 130 + return 2 131 + } 132 + return 1 133 + } 134 + 135 + func tabAdvance(column int, tabStops []int) int { 136 + next := getNextTabStop(column, tabStops) 137 + if next > column { 138 + return next 139 + } 121 140 if len(tabStops) >= 2 { 122 141 last := tabStops[len(tabStops)-1] 123 142 prev := tabStops[len(tabStops)-2] ··· 125 144 steps := (column-last)/interval + 1 126 145 return last + steps*interval 127 146 } 128 - return -1 147 + return column + 1 129 148 } 130 149 131 150 func unexpandLine(line string, tabStops []int, allBlanks bool) string { ··· 147 166 spaceRun = 0 148 167 return 149 168 } 169 + // Per GNU and POSIX -a: only sequences of two or more blanks 170 + // immediately preceding a tab stop convert in non-leading runs. 171 + // Leading runs (any size) may still convert. 172 + if !inLeading && spaceRun < 2 { 173 + for range spaceRun { 174 + result.WriteByte(' ') 175 + } 176 + spaceRun = 0 177 + return 178 + } 150 179 currentPos := spaceRunStart 151 180 for currentPos < endColumn { 152 181 nextStop := getNextTabStop(currentPos, tabStops) 153 - if nextStop <= endColumn && nextStop > currentPos { 182 + if nextStop > 0 && nextStop <= endColumn && nextStop > currentPos { 154 183 result.WriteByte('\t') 155 184 currentPos = nextStop 156 185 continue ··· 175 204 case '\t': 176 205 flushSpaces() 177 206 result.WriteByte('\t') 178 - column = getNextTabStop(column, tabStops) 207 + column = tabAdvance(column, tabStops) 208 + case '\b': 209 + flushSpaces() 210 + result.WriteByte('\b') 211 + if column > 0 { 212 + column-- 213 + } 214 + inLeading = false 179 215 default: 180 216 flushSpaces() 181 217 result.WriteRune(r) 182 - column++ 218 + column += runeWidth(r) 183 219 inLeading = false 184 220 } 185 221 }
+125
command/internal/unexpand/unexpand_test.go
··· 189 189 args: []string{"--no-such-flag"}, 190 190 wantErr: true, 191 191 }, 192 + { 193 + name: "-t implies -a (interior runs converted)", 194 + args: []string{"-t", "4"}, 195 + stdin: " a b c\n", 196 + wantStdout: "\ta\t b\t c\n", 197 + }, 198 + { 199 + name: "-t implies -a long flag", 200 + args: []string{"--tabs=4"}, 201 + stdin: "a b\n", 202 + wantStdout: "a\tb\n", 203 + }, 204 + { 205 + name: "multi tab stops do not convert past last stop", 206 + args: []string{"-a", "-t", "4,8"}, 207 + stdin: " x\n", 208 + wantStdout: "\t\t x\n", 209 + }, 210 + { 211 + name: "multi tab stops boundary at last stop", 212 + args: []string{"-a", "-t", "4,8"}, 213 + stdin: " x\n", 214 + wantStdout: "\t\tx\n", 215 + }, 216 + { 217 + name: "multi tab stops past last stop after non-blank", 218 + args: []string{"-a", "-t", "4,8"}, 219 + stdin: "a b c\n", 220 + wantStdout: "a\t\tb c\n", 221 + }, 222 + { 223 + name: "leading backspaces do not crash and disable leading conversion", 224 + args: nil, 225 + stdin: "\b\b\b a\n", 226 + wantStdout: "\b\b\b a\n", 227 + }, 228 + { 229 + name: "backspace decrements column for tab calculation", 230 + args: []string{"-a", "-t", "4"}, 231 + stdin: " \b x\n", 232 + wantStdout: " \b\t x\n", 233 + }, 234 + { 235 + name: "many backspaces do not underflow column", 236 + args: []string{"-a"}, 237 + stdin: "a\b\b\b\b\b\b\b\b\b\b b\n", 238 + wantStdout: "a\b\b\b\b\b\b\b\b\b\b\tb\n", 239 + }, 240 + { 241 + // Audit example: -t implies -a even after non-blanks. 242 + name: "audit example: -t 4 converts every quad on whole line", 243 + args: []string{"-t", "4"}, 244 + stdin: " a b c\n", 245 + wantStdout: "\ta\t b\t c\n", 246 + }, 247 + { 248 + // Audit example: beyond-last-stop trailing spaces left untouched. 249 + name: "audit example: beyond_last_stop spaces past 4", 250 + args: []string{"-a", "-t", "4"}, 251 + stdin: "beyond_last_stop spaces past 4\n", 252 + wantStdout: "beyond_last_stop spaces past 4\n", 253 + }, 254 + { 255 + // With single tab spec, conversions continue to repeat past the 256 + // "last" stop (the tab spec defines a repeating interval). 257 + name: "single tab spec keeps converting past nominal last stop", 258 + args: []string{"-a", "-t", "4"}, 259 + stdin: "a b c\n", 260 + wantStdout: "a\tb\tc\n", 261 + }, 262 + { 263 + // Backspace at column 1 must not decrement below 1 (GNU rule). 264 + name: "backspace at start does not underflow", 265 + args: []string{"-a", "-t", "4"}, 266 + stdin: "\b x\n", 267 + wantStdout: "\b\tx\n", 268 + }, 269 + { 270 + // Multibyte runes: GNU unexpand is byte-based; we count one 271 + // column per rune, which is consistent for ASCII input. A leading 272 + // quad of spaces followed by a multibyte char still tabifies. 273 + name: "multibyte rune after leading tab-quad", 274 + args: []string{"-t", "4"}, 275 + stdin: " é\n", 276 + wantStdout: "\té\n", 277 + }, 278 + { 279 + // Multibyte rune occupies one column for tab calculations: a 280 + // rune followed by 3 spaces fills column 4, so converts to tab. 281 + name: "multibyte rune counts as one column", 282 + args: []string{"-a", "-t", "4"}, 283 + stdin: "é x\n", 284 + wantStdout: "é\tx\n", 285 + }, 286 + { 287 + // Per GNU/POSIX: non-leading single space never converts 288 + // to tab even when it lands on a tab stop. 289 + name: "interior single space at tab stop is not converted", 290 + args: []string{"-a", "-t", "4"}, 291 + stdin: "aaa b\n", 292 + wantStdout: "aaa b\n", 293 + }, 294 + { 295 + // Two interior spaces ending on a tab stop do convert. 296 + name: "interior two-space run ending on tab stop converts", 297 + args: []string{"-a", "-t", "4"}, 298 + stdin: "aa b\n", 299 + wantStdout: "aa\tb\n", 300 + }, 301 + { 302 + // East Asian wide chars occupy two columns: a wide rune 303 + // fills cols 1-2, then 2 spaces at cols 3-4 hit tab stop 4. 304 + name: "wide character counts as two columns", 305 + args: []string{"-a", "-t", "4"}, 306 + stdin: "中 x\n", 307 + wantStdout: "中\tx\n", 308 + }, 309 + { 310 + // Two wide chars fill cols 1-4, hitting tab stop 4 already; 311 + // leading 4 spaces convert to a tab independently. 312 + name: "leading tab then wide chars", 313 + args: []string{"-t", "4"}, 314 + stdin: " 中文\n", 315 + wantStdout: "\t中文\n", 316 + }, 192 317 } 193 318 194 319 for _, tt := range tests {