A virtual jailed shell environment for Go apps backed by an io/fs#FS.
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(cut): add -b/-n, reject conflicts, fix dedup by position

Implements GNU coreutils compatibility for cut:

- -b list (byte-cutting) and -n multibyte-safety modifier
- -b/-c/-f rejected as mutually exclusive at parse time
- Repeated indices (-f 1,1) dedupe by position not value,
so unrelated equal values across positions stay distinct

Refs: docs/posix2018/CONFORMANCE.md
Assisted-by: Claude Opus 4.7 via Claude Code
Signed-off-by: Xe Iaso <me@xeiaso.net>

Xe Iaso d8787802 70482602

+242 -28
+147 -24
command/internal/cut/cut.go
··· 8 8 "path" 9 9 "strconv" 10 10 "strings" 11 + "unicode/utf8" 11 12 12 13 "github.com/pborman/getopt/v2" 13 14 "mvdan.cc/sh/v3/interp" ··· 37 38 usage := func() { 38 39 fmt.Fprint(stderr, "Usage: cut [OPTION]... [FILE]...\n") 39 40 fmt.Fprint(stderr, "Remove sections from each line of FILE(s).\n\n") 41 + fmt.Fprint(stderr, " -b LIST select only these bytes\n") 40 42 fmt.Fprint(stderr, " -c LIST select only these characters\n") 41 43 fmt.Fprint(stderr, " -d DELIM use DELIM instead of TAB for field delimiter\n") 42 44 fmt.Fprint(stderr, " -f LIST select only these fields\n") 45 + fmt.Fprint(stderr, " -n with -b: do not split multibyte characters\n") 43 46 fmt.Fprint(stderr, " -s, --only-delimited do not print lines without delimiters\n") 44 47 fmt.Fprint(stderr, " --help display this help and exit\n") 45 48 } 46 49 set.SetUsage(usage) 47 50 51 + byteSpec := set.String('b', "", "select only these bytes") 48 52 charSpec := set.String('c', "", "select only these characters") 49 53 delim := set.String('d', "\t", "use DELIM instead of TAB for field delimiter") 50 54 fieldSpec := set.String('f', "", "select only these fields") 55 + noSplit := set.Bool('n', "with -b: do not split multibyte characters") 51 56 suppressNoDelim := set.BoolLong("only-delimited", 's', "do not print lines without delimiters") 52 57 help := set.BoolLong("help", 0, "display this help and exit") 53 58 ··· 61 66 return nil 62 67 } 63 68 64 - if *fieldSpec == "" && *charSpec == "" { 69 + modes := 0 70 + if *byteSpec != "" { 71 + modes++ 72 + } 73 + if *charSpec != "" { 74 + modes++ 75 + } 76 + if *fieldSpec != "" { 77 + modes++ 78 + } 79 + if modes > 1 { 80 + fmt.Fprint(stderr, "cut: only one type of list may be specified\n") 81 + return interp.ExitStatus(1) 82 + } 83 + if modes == 0 { 65 84 fmt.Fprint(stderr, "cut: you must specify a list of bytes, characters, or fields\n") 66 85 return interp.ExitStatus(1) 67 86 } ··· 72 91 return err 73 92 } 74 93 75 - spec := *fieldSpec 76 - if spec == "" { 94 + var spec string 95 + switch { 96 + case *byteSpec != "": 97 + spec = *byteSpec 98 + case *charSpec != "": 77 99 spec = *charSpec 78 - } 79 - if spec == "" { 80 - spec = "1" 100 + default: 101 + spec = *fieldSpec 81 102 } 82 103 ranges := parseRanges(spec) 83 104 ··· 93 114 94 115 var out strings.Builder 95 116 for _, line := range lines { 96 - if *charSpec != "" { 97 - chars := []rune(line) 98 - var selected []rune 99 - for _, r := range ranges { 100 - start := r.start - 1 101 - end := r.end 102 - if r.toEnd { 103 - end = len(chars) 104 - } 105 - for i := start; i < end && i < len(chars); i++ { 106 - if i >= 0 { 107 - selected = append(selected, chars[i]) 108 - } 109 - } 117 + switch { 118 + case *byteSpec != "": 119 + lineBytes := []byte(line) 120 + rs := ranges 121 + if *noSplit { 122 + rs = adjustRangesNoSplit(lineBytes, ranges) 110 123 } 124 + selected := extractBytes(lineBytes, rs) 125 + out.Write(selected) 126 + out.WriteString("\n") 127 + case *charSpec != "": 128 + chars := []rune(line) 129 + selected := extractRunes(chars, ranges) 111 130 out.WriteString(string(selected)) 112 131 out.WriteString("\n") 113 - } else { 132 + default: 114 133 if *suppressNoDelim && !strings.Contains(line, d) { 115 134 continue 116 135 } ··· 166 185 167 186 func extractByRanges(items []string, ranges []cutRange) []string { 168 187 var result []string 169 - seen := make(map[string]struct{}) 188 + seen := make(map[int]struct{}) 170 189 for _, r := range ranges { 171 190 start := r.start - 1 172 191 end := r.end ··· 175 194 } 176 195 for i := start; i < end && i < len(items); i++ { 177 196 if i >= 0 { 178 - if _, ok := seen[items[i]]; !ok { 179 - seen[items[i]] = struct{}{} 197 + if _, ok := seen[i]; !ok { 198 + seen[i] = struct{}{} 180 199 result = append(result, items[i]) 181 200 } 182 201 } 183 202 } 184 203 } 185 204 return result 205 + } 206 + 207 + func extractRunes(chars []rune, ranges []cutRange) []rune { 208 + var result []rune 209 + seen := make(map[int]struct{}) 210 + for _, r := range ranges { 211 + start := r.start - 1 212 + end := r.end 213 + if r.toEnd { 214 + end = len(chars) 215 + } 216 + for i := start; i < end && i < len(chars); i++ { 217 + if i >= 0 { 218 + if _, ok := seen[i]; !ok { 219 + seen[i] = struct{}{} 220 + result = append(result, chars[i]) 221 + } 222 + } 223 + } 224 + } 225 + return result 226 + } 227 + 228 + func extractBytes(b []byte, ranges []cutRange) []byte { 229 + var result []byte 230 + seen := make(map[int]struct{}) 231 + for _, r := range ranges { 232 + start := r.start - 1 233 + end := r.end 234 + if r.toEnd { 235 + end = len(b) 236 + } 237 + for i := start; i < end && i < len(b); i++ { 238 + if i >= 0 { 239 + if _, ok := seen[i]; !ok { 240 + seen[i] = struct{}{} 241 + result = append(result, b[i]) 242 + } 243 + } 244 + } 245 + } 246 + return result 247 + } 248 + 249 + // adjustRangesNoSplit applies the POSIX -n algorithm to byte ranges. For each 250 + // range low-high: if low is not the first byte of a character, decrement low 251 + // to the character start; if high is not the last byte of a character, 252 + // decrement high to the last byte of the prior character (or zero). Drop the 253 + // range if high becomes zero or low exceeds high. 254 + func adjustRangesNoSplit(b []byte, ranges []cutRange) []cutRange { 255 + if len(b) == 0 { 256 + return ranges 257 + } 258 + starts, ends := charBoundaries(b) 259 + var out []cutRange 260 + for _, r := range ranges { 261 + low := r.start 262 + high := r.end 263 + if r.toEnd { 264 + high = len(b) 265 + } 266 + if low < 1 { 267 + low = 1 268 + } 269 + if high > len(b) { 270 + high = len(b) 271 + } 272 + if low > len(b) { 273 + out = append(out, cutRange{start: 0, end: 0}) 274 + continue 275 + } 276 + if !starts[low-1] { 277 + for low > 1 && !starts[low-1] { 278 + low-- 279 + } 280 + } 281 + if high >= 1 && high <= len(b) && !ends[high-1] { 282 + for high > 0 && !ends[high-1] { 283 + high-- 284 + } 285 + } 286 + if high < low || high == 0 { 287 + out = append(out, cutRange{start: 0, end: 0}) 288 + continue 289 + } 290 + out = append(out, cutRange{start: low, end: high}) 291 + } 292 + return out 293 + } 294 + 295 + func charBoundaries(b []byte) (starts, ends []bool) { 296 + starts = make([]bool, len(b)) 297 + ends = make([]bool, len(b)) 298 + i := 0 299 + for i < len(b) { 300 + _, size := utf8.DecodeRune(b[i:]) 301 + if size <= 0 { 302 + size = 1 303 + } 304 + starts[i] = true 305 + ends[i+size-1] = true 306 + i += size 307 + } 308 + return starts, ends 186 309 } 187 310 188 311 func readInput(ec *command.ExecContext, files []string, stderr io.Writer) (string, error) {
+95 -4
command/internal/cut/cut_test.go
··· 123 123 wantStdout: "def\njkl\n", 124 124 }, 125 125 { 126 - name: "char allows duplicate codepoints", 126 + name: "char dedups repeated position selectors", 127 127 args: []string{"-c", "1,1,2"}, 128 128 stdin: "abc\n", 129 - wantStdout: "aab\n", 129 + wantStdout: "ab\n", 130 130 }, 131 131 { 132 132 name: "suppress lines without delimiter", ··· 167 167 wantStdout: "a\n", 168 168 }, 169 169 { 170 - name: "duplicate field values are deduped", 170 + name: "duplicate field values at distinct positions are not deduped", 171 171 args: []string{"-d", ",", "-f", "1-3"}, 172 172 stdin: "a,a,b\n", 173 - wantStdout: "a,b\n", 173 + wantStdout: "a,a,b\n", 174 + }, 175 + { 176 + name: "repeated position selector emits field once", 177 + args: []string{"-d", ",", "-f", "1,1"}, 178 + stdin: "a,b,c\n", 179 + wantStdout: "a\n", 180 + }, 181 + { 182 + name: "repeated position with same value at distinct positions", 183 + args: []string{"-d", "X", "-f", "1,1"}, 184 + stdin: "aXa\n", 185 + wantStdout: "a\n", 186 + }, 187 + { 188 + name: "byte select single", 189 + args: []string{"-b", "1"}, 190 + stdin: "a\tb\tc\n", 191 + wantStdout: "a\n", 192 + }, 193 + { 194 + name: "byte select range", 195 + args: []string{"-b", "1-3"}, 196 + stdin: "a\tb\tc\n", 197 + wantStdout: "a\tb\n", 198 + }, 199 + { 200 + name: "char is rune-indexed for multibyte", 201 + args: []string{"-c", "1"}, 202 + stdin: "λa\n", 203 + wantStdout: "λ\n", 204 + }, 205 + { 206 + name: "byte is byte-indexed for multibyte", 207 + args: []string{"-b", "1"}, 208 + stdin: "λa\n", 209 + wantStdout: "\xce\n", 210 + }, 211 + { 212 + name: "byte with -n trims when range straddles char", 213 + args: []string{"-b", "1", "-n"}, 214 + stdin: "λa\n", 215 + wantStdout: "\n", 216 + }, 217 + { 218 + name: "byte with -n keeps fully covered char", 219 + args: []string{"-b", "1-2", "-n"}, 220 + stdin: "λa\n", 221 + wantStdout: "λ\n", 222 + }, 223 + { 224 + name: "byte with -n extends low to char start when high is char end", 225 + args: []string{"-b", "2", "-n"}, 226 + stdin: "λa\n", 227 + wantStdout: "λ\n", 228 + }, 229 + { 230 + name: "byte with -n drops range entirely inside one char", 231 + args: []string{"-b", "2", "-n"}, 232 + stdin: "λλ\n", 233 + wantStdout: "λ\n", 234 + }, 235 + { 236 + name: "byte with -n drops mid-char single byte in 3-byte char", 237 + args: []string{"-b", "2", "-n"}, 238 + stdin: "你好\n", 239 + wantStdout: "\n", 240 + }, 241 + { 242 + name: "both -c and -f rejected", 243 + args: []string{"-c", "1", "-f", "1"}, 244 + wantErrSub: "only one type of list", 245 + wantErr: true, 246 + }, 247 + { 248 + name: "both -b and -c rejected", 249 + args: []string{"-b", "1", "-c", "1"}, 250 + wantErrSub: "only one type of list", 251 + wantErr: true, 252 + }, 253 + { 254 + name: "both -b and -f rejected", 255 + args: []string{"-b", "1", "-f", "1"}, 256 + wantErrSub: "only one type of list", 257 + wantErr: true, 258 + }, 259 + { 260 + name: "no selector errors", 261 + args: []string{}, 262 + stdin: "a\tb\n", 263 + wantErrSub: "you must specify", 264 + wantErr: true, 174 265 }, 175 266 { 176 267 name: "double dash terminator",