A virtual jailed shell environment for Go apps backed by an io/fs#FS.
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(expr): translate POSIX BRE to Go regex for : operator

Implements GNU coreutils compatibility for expr:

- New breToGoRegex translator handles the BRE/Go regex
differences for the : (regex match) operator:
- \( and \) for grouping; bare ( and ) literal
- \{n,m\} intervals; bare { and } literal
- leading * literal
- bracket expressions verbatim with leading ]/^ rules
- Capture-group "no match" returns empty string per GNU

Refs: docs/posix2018/CONFORMANCE.md
Assisted-by: Claude Opus 4.7 via Claude Code
Signed-off-by: Xe Iaso <me@xeiaso.net>

Xe Iaso ece1a334 d7c2346a

+190 -8
+162 -6
command/internal/expr/expr.go
··· 348 348 } 349 349 350 350 func matchAnchored(s, pattern string) (string, error) { 351 - re, err := regexp.Compile("^" + pattern) 351 + goPattern, err := breToGoRegex(pattern) 352 + if err != nil { 353 + return "", fmt.Errorf("invalid regular expression: %s", pattern) 354 + } 355 + re, err := regexp.Compile("^(?:" + goPattern + ")") 352 356 if err != nil { 353 357 return "", fmt.Errorf("invalid regular expression: %s", pattern) 354 358 } 355 359 idx := re.FindStringSubmatchIndex(s) 356 360 if idx == nil { 361 + // When the pattern contains a capture group and no match 362 + // occurred, GNU expr prints an empty string with exit 1. 363 + if re.NumSubexp() > 0 { 364 + return "", nil 365 + } 357 366 return "0", nil 358 367 } 359 - if len(idx) >= 4 && idx[2] >= 0 { 360 - return s[idx[2]:idx[3]], nil 368 + if re.NumSubexp() > 0 { 369 + if len(idx) >= 4 && idx[2] >= 0 { 370 + return s[idx[2]:idx[3]], nil 371 + } 372 + return "", nil 361 373 } 362 374 return strconv.Itoa(idx[1] - idx[0]), nil 363 375 } 364 376 365 377 func matchUnanchored(s, pattern string) (string, error) { 366 - re, err := regexp.Compile(pattern) 378 + goPattern, err := breToGoRegex(pattern) 379 + if err != nil { 380 + return "", fmt.Errorf("invalid regular expression: %s", pattern) 381 + } 382 + re, err := regexp.Compile(goPattern) 367 383 if err != nil { 368 384 return "", fmt.Errorf("invalid regular expression: %s", pattern) 369 385 } 370 386 idx := re.FindStringSubmatchIndex(s) 371 387 if idx == nil { 388 + if re.NumSubexp() > 0 { 389 + return "", nil 390 + } 372 391 return "0", nil 373 392 } 374 - if len(idx) >= 4 && idx[2] >= 0 { 375 - return s[idx[2]:idx[3]], nil 393 + if re.NumSubexp() > 0 { 394 + if len(idx) >= 4 && idx[2] >= 0 { 395 + return s[idx[2]:idx[3]], nil 396 + } 397 + return "", nil 376 398 } 377 399 return strconv.Itoa(idx[1] - idx[0]), nil 400 + } 401 + 402 + // breToGoRegex translates a POSIX Basic Regular Expression (BRE) into the 403 + // equivalent Go (RE2) regular expression syntax. BRE differs from Go regex in 404 + // the following ways that this translator handles: 405 + // 406 + // - `\(` and `\)` denote grouping; `(` and `)` are literal. 407 + // - `\{n,m\}` denotes an interval; `{` and `}` are literal. 408 + // - A leading `*` (at the start of the pattern or just after `\(`) is 409 + // literal, not a quantifier. 410 + // - `\|`, `\+`, `\?` are not standard BRE alternation/quantifiers and are 411 + // kept as their literal escaped forms in Go regex. 412 + // - `\1`..`\9` backreferences are not supported by Go's RE2 engine and 413 + // surface as a translation error. 414 + // - `.`, `^`, `$`, and bracket expressions `[...]` retain their meaning. 415 + func breToGoRegex(bre string) (string, error) { 416 + var out strings.Builder 417 + atStart := true 418 + // A "group start" is the position immediately after `\(`, where a 419 + // leading `*` is also literal per POSIX. 420 + afterGroupStart := false 421 + for i := 0; i < len(bre); i++ { 422 + c := bre[i] 423 + switch c { 424 + case '\\': 425 + if i+1 >= len(bre) { 426 + // Trailing backslash: keep it as a literal backslash escape. 427 + out.WriteString(`\\`) 428 + atStart = false 429 + afterGroupStart = false 430 + continue 431 + } 432 + next := bre[i+1] 433 + switch next { 434 + case '(': 435 + out.WriteByte('(') 436 + i++ 437 + atStart = false 438 + afterGroupStart = true 439 + continue 440 + case ')': 441 + out.WriteByte(')') 442 + i++ 443 + atStart = false 444 + afterGroupStart = false 445 + continue 446 + case '{': 447 + // Interval: copy `{...\}` as `{...}`. 448 + j := i + 2 449 + out.WriteByte('{') 450 + for j < len(bre) { 451 + if bre[j] == '\\' && j+1 < len(bre) && bre[j+1] == '}' { 452 + out.WriteByte('}') 453 + j += 2 454 + break 455 + } 456 + out.WriteByte(bre[j]) 457 + j++ 458 + } 459 + i = j - 1 460 + atStart = false 461 + afterGroupStart = false 462 + continue 463 + case '1', '2', '3', '4', '5', '6', '7', '8', '9': 464 + return "", fmt.Errorf("backreferences not supported") 465 + case '.', '*', '[', ']', '^', '$', '\\': 466 + // Pass through as escaped literal in Go regex too. 467 + out.WriteByte('\\') 468 + out.WriteByte(next) 469 + i++ 470 + atStart = false 471 + afterGroupStart = false 472 + continue 473 + default: 474 + // Other `\X` sequences: pass through unchanged. This covers 475 + // character classes like `\b` etc., which Go regex supports. 476 + out.WriteByte('\\') 477 + out.WriteByte(next) 478 + i++ 479 + atStart = false 480 + afterGroupStart = false 481 + continue 482 + } 483 + case '(', ')': 484 + // Literal parens in BRE — escape for Go regex. 485 + out.WriteByte('\\') 486 + out.WriteByte(c) 487 + atStart = false 488 + afterGroupStart = false 489 + case '{', '}': 490 + // Literal braces in BRE — escape for Go regex. 491 + out.WriteByte('\\') 492 + out.WriteByte(c) 493 + atStart = false 494 + afterGroupStart = false 495 + case '*': 496 + if atStart || afterGroupStart { 497 + // Literal `*` at start of expression or just after `\(`. 498 + out.WriteString(`\*`) 499 + } else { 500 + out.WriteByte('*') 501 + } 502 + atStart = false 503 + afterGroupStart = false 504 + case '[': 505 + // Copy bracket expression verbatim. Handle a leading `]` 506 + // (which is a literal in POSIX brackets) and a leading `^`. 507 + out.WriteByte('[') 508 + j := i + 1 509 + if j < len(bre) && bre[j] == '^' { 510 + out.WriteByte('^') 511 + j++ 512 + } 513 + if j < len(bre) && bre[j] == ']' { 514 + out.WriteByte(']') 515 + j++ 516 + } 517 + for j < len(bre) && bre[j] != ']' { 518 + out.WriteByte(bre[j]) 519 + j++ 520 + } 521 + if j < len(bre) { 522 + out.WriteByte(']') 523 + } 524 + i = j 525 + atStart = false 526 + afterGroupStart = false 527 + default: 528 + out.WriteByte(c) 529 + atStart = false 530 + afterGroupStart = false 531 + } 532 + } 533 + return out.String(), nil 378 534 } 379 535 380 536 // jsParseInt mimics JavaScript's parseInt(s, 10): skip leading whitespace,
+28 -2
command/internal/expr/expr_test.go
··· 124 124 wantExit: 2, 125 125 }, 126 126 { 127 + name: "addition with non-integer right operand", 128 + args: []string{"1", "+", "foo"}, 129 + wantStderr: "expr: non-integer argument\n", 130 + wantExit: 2, 131 + }, 132 + { 127 133 name: "numeric equality true", 128 134 args: []string{"3", "=", "3"}, 129 135 wantStdout: "1\n", ··· 199 205 }, 200 206 { 201 207 name: "match anchored capture group", 202 - args: []string{"abc123", ":", "abc([0-9]+)"}, 208 + args: []string{"abc123", ":", `abc\([0-9]+\)`}, 203 209 wantStdout: "123\n", 204 210 }, 205 211 { 212 + name: "match anchored BRE grouping returns capture", 213 + args: []string{"abc", ":", `a\(b\)c`}, 214 + wantStdout: "b\n", 215 + }, 216 + { 217 + name: "match anchored BRE dot returns whole match length", 218 + args: []string{"abc", ":", "a.c"}, 219 + wantStdout: "3\n", 220 + }, 221 + { 222 + name: "match anchored BRE leading star is literal", 223 + args: []string{"*foo", ":", "*foo"}, 224 + wantStdout: "4\n", 225 + }, 226 + { 227 + name: "match anchored BRE interval", 228 + args: []string{"aaa", ":", `a\{2\}`}, 229 + wantStdout: "2\n", 230 + }, 231 + { 206 232 name: "match function unanchored returns length", 207 233 args: []string{"match", "abcdef", "cd"}, 208 234 wantStdout: "2\n", 209 235 }, 210 236 { 211 237 name: "match function with capture", 212 - args: []string{"match", "hello world", "(w[a-z]+)"}, 238 + args: []string{"match", "hello world", `\(w[a-z]+\)`}, 213 239 wantStdout: "world\n", 214 240 }, 215 241 {