this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

encoding/jsonschema: detect Perl regexps

We want to treat Perl syntax as a missing feature
and make genuinely invalid regular expressions
an error.

Signed-off-by: Roger Peppe <rogpeppe@gmail.com>
Change-Id: I187be5f8846e02c9af514ec808fa19a8598e41ce
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1201127
TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>

+93 -50
+3 -10
encoding/jsonschema/constraints_string.go
··· 15 15 package jsonschema 16 16 17 17 import ( 18 - "regexp" 19 18 "sync" 20 19 21 20 "cuelang.org/go/cue" ··· 50 49 } 51 50 52 51 func constraintPattern(key string, n cue.Value, s *state) { 53 - str, _ := s.strValue(n) 54 - if _, err := regexp.Compile(str); err != nil { 55 - if s.cfg.StrictFeatures { 56 - // TODO check if the error is only because of an unsupported 57 - // regexp feature (e.g. perl regexp) or because the regexp is just 58 - // bad. If the latter, this should be an error even if Strict is false. 59 - s.errf(n, "unsupported regexp: %v", err) 60 - } 52 + str, ok := s.regexpValue(n) 53 + if !ok { 61 54 return 62 55 } 63 - s.add(n, stringType, &ast.UnaryExpr{Op: token.MAT, X: s.string(n)}) 56 + s.add(n, stringType, &ast.UnaryExpr{Op: token.MAT, X: str}) 64 57 } 65 58 66 59 type formatFuncInfo struct {
+26
encoding/jsonschema/decode.go
··· 22 22 "fmt" 23 23 "math" 24 24 "net/url" 25 + "regexp/syntax" 25 26 "sort" 26 27 "strconv" 27 28 "strings" ··· 254 255 return "", false 255 256 } 256 257 return s, true 258 + } 259 + 260 + func (d *decoder) regexpValue(n cue.Value) (ast.Expr, bool) { 261 + s, ok := d.strValue(n) 262 + if !ok { 263 + return nil, false 264 + } 265 + _, err := syntax.Parse(s, syntax.Perl) 266 + if err == nil { 267 + return d.string(n), true 268 + } 269 + var regErr *syntax.Error 270 + if errors.As(err, &regErr) && regErr.Code == syntax.ErrInvalidPerlOp { 271 + // It's Perl syntax that we'll never support because the CUE evaluation 272 + // engine uses Go's regexp implementation and because the missing 273 + // features are usually not there for good reason (e.g. exponential 274 + // runtime). In other words, this is a missing feature but not an invalid 275 + // regular expression as such. 276 + if d.cfg.StrictFeatures { 277 + d.errf(n, "unsupported Perl regexp syntax in %q: %v", s, err) 278 + } 279 + return nil, false 280 + } 281 + d.errf(n, "invalid regexp %q: %v", s, err) 282 + return nil, false 257 283 } 258 284 259 285 // const draftCutoff = 5
+8 -8
encoding/jsonschema/testdata/external/tests/draft2019-09/optional/ecmascript-regex.json
··· 47 47 "pattern": "^\\cC$" 48 48 }, 49 49 "skip": { 50 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", 51 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" 50 + "v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`", 51 + "v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`" 52 52 }, 53 53 "tests": [ 54 54 { ··· 79 79 "pattern": "^\\cc$" 80 80 }, 81 81 "skip": { 82 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", 83 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" 82 + "v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`", 83 + "v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`" 84 84 }, 85 85 "tests": [ 86 86 { ··· 370 370 "pattern": "\\p{Letter}cole" 371 371 }, 372 372 "skip": { 373 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`", 374 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`" 373 + "v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`", 374 + "v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`" 375 375 }, 376 376 "tests": [ 377 377 { ··· 496 496 "pattern": "^\\p{digit}+$" 497 497 }, 498 498 "skip": { 499 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`", 500 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`" 499 + "v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`", 500 + "v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`" 501 501 }, 502 502 "tests": [ 503 503 {
+8 -8
encoding/jsonschema/testdata/external/tests/draft2020-12/optional/ecmascript-regex.json
··· 47 47 "pattern": "^\\cC$" 48 48 }, 49 49 "skip": { 50 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", 51 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" 50 + "v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`", 51 + "v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`" 52 52 }, 53 53 "tests": [ 54 54 { ··· 79 79 "pattern": "^\\cc$" 80 80 }, 81 81 "skip": { 82 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", 83 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" 82 + "v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`", 83 + "v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`" 84 84 }, 85 85 "tests": [ 86 86 { ··· 370 370 "pattern": "\\p{Letter}cole" 371 371 }, 372 372 "skip": { 373 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`", 374 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`" 373 + "v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`", 374 + "v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`" 375 375 }, 376 376 "tests": [ 377 377 { ··· 514 514 "pattern": "^\\p{digit}+$" 515 515 }, 516 516 "skip": { 517 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`", 518 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`" 517 + "v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`", 518 + "v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`" 519 519 }, 520 520 "tests": [ 521 521 {
+8 -8
encoding/jsonschema/testdata/external/tests/draft4/optional/ecmascript-regex.json
··· 44 44 "pattern": "^\\cC$" 45 45 }, 46 46 "skip": { 47 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", 48 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" 47 + "v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`", 48 + "v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`" 49 49 }, 50 50 "tests": [ 51 51 { ··· 75 75 "pattern": "^\\cc$" 76 76 }, 77 77 "skip": { 78 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", 79 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" 78 + "v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`", 79 + "v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`" 80 80 }, 81 81 "tests": [ 82 82 { ··· 359 359 "pattern": "\\p{Letter}cole" 360 360 }, 361 361 "skip": { 362 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`", 363 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`" 362 + "v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`", 363 + "v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`" 364 364 }, 365 365 "tests": [ 366 366 { ··· 481 481 "pattern": "^\\p{digit}+$" 482 482 }, 483 483 "skip": { 484 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`", 485 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`" 484 + "v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`", 485 + "v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`" 486 486 }, 487 487 "tests": [ 488 488 {
+8 -8
encoding/jsonschema/testdata/external/tests/draft6/optional/ecmascript-regex.json
··· 44 44 "pattern": "^\\cC$" 45 45 }, 46 46 "skip": { 47 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", 48 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" 47 + "v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`", 48 + "v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`" 49 49 }, 50 50 "tests": [ 51 51 { ··· 75 75 "pattern": "^\\cc$" 76 76 }, 77 77 "skip": { 78 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", 79 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" 78 + "v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`", 79 + "v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`" 80 80 }, 81 81 "tests": [ 82 82 { ··· 359 359 "pattern": "\\p{Letter}cole" 360 360 }, 361 361 "skip": { 362 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`", 363 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`" 362 + "v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`", 363 + "v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`" 364 364 }, 365 365 "tests": [ 366 366 { ··· 481 481 "pattern": "^\\p{digit}+$" 482 482 }, 483 483 "skip": { 484 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`", 485 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`" 484 + "v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`", 485 + "v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`" 486 486 }, 487 487 "tests": [ 488 488 {
+8 -8
encoding/jsonschema/testdata/external/tests/draft7/optional/ecmascript-regex.json
··· 44 44 "pattern": "^\\cC$" 45 45 }, 46 46 "skip": { 47 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", 48 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" 47 + "v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`", 48 + "v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`" 49 49 }, 50 50 "tests": [ 51 51 { ··· 75 75 "pattern": "^\\cc$" 76 76 }, 77 77 "skip": { 78 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", 79 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" 78 + "v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`", 79 + "v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`" 80 80 }, 81 81 "tests": [ 82 82 { ··· 359 359 "pattern": "\\p{Letter}cole" 360 360 }, 361 361 "skip": { 362 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`", 363 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`" 362 + "v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`", 363 + "v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`" 364 364 }, 365 365 "tests": [ 366 366 { ··· 481 481 "pattern": "^\\p{digit}+$" 482 482 }, 483 483 "skip": { 484 - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`", 485 - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`" 484 + "v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`", 485 + "v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`" 486 486 }, 487 487 "tests": [ 488 488 {
+11
encoding/jsonschema/testdata/txtar/perl_pattern.txtar
··· 1 + Invalid perl syntax should not result in failure when #strictFeatures 2 + isn't enabled. 3 + 4 + -- schema.json -- 5 + { 6 + "$schema": "https://json-schema.org/draft/2020-12/schema", 7 + "pattern": "^(?![ \\t\\n]*\\(default(.*)\\))[\\s\\S]*" 8 + } 9 + -- out/decode/extract -- 10 + @jsonschema(schema="https://json-schema.org/draft/2020-12/schema") 11 + _
+13
encoding/jsonschema/testdata/txtar/perl_pattern_strict.txtar
··· 1 + Invalid perl syntax should result in failure when #strictFeatures 2 + is enabled. 3 + #strictFeatures 4 + 5 + -- schema.json -- 6 + { 7 + "$schema": "https://json-schema.org/draft/2020-12/schema", 8 + "pattern": "^(?![ \\t\\n]*\\(default(.*)\\))[\\s\\S]*" 9 + } 10 + -- out/decode/extract -- 11 + ERROR: 12 + unsupported Perl regexp syntax in "^(?![ \\t\\n]*\\(default(.*)\\))[\\s\\S]*": error parsing regexp: invalid or unsupported Perl syntax: `(?!`: 13 + schema.json:3:5