this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

encoding/jsonschema: fix issue 3176

https://cuelang.org/issue/3176 describes a situation where a higher
level constraint omitted a necessary term from its disjunction because
of a `oneOf`. On inspection, the current logic for `usedTypes` and
`allowedTypes` seems flaky.

Notably, `usedTypes` does not seem to fit its purported purpose, as
described in this comment [1] by Marcel:

> Used means that the type [is] implied somewhere (e.g. by a constraint
> or in the "all" bucket). "types" means that this type should be
> represented (and if it hasn't yet up till now, it needs an explicit
> type).

- various constraint types that adjust `usedTypes` (e.g. `minLength`) do
_not_ imply a particular type, as they only apply if the object is that
particular type.
- the `state.value` logic also sets `usedTypes` but that's also
incorrect because it sets it for nested values and also inappropriately
when the value is part of an enum (when it's in an enum, we know the
union of the types in the enum, not their intersection).

We fix this by making the invariants simpler (and renaming `usedTypes`
to `knownTypes` to hopefully make its meaning clearer). Namely,
`allowedTypes` holds the full set of types that the result is allowed to
be; `knownTypes` holds the full set of types that the non-type-specific
constraints are known to imply.

For example:

{"enum": [1, null]}

would result in knownTypes={number,null}, allowedTypes={number,null}

but:

{"types": ["number", "null"]}

would result in: knownTypes=allTypes, allowedTypes={number,null} because
the latter adds no elements to state.all that imply the types where the
former does.

None of this logic is entirely ideal: it would be considerably nicer if
we could emit much more naive CUE and let the evaluator simplify it for
us, but we're not there yet.

Note: this does improve the output in some of the existing test cases.

Fixes #3176.

[1] https://cue-review.googlesource.com/c/cue/+/6302/10..14/encoding/jsonschema/decode.go#b433

Signed-off-by: Roger Peppe <rogpeppe@gmail.com>
Change-Id: I8fc29b3de035d1ecad62b5c1bf6a8ef136660dc9
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1199309
Unity-Result: CUE porcuepine <cue.porcuepine@gmail.com>
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>

+120 -104
+31 -31
encoding/jsonschema/constraints.go
··· 192 192 193 193 p1("enum", func(key string, n cue.Value, s *state) { 194 194 var a []ast.Expr 195 + var types cue.Kind 195 196 for _, x := range s.listItems("enum", n, true) { 197 + if (s.allowedTypes & x.Kind()) == 0 { 198 + // Enum value is redundant because it's 199 + // not in the allowed type set. 200 + continue 201 + } 196 202 a = append(a, s.value(x)) 203 + types |= x.Kind() 197 204 } 205 + s.knownTypes &= types 206 + s.allowedTypes &= types 198 207 s.all.add(n, ast.NewBinExpr(token.OR, a...)) 199 208 }), 200 209 ··· 207 216 208 217 p1d("const", 6, func(key string, n cue.Value, s *state) { 209 218 s.all.add(n, s.value(n)) 219 + s.allowedTypes &= n.Kind() 220 + s.knownTypes &= n.Kind() 210 221 }), 211 222 212 223 p1("default", func(key string, n cue.Value, s *state) { ··· 249 260 p1("$defs", addDefinitions), 250 261 p1("definitions", addDefinitions), 251 262 p1("$ref", func(key string, n cue.Value, s *state) { 252 - s.usedTypes = allTypes 253 - 254 263 u := s.resolveURI(n) 255 264 256 265 fragmentParts, err := splitFragment(u) ··· 288 297 289 298 p2("allOf", func(key string, n cue.Value, s *state) { 290 299 var a []ast.Expr 300 + var knownTypes cue.Kind 291 301 for _, v := range s.listItems("allOf", n, false) { 292 302 x, sub := s.schemaState(v, s.allowedTypes, nil, true) 293 303 s.allowedTypes &= sub.allowedTypes 294 - s.usedTypes |= sub.usedTypes 295 304 if sub.hasConstraints() { 305 + // This might seem a little odd, since the actual 306 + // types are the intersection of the known types 307 + // of the allOf members. However, knownTypes 308 + // is really there to avoid adding redundant disjunctions. 309 + // So if we have (int & string) & (disjunction) 310 + // we definitely don't have to add int or string to 311 + // disjunction. 312 + knownTypes |= sub.knownTypes 296 313 a = append(a, x) 297 314 } 298 315 } 316 + // TODO maybe give an error/warning if s.allowedTypes == 0 317 + // as that's a known-impossible assertion? 299 318 if len(a) > 0 { 319 + s.knownTypes &= knownTypes 300 320 s.all.add(n, ast.NewBinExpr(token.AND, a...)) 301 321 } 302 322 }), ··· 304 324 p2("anyOf", func(key string, n cue.Value, s *state) { 305 325 var types cue.Kind 306 326 var a []ast.Expr 327 + var knownTypes cue.Kind 307 328 for _, v := range s.listItems("anyOf", n, false) { 308 329 x, sub := s.schemaState(v, s.allowedTypes, nil, true) 309 330 types |= sub.allowedTypes 331 + knownTypes |= sub.knownTypes 310 332 a = append(a, x) 311 333 } 312 334 s.allowedTypes &= types 313 335 if len(a) > 0 { 336 + s.knownTypes &= knownTypes 314 337 s.all.add(n, ast.NewBinExpr(token.OR, a...)) 315 338 } 316 339 }), 317 340 318 341 p2("oneOf", func(key string, n cue.Value, s *state) { 319 342 var types cue.Kind 343 + var knownTypes cue.Kind 320 344 var a []ast.Expr 321 345 hasSome := false 322 346 for _, v := range s.listItems("oneOf", n, false) { ··· 329 353 } 330 354 331 355 if !isAny(x) { 356 + knownTypes |= sub.knownTypes 332 357 a = append(a, x) 333 358 } 334 359 } 360 + // TODO if there are no elements in the oneOf, validation 361 + // should fail. 335 362 s.allowedTypes &= types 336 363 if len(a) > 0 && hasSome { 337 - s.usedTypes = allTypes 364 + s.knownTypes &= knownTypes 338 365 s.all.add(n, ast.NewBinExpr(token.OR, a...)) 339 366 } 340 367 ··· 352 379 } 353 380 return 354 381 } 355 - s.usedTypes |= cue.StringKind 356 382 s.add(n, stringType, &ast.UnaryExpr{Op: token.MAT, X: s.string(n)}) 357 383 }), 358 384 359 385 p1("minLength", func(key string, n cue.Value, s *state) { 360 - s.usedTypes |= cue.StringKind 361 386 min := s.number(n) 362 387 strings := s.addImport(n, "strings") 363 388 s.add(n, stringType, ast.NewCall(ast.NewSel(strings, "MinRunes"), min)) 364 389 }), 365 390 366 391 p1("maxLength", func(key string, n cue.Value, s *state) { 367 - s.usedTypes |= cue.StringKind 368 392 max := s.number(n) 369 393 strings := s.addImport(n, "strings") 370 394 s.add(n, stringType, ast.NewCall(ast.NewSel(strings, "MaxRunes"), max)) ··· 372 396 373 397 p1d("contentMediaType", 7, func(key string, n cue.Value, s *state) { 374 398 // TODO: only mark as used if it generates something. 375 - // s.usedTypes |= cue.StringKind 376 399 }), 377 400 378 401 p1d("contentEncoding", 7, func(key string, n cue.Value, s *state) { 379 402 // TODO: only mark as used if it generates something. 380 - // s.usedTypes |= cue.StringKind 381 403 // 7bit, 8bit, binary, quoted-printable and base64. 382 404 // RFC 2054, part 6.1. 383 405 // https://tools.ietf.org/html/rfc2045 ··· 387 409 // Number constraints 388 410 389 411 p2("minimum", func(key string, n cue.Value, s *state) { 390 - s.usedTypes |= cue.NumberKind 391 412 op := token.GEQ 392 413 if s.exclusiveMin { 393 414 op = token.GTR ··· 400 421 s.exclusiveMin = true 401 422 return 402 423 } 403 - s.usedTypes |= cue.NumberKind 404 424 s.add(n, numType, &ast.UnaryExpr{Op: token.GTR, X: s.number(n)}) 405 425 }), 406 426 407 427 p2("maximum", func(key string, n cue.Value, s *state) { 408 - s.usedTypes |= cue.NumberKind 409 428 op := token.LEQ 410 429 if s.exclusiveMax { 411 430 op = token.LSS ··· 418 437 s.exclusiveMax = true 419 438 return 420 439 } 421 - s.usedTypes |= cue.NumberKind 422 440 s.add(n, numType, &ast.UnaryExpr{Op: token.LSS, X: s.number(n)}) 423 441 }), 424 442 425 443 p1("multipleOf", func(key string, n cue.Value, s *state) { 426 - s.usedTypes |= cue.NumberKind 427 444 multiple := s.number(n) 428 445 var x big.Int 429 446 _, _ = n.MantExp(&x) ··· 437 454 // Object constraints 438 455 439 456 p1("properties", func(key string, n cue.Value, s *state) { 440 - s.usedTypes |= cue.StructKind 441 457 obj := s.object(n) 442 458 443 459 if n.Kind() != cue.StructKind { ··· 475 491 return 476 492 } 477 493 478 - s.usedTypes |= cue.StructKind 479 - 480 494 // TODO: detect that properties is defined somewhere. 481 495 // s.errf(n, `"required" without a "properties" field`) 482 496 obj := s.object(n) ··· 518 532 p1d("propertyNames", 6, func(key string, n cue.Value, s *state) { 519 533 // [=~pattern]: _ 520 534 if names, _ := s.schemaState(n, cue.StringKind, nil, false); !isAny(names) { 521 - s.usedTypes |= cue.StructKind 522 535 x := ast.NewStruct(ast.NewList(names), ast.NewIdent("_")) 523 536 s.add(n, objectType, x) 524 537 } ··· 526 539 527 540 // TODO: reenable when we have proper non-monotonic contraint validation. 528 541 // p1("minProperties", func(key string, n cue.Value, s *state) { 529 - // s.usedTypes |= cue.StructKind 530 542 531 543 // pkg := s.addImport(n, "struct") 532 544 // s.addConjunct(n, ast.NewCall(ast.NewSel(pkg, "MinFields"), s.uint(n))) 533 545 // }), 534 546 535 547 p1("maxProperties", func(key string, n cue.Value, s *state) { 536 - s.usedTypes |= cue.StructKind 537 - 538 548 pkg := s.addImport(n, "struct") 539 549 x := ast.NewCall(ast.NewSel(pkg, "MaxFields"), s.uint(n)) 540 550 s.add(n, objectType, x) 541 551 }), 542 552 543 553 p1("dependencies", func(key string, n cue.Value, s *state) { 544 - s.usedTypes |= cue.StructKind 545 - 546 554 // Schema and property dependencies. 547 555 // TODO: the easiest implementation is with comprehensions. 548 556 // The nicer implementation is with disjunctions. This has to be done ··· 556 564 }), 557 565 558 566 p2("patternProperties", func(key string, n cue.Value, s *state) { 559 - s.usedTypes |= cue.StructKind 560 567 if n.Kind() != cue.StructKind { 561 568 s.errf(n, `value of "patternProperties" must be an object, found %v`, n.Kind()) 562 569 } ··· 583 590 s.closeStruct = !s.boolValue(n) 584 591 585 592 case cue.StructKind: 586 - s.usedTypes |= cue.StructKind 587 593 s.closeStruct = true 588 594 obj := s.object(n) 589 595 if len(obj.Elts) == 0 { ··· 609 615 // Array constraints. 610 616 611 617 p1("items", func(key string, n cue.Value, s *state) { 612 - s.usedTypes |= cue.ListKind 613 618 switch n.Kind() { 614 619 case cue.StructKind: 615 620 elem := s.schema(n) ··· 638 643 639 644 case cue.StructKind: 640 645 if s.list != nil { 641 - s.usedTypes |= cue.ListKind 642 646 elem := s.schema(n) 643 647 s.list.Elts = append(s.list.Elts, &ast.Ellipsis{Type: elem}) 644 648 } ··· 649 653 }), 650 654 651 655 p1("contains", func(key string, n cue.Value, s *state) { 652 - s.usedTypes |= cue.ListKind 653 656 list := s.addImport(n, "list") 654 657 // TODO: Passing non-concrete values is not yet supported in CUE. 655 658 if x := s.schema(n); !isAny(x) { ··· 661 664 // TODO: min/maxContains 662 665 663 666 p1("minItems", func(key string, n cue.Value, s *state) { 664 - s.usedTypes |= cue.ListKind 665 667 a := []ast.Expr{} 666 668 p, err := n.Uint64() 667 669 if err != nil { ··· 678 680 }), 679 681 680 682 p1("maxItems", func(key string, n cue.Value, s *state) { 681 - s.usedTypes |= cue.ListKind 682 683 list := s.addImport(n, "list") 683 684 x := ast.NewCall(ast.NewSel(list, "MaxItems"), clearPos(s.uint(n))) 684 685 s.add(n, arrayType, x) 685 686 }), 686 687 687 688 p1("uniqueItems", func(key string, n cue.Value, s *state) { 688 - s.usedTypes |= cue.ListKind 689 689 if s.boolValue(n) { 690 690 list := s.addImport(n, "list") 691 691 s.add(n, arrayType, ast.NewCall(ast.NewSel(list, "UniqueItems")))
+78 -60
encoding/jsonschema/decode.go
··· 333 333 all constraintInfo // values and oneOf etc. 334 334 nullable *ast.BasicLit // nullable 335 335 336 - usedTypes cue.Kind 336 + // allowedTypes holds the set of types that 337 + // this node is allowed to be. 337 338 allowedTypes cue.Kind 339 + 340 + // knownTypes holds the set of types that this node 341 + // is known to be one of by virtue of the constraints inside 342 + // all. This is used to avoid adding redundant elements 343 + // to the disjunction created by [state.finalize]. 344 + knownTypes cue.Kind 338 345 339 346 default_ ast.Expr 340 347 examples []ast.Expr ··· 414 421 415 422 // finalize constructs a CUE type from the collected constraints. 416 423 func (s *state) finalize() (e ast.Expr) { 424 + if s.allowedTypes == 0 { 425 + // Nothing is possible. 426 + s.addErr(errors.Newf(s.pos.Pos(), "constraints are not possible to satisfy")) 427 + return &ast.BottomLit{} 428 + } 429 + 417 430 conjuncts := []ast.Expr{} 418 431 disjuncts := []ast.Expr{} 419 - 420 - types := s.allowedTypes &^ s.usedTypes 421 - if types == allTypes { 422 - disjuncts = append(disjuncts, ast.NewIdent("_")) 423 - types = 0 424 - } 425 432 426 433 // Sort literal structs and list last for nicer formatting. 427 434 sort.SliceStable(s.types[arrayType].constraints, func(i, j int) bool { ··· 438 445 typIndex int 439 446 } 440 447 var excluded []excludeInfo 441 - npossible := 0 442 - nexcluded := 0 443 - for i, t := range s.types { 444 - k := coreToCUE[i] 445 - isAllowed := s.allowedTypes&k != 0 446 - if len(t.constraints) > 0 { 447 - npossible++ 448 - if t.typ == nil && !isAllowed { 449 - // TODO this implies a redundant constraint, which is technically allowed, 450 - // but in practice probably represents a mistake. We could provide 451 - // a mode that warns about such likely errors. 452 - nexcluded++ 453 - for _, c := range t.constraints { 454 - excluded = append(excluded, excludeInfo{c.Pos(), i}) 455 - } 456 - continue 457 - } 458 - x := ast.NewBinExpr(token.AND, t.constraints...) 459 - disjuncts = append(disjuncts, x) 460 - } else if s.usedTypes&k != 0 { 461 - npossible++ 462 - continue 463 - } else if t.typ != nil { 464 - npossible++ 465 - if !isAllowed { 466 - // TODO this implies a redundant type constraint, which is technically allowed, 467 - // but in practice probably represents a mistake. We could provide 468 - // a mode that warns about such likely errors. 469 - nexcluded++ 470 - excluded = append(excluded, excludeInfo{t.typ.Pos(), i}) 471 - continue 448 + 449 + needsTypeDisjunction := s.allowedTypes != s.knownTypes 450 + if !needsTypeDisjunction { 451 + for i, t := range s.types { 452 + k := coreToCUE[i] 453 + if len(t.constraints) > 0 && s.allowedTypes&k != 0 { 454 + // We need to include at least one type-specific 455 + // constraint in the disjunction. 456 + needsTypeDisjunction = true 457 + break 472 458 } 473 - disjuncts = append(disjuncts, t.typ) 474 - } else if types&k != 0 { 475 - npossible++ 476 - x := kindToAST(k) 477 - if x != nil { 459 + } 460 + } 461 + 462 + if needsTypeDisjunction { 463 + npossible := 0 464 + nexcluded := 0 465 + for i, t := range s.types { 466 + k := coreToCUE[i] 467 + allowed := s.allowedTypes&k != 0 468 + switch { 469 + case len(t.constraints) > 0: 470 + npossible++ 471 + if !allowed { 472 + nexcluded++ 473 + for _, c := range t.constraints { 474 + excluded = append(excluded, excludeInfo{c.Pos(), i}) 475 + } 476 + continue 477 + } 478 + x := ast.NewBinExpr(token.AND, t.constraints...) 478 479 disjuncts = append(disjuncts, x) 480 + case allowed: 481 + npossible++ 482 + if s.knownTypes&k != 0 { 483 + disjuncts = append(disjuncts, kindToAST(k)) 484 + } 479 485 } 480 486 } 481 - } 482 - if nexcluded == npossible { 483 - // All possibilities have been excluded: this is an impossible 484 - // schema. 485 - for _, e := range excluded { 486 - s.addErr(errors.Newf(e.pos, 487 - "constraint not allowed because type %s is excluded", 488 - coreTypeName[e.typIndex], 489 - )) 487 + if nexcluded == npossible { 488 + // All possibilities have been excluded: this is an impossible 489 + // schema. 490 + for _, e := range excluded { 491 + s.addErr(errors.Newf(e.pos, 492 + "constraint not allowed because type %s is excluded", 493 + coreTypeName[e.typIndex], 494 + )) 495 + } 490 496 } 491 497 } 492 498 conjuncts = append(conjuncts, s.all.constraints...) 493 - 494 499 obj := s.obj 495 500 if obj == nil { 496 501 obj, _ = s.types[objectType].typ.(*ast.StructLit) ··· 507 512 } 508 513 509 514 if len(conjuncts) == 0 { 510 - e = &ast.BottomLit{} 515 + // There are no conjuncts, which can only happen when there 516 + // are no disjuncts, which can only happen when the entire 517 + // set of disjuncts is redundant with respect to the types 518 + // already implied by s.all. As we've already checked that 519 + // s.allowedTypes is non-zero (so we know that 520 + // it's not bottom) and we need _some_ expression 521 + // to be part of the subequent syntax, we use top. 522 + e = ast.NewIdent("_") 511 523 } else { 512 524 e = ast.NewBinExpr(token.AND, conjuncts...) 513 525 } ··· 523 535 // check conditions where default can be skipped. 524 536 switch x := s.default_.(type) { 525 537 case *ast.ListLit: 526 - if s.usedTypes == cue.ListKind && len(x.Elts) == 0 { 538 + if s.allowedTypes == cue.ListKind && len(x.Elts) == 0 { 527 539 break outer 528 540 } 529 541 } ··· 556 568 } 557 569 } 558 570 571 + // Now that we've expressed the schema as actual syntax, 572 + // all the allowed types are actually explicit and will not 573 + // need to be mentioned again. 574 + s.knownTypes = s.allowedTypes 559 575 return e 560 576 } 561 577 ··· 594 610 return expr 595 611 } 596 612 597 - // schemaState is a low-level API for schema. isLogical specifies whether the 598 - // caller is a logical operator like anyOf, allOf, oneOf, or not. 599 - func (s *state) schemaState(n cue.Value, types cue.Kind, idRef []label, isLogical bool) (ast.Expr, *state) { 613 + // schemaState returns a new state value derived from s. 614 + // n holds the JSONSchema node to translate to a schema. 615 + // types holds the set of possible types that the value can hold. 616 + // idRef holds the path to the value. 617 + // isLogical specifies whether the caller is a logical operator like anyOf, allOf, oneOf, or not. 618 + func (s *state) schemaState(n cue.Value, types cue.Kind, idRef []label, isLogical bool) (_e ast.Expr, _ *state) { 600 619 state := &state{ 601 620 up: s, 602 621 schemaVersion: s.schemaVersion, 603 622 isSchema: s.isSchema, 604 623 decoder: s.decoder, 605 624 allowedTypes: types, 625 + knownTypes: allTypes, 606 626 path: s.path, 607 627 idRef: idRef, 608 628 pos: n, ··· 638 658 639 659 func (s *state) value(n cue.Value) ast.Expr { 640 660 k := n.Kind() 641 - s.usedTypes |= k 642 - s.allowedTypes &= k 643 661 switch k { 644 662 case cue.ListKind: 645 663 a := []ast.Expr{}
+1 -1
encoding/jsonschema/testdata/emptyobjinanyof.txtar
··· 23 23 -- out/decode/cue -- 24 24 _ 25 25 26 - #shell: (string | ("bash" | "sh" | "cmd" | "powershell")) & string 26 + #shell: string | ("bash" | "sh" | "cmd" | "powershell")
+4 -2
encoding/jsonschema/testdata/impossibleallof.txtar
··· 6 6 ] 7 7 } 8 8 -- out/decode/err -- 9 - constraint not allowed because type string is excluded: 10 - type.json:4:11 9 + constraints are not possible to satisfy: 10 + type.json:1:1 11 + constraints are not possible to satisfy: 12 + type.json:4:9
+2 -2
encoding/jsonschema/testdata/issue3176.txtar
··· 20 20 21 21 ({ 22 22 ... 23 - } | strings.MaxRunes(3)) & { 23 + } | strings.MaxRunes(3)) & (string | { 24 24 {[=~"^x-" & !~"^()$"]: string} 25 25 ... 26 - } 26 + })
+1 -1
encoding/jsonschema/testdata/issue3351.txtar
··· 72 72 $let?: [string]: null | bool | number | string | [...] | { 73 73 [string]: _schema_1 74 74 } 75 - $sort?: (_schema_5 | [...number]) & _ 75 + $sort?: _schema_5 | [...number] 76 76 {[!~"^($else|$let|$sort)$"]: #["jsone-value"]} 77 77 78 78 #: "jsone-value": _schema_A | [..._schema_8]
+2 -2
encoding/jsonschema/testdata/typedis.txtar
··· 40 40 // Main schema 41 41 intOrString1?: int | string 42 42 intOrString2?: int | string 43 - intOrString3?: (int | string) & (number | string) 44 - disjunction?: (int | string) & (number | string) | int & >=3 43 + intOrString3?: int | string 44 + disjunction?: int | string | int & >=3 45 45 ...
+1 -5
encoding/jsonschema/testdata/typeexcluded.txtar
··· 120 120 e4?: [...string] 121 121 e5?: int & >=0 122 122 e6?: [...=~"^[A-Za-z0-9 _.-]+$"] 123 - e7?: (true | { 124 - ... 125 - } | { 123 + e7?: true | { 126 124 disableFix?: bool 127 125 ... 128 126 } & { 129 127 ignoreMediaFeatureNames?: list.UniqueItems() & [_, ...] & [...string] 130 - ... 131 - }) & { 132 128 ... 133 129 } 134 130 ...