Native OCaml Rego/OPA policy engine
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

ocaml-rego: more OPA conformance fixes

Parser:
- assign IN explicit precedence (between AMPERSAND and EQEQ) so
arithmetic/relational operators bind tighter than membership; this
drops the 32 shift/reduce conflicts the IN production used to cause.
- accept patterns (not just identifiers) on the value/key positions of
[some ... in ...], so [some "foo" in [...]] and [some {"a": x} in
data.array] parse against the [SomeIn] AST shape.
- add a [ref_brack_head] alternative for the legacy [p[100]] body-less
partial-set form.

Evaluator:
- [eval_some_in] uses [bind_value_pattern] so [some] handles literal
patterns, array/object destructuring, and bound vars.
- [object.get] with an [Array] path walks nested objects/arrays and
returns the default when any segment is missing; non-object subjects
now error.
- glob.match recognises [{a,b,...}] alternation and [[!...]] negated
classes; default delimiter is [.] (matching gobwas/glob), and an
empty array is treated like the default.
- regex.* switched to the Re.Pcre flavour so backslash escapes ([\[],
[\d], etc.) match Go's regexp; regex.replace now substitutes [$N]
group references.

+167 -47
+148 -39
lib/eval.ml
··· 478 478 479 479 (* ── Glob ──────────────────────────────────────────────────────────────── *) 480 480 481 - (** [glob_match ~delims pattern s] matches a glob pattern against a string. [*] 482 - matches any sequence of non-delimiter chars; [**] matches anything; [?] 483 - matches one non-delimiter char; [[...]] matches a character class. Used by 484 - the [glob.match] builtin (see OPA's policy reference). *) 485 - let glob_match ~delims pattern s = 481 + (** [glob_match ~delims pattern s] matches a glob pattern against a string. 482 + Recognised: [*]/[**] (segment / cross-segment wildcard), [?] (one 483 + non-delimiter char), [[...]] / [[!...]] (character class with optional 484 + negation, supports [a-z] ranges), [{a,b}] (alternation), and 485 + backslash-escaping. Mirrors OPA's gobwas/glob semantics. *) 486 + let rec glob_match ~delims pattern s = 486 487 let is_delim c = String.contains delims c in 487 488 let plen = String.length pattern in 488 489 let slen = String.length s in 489 - let rec parse_class i = 490 - if i >= plen then raise (Eval_error "glob: unterminated character class"); 491 - if pattern.[i] = ']' then ([], i + 1) 492 - else 493 - let lo = pattern.[i] in 494 - let hi, next = 495 - if i + 2 < plen && pattern.[i + 1] = '-' && pattern.[i + 2] <> ']' then 496 - (pattern.[i + 2], i + 3) 497 - else (lo, i + 1) 498 - in 499 - let rest, end_i = parse_class next in 500 - ((lo, hi) :: rest, end_i) 490 + let parse_class start = 491 + let neg, content_start = 492 + if start < plen && pattern.[start] = '!' then (true, start + 1) 493 + else (false, start) 494 + in 495 + let rec loop i = 496 + if i >= plen then raise (Eval_error "glob: unterminated character class") 497 + else if pattern.[i] = ']' then ([], i + 1) 498 + else 499 + let lo = pattern.[i] in 500 + let hi, next = 501 + if i + 2 < plen && pattern.[i + 1] = '-' && pattern.[i + 2] <> ']' 502 + then (pattern.[i + 2], i + 3) 503 + else (lo, i + 1) 504 + in 505 + let rest, end_i = loop next in 506 + ((lo, hi) :: rest, end_i) 507 + in 508 + let ranges, end_i = loop content_start in 509 + (neg, ranges, end_i) 501 510 in 502 - let class_match ranges c = 503 - List.exists (fun (lo, hi) -> c >= lo && c <= hi) ranges 511 + let class_match (neg, ranges, _) c = 512 + let hit = List.exists (fun (lo, hi) -> c >= lo && c <= hi) ranges in 513 + if neg then not hit else hit 514 + in 515 + (* Split a [{a,b,c}] body at top-level commas. Nested braces are 516 + preserved literally inside an alternation branch. *) 517 + let parse_alternation start = 518 + let buf = Buffer.create 8 in 519 + let rec loop i depth alts = 520 + if i >= plen then raise (Eval_error "glob: unterminated alternation") 521 + else 522 + match pattern.[i] with 523 + | '}' when depth = 0 -> (List.rev (Buffer.contents buf :: alts), i + 1) 524 + | ',' when depth = 0 -> 525 + let alts = Buffer.contents buf :: alts in 526 + Buffer.clear buf; 527 + loop (i + 1) depth alts 528 + | '{' -> 529 + Buffer.add_char buf pattern.[i]; 530 + loop (i + 1) (depth + 1) alts 531 + | '}' -> 532 + Buffer.add_char buf pattern.[i]; 533 + loop (i + 1) (depth - 1) alts 534 + | c -> 535 + Buffer.add_char buf c; 536 + loop (i + 1) depth alts 537 + in 538 + loop start 0 [] 504 539 in 505 540 let rec m pi si = 506 541 if pi = plen then si = slen ··· 523 558 try_ si 524 559 | '?' -> si < slen && (not (is_delim s.[si])) && m (pi + 1) (si + 1) 525 560 | '[' -> 526 - let ranges, next_pi = parse_class (pi + 1) in 527 - si < slen && class_match ranges s.[si] && m next_pi (si + 1) 561 + let neg, ranges, next_pi = parse_class (pi + 1) in 562 + si < slen 563 + && class_match (neg, ranges, next_pi) s.[si] 564 + && m next_pi (si + 1) 565 + | '{' -> 566 + let alts, next_pi = parse_alternation (pi + 1) in 567 + let suffix = String.sub pattern next_pi (plen - next_pi) in 568 + let tail = String.sub s si (slen - si) in 569 + List.exists (fun alt -> glob_match ~delims (alt ^ suffix) tail) alts 528 570 | '\\' when pi + 1 < plen -> 529 571 si < slen && s.[si] = pattern.[pi + 1] && m (pi + 2) (si + 1) 530 572 | _ -> si < slen && c = s.[si] && m (pi + 1) (si + 1) ··· 1606 1648 eval_expr env bnds col_expr 1607 1649 |> List.concat_map (fun (col, b) -> 1608 1650 collection_pairs col 1609 - |> List.filter_map (fun (k, v) -> 1610 - match bind_pattern b val_expr v with 1611 - | None -> None 1612 - | Some b1 -> ( 1651 + |> List.concat_map (fun (k, v) -> 1652 + bind_value_pattern env b val_expr v 1653 + |> List.concat_map (fun b1 -> 1613 1654 match key_opt with 1614 - | None -> Some b1 1615 - | Some k_expr -> bind_pattern b1 k_expr k))) 1655 + | None -> [ b1 ] 1656 + | Some k_expr -> bind_value_pattern env b1 k_expr k))) 1616 1657 1617 1658 and collection_pairs = function 1618 1659 | Value.Array xs -> ··· 2171 2212 xs)) 2172 2213 | "sprintf", [ Value.String fmt; Value.Array vs ] -> 2173 2214 Value.String (rego_sprintf fmt vs) 2174 - (* Regex *) 2175 - | "regex.match", [ Value.String pat; Value.String s ] -> 2176 - Value.Bool (Re.execp (Re.Posix.compile_pat pat) s) 2215 + (* Regex — OPA's regex builtins use Go's [regexp] syntax (RE2). The 2216 + [Re.Pcre] flavour is the closest match Re ships, accepting the 2217 + Perl-style escapes ([\d], [\s], [\[], etc.) that the OPA test 2218 + suite relies on. *) 2219 + | "regex.match", [ Value.String pat; Value.String s ] -> ( 2220 + try Value.Bool (Re.execp (Re.Pcre.re pat |> Re.compile) s) 2221 + with _ -> raise (Eval_error "regex.match: invalid pattern")) 2177 2222 | "regex.is_valid", [ Value.String pat ] -> ( 2178 2223 try 2179 - let _ = Re.Posix.compile_pat pat in 2224 + let _ = Re.Pcre.re pat |> Re.compile in 2180 2225 Value.Bool true 2181 2226 with _ -> Value.Bool false) 2182 2227 | "regex.split", [ Value.String pat; Value.String s ] -> 2183 - Value.Array 2184 - (List.map 2185 - (fun p -> Value.String p) 2186 - (Re.split (Re.Posix.compile_pat pat) s)) 2228 + let re = 2229 + try Re.Pcre.re pat |> Re.compile 2230 + with _ -> raise (Eval_error "regex.split: invalid pattern") 2231 + in 2232 + Value.Array (List.map (fun p -> Value.String p) (Re.split re s)) 2187 2233 | "regex.find_n", [ Value.String pat; Value.String s; Value.Number n ] -> 2188 - let re = Re.Posix.compile_pat pat in 2234 + let re = 2235 + try Re.Pcre.re pat |> Re.compile 2236 + with _ -> raise (Eval_error "regex.find_n: invalid pattern") 2237 + in 2189 2238 let max_n = if n < 0. then max_int else int_of_float n in 2190 2239 let all = Re.all re s in 2191 2240 let rec take i = function ··· 2195 2244 in 2196 2245 Value.Array (List.map (fun s -> Value.String s) (take 0 all)) 2197 2246 | "regex.replace", [ Value.String s; Value.String pat; Value.String repl ] -> 2198 - let re = Re.Posix.compile_pat pat in 2199 - Value.String (Re.replace_string re ~by:repl s) 2247 + let re = 2248 + try Re.Pcre.re pat |> Re.compile 2249 + with _ -> raise (Eval_error "regex.replace: invalid pattern") 2250 + in 2251 + (* Translate Go-style group references [$1] / [${name}] into the 2252 + backslash form Re expects. *) 2253 + let by groups = 2254 + let buf = Buffer.create (String.length repl) in 2255 + let n = String.length repl in 2256 + let i = ref 0 in 2257 + while !i < n do 2258 + let c = repl.[!i] in 2259 + if c = '$' && !i + 1 < n then 2260 + let next = repl.[!i + 1] in 2261 + if next >= '0' && next <= '9' then ( 2262 + let g = Char.code next - Char.code '0' in 2263 + (try Buffer.add_string buf (Re.Group.get groups g) 2264 + with Not_found -> ()); 2265 + i := !i + 2) 2266 + else if next = '{' then ( 2267 + let stop = ref (!i + 2) in 2268 + while !stop < n && repl.[!stop] <> '}' do 2269 + incr stop 2270 + done; 2271 + let _name = String.sub repl (!i + 2) (!stop - !i - 2) in 2272 + (* Named groups not supported by Re; emit empty string. *) 2273 + i := !stop + 1) 2274 + else ( 2275 + Buffer.add_char buf c; 2276 + incr i) 2277 + else ( 2278 + Buffer.add_char buf c; 2279 + incr i) 2280 + done; 2281 + Buffer.contents buf 2282 + in 2283 + Value.String (Re.replace re ~f:by s) 2200 2284 (* Base64 *) 2201 2285 | "base64.encode", [ Value.String s ] -> 2202 2286 Value.String (base64_encode_alpha b64_alpha true s) ··· 2277 2361 | "glob.match", [ Value.String pattern; delims_v; Value.String value ] -> 2278 2362 let delims = 2279 2363 match delims_v with 2280 - | Value.Null -> "/" 2364 + | Value.Null -> "." 2365 + | Value.Array [] | Value.Set [] -> "." 2281 2366 | Value.Array xs | Value.Set xs -> 2282 2367 String.concat "" 2283 2368 (List.map ··· 2425 2510 | "object.keys", [ Value.Object pairs ] -> 2426 2511 Value.Set (List.sort_uniq Value.compare (List.map fst pairs)) 2427 2512 | "object.values", [ Value.Object pairs ] -> Value.Array (List.map snd pairs) 2513 + | "object.get", [ Value.Object pairs; Value.Array path; default ] -> 2514 + (* Array-form path: walk segments through nested objects/arrays. *) 2515 + let rec walk v = function 2516 + | [] -> v 2517 + | seg :: rest -> ( 2518 + match v with 2519 + | Value.Object kvs -> ( 2520 + match List.assoc_opt seg kvs with 2521 + | Some sub -> walk sub rest 2522 + | None -> Value.Undefined) 2523 + | Value.Array xs -> ( 2524 + match seg with 2525 + | Value.Number n 2526 + when Float.is_integer n 2527 + && int_of_float n >= 0 2528 + && int_of_float n < List.length xs -> 2529 + walk (List.nth xs (int_of_float n)) rest 2530 + | _ -> Value.Undefined) 2531 + | _ -> Value.Undefined) 2532 + in 2533 + let result = walk (Value.Object pairs) path in 2534 + if Value.equal result Value.Undefined then default else result 2428 2535 | "object.get", [ Value.Object pairs; key; default ] -> ( 2429 2536 match List.assoc_opt key pairs with Some v -> v | None -> default) 2537 + | "object.get", [ _; _; _ ] -> 2538 + raise (Eval_error "object.get: first argument must be an object") 2430 2539 | "object.remove", [ Value.Object pairs; keys_v ] -> 2431 2540 let keys = 2432 2541 match keys_v with
+19 -8
lib/parser.mly
··· 40 40 %nonassoc ASSIGN COLONEQ 41 41 %left PIPE 42 42 %left AMPERSAND 43 - %nonassoc IN_OP 43 + %nonassoc IN 44 44 %nonassoc EQEQ BANGEQ LT GT LE GE 45 45 %left PLUS MINUS 46 46 %left STAR SLASH PERCENT ··· 111 111 { RuleSpec (sp, RuleHeadCompr (sp, r, None), b :: es) } 112 112 | r=ref_expr IF b=rule_body es=else_chain 113 113 { RuleSpec (sp, RuleHeadCompr (sp, r, None), b :: es) } 114 + | r=ref_brack_head 115 + { RuleSpec (sp, RuleHeadCompr (sp, r, None), 116 + [{ rb_span = sp; rb_assign = None; 117 + rb_query = { q_span = sp; q_stmts = [] } }]) } 114 118 | r=ref_expr op=assign_op v=expr b=rule_body es=else_chain 115 119 { RuleSpec (sp, RuleHeadCompr (sp, r, 116 120 Some { ra_span = sp; ra_op = op; ra_value = v }), b :: es) } ··· 188 192 | NOT e=expr { NotExpr (sp, e) } 189 193 | SOME vs=separated_nonempty_list(COMMA, IDENT) 190 194 { SomeVars (sp, vs) } 191 - | SOME v=IDENT IN e=expr 192 - { SomeIn (sp, None, Var (sp, v), e) } 193 - | SOME k=IDENT COMMA v=IDENT IN e=expr 194 - { SomeIn (sp, Some (Var (sp, k)), Var (sp, v), e) } 195 + | SOME v=expr_no_in IN e=expr 196 + { SomeIn (sp, None, v, e) } 197 + | SOME k=expr_no_in COMMA v=expr_no_in IN e=expr 198 + { SomeIn (sp, Some k, v, e) } 195 199 | k=expr_no_in COMMA v=expr_no_in IN c=expr 196 200 { Expr (sp, Membership (sp, Some k, v, c)) } 197 201 | EVERY v=IDENT IN e=expr b=rule_body ··· 234 238 | l=infix_expr PIPE r=infix_expr { BinExpr (sp, Union, l, r) } 235 239 | l=infix_expr ASSIGN r=infix_expr { AssignExpr (sp, Assign, l, r) } 236 240 | l=infix_expr COLONEQ r=infix_expr { AssignExpr (sp, ColonEq, l, r) } 237 - | l=infix_expr IN r=infix_expr %prec IN_OP { Membership (sp, None, l, r) } 241 + | l=infix_expr IN r=infix_expr { Membership (sp, None, l, r) } 238 242 ; 239 243 240 244 (* Same as [infix_expr] but without the IN production. Used as the left 241 245 and right sides of the keyed [k, v in c] form so that an IN there 242 246 never gets greedily consumed into a value position. *) 243 247 infix_expr_no_in: 244 - | t=term_expr { t } %prec below_LPAREN 248 + | t=term_expr { t } 245 249 | l=infix_expr_no_in PLUS r=infix_expr_no_in { ArithExpr (sp, Add, l, r) } 246 250 | l=infix_expr_no_in MINUS r=infix_expr_no_in { ArithExpr (sp, Sub, l, r) } 247 251 | l=infix_expr_no_in STAR r=infix_expr_no_in { ArithExpr (sp, Mul, l, r) } ··· 285 289 | l=no_pipe_infix AMPERSAND r=no_pipe_infix { BinExpr (sp, Intersection, l, r) } 286 290 | l=no_pipe_infix ASSIGN r=no_pipe_infix { AssignExpr (sp, Assign, l, r) } 287 291 | l=no_pipe_infix COLONEQ r=no_pipe_infix { AssignExpr (sp, ColonEq, l, r) } 288 - | l=no_pipe_infix IN r=no_pipe_infix %prec IN_OP { Membership (sp, None, l, r) } 292 + | l=no_pipe_infix IN r=no_pipe_infix { Membership (sp, None, l, r) } 289 293 ; 290 294 291 295 (* ── Terms ─────────────────────────────────────────────────────────────── *) ··· 326 330 | t=atom { t } 327 331 | t=ref_expr DOT i=IDENT { RefDot (sp, t, i) } 328 332 | t=ref_expr LBRACK e=expr RBRACK { RefBrack (sp, t, e) } 333 + ; 334 + 335 + (* ref_expr that ends in a bracketed key, used for the legacy v0 336 + body-less partial-set form [p[k]]. Restricting to the bracketed 337 + shape avoids matching plain identifiers as no-body rules. *) 338 + ref_brack_head: 339 + | b=ref_expr LBRACK e=expr RBRACK { RefBrack (sp, b, e) } 329 340 ; 330 341 331 342 (* ── Atoms ─────────────────────────────────────────────────────────────── *)