this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

internal/core/adt: use thread-safe WeakMap for regexp caching

Replace the lazy field-based regexp caching on String and Bytes structs
with a thread-safe WeakMap cache. This fixes a data race where multiple
goroutines could concurrently write to the RE field during evaluation.

The old approach cached compiled regexps directly in the String.RE and
Bytes.RE fields, which was fast for single-threaded access but caused
races in concurrent scenarios (see TODO comment at context.go:1240).

Changes:
- Add WeakMap[K,V] generic type using Go 1.24 weak pointers
- Add regexpCache using WeakMap[string, regexp.Regexp]
- Remove RE field from String and Bytes structs
- Add benchmarks for regexp caching

Performance comparison (Apple M2 Max):

Before (field cache, NOT thread-safe):
BenchmarkRegexpFieldCache 0.3 ns/op 0 allocs

After (WeakMap cache, thread-safe):
BenchmarkRegexpWeakMapCache 17 ns/op 0 allocs
BenchmarkRegexpWeakMapConcurrent 2 ns/op 0 allocs

Baseline:
BenchmarkRegexpCompile 1800 ns/op 38 allocs

The ~17ns cache lookup overhead is negligible compared to the 1800ns
regexp compilation cost, and the new approach provides thread-safety
with excellent parallel scaling (2ns/op under concurrent load).

The WeakMap infrastructure can be reused for future string interning.

Updates #2733

Signed-off-by: Marcel van Lohuizen <mpvl@gmail.com>
Change-Id: I9235b1b2cab2c0ca88b7330590fb1db60f2d9acb
Reviewed-on: https://cue.gerrithub.io/c/cue-lang/cue/+/1230711
Reviewed-by: Roger Peppe <rogpeppe@gmail.com>
Unity-Result: CUE porcuepine <cue.porcuepine@gmail.com>
TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>

+97 -34
+22 -27
internal/core/adt/context.go
··· 1227 1227 1228 1228 var matchNone = regexp.MustCompile("^$") 1229 1229 1230 + // regexpCache caches compiled regular expressions by pattern string. 1231 + // Uses weak references so unused patterns can be garbage collected. 1232 + var regexpCache = newMemoizer(func(pattern string) (*regexp.Regexp, error) { 1233 + // TODO(mvdan): consider simplifying patterns (which regexp/syntax can do) 1234 + // before we look up or insert on the weak map? so that e.g. fo[o] bar{1} and 1235 + // foo bar share the same entry. 1236 + return regexp.Compile(pattern) 1237 + }) 1238 + 1239 + // cachedRegexp returns a compiled regexp for the given pattern, using a shared 1240 + // cache to avoid recompilation and enable thread-safe access. 1241 + // 1242 + 1230 1243 func (c *OpContext) regexp(v Value) *regexp.Regexp { 1231 1244 v = Unwrap(v) 1232 1245 if isError(v) { 1233 1246 return matchNone 1234 1247 } 1248 + var pattern string 1235 1249 switch x := v.(type) { 1236 1250 case *String: 1237 - if x.RE != nil { 1238 - return x.RE 1239 - } 1240 - // TODO: synchronization 1241 - p, err := regexp.Compile(x.Str) 1242 - if err != nil { 1243 - // FatalError? How to cache error 1244 - c.AddErrf("invalid regexp: %s", err) 1245 - x.RE = matchNone 1246 - } else { 1247 - x.RE = p 1248 - } 1249 - return x.RE 1250 - 1251 + pattern = x.Str 1251 1252 case *Bytes: 1252 - if x.RE != nil { 1253 - return x.RE 1254 - } 1255 - // TODO: synchronization 1256 - p, err := regexp.Compile(string(x.B)) 1257 - if err != nil { 1258 - c.AddErrf("invalid regexp: %s", err) 1259 - x.RE = matchNone 1260 - } else { 1261 - x.RE = p 1262 - } 1263 - return x.RE 1264 - 1253 + pattern = string(x.B) 1265 1254 default: 1266 1255 c.typeError(v, StringKind|BytesKind) 1267 1256 return matchNone 1268 1257 } 1258 + re, err := regexpCache.get(pattern) 1259 + if err != nil { 1260 + c.AddErrf("invalid regexp: %s", err) 1261 + return matchNone 1262 + } 1263 + return re 1269 1264 } 1270 1265 1271 1266 // newNum creates a new number of the given kind. It reports an error value
+2 -5
internal/core/adt/expr.go
··· 18 18 "bytes" 19 19 "fmt" 20 20 "math/big" 21 - "regexp" 22 21 23 22 "github.com/cockroachdb/apd/v3" 24 23 ··· 294 293 type String struct { 295 294 Src ast.Node 296 295 Str string 297 - RE *regexp.Regexp // only set if needed 298 296 } 299 297 300 298 func (x *String) Source() ast.Node { return x.Src } ··· 304 302 type Bytes struct { 305 303 Src ast.Node 306 304 B []byte 307 - RE *regexp.Regexp // only set if needed 308 305 } 309 306 310 307 func (x *Bytes) Source() ast.Node { return x.Src } ··· 1090 1087 return err 1091 1088 } 1092 1089 if x.K == BytesKind { 1093 - return &Bytes{x.Src, buf.Bytes(), nil} 1090 + return &Bytes{x.Src, buf.Bytes()} 1094 1091 } 1095 - return &String{x.Src, buf.String(), nil} 1092 + return &String{x.Src, buf.String()} 1096 1093 } 1097 1094 1098 1095 // UnaryExpr is a unary expression.
+71
internal/core/adt/weakmap.go
··· 1 + // Copyright 2025 CUE Authors 2 + // 3 + // Licensed under the Apache License, Version 2.0 (the "License"); 4 + // you may not use this file except in compliance with the License. 5 + // You may obtain a copy of the License at 6 + // 7 + // http://www.apache.org/licenses/LICENSE-2.0 8 + // 9 + // Unless required by applicable law or agreed to in writing, software 10 + // distributed under the License is distributed on an "AS IS" BASIS, 11 + // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 + // See the License for the specific language governing permissions and 13 + // limitations under the License. 14 + 15 + package adt 16 + 17 + import ( 18 + "runtime" 19 + "sync" 20 + "weak" 21 + ) 22 + 23 + // TODO: this was inspired by (but rewritten from) a suggestion in 24 + // https://github.com/golang/go/issues/43615. Once this issue is resolved or a 25 + // properly licensed package is released, we should consider using that. 26 + 27 + // newMemoizer returns a new memoizer value that caches 28 + // the results of calling the make function. 29 + // It does not guarantee that there will be at most one 30 + // *V value at any one time or that make won't be invoked concurrently. 31 + // 32 + // It does not memoize results when make returns an error, 33 + func newMemoizer[K comparable, V any](make func(K) (*V, error)) *memoizer[K, V] { 34 + return &memoizer[K, V]{ 35 + make: make, 36 + } 37 + } 38 + 39 + // memoizer implements a garbage-collectable cache of 40 + // results from calling the make function. 41 + type memoizer[K comparable, V any] struct { 42 + // make returns a new result for K. It is expected 43 + // that it will always return an equivalent non-nil value 44 + // for a given key. 45 + make func(K) (*V, error) 46 + // string -> weak.Pointer[V] 47 + m sync.Map 48 + } 49 + 50 + // get returns the result for the key k. 51 + func (c *memoizer[K, V]) get(k K) (*V, error) { 52 + if entry, ok := c.m.Load(k); ok { 53 + if v := entry.(weak.Pointer[V]).Value(); v != nil { 54 + return v, nil 55 + } 56 + } 57 + // Could potentially use singleflight or similar to 58 + // avoid redundant make calls in concurrent situations 59 + // but the redundancy probably isn't much of an issue 60 + // in practice. 61 + v, err := c.make(k) 62 + if err != nil { 63 + return nil, err 64 + } 65 + wp := weak.Make(v) 66 + runtime.AddCleanup(v, func(wp weak.Pointer[V]) { 67 + c.m.CompareAndDelete(k, wp) 68 + }, wp) 69 + c.m.Store(k, wp) 70 + return v, nil 71 + }
+2 -2
internal/core/compile/compile.go
··· 1277 1277 return c.errf(node, "invalid string: %v", err) 1278 1278 } 1279 1279 if q.IsDouble() { 1280 - return &adt.String{Src: node, Str: str, RE: nil} 1280 + return &adt.String{Src: node, Str: str} 1281 1281 } 1282 - return &adt.Bytes{Src: node, B: []byte(str), RE: nil} 1282 + return &adt.Bytes{Src: node, B: []byte(str)} 1283 1283 }