this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

cue/load: avoid repeatedly loading parent directories by caching

In order to load a cue package, the `cue.load.loader` performs various
CPU/IO heavy operations:

- readDir: in order to load a cue instance, all ancestors of the
instance directory are loaded as well, causing multiple dir
reads. In case multiple instances are loaded, such as in
`cue fmt ./...`, the same directory may be read many times as
we descend into subdirectories
- each file is loaded as a `*build.File` and its source parsed as
an `*ast.File`

We now maintain two separate caches of the above work:

- a cache of directory path to []*build.File
- a cache of file path to []*ast.File

After this change we can see the following speedup on my monorepo:

before:

cue fmt --check ./... 2.50s user 0.54s system 83% cpu 3.661 total

after:

cue fmt --check ./... 0.72s user 0.21s system 51% cpu 1.815 total

Signed-off-by: Noam Dolovich <noam.tzvi.dolovich@gmail.com>
Change-Id: I95472ad22eb124df730f6711af730425106a85e8
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1193678
TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>

authored by

Noam Dolovich and committed by
Daniel Martí
d9c6c752 b7c58005

+90 -41
+41 -26
cue/load/import.go
··· 140 140 // Walk the parent directories up to the module root to add their files as well, 141 141 // since a package foo/bar/baz inherits from parent packages foo/bar and foo. 142 142 // See https://cuelang.org/docs/concept/modules-packages-instances/#instances. 143 - // 144 - // TODO(mvdan): Note that the work below, most notably readDir and ParseFile, 145 - // is not cached or reused in any way. This causes slow-downs on big repos, 146 - // for example `cue fmt --check ./...` on the cue repo inspects 147 - // top-level files like README.md and LICENSE many dozens of times. 148 143 for _, d := range dirs { 149 144 for dir := filepath.Clean(d[1]); ctxt.isDir(dir); { 150 - files, err := ctxt.readDir(dir) 151 - if err != nil && !os.IsNotExist(err) { 152 - return retErr(errors.Wrapf(err, pos, "import failed reading dir %v", dirs[0][1])) 145 + sd, ok := l.dirCachedBuildFiles[dir] 146 + if !ok { 147 + sd = l.scanDir(dir) 148 + l.dirCachedBuildFiles[dir] = sd 149 + } 150 + if err := sd.err; err != nil { 151 + return retErr(errors.Wrapf(err, token.NoPos, "import failed reading dir %v", dir)) 153 152 } 154 - for _, f := range files { 155 - if f.IsDir() { 156 - continue 157 - } 158 - if f.Name() == "-" { 159 - if _, err := cfg.fileSystem.stat("-"); !os.IsNotExist(err) { 160 - continue 161 - } 162 - } 163 - file, err := filetypes.ParseFile(f.Name(), filetypes.Input) 164 - if err != nil { 165 - p.UnknownFiles = append(p.UnknownFiles, &build.File{ 166 - Filename: f.Name(), 167 - ExcludeReason: errors.Newf(token.NoPos, "unknown filetype"), 168 - }) 169 - continue // skip unrecognized file types 170 - } 171 - fp.add(dir, file, importComment) 153 + p.UnknownFiles = append(p.UnknownFiles, sd.unknownFiles...) 154 + for _, f := range sd.buildFiles { 155 + bf := *f 156 + fp.add(dir, &bf, importComment) 172 157 } 173 158 174 159 if p.PkgName == "" || !inModule || l.cfg.isRoot(dir) || dir == d[0] { ··· 218 203 return cmp.Compare(a.PkgName, b.PkgName) 219 204 }) 220 205 return all 206 + } 207 + 208 + func (l *loader) scanDir(dir string) cachedFileFiles { 209 + sd := cachedFileFiles{} 210 + files, err := l.cfg.fileSystem.readDir(dir) 211 + if err != nil && !os.IsNotExist(err) { 212 + sd.err = err 213 + return sd 214 + } 215 + for _, f := range files { 216 + if f.IsDir() { 217 + continue 218 + } 219 + if f.Name() == "-" { 220 + if _, err := l.cfg.fileSystem.stat("-"); !os.IsNotExist(err) { 221 + continue 222 + } 223 + } 224 + file, err := filetypes.ParseFile(f.Name(), filetypes.Input) 225 + if err != nil { 226 + sd.unknownFiles = append(sd.unknownFiles, &build.File{ 227 + Filename: f.Name(), 228 + ExcludeReason: errors.Newf(token.NoPos, "unknown filetype"), 229 + }) 230 + continue // skip unrecognized file types 231 + } 232 + 233 + sd.buildFiles = append(sd.buildFiles, file) 234 + } 235 + return sd 221 236 } 222 237 223 238 // _loadFunc is the method used for the value of l.loadFunc.
+2 -2
cue/load/import_test.go
··· 38 38 if err != nil { 39 39 return nil, fmt.Errorf("unexpected error on Config.complete: %v", err) 40 40 } 41 - l := loader{cfg: c} 41 + l := newLoader(c, nil, nil) 42 42 inst := l.newRelInstance(token.NoPos, pkg, c.Package) 43 43 p := l.importPkg(token.NoPos, inst)[0] 44 44 return p, p.Err ··· 51 51 if err != nil { 52 52 t.Fatal(err) 53 53 } 54 - l := loader{cfg: c} 54 + l := newLoader(c, nil, nil) 55 55 inst := l.newInstance(token.NoPos, "") 56 56 p := l.importPkg(token.NoPos, inst)[0] 57 57 if p.Err == nil {
+47 -13
cue/load/loader.go
··· 22 22 import ( 23 23 "path/filepath" 24 24 25 + "cuelang.org/go/cue/ast" 25 26 "cuelang.org/go/cue/build" 26 27 "cuelang.org/go/cue/cuecontext" 27 28 "cuelang.org/go/cue/errors" ··· 42 43 stk importStack 43 44 loadFunc build.LoadFunc 44 45 pkgs *modpkgload.Packages 46 + 47 + // When we descend into subdirectories to load patterns such as ./... 48 + // we often end up loading parent directories many times over; cache that work by directory. 49 + dirCachedBuildFiles map[string]cachedFileFiles 50 + 51 + // The same file may be decoded into an *ast.File multiple times, e.g. when it is present in 52 + // multiple different build instances in the same directory hierarchy; cache that work by file name. 53 + fileCachedSyntaxFiles map[string]cachedSyntaxFiles 45 54 } 46 55 56 + type ( 57 + cachedFileFiles struct { 58 + err errors.Error 59 + buildFiles []*build.File 60 + unknownFiles []*build.File 61 + } 62 + 63 + cachedSyntaxFiles struct { 64 + err error 65 + files []*ast.File 66 + } 67 + ) 68 + 47 69 func newLoader(c *Config, tg *tagger, pkgs *modpkgload.Packages) *loader { 48 70 l := &loader{ 49 - cfg: c, 50 - tagger: tg, 51 - pkgs: pkgs, 71 + cfg: c, 72 + tagger: tg, 73 + pkgs: pkgs, 74 + dirCachedBuildFiles: map[string]cachedFileFiles{}, 75 + fileCachedSyntaxFiles: map[string]cachedSyntaxFiles{}, 52 76 } 53 77 l.loadFunc = l._loadFunc 54 78 return l ··· 137 161 } 138 162 139 163 func (l *loader) addFiles(dir string, p *build.Instance) { 140 - for _, f := range p.BuildFiles { 141 - // TODO(mvdan): reuse the same context for an entire loader 142 - d := encoding.NewDecoder(cuecontext.New(), f, &encoding.Config{ 143 - Stdin: l.cfg.stdin(), 144 - ParseFile: l.cfg.ParseFile, 145 - }) 146 - for ; !d.Done(); d.Next() { 147 - _ = p.AddSyntax(d.File()) 164 + for _, bf := range p.BuildFiles { 165 + syntax, ok := l.fileCachedSyntaxFiles[bf.Filename] 166 + if !ok { 167 + syntax = cachedSyntaxFiles{} 168 + // TODO(mvdan): reuse the same context for an entire loader 169 + d := encoding.NewDecoder(cuecontext.New(), bf, &encoding.Config{ 170 + Stdin: l.cfg.stdin(), 171 + ParseFile: l.cfg.ParseFile, 172 + }) 173 + for ; !d.Done(); d.Next() { 174 + syntax.files = append(syntax.files, d.File()) 175 + } 176 + syntax.err = d.Err() 177 + d.Close() 178 + l.fileCachedSyntaxFiles[bf.Filename] = syntax 148 179 } 149 - if err := d.Err(); err != nil { 180 + 181 + if err := syntax.err; err != nil { 150 182 p.ReportError(errors.Promote(err, "load")) 151 183 } 152 - d.Close() 184 + for _, f := range syntax.files { 185 + _ = p.AddSyntax(f) 186 + } 153 187 } 154 188 }