Monorepo for Tangled tangled.org
858
fork

Configure Feed

Select the types of activity you want to include in your feed.

Fix Language detection problems #306

open opened by atixnotfound.tngl.sh targeting master from atixnotfound.tngl.sh/core: master

This fixes language detection issues for large and asset-heavy repositories. It bumps the tight HTTP timeouts from 1s to 5s, adds a 10,000 file cap to prevent knot starvation on massive codebases, and drops enry.Unknown files from the language calculations. The unknown file exclusion prevents massive game assets (like Godot .tscn files) from washing out actual code languages like GDScript in the breakdown.

Labels

None yet.

assignee

None yet.

Participants 1
AT URI
at://did:plc:3hv3pvzhalkhjnc3g7wfmnvb/sh.tangled.repo.pull/3mkx7tkbdgd22
+24 -12
Diff #0
+1 -1
go.mod
··· 28 28 github.com/dustin/go-humanize v1.0.1 29 29 github.com/gliderlabs/ssh v0.3.8 30 30 github.com/go-chi/chi/v5 v5.2.0 31 - github.com/go-enry/go-enry/v2 v2.9.2 31 + github.com/go-enry/go-enry/v2 v2.9.6 32 32 github.com/go-git/go-git/v5 v5.14.0 33 33 github.com/google/uuid v1.6.0 34 34 github.com/gorilla/feeds v1.2.0
+2 -2
go.sum
··· 214 214 github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= 215 215 github.com/go-chi/chi/v5 v5.2.0 h1:Aj1EtB0qR2Rdo2dG4O94RIU35w2lvQSj6BRA4+qwFL0= 216 216 github.com/go-chi/chi/v5 v5.2.0/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= 217 - github.com/go-enry/go-enry/v2 v2.9.2 h1:giOQAtCgBX08kosrX818DCQJTCNtKwoPBGu0qb6nKTY= 218 - github.com/go-enry/go-enry/v2 v2.9.2/go.mod h1:9yrj4ES1YrbNb1Wb7/PWYr2bpaCXUGRt0uafN0ISyG8= 217 + github.com/go-enry/go-enry/v2 v2.9.6 h1:np63eOtMV56zfYDHnFVgpEVOk8fr2kmylcMnAZUDbSs= 218 + github.com/go-enry/go-enry/v2 v2.9.6/go.mod h1:9yrj4ES1YrbNb1Wb7/PWYr2bpaCXUGRt0uafN0ISyG8= 219 219 github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo= 220 220 github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= 221 221 github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=
+1 -1
knotmirror/xrpc/git_list_languages.go
··· 51 51 return nil, &atclient.APIError{StatusCode: http.StatusNotFound, Name: "RepoNotFound", Message: "failed to find git repo"} 52 52 } 53 53 54 - ctx, cancel := context.WithTimeout(ctx, 1*time.Second) 54 + ctx, cancel := context.WithTimeout(ctx, 5*time.Second) 55 55 defer cancel() 56 56 57 57 sizes, err := gr.AnalyzeLanguages(ctx)
+18 -6
knotserver/git/language.go
··· 11 11 12 12 type LangBreakdown map[string]int64 13 13 14 + // maxAnalyzeFiles caps how many files are inspected to avoid runaway walks on 15 + // extremely large repositories. 16 + const maxAnalyzeFiles = 10_000 17 + 14 18 func (g *GitRepo) AnalyzeLanguages(ctx context.Context) (LangBreakdown, error) { 15 19 sizes := make(map[string]int64) 20 + fileCount := 0 21 + 16 22 err := g.Walk(ctx, "", func(node object.TreeEntry, parent *object.Tree, root string) error { 23 + if fileCount >= maxAnalyzeFiles { 24 + return TerminateWalk 25 + } 26 + fileCount++ 27 + 17 28 filepath := path.Join(root, node.Name) 18 29 19 - content, err := g.FileContentN(filepath, 16*1024) // 16KB 30 + content, err := g.FileContentN(filepath, 16*1024) // 16 KB 20 31 if err != nil { 21 32 return nil 22 33 } ··· 30 41 31 42 32 43 44 + } 33 45 34 - 35 - 36 - 37 - 38 - 46 + langType := enry.GetLanguageType(language) 47 + if langType != enry.Programming && langType != enry.Markup { 48 + return nil 49 + } 39 50 40 51 41 52 ··· 67 78 68 79 return enry.GetLanguage(node.Name, content) 69 80 } 81 +
+1 -1
knotserver/git/post_receive.go
··· 67 67 isDefaultRef, err := g.isDefaultBranch(line) 68 68 errors.Join(errs, err) 69 69 70 - ctx, cancel := context.WithTimeout(context.Background(), time.Second*2) 70 + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 71 71 defer cancel() 72 72 breakdown, err := g.AnalyzeLanguages(ctx) 73 73 errors.Join(errs, err)
+1 -1
knotserver/xrpc/repo_languages.go
··· 28 28 return 29 29 } 30 30 31 - ctx, cancel := context.WithTimeout(r.Context(), 1*time.Second) 31 + ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second) 32 32 defer cancel() 33 33 34 34 sizes, err := gr.AnalyzeLanguages(ctx)

History

1 round 0 comments
sign up or login to add to the discussion
3 commits
expand
chore(deps): bump go-enry to v2.9.6
An attempt to fix the language detection problem
fix: exclude unknown languages from detection
merge conflicts detected
expand
  • go.mod:28
  • go.sum:214
expand 0 comments