This fixes language detection issues for large and asset-heavy repositories. It bumps the tight HTTP timeouts from 1s to 5s, adds a 10,000 file cap to prevent knot starvation on massive codebases, and drops enry.Unknown files from the language calculations. The unknown file exclusion prevents massive game assets (like Godot .tscn files) from washing out actual code languages like GDScript in the breakdown.
+24
-12
Diff
round #0
+1
-1
go.mod
+1
-1
go.mod
···
28
28
github.com/dustin/go-humanize v1.0.1
29
29
github.com/gliderlabs/ssh v0.3.8
30
30
github.com/go-chi/chi/v5 v5.2.0
31
-
github.com/go-enry/go-enry/v2 v2.9.2
31
+
github.com/go-enry/go-enry/v2 v2.9.6
32
32
github.com/go-git/go-git/v5 v5.14.0
33
33
github.com/google/uuid v1.6.0
34
34
github.com/gorilla/feeds v1.2.0
+2
-2
go.sum
+2
-2
go.sum
···
214
214
github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU=
215
215
github.com/go-chi/chi/v5 v5.2.0 h1:Aj1EtB0qR2Rdo2dG4O94RIU35w2lvQSj6BRA4+qwFL0=
216
216
github.com/go-chi/chi/v5 v5.2.0/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
217
-
github.com/go-enry/go-enry/v2 v2.9.2 h1:giOQAtCgBX08kosrX818DCQJTCNtKwoPBGu0qb6nKTY=
218
-
github.com/go-enry/go-enry/v2 v2.9.2/go.mod h1:9yrj4ES1YrbNb1Wb7/PWYr2bpaCXUGRt0uafN0ISyG8=
217
+
github.com/go-enry/go-enry/v2 v2.9.6 h1:np63eOtMV56zfYDHnFVgpEVOk8fr2kmylcMnAZUDbSs=
218
+
github.com/go-enry/go-enry/v2 v2.9.6/go.mod h1:9yrj4ES1YrbNb1Wb7/PWYr2bpaCXUGRt0uafN0ISyG8=
219
219
github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo=
220
220
github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4=
221
221
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=
+1
-1
knotmirror/xrpc/git_list_languages.go
+1
-1
knotmirror/xrpc/git_list_languages.go
···
51
51
return nil, &atclient.APIError{StatusCode: http.StatusNotFound, Name: "RepoNotFound", Message: "failed to find git repo"}
52
52
}
53
53
54
-
ctx, cancel := context.WithTimeout(ctx, 1*time.Second)
54
+
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
55
55
defer cancel()
56
56
57
57
sizes, err := gr.AnalyzeLanguages(ctx)
+18
-6
knotserver/git/language.go
+18
-6
knotserver/git/language.go
···
11
11
12
12
type LangBreakdown map[string]int64
13
13
14
+
// maxAnalyzeFiles caps how many files are inspected to avoid runaway walks on
15
+
// extremely large repositories.
16
+
const maxAnalyzeFiles = 10_000
17
+
14
18
func (g *GitRepo) AnalyzeLanguages(ctx context.Context) (LangBreakdown, error) {
15
19
sizes := make(map[string]int64)
20
+
fileCount := 0
21
+
16
22
err := g.Walk(ctx, "", func(node object.TreeEntry, parent *object.Tree, root string) error {
23
+
if fileCount >= maxAnalyzeFiles {
24
+
return TerminateWalk
25
+
}
26
+
fileCount++
27
+
17
28
filepath := path.Join(root, node.Name)
18
29
19
-
content, err := g.FileContentN(filepath, 16*1024) // 16KB
30
+
content, err := g.FileContentN(filepath, 16*1024) // 16 KB
20
31
if err != nil {
21
32
return nil
22
33
}
···
30
41
31
42
32
43
44
+
}
33
45
34
-
35
-
36
-
37
-
38
-
46
+
langType := enry.GetLanguageType(language)
47
+
if langType != enry.Programming && langType != enry.Markup {
48
+
return nil
49
+
}
39
50
40
51
41
52
···
67
78
68
79
return enry.GetLanguage(node.Name, content)
69
80
}
81
+
+1
-1
knotserver/git/post_receive.go
+1
-1
knotserver/git/post_receive.go
···
67
67
isDefaultRef, err := g.isDefaultBranch(line)
68
68
errors.Join(errs, err)
69
69
70
-
ctx, cancel := context.WithTimeout(context.Background(), time.Second*2)
70
+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
71
71
defer cancel()
72
72
breakdown, err := g.AnalyzeLanguages(ctx)
73
73
errors.Join(errs, err)
History
1 round
0 comments
atixnotfound.tngl.sh
submitted
#0
3 commits
expand
collapse
chore(deps): bump go-enry to v2.9.6
An attempt to fix the language detection problem
Signed-off-by: atixnotfound.tngl.sh <atsharma623@gmail.com>
fix: exclude unknown languages from detection
Signed-off-by: atixnotfound.tngl.sh <atsharma623@gmail.com>
merge conflicts detected
expand
collapse
expand
collapse
- go.mod:28
- go.sum:214