loading up the forgejo repo on tangled to test page performance
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Optimization of labels handling in issue_search (#4228)

This PR optimizes the SQL query and de-duplicate the labels' ids when generating the query string, on the issue page.

<hr/>

### Background

Some time ago, BingBot and some other crawlers have been putting my instance on its knees with requests containing a lot of label ids, like this one :

```
[07/Aug/2023:11:28:37 +0200] "GET /Dolibarr/sendrecurringinvoicebymail/issues?q=&type=all&sort=&state=closed&labels=1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c2%2c10%2c2%2c1%2c1%2c10%2c10%2c7%2c6%2c10%2c10%2c3%2c2%2c1%2c5%2c10%2c1%2c6%2c2%2c7%2c3%2c7%2c6%2c10%2c1%2c10%2c1%2c1%2c7%2c7%2c1%2c1%2c1%2c1%2c10%2c10%2c1%2c2%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c1%2c2%2c1%2c12%2c6%2c6%2c10&milestone=0&project=-1&poster=0 HTTP/1.1" 499 0 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/103.0.5060.134 Safari/537.36"
```

Since each of the label ids implies a join, it grows exponentially expensive for the database engine (at least on PostgreSQL but SQLite suffers a little too).

Thus, this PR proposes two enhancements:

* rewrite the database query to use only one squashed condition,
* deduplicate the label ids when generating the URL.

### Performance comparison

Here are some timings on Postgresql-backed, Forgejo 7.0.4 instances :
```sh
$ time curl -s -o /dev/null "http://localhost:3000/toto/tata/issues?q=&type=all&sort=&labels=19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25&state=open&milestone=0&project=0&assignee=0&poster=0"

real 0m10,491s
user 0m0,017s
sys 0m0,008s
```
...and with the patch:
```sh
$ time curl -s -o /dev/null "http://localhost:3000/toto/tata/issues?q=&type=all&sort=&labels=19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25%2c19%2c25&state=open&milestone=0&project=0&assignee=0&poster=0"

real 0m0,094s
user 0m0,012s
sys 0m0,013s
```

### Annex

This issue was originally proposed to [Gitea](https://github.com/go-gitea/gitea/pull/26460) but didn't get much attention, and I switched to Forgejo in the meantime :)

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/4228
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Co-authored-by: Chl <chl@xlii.si>
Co-committed-by: Chl <chl@xlii.si>

authored by

Chl
Chl
and committed by
Earl Warren
544cbc6f 2121a29f

+80 -16
+22 -4
models/issues/issue_search.go
··· 6 6 import ( 7 7 "context" 8 8 "fmt" 9 + "strconv" 9 10 "strings" 10 11 11 12 "code.gitea.io/gitea/models/db" ··· 13 14 repo_model "code.gitea.io/gitea/models/repo" 14 15 "code.gitea.io/gitea/models/unit" 15 16 user_model "code.gitea.io/gitea/models/user" 17 + "code.gitea.io/gitea/modules/container" 16 18 "code.gitea.io/gitea/modules/optional" 17 19 18 20 "xorm.io/builder" ··· 116 118 if opts.LabelIDs[0] == 0 { 117 119 sess.Where("issue.id NOT IN (SELECT issue_id FROM issue_label)") 118 120 } else { 119 - for i, labelID := range opts.LabelIDs { 121 + // deduplicate the label IDs for inclusion and exclusion 122 + includedLabelIDs := make(container.Set[int64]) 123 + excludedLabelIDs := make(container.Set[int64]) 124 + for _, labelID := range opts.LabelIDs { 120 125 if labelID > 0 { 121 - sess.Join("INNER", fmt.Sprintf("issue_label il%d", i), 122 - fmt.Sprintf("issue.id = il%[1]d.issue_id AND il%[1]d.label_id = %[2]d", i, labelID)) 126 + includedLabelIDs.Add(labelID) 123 127 } else if labelID < 0 { // 0 is not supported here, so just ignore it 124 - sess.Where("issue.id not in (select issue_id from issue_label where label_id = ?)", -labelID) 128 + excludedLabelIDs.Add(-labelID) 125 129 } 130 + } 131 + // ... and use them in a subquery of the form : 132 + // where (select count(*) from issue_label where issue_id=issue.id and label_id in (2, 4, 6)) = 3 133 + // This equality is guaranteed thanks to unique index (issue_id,label_id) on table issue_label. 134 + if len(includedLabelIDs) > 0 { 135 + subQuery := builder.Select("count(*)").From("issue_label").Where(builder.Expr("issue_id = issue.id")). 136 + And(builder.In("label_id", includedLabelIDs.Values())) 137 + sess.Where(builder.Eq{strconv.Itoa(len(includedLabelIDs)): subQuery}) 138 + } 139 + // or (select count(*)...) = 0 for excluded labels 140 + if len(excludedLabelIDs) > 0 { 141 + subQuery := builder.Select("count(*)").From("issue_label").Where(builder.Expr("issue_id = issue.id")). 142 + And(builder.In("label_id", excludedLabelIDs.Values())) 143 + sess.Where(builder.Eq{"0": subQuery}) 126 144 } 127 145 } 128 146 }
+13
models/issues/issue_test.go
··· 193 193 }, 194 194 { 195 195 issues_model.IssuesOptions{ 196 + LabelIDs: []int64{-1, 2}, 197 + }, 198 + []int64{5}, // issue without label 1 but with label 2. 199 + }, 200 + { 201 + issues_model.IssuesOptions{ 202 + RepoCond: builder.In("repo_id", 1), 203 + LabelIDs: []int64{0}, 204 + }, 205 + []int64{11, 3}, // issues without any label (ordered by creation date desc.)(note: 11 is a pull request) 206 + }, 207 + { 208 + issues_model.IssuesOptions{ 196 209 MilestoneIDs: []int64{1}, 197 210 }, 198 211 []int64{2},
+23 -12
models/issues/label.go
··· 7 7 import ( 8 8 "context" 9 9 "fmt" 10 + "slices" 10 11 "strconv" 11 12 "strings" 12 13 ··· 142 143 143 144 // LoadSelectedLabelsAfterClick calculates the set of selected labels when a label is clicked 144 145 func (l *Label) LoadSelectedLabelsAfterClick(currentSelectedLabels []int64, currentSelectedExclusiveScopes []string) { 145 - var labelQuerySlice []string 146 + labelQuerySlice := []int64{} 146 147 labelSelected := false 147 - labelID := strconv.FormatInt(l.ID, 10) 148 - labelScope := l.ExclusiveScope() 149 - for i, s := range currentSelectedLabels { 150 - if s == l.ID { 148 + exclusiveScope := l.ExclusiveScope() 149 + for i, curSel := range currentSelectedLabels { 150 + if curSel == l.ID { 151 151 labelSelected = true 152 - } else if -s == l.ID { 152 + } else if -curSel == l.ID { 153 153 labelSelected = true 154 154 l.IsExcluded = true 155 - } else if s != 0 { 155 + } else if curSel != 0 { 156 156 // Exclude other labels in the same scope from selection 157 - if s < 0 || labelScope == "" || labelScope != currentSelectedExclusiveScopes[i] { 158 - labelQuerySlice = append(labelQuerySlice, strconv.FormatInt(s, 10)) 157 + if curSel < 0 || exclusiveScope == "" || exclusiveScope != currentSelectedExclusiveScopes[i] { 158 + labelQuerySlice = append(labelQuerySlice, curSel) 159 159 } 160 160 } 161 161 } 162 + 162 163 if !labelSelected { 163 - labelQuerySlice = append(labelQuerySlice, labelID) 164 + labelQuerySlice = append(labelQuerySlice, l.ID) 164 165 } 165 166 l.IsSelected = labelSelected 166 - l.QueryString = strings.Join(labelQuerySlice, ",") 167 + 168 + // Sort and deduplicate the ids to avoid the crawlers asking for the 169 + // same thing with simply a different order of parameters 170 + slices.Sort(labelQuerySlice) 171 + labelQuerySlice = slices.Compact(labelQuerySlice) 172 + // Quick conversion (strings.Join() doesn't accept slices of Int64) 173 + labelQuerySliceStrings := make([]string, len(labelQuerySlice)) 174 + for i, x := range labelQuerySlice { 175 + labelQuerySliceStrings[i] = strconv.FormatInt(x, 10) 176 + } 177 + l.QueryString = strings.Join(labelQuerySliceStrings, ",") 167 178 } 168 179 169 180 // BelongsToOrg returns true if label is an organization label ··· 176 187 return l.RepoID > 0 177 188 } 178 189 179 - // Return scope substring of label name, or empty string if none exists 190 + // ExclusiveScope returns scope substring of label name, or empty string if none exists 180 191 func (l *Label) ExclusiveScope() string { 181 192 if !l.Exclusive { 182 193 return ""
+22
models/issues/label_test.go
··· 23 23 assert.EqualValues(t, 2, label.NumOpenIssues) 24 24 } 25 25 26 + func TestLabel_LoadSelectedLabelsAfterClick(t *testing.T) { 27 + assert.NoError(t, unittest.PrepareTestDatabase()) 28 + // Loading the label id:8 (scope/label2) which have a scope and an 29 + // exclusivity with id:7 (scope/label1) 30 + label := unittest.AssertExistsAndLoadBean(t, &issues_model.Label{ID: 8}) 31 + 32 + // First test : with negative and scope 33 + label.LoadSelectedLabelsAfterClick([]int64{1, -8}, []string{"", "scope"}) 34 + assert.Equal(t, "1", label.QueryString) 35 + assert.Equal(t, true, label.IsSelected) 36 + 37 + // Second test : with duplicates 38 + label.LoadSelectedLabelsAfterClick([]int64{1, 7, 1, 7, 7}, []string{"", "scope", "", "scope", "scope"}) 39 + assert.Equal(t, "1,8", label.QueryString) 40 + assert.Equal(t, false, label.IsSelected) 41 + 42 + // Third test : empty set 43 + label.LoadSelectedLabelsAfterClick([]int64{}, []string{}) 44 + assert.False(t, label.IsSelected) 45 + assert.Equal(t, "8", label.QueryString) 46 + } 47 + 26 48 func TestLabel_ExclusiveScope(t *testing.T) { 27 49 assert.NoError(t, unittest.PrepareTestDatabase()) 28 50 label := unittest.AssertExistsAndLoadBean(t, &issues_model.Label{ID: 7})