loading up the forgejo repo on tangled to test page performance
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge pull request '[Port] Sync branches to DB immediately when handle git hook calling gitea#29493' (#2684) from oliverpool/forgejo:sync_branch into forgejo

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/2684
Reviewed-by: Otto <otto@codeberg.org>
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Reviewed-by: Gusted <gusted@noreply.codeberg.org>

+280 -44
+5
models/git/branch.go
··· 162 162 return &branch, nil 163 163 } 164 164 165 + func GetBranches(ctx context.Context, repoID int64, branchNames []string) ([]*Branch, error) { 166 + branches := make([]*Branch, 0, len(branchNames)) 167 + return branches, db.GetEngine(ctx).Where("repo_id=?", repoID).In("name", branchNames).Find(&branches) 168 + } 169 + 165 170 func AddBranches(ctx context.Context, branches []*Branch) error { 166 171 for _, branch := range branches { 167 172 if _, err := db.GetEngine(ctx).Insert(branch); err != nil {
+63 -1
routers/private/hook_post_receive.go
··· 8 8 "net/http" 9 9 "strconv" 10 10 11 + git_model "code.gitea.io/gitea/models/git" 11 12 issues_model "code.gitea.io/gitea/models/issues" 12 13 repo_model "code.gitea.io/gitea/models/repo" 13 14 "code.gitea.io/gitea/modules/git" 15 + "code.gitea.io/gitea/modules/gitrepo" 14 16 "code.gitea.io/gitea/modules/log" 15 17 "code.gitea.io/gitea/modules/private" 16 18 repo_module "code.gitea.io/gitea/modules/repository" ··· 27 29 28 30 // We don't rely on RepoAssignment here because: 29 31 // a) we don't need the git repo in this function 32 + // OUT OF DATE: we do need the git repo to sync the branch to the db now. 30 33 // b) our update function will likely change the repository in the db so we will need to refresh it 31 34 // c) we don't always need the repo 32 35 ··· 34 37 repoName := ctx.Params(":repo") 35 38 36 39 // defer getting the repository at this point - as we should only retrieve it if we're going to call update 37 - var repo *repo_model.Repository 40 + var ( 41 + repo *repo_model.Repository 42 + gitRepo *git.Repository 43 + ) 44 + defer gitRepo.Close() // it's safe to call Close on a nil pointer 38 45 39 46 updates := make([]*repo_module.PushUpdateOptions, 0, len(opts.OldCommitIDs)) 40 47 wasEmpty := false ··· 75 82 } 76 83 77 84 if repo != nil && len(updates) > 0 { 85 + branchesToSync := make([]*repo_module.PushUpdateOptions, 0, len(updates)) 86 + for _, update := range updates { 87 + if !update.RefFullName.IsBranch() { 88 + continue 89 + } 90 + if repo == nil { 91 + repo = loadRepository(ctx, ownerName, repoName) 92 + if ctx.Written() { 93 + return 94 + } 95 + wasEmpty = repo.IsEmpty 96 + } 97 + 98 + if update.IsDelRef() { 99 + if err := git_model.AddDeletedBranch(ctx, repo.ID, update.RefFullName.BranchName(), update.PusherID); err != nil { 100 + log.Error("Failed to add deleted branch: %s/%s Error: %v", ownerName, repoName, err) 101 + ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ 102 + Err: fmt.Sprintf("Failed to add deleted branch: %s/%s Error: %v", ownerName, repoName, err), 103 + }) 104 + return 105 + } 106 + } else { 107 + branchesToSync = append(branchesToSync, update) 108 + } 109 + } 110 + if len(branchesToSync) > 0 { 111 + if gitRepo == nil { 112 + var err error 113 + gitRepo, err = gitrepo.OpenRepository(ctx, repo) 114 + if err != nil { 115 + log.Error("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err) 116 + ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ 117 + Err: fmt.Sprintf("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err), 118 + }) 119 + return 120 + } 121 + } 122 + 123 + var ( 124 + branchNames = make([]string, 0, len(branchesToSync)) 125 + commitIDs = make([]string, 0, len(branchesToSync)) 126 + ) 127 + for _, update := range branchesToSync { 128 + branchNames = append(branchNames, update.RefFullName.BranchName()) 129 + commitIDs = append(commitIDs, update.NewCommitID) 130 + } 131 + 132 + if err := repo_service.SyncBranchesToDB(ctx, repo.ID, opts.UserID, branchNames, commitIDs, gitRepo.GetCommit); err != nil { 133 + ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{ 134 + Err: fmt.Sprintf("Failed to sync branch to DB in repository: %s/%s Error: %v", ownerName, repoName, err), 135 + }) 136 + return 137 + } 138 + } 139 + 78 140 if err := repo_service.PushUpdates(updates); err != nil { 79 141 log.Error("Failed to Update: %s/%s Total Updates: %d", ownerName, repoName, len(updates)) 80 142 for i, update := range updates {
+81 -34
services/repository/branch.go
··· 225 225 return err 226 226 } 227 227 228 - // syncBranchToDB sync the branch information in the database. It will try to update the branch first, 229 - // if updated success with affect records > 0, then all are done. Because that means the branch has been in the database. 230 - // If no record is affected, that means the branch does not exist in database. So there are two possibilities. 231 - // One is this is a new branch, then we just need to insert the record. Another is the branches haven't been synced, 232 - // then we need to sync all the branches into database. 233 - func syncBranchToDB(ctx context.Context, repoID, pusherID int64, branchName string, commit *git.Commit) error { 234 - cnt, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit) 235 - if err != nil { 236 - return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err) 228 + // SyncBranchesToDB sync the branch information in the database. 229 + // It will check whether the branches of the repository have never been synced before. 230 + // If so, it will sync all branches of the repository. 231 + // Otherwise, it will sync the branches that need to be updated. 232 + func SyncBranchesToDB(ctx context.Context, repoID, pusherID int64, branchNames, commitIDs []string, getCommit func(commitID string) (*git.Commit, error)) error { 233 + // Some designs that make the code look strange but are made for performance optimization purposes: 234 + // 1. Sync branches in a batch to reduce the number of DB queries. 235 + // 2. Lazy load commit information since it may be not necessary. 236 + // 3. Exit early if synced all branches of git repo when there's no branch in DB. 237 + // 4. Check the branches in DB if they are already synced. 238 + // 239 + // If the user pushes many branches at once, the Git hook will call the internal API in batches, rather than all at once. 240 + // See https://github.com/go-gitea/gitea/blob/cb52b17f92e2d2293f7c003649743464492bca48/cmd/hook.go#L27 241 + // For the first batch, it will hit optimization 3. 242 + // For other batches, it will hit optimization 4. 243 + 244 + if len(branchNames) != len(commitIDs) { 245 + return fmt.Errorf("branchNames and commitIDs length not match") 237 246 } 238 - if cnt > 0 { // This means branch does exist, so it's a normal update. It also means the branch has been synced. 239 - return nil 240 - } 247 + 248 + return db.WithTx(ctx, func(ctx context.Context) error { 249 + branches, err := git_model.GetBranches(ctx, repoID, branchNames) 250 + if err != nil { 251 + return fmt.Errorf("git_model.GetBranches: %v", err) 252 + } 253 + 254 + if len(branches) == 0 { 255 + // if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21, 256 + // we cannot simply insert the branch but need to check we have branches or not 257 + hasBranch, err := db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{ 258 + RepoID: repoID, 259 + IsDeletedBranch: optional.Some(false), 260 + }.ToConds()) 261 + if err != nil { 262 + return err 263 + } 264 + if !hasBranch { 265 + if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil { 266 + return fmt.Errorf("repo_module.SyncRepoBranches %d failed: %v", repoID, err) 267 + } 268 + return nil 269 + } 270 + } 271 + 272 + branchMap := make(map[string]*git_model.Branch, len(branches)) 273 + for _, branch := range branches { 274 + branchMap[branch.Name] = branch 275 + } 276 + 277 + newBranches := make([]*git_model.Branch, 0, len(branchNames)) 278 + 279 + for i, branchName := range branchNames { 280 + commitID := commitIDs[i] 281 + branch, exist := branchMap[branchName] 282 + if exist && branch.CommitID == commitID && !branch.IsDeleted { 283 + continue 284 + } 285 + 286 + commit, err := getCommit(commitID) 287 + if err != nil { 288 + return fmt.Errorf("get commit of %s failed: %v", branchName, err) 289 + } 290 + 291 + if exist { 292 + if _, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit); err != nil { 293 + return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err) 294 + } 295 + return nil 296 + } 297 + 298 + // if database have branches but not this branch, it means this is a new branch 299 + newBranches = append(newBranches, &git_model.Branch{ 300 + RepoID: repoID, 301 + Name: branchName, 302 + CommitID: commit.ID.String(), 303 + CommitMessage: commit.Summary(), 304 + PusherID: pusherID, 305 + CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()), 306 + }) 307 + } 241 308 242 - // if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21, 243 - // we cannot simply insert the branch but need to check we have branches or not 244 - hasBranch, err := db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{ 245 - RepoID: repoID, 246 - IsDeletedBranch: optional.Some(false), 247 - }.ToConds()) 248 - if err != nil { 249 - return err 250 - } 251 - if !hasBranch { 252 - if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil { 253 - return fmt.Errorf("repo_module.SyncRepoBranches %d:%s failed: %v", repoID, branchName, err) 309 + if len(newBranches) > 0 { 310 + return db.Insert(ctx, newBranches) 254 311 } 255 312 return nil 256 - } 257 - 258 - // if database have branches but not this branch, it means this is a new branch 259 - return db.Insert(ctx, &git_model.Branch{ 260 - RepoID: repoID, 261 - Name: branchName, 262 - CommitID: commit.ID.String(), 263 - CommitMessage: commit.Summary(), 264 - PusherID: pusherID, 265 - CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()), 266 313 }) 267 314 } 268 315
-9
services/repository/push.go
··· 11 11 "time" 12 12 13 13 "code.gitea.io/gitea/models/db" 14 - git_model "code.gitea.io/gitea/models/git" 15 14 repo_model "code.gitea.io/gitea/models/repo" 16 15 user_model "code.gitea.io/gitea/models/user" 17 16 "code.gitea.io/gitea/modules/cache" ··· 259 258 commits.Commits = commits.Commits[:setting.UI.FeedMaxCommitNum] 260 259 } 261 260 262 - if err = syncBranchToDB(ctx, repo.ID, opts.PusherID, branch, newCommit); err != nil { 263 - return fmt.Errorf("git_model.UpdateBranch %s:%s failed: %v", repo.FullName(), branch, err) 264 - } 265 - 266 261 notify_service.PushCommits(ctx, pusher, repo, opts, commits) 267 262 268 263 // Cache for big repository ··· 274 269 if err = pull_service.CloseBranchPulls(ctx, pusher, repo.ID, branch); err != nil { 275 270 // close all related pulls 276 271 log.Error("close related pull request failed: %v", err) 277 - } 278 - 279 - if err := git_model.AddDeletedBranch(ctx, repo.ID, branch, pusher.ID); err != nil { 280 - return fmt.Errorf("AddDeletedBranch %s:%s failed: %v", repo.FullName(), branch, err) 281 272 } 282 273 } 283 274
+131
tests/integration/git_push_test.go
··· 1 + // Copyright 2024 The Gitea Authors. All rights reserved. 2 + // SPDX-License-Identifier: MIT 3 + 4 + package integration 5 + 6 + import ( 7 + "fmt" 8 + "net/url" 9 + "testing" 10 + 11 + "code.gitea.io/gitea/models/db" 12 + git_model "code.gitea.io/gitea/models/git" 13 + "code.gitea.io/gitea/models/unittest" 14 + user_model "code.gitea.io/gitea/models/user" 15 + "code.gitea.io/gitea/modules/git" 16 + repo_service "code.gitea.io/gitea/services/repository" 17 + 18 + "github.com/stretchr/testify/assert" 19 + "github.com/stretchr/testify/require" 20 + ) 21 + 22 + func TestGitPush(t *testing.T) { 23 + onGiteaRun(t, testGitPush) 24 + } 25 + 26 + func testGitPush(t *testing.T, u *url.URL) { 27 + t.Run("Push branches at once", func(t *testing.T) { 28 + runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) { 29 + for i := 0; i < 100; i++ { 30 + branchName := fmt.Sprintf("branch-%d", i) 31 + pushed = append(pushed, branchName) 32 + doGitCreateBranch(gitPath, branchName)(t) 33 + } 34 + pushed = append(pushed, "master") 35 + doGitPushTestRepository(gitPath, "origin", "--all")(t) 36 + return pushed, deleted 37 + }) 38 + }) 39 + 40 + t.Run("Push branches one by one", func(t *testing.T) { 41 + runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) { 42 + for i := 0; i < 100; i++ { 43 + branchName := fmt.Sprintf("branch-%d", i) 44 + doGitCreateBranch(gitPath, branchName)(t) 45 + doGitPushTestRepository(gitPath, "origin", branchName)(t) 46 + pushed = append(pushed, branchName) 47 + } 48 + return pushed, deleted 49 + }) 50 + }) 51 + 52 + t.Run("Delete branches", func(t *testing.T) { 53 + runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) { 54 + doGitPushTestRepository(gitPath, "origin", "master")(t) // make sure master is the default branch instead of a branch we are going to delete 55 + pushed = append(pushed, "master") 56 + 57 + for i := 0; i < 100; i++ { 58 + branchName := fmt.Sprintf("branch-%d", i) 59 + pushed = append(pushed, branchName) 60 + doGitCreateBranch(gitPath, branchName)(t) 61 + } 62 + doGitPushTestRepository(gitPath, "origin", "--all")(t) 63 + 64 + for i := 0; i < 10; i++ { 65 + branchName := fmt.Sprintf("branch-%d", i) 66 + doGitPushTestRepository(gitPath, "origin", "--delete", branchName)(t) 67 + deleted = append(deleted, branchName) 68 + } 69 + return pushed, deleted 70 + }) 71 + }) 72 + } 73 + 74 + func runTestGitPush(t *testing.T, u *url.URL, gitOperation func(t *testing.T, gitPath string) (pushed, deleted []string)) { 75 + user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) 76 + repo, err := repo_service.CreateRepository(db.DefaultContext, user, user, repo_service.CreateRepoOptions{ 77 + Name: "repo-to-push", 78 + Description: "test git push", 79 + AutoInit: false, 80 + DefaultBranch: "main", 81 + IsPrivate: false, 82 + }) 83 + require.NoError(t, err) 84 + require.NotEmpty(t, repo) 85 + 86 + gitPath := t.TempDir() 87 + 88 + doGitInitTestRepository(gitPath)(t) 89 + 90 + oldPath := u.Path 91 + oldUser := u.User 92 + defer func() { 93 + u.Path = oldPath 94 + u.User = oldUser 95 + }() 96 + u.Path = repo.FullName() + ".git" 97 + u.User = url.UserPassword(user.LowerName, userPassword) 98 + 99 + doGitAddRemote(gitPath, "origin", u)(t) 100 + 101 + gitRepo, err := git.OpenRepository(git.DefaultContext, gitPath) 102 + require.NoError(t, err) 103 + defer gitRepo.Close() 104 + 105 + pushedBranches, deletedBranches := gitOperation(t, gitPath) 106 + 107 + dbBranches := make([]*git_model.Branch, 0) 108 + require.NoError(t, db.GetEngine(db.DefaultContext).Where("repo_id=?", repo.ID).Find(&dbBranches)) 109 + assert.Equalf(t, len(pushedBranches), len(dbBranches), "mismatched number of branches in db") 110 + dbBranchesMap := make(map[string]*git_model.Branch, len(dbBranches)) 111 + for _, branch := range dbBranches { 112 + dbBranchesMap[branch.Name] = branch 113 + } 114 + 115 + deletedBranchesMap := make(map[string]bool, len(deletedBranches)) 116 + for _, branchName := range deletedBranches { 117 + deletedBranchesMap[branchName] = true 118 + } 119 + 120 + for _, branchName := range pushedBranches { 121 + branch, ok := dbBranchesMap[branchName] 122 + deleted := deletedBranchesMap[branchName] 123 + assert.True(t, ok, "branch %s not found in database", branchName) 124 + assert.Equal(t, deleted, branch.IsDeleted, "IsDeleted of %s is %v, but it's expected to be %v", branchName, branch.IsDeleted, deleted) 125 + commitID, err := gitRepo.GetBranchCommitID(branchName) 126 + require.NoError(t, err) 127 + assert.Equal(t, commitID, branch.CommitID) 128 + } 129 + 130 + require.NoError(t, repo_service.DeleteRepositoryDirectly(db.DefaultContext, user, repo.ID)) 131 + }