@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Improve performance of repository discovery in repositories with >65K refs

Summary:
Ref T13593. The commit cache in this Engine has a maximum fixed size (currently 65,535 entries).

If we execute discovery in a repository with more refs than this (e.g., 180K), we get fast lookups for the first 65,535 refs and slow lookups for the remaining refs.

Instead, divide the refs into chunks no larger than the cache size, and perform an explicit cache fill before each chunk is processed.

Test Plan:
- Created a repository with 1K refs. Set cache size to 256. Ran discovery.
- Before patch: saw one large cache fill and then ~750 single-gets.
- After patch: saw four large cache fills.
- Compared `bin/repository discover ... --verbose` output before and after patch for overall effect; saw no differences.

Maniphest Tasks: T13593

Differential Revision: https://secure.phabricator.com/D21521

+64 -39
+64 -39
src/applications/repository/engine/PhabricatorRepositoryDiscoveryEngine.php
··· 142 142 return array(); 143 143 } 144 144 145 - $heads = $this->sortRefs($heads); 146 - $head_commits = mpull($heads, 'getCommitIdentifier'); 147 - 148 145 $this->log( 149 146 pht( 150 147 'Discovering commits in repository "%s".', 151 148 $repository->getDisplayName())); 152 149 153 - $this->fillCommitCache($head_commits); 150 + $ref_lists = array(); 151 + 152 + $head_groups = $this->getRefGroupsForDiscovery($heads); 153 + foreach ($head_groups as $head_group) { 154 154 155 - $refs = array(); 156 - foreach ($heads as $ref) { 157 - $name = $ref->getShortName(); 158 - $commit = $ref->getCommitIdentifier(); 155 + $group_identifiers = mpull($head_group, 'getCommitIdentifier'); 156 + $group_identifiers = array_fuse($group_identifiers); 157 + $this->fillCommitCache($group_identifiers); 158 + 159 + foreach ($head_group as $ref) { 160 + $name = $ref->getShortName(); 161 + $commit = $ref->getCommitIdentifier(); 162 + 163 + $this->log( 164 + pht( 165 + 'Examining "%s" (%s) at "%s".', 166 + $name, 167 + $ref->getRefType(), 168 + $commit)); 169 + 170 + if (!$repository->shouldTrackRef($ref)) { 171 + $this->log(pht('Skipping, ref is untracked.')); 172 + continue; 173 + } 174 + 175 + if ($this->isKnownCommit($commit)) { 176 + $this->log(pht('Skipping, HEAD is known.')); 177 + continue; 178 + } 179 + 180 + // In Git, it's possible to tag anything. We just skip tags that don't 181 + // point to a commit. See T11301. 182 + $fields = $ref->getRawFields(); 183 + $ref_type = idx($fields, 'objecttype'); 184 + $tag_type = idx($fields, '*objecttype'); 185 + if ($ref_type != 'commit' && $tag_type != 'commit') { 186 + $this->log(pht('Skipping, this is not a commit.')); 187 + continue; 188 + } 189 + 190 + $this->log(pht('Looking for new commits.')); 191 + 192 + $head_refs = $this->discoverStreamAncestry( 193 + new PhabricatorGitGraphStream($repository, $commit), 194 + $commit, 195 + $publisher->isPermanentRef($ref)); 159 196 160 - $this->log( 161 - pht( 162 - 'Examining "%s" (%s) at "%s".', 163 - $name, 164 - $ref->getRefType(), 165 - $commit)); 197 + $this->didDiscoverRefs($head_refs); 166 198 167 - if (!$repository->shouldTrackRef($ref)) { 168 - $this->log(pht('Skipping, ref is untracked.')); 169 - continue; 199 + $ref_lists[] = $head_refs; 170 200 } 201 + } 171 202 172 - if ($this->isKnownCommit($commit)) { 173 - $this->log(pht('Skipping, HEAD is known.')); 174 - continue; 175 - } 203 + $refs = array_mergev($ref_lists); 176 204 177 - // In Git, it's possible to tag anything. We just skip tags that don't 178 - // point to a commit. See T11301. 179 - $fields = $ref->getRawFields(); 180 - $ref_type = idx($fields, 'objecttype'); 181 - $tag_type = idx($fields, '*objecttype'); 182 - if ($ref_type != 'commit' && $tag_type != 'commit') { 183 - $this->log(pht('Skipping, this is not a commit.')); 184 - continue; 185 - } 205 + return $refs; 206 + } 186 207 187 - $this->log(pht('Looking for new commits.')); 208 + /** 209 + * @task git 210 + */ 211 + private function getRefGroupsForDiscovery(array $heads) { 212 + $heads = $this->sortRefs($heads); 188 213 189 - $head_refs = $this->discoverStreamAncestry( 190 - new PhabricatorGitGraphStream($repository, $commit), 191 - $commit, 192 - $publisher->isPermanentRef($ref)); 214 + // See T13593. We hold a commit cache with a fixed maximum size. Split the 215 + // refs into chunks no larger than the cache size, so we don't overflow the 216 + // cache when testing them. 193 217 194 - $this->didDiscoverRefs($head_refs); 218 + $array_iterator = new ArrayIterator($heads); 195 219 196 - $refs[] = $head_refs; 197 - } 220 + $chunk_iterator = new PhutilChunkedIterator( 221 + $array_iterator, 222 + self::MAX_COMMIT_CACHE_SIZE); 198 223 199 - return array_mergev($refs); 224 + return $chunk_iterator; 200 225 } 201 226 202 227