@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at recaptime-dev/main 931 lines 27 kB view raw
1<?php 2 3/** 4 * @task discover Discovering Repositories 5 * @task svn Discovering Subversion Repositories 6 * @task git Discovering Git Repositories 7 * @task hg Discovering Mercurial Repositories 8 * @task internal Internals 9 */ 10final class PhabricatorRepositoryDiscoveryEngine 11 extends PhabricatorRepositoryEngine { 12 13 private $repairMode; 14 private $commitCache = array(); 15 private $workingSet = array(); 16 17 const MAX_COMMIT_CACHE_SIZE = 65535; 18 19 20/* -( Discovering Repositories )------------------------------------------- */ 21 22 23 public function setRepairMode($repair_mode) { 24 $this->repairMode = $repair_mode; 25 return $this; 26 } 27 28 29 public function getRepairMode() { 30 return $this->repairMode; 31 } 32 33 34 /** 35 * @task discovery 36 */ 37 public function discoverCommits() { 38 $repository = $this->getRepository(); 39 40 $lock = $this->newRepositoryLock($repository, 'repo.look', false); 41 42 try { 43 $lock->lock(); 44 } catch (PhutilLockException $ex) { 45 throw new DiffusionDaemonLockException( 46 pht( 47 'Another process is currently discovering repository "%s", '. 48 'skipping discovery.', 49 $repository->getDisplayName())); 50 } 51 52 try { 53 $result = $this->discoverCommitsWithLock(); 54 } catch (Exception $ex) { 55 $lock->unlock(); 56 throw $ex; 57 } 58 59 $lock->unlock(); 60 61 return $result; 62 } 63 64 private function discoverCommitsWithLock() { 65 $repository = $this->getRepository(); 66 $viewer = $this->getViewer(); 67 68 $vcs = $repository->getVersionControlSystem(); 69 switch ($vcs) { 70 case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: 71 $refs = $this->discoverSubversionCommits(); 72 break; 73 case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: 74 $refs = $this->discoverMercurialCommits(); 75 break; 76 case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: 77 $refs = $this->discoverGitCommits(); 78 break; 79 default: 80 throw new Exception(pht("Unknown VCS '%s'!", $vcs)); 81 } 82 83 if ($this->isInitialImport($refs)) { 84 $this->log( 85 pht( 86 'Discovered more than %s commits in an empty repository, '. 87 'marking repository as importing.', 88 new PhutilNumber(PhabricatorRepository::IMPORT_THRESHOLD))); 89 90 $repository->markImporting(); 91 } 92 93 // Clear the working set cache. 94 $this->workingSet = array(); 95 96 $task_priority = $this->getImportTaskPriority($repository, $refs); 97 98 // Record discovered commits and mark them in the cache. 99 foreach ($refs as $ref) { 100 $this->recordCommit( 101 $repository, 102 $ref->getIdentifier(), 103 $ref->getEpoch(), 104 $ref->getIsPermanent(), 105 $ref->getParents(), 106 $task_priority); 107 108 $this->commitCache[$ref->getIdentifier()] = true; 109 } 110 111 $this->markUnreachableCommits($repository); 112 113 $version = $this->getObservedVersion($repository); 114 if ($version !== null) { 115 id(new DiffusionRepositoryClusterEngine()) 116 ->setViewer($viewer) 117 ->setRepository($repository) 118 ->synchronizeWorkingCopyAfterDiscovery($version); 119 } 120 121 return $refs; 122 } 123 124 125/* -( Discovering Git Repositories )--------------------------------------- */ 126 127 128 /** 129 * @task git 130 */ 131 private function discoverGitCommits() { 132 $repository = $this->getRepository(); 133 $publisher = $repository->newPublisher(); 134 135 $heads = id(new DiffusionLowLevelGitRefQuery()) 136 ->setRepository($repository) 137 ->execute(); 138 139 if (!$heads) { 140 // This repository has no heads at all, so we don't need to do 141 // anything. Generally, this means the repository is empty. 142 return array(); 143 } 144 145 $this->log( 146 pht( 147 'Discovering commits in repository "%s".', 148 $repository->getDisplayName())); 149 150 $ref_lists = array(); 151 152 $head_groups = $this->getRefGroupsForDiscovery($heads); 153 foreach ($head_groups as $head_group) { 154 155 $group_identifiers = mpull($head_group, 'getCommitIdentifier'); 156 $group_identifiers = array_fuse($group_identifiers); 157 $this->fillCommitCache($group_identifiers); 158 159 foreach ($head_group as $ref) { 160 $name = $ref->getShortName(); 161 $commit = $ref->getCommitIdentifier(); 162 163 $this->log( 164 pht( 165 'Examining "%s" (%s) at "%s".', 166 $name, 167 $ref->getRefType(), 168 $commit)); 169 170 if (!$repository->shouldTrackRef($ref)) { 171 $this->log(pht('Skipping, ref is untracked.')); 172 continue; 173 } 174 175 if ($this->isKnownCommit($commit)) { 176 $this->log(pht('Skipping, HEAD is known.')); 177 continue; 178 } 179 180 // In Git, it's possible to tag anything. We just skip tags that don't 181 // point to a commit. See T11301. 182 $fields = $ref->getRawFields(); 183 $ref_type = idx($fields, 'objecttype'); 184 $tag_type = idx($fields, '*objecttype'); 185 if ($ref_type != 'commit' && $tag_type != 'commit') { 186 $this->log(pht('Skipping, this is not a commit.')); 187 continue; 188 } 189 190 $this->log(pht('Looking for new commits.')); 191 192 $head_refs = $this->discoverStreamAncestry( 193 new PhabricatorGitGraphStream($repository, $commit), 194 $commit, 195 $publisher->isPermanentRef($ref)); 196 197 $this->didDiscoverRefs($head_refs); 198 199 $ref_lists[] = $head_refs; 200 } 201 } 202 203 $refs = array_mergev($ref_lists); 204 205 return $refs; 206 } 207 208 /** 209 * @task git 210 */ 211 private function getRefGroupsForDiscovery(array $heads) { 212 $heads = $this->sortRefs($heads); 213 214 // See T13593. We hold a commit cache with a fixed maximum size. Split the 215 // refs into chunks no larger than the cache size, so we don't overflow the 216 // cache when testing them. 217 218 $array_iterator = new ArrayIterator($heads); 219 220 $chunk_iterator = new PhutilChunkedIterator( 221 $array_iterator, 222 self::MAX_COMMIT_CACHE_SIZE); 223 224 return $chunk_iterator; 225 } 226 227 228/* -( Discovering Subversion Repositories )-------------------------------- */ 229 230 231 /** 232 * @task svn 233 */ 234 private function discoverSubversionCommits() { 235 $repository = $this->getRepository(); 236 237 if (!$repository->isHosted()) { 238 $this->verifySubversionRoot($repository); 239 } 240 241 $upper_bound = null; 242 $limit = 1; 243 $refs = array(); 244 do { 245 // Find all the unknown commits on this path. Note that we permit 246 // importing an SVN subdirectory rather than the entire repository, so 247 // commits may be nonsequential. 248 249 if ($upper_bound === null) { 250 $at_rev = 'HEAD'; 251 } else { 252 $at_rev = ($upper_bound - 1); 253 } 254 255 try { 256 list($xml, $stderr) = $repository->execxRemoteCommand( 257 'log --xml --quiet --limit %d %s', 258 $limit, 259 $repository->getSubversionBaseURI($at_rev)); 260 } catch (CommandException $ex) { 261 $stderr = $ex->getStderr(); 262 if (preg_match('/(path|File) not found/', $stderr)) { 263 // We've gone all the way back through history and this path was not 264 // affected by earlier commits. 265 break; 266 } 267 throw $ex; 268 } 269 270 $xml = phutil_utf8ize($xml); 271 $log = new SimpleXMLElement($xml); 272 foreach ($log->logentry as $entry) { 273 $identifier = (int)$entry['revision']; 274 $epoch = (int)strtotime((string)$entry->date[0]); 275 $refs[$identifier] = id(new PhabricatorRepositoryCommitRef()) 276 ->setIdentifier($identifier) 277 ->setEpoch($epoch) 278 ->setIsPermanent(true); 279 280 if ($upper_bound === null) { 281 $upper_bound = $identifier; 282 } else { 283 $upper_bound = min($upper_bound, $identifier); 284 } 285 } 286 287 // Discover 2, 4, 8, ... 256 logs at a time. This allows us to initially 288 // import large repositories fairly quickly, while pulling only as much 289 // data as we need in the common case (when we've already imported the 290 // repository and are just grabbing one commit at a time). 291 $limit = min($limit * 2, 256); 292 293 } while ($upper_bound > 1 && !$this->isKnownCommit($upper_bound)); 294 295 krsort($refs); 296 while ($refs && $this->isKnownCommit(last($refs)->getIdentifier())) { 297 array_pop($refs); 298 } 299 $refs = array_reverse($refs); 300 301 $this->didDiscoverRefs($refs); 302 303 return $refs; 304 } 305 306 307 private function verifySubversionRoot(PhabricatorRepository $repository) { 308 list($xml) = $repository->execxRemoteCommand( 309 'info --xml %s', 310 $repository->getSubversionPathURI()); 311 312 $xml = phutil_utf8ize($xml); 313 $xml = new SimpleXMLElement($xml); 314 315 $remote_root = (string)($xml->entry[0]->repository[0]->root[0]); 316 $expect_root = $repository->getSubversionPathURI(); 317 318 $normal_type_svn = ArcanistRepositoryURINormalizer::TYPE_SVN; 319 320 $remote_normal = id(new ArcanistRepositoryURINormalizer( 321 $normal_type_svn, 322 $remote_root))->getNormalizedPath(); 323 324 $expect_normal = id(new ArcanistRepositoryURINormalizer( 325 $normal_type_svn, 326 $expect_root))->getNormalizedPath(); 327 328 if ($remote_normal != $expect_normal) { 329 throw new Exception( 330 pht( 331 'Repository "%s" does not have a correctly configured remote URI. '. 332 'The remote URI for a Subversion repository MUST point at the '. 333 'repository root. The root for this repository is "%s", but the '. 334 'configured URI is "%s". To resolve this error, set the remote URI '. 335 'to point at the repository root. If you want to import only part '. 336 'of a Subversion repository, use the "Import Only" option.', 337 $repository->getDisplayName(), 338 $remote_root, 339 $expect_root)); 340 } 341 } 342 343 344/* -( Discovering Mercurial Repositories )--------------------------------- */ 345 346 347 /** 348 * @task hg 349 */ 350 private function discoverMercurialCommits() { 351 $repository = $this->getRepository(); 352 353 $branches = id(new DiffusionLowLevelMercurialBranchesQuery()) 354 ->setRepository($repository) 355 ->execute(); 356 357 $this->fillCommitCache(mpull($branches, 'getCommitIdentifier')); 358 359 $refs = array(); 360 foreach ($branches as $branch) { 361 // NOTE: Mercurial branches may have multiple heads, so the names may 362 // not be unique. 363 $name = $branch->getShortName(); 364 $commit = $branch->getCommitIdentifier(); 365 366 $this->log(pht('Examining branch "%s" head "%s".', $name, $commit)); 367 if (!$repository->shouldTrackBranch($name)) { 368 $this->log(pht('Skipping, branch is untracked.')); 369 continue; 370 } 371 372 if ($this->isKnownCommit($commit)) { 373 $this->log(pht('Skipping, this head is a known commit.')); 374 continue; 375 } 376 377 $this->log(pht('Looking for new commits.')); 378 379 $branch_refs = $this->discoverStreamAncestry( 380 new PhabricatorMercurialGraphStream($repository, $commit), 381 $commit, 382 $is_permanent = true); 383 384 $this->didDiscoverRefs($branch_refs); 385 386 $refs[] = $branch_refs; 387 } 388 389 return array_mergev($refs); 390 } 391 392 393/* -( Internals )---------------------------------------------------------- */ 394 395 396 private function discoverStreamAncestry( 397 PhabricatorRepositoryGraphStream $stream, 398 $commit, 399 $is_permanent) { 400 401 $discover = array($commit); 402 $graph = array(); 403 $seen = array(); 404 405 // Find all the reachable, undiscovered commits. Build a graph of the 406 // edges. 407 while ($discover) { 408 $target = array_pop($discover); 409 410 if (empty($graph[$target])) { 411 $graph[$target] = array(); 412 } 413 414 $parents = $stream->getParents($target); 415 foreach ($parents as $parent) { 416 if ($this->isKnownCommit($parent)) { 417 continue; 418 } 419 420 $graph[$target][$parent] = true; 421 422 if (empty($seen[$parent])) { 423 $seen[$parent] = true; 424 $discover[] = $parent; 425 } 426 } 427 } 428 429 // Now, sort them topologically. 430 $commits = $this->reduceGraph($graph); 431 432 $refs = array(); 433 foreach ($commits as $commit) { 434 $epoch = $stream->getCommitDate($commit); 435 436 // If the epoch doesn't fit into a uint32, treat it as though it stores 437 // the current time. For discussion, see T11537. 438 if ($epoch > 0xFFFFFFFF) { 439 $epoch = PhabricatorTime::getNow(); 440 } 441 442 // If the epoch is not present at all, treat it as though it stores the 443 // value "0". For discussion, see T12062. This behavior is consistent 444 // with the behavior of "git show". 445 if (!strlen($epoch)) { 446 $epoch = 0; 447 } 448 449 $refs[] = id(new PhabricatorRepositoryCommitRef()) 450 ->setIdentifier($commit) 451 ->setEpoch($epoch) 452 ->setIsPermanent($is_permanent) 453 ->setParents($stream->getParents($commit)); 454 } 455 456 return $refs; 457 } 458 459 460 private function reduceGraph(array $edges) { 461 foreach ($edges as $commit => $parents) { 462 $edges[$commit] = array_keys($parents); 463 } 464 465 $graph = new PhutilDirectedScalarGraph(); 466 $graph->addNodes($edges); 467 468 $commits = $graph->getNodesInTopologicalOrder(); 469 470 // NOTE: We want the most ancestral nodes first, so we need to reverse the 471 // list we get out of AbstractDirectedGraph. 472 $commits = array_reverse($commits); 473 474 return $commits; 475 } 476 477 478 private function isKnownCommit($identifier) { 479 if (isset($this->commitCache[$identifier])) { 480 return true; 481 } 482 483 if (isset($this->workingSet[$identifier])) { 484 return true; 485 } 486 487 $this->fillCommitCache(array($identifier)); 488 489 return isset($this->commitCache[$identifier]); 490 } 491 492 private function fillCommitCache(array $identifiers) { 493 if (!$identifiers) { 494 return; 495 } 496 497 if ($this->repairMode) { 498 // In repair mode, rediscover the entire repository, ignoring the 499 // database state. The engine still maintains a local cache (the 500 // "Working Set") but we just give up before looking in the database. 501 return; 502 } 503 504 $max_size = self::MAX_COMMIT_CACHE_SIZE; 505 506 // If we're filling more identifiers than would fit in the cache, ignore 507 // the ones that don't fit. Because the cache is FIFO, overfilling it can 508 // cause the entire cache to miss. See T12296. 509 if (count($identifiers) > $max_size) { 510 $identifiers = array_slice($identifiers, 0, $max_size); 511 } 512 513 // When filling the cache we ignore commits which have been marked as 514 // unreachable, treating them as though they do not exist. When recording 515 // commits later we'll revive commits that exist but are unreachable. 516 517 $commits = id(new PhabricatorRepositoryCommit())->loadAllWhere( 518 'repositoryID = %d AND commitIdentifier IN (%Ls) 519 AND (importStatus & %d) != %d', 520 $this->getRepository()->getID(), 521 $identifiers, 522 PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE, 523 PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE); 524 525 foreach ($commits as $commit) { 526 $this->commitCache[$commit->getCommitIdentifier()] = true; 527 } 528 529 while (count($this->commitCache) > $max_size) { 530 array_shift($this->commitCache); 531 } 532 } 533 534 /** 535 * Sort refs so we process permanent refs first. This makes the whole import 536 * process a little cheaper, since we can publish these commits the first 537 * time through rather than catching them in the refs step. 538 * 539 * @task internal 540 * 541 * @param list<DiffusionRepositoryRef> $refs List of refs. 542 * @return list<DiffusionRepositoryRef> Sorted list of refs. 543 */ 544 private function sortRefs(array $refs) { 545 $repository = $this->getRepository(); 546 $publisher = $repository->newPublisher(); 547 548 $head_refs = array(); 549 $tail_refs = array(); 550 foreach ($refs as $ref) { 551 if ($publisher->isPermanentRef($ref)) { 552 $head_refs[] = $ref; 553 } else { 554 $tail_refs[] = $ref; 555 } 556 } 557 558 return array_merge($head_refs, $tail_refs); 559 } 560 561 562 private function recordCommit( 563 PhabricatorRepository $repository, 564 $commit_identifier, 565 $epoch, 566 $is_permanent, 567 array $parents, 568 $task_priority) { 569 570 $commit = new PhabricatorRepositoryCommit(); 571 $conn_w = $repository->establishConnection('w'); 572 573 // First, try to revive an existing unreachable commit (if one exists) by 574 // removing the "unreachable" flag. If we succeed, we don't need to do 575 // anything else: we already discovered this commit some time ago. 576 queryfx( 577 $conn_w, 578 'UPDATE %T SET importStatus = (importStatus & ~%d) 579 WHERE repositoryID = %d AND commitIdentifier = %s', 580 $commit->getTableName(), 581 PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE, 582 $repository->getID(), 583 $commit_identifier); 584 if ($conn_w->getAffectedRows()) { 585 $commit = $commit->loadOneWhere( 586 'repositoryID = %d AND commitIdentifier = %s', 587 $repository->getID(), 588 $commit_identifier); 589 590 // After reviving a commit, schedule new daemons for it. 591 $this->didDiscoverCommit($repository, $commit, $epoch, $task_priority); 592 return; 593 } 594 595 $commit->setRepositoryID($repository->getID()); 596 $commit->setCommitIdentifier($commit_identifier); 597 $commit->setEpoch($epoch); 598 if ($is_permanent) { 599 $commit->setImportStatus(PhabricatorRepositoryCommit::IMPORTED_PERMANENT); 600 } 601 602 $data = new PhabricatorRepositoryCommitData(); 603 604 try { 605 // If this commit has parents, look up their IDs. The parent commits 606 // should always exist already. 607 608 $parent_ids = array(); 609 if ($parents) { 610 $parent_rows = queryfx_all( 611 $conn_w, 612 'SELECT id, commitIdentifier FROM %T 613 WHERE commitIdentifier IN (%Ls) AND repositoryID = %d', 614 $commit->getTableName(), 615 $parents, 616 $repository->getID()); 617 618 $parent_map = ipull($parent_rows, 'id', 'commitIdentifier'); 619 620 foreach ($parents as $parent) { 621 if (empty($parent_map[$parent])) { 622 throw new Exception( 623 pht('Unable to identify parent "%s"!', $parent)); 624 } 625 $parent_ids[] = $parent_map[$parent]; 626 } 627 } else { 628 // Write an explicit 0 so we can distinguish between "really no 629 // parents" and "data not available". 630 if (!$repository->isSVN()) { 631 $parent_ids = array(0); 632 } 633 } 634 635 $commit->openTransaction(); 636 $commit->save(); 637 638 $data->setCommitID($commit->getID()); 639 $data->save(); 640 641 foreach ($parent_ids as $parent_id) { 642 queryfx( 643 $conn_w, 644 'INSERT IGNORE INTO %T (childCommitID, parentCommitID) 645 VALUES (%d, %d)', 646 PhabricatorRepository::TABLE_PARENTS, 647 $commit->getID(), 648 $parent_id); 649 } 650 $commit->saveTransaction(); 651 652 $this->didDiscoverCommit($repository, $commit, $epoch, $task_priority); 653 654 if ($this->repairMode) { 655 // Normally, the query should throw a duplicate key exception. If we 656 // reach this in repair mode, we've actually performed a repair. 657 $this->log(pht('Repaired commit "%s".', $commit_identifier)); 658 } 659 660 PhutilEventEngine::dispatchEvent( 661 new PhabricatorEvent( 662 PhabricatorEventType::TYPE_DIFFUSION_DIDDISCOVERCOMMIT, 663 array( 664 'repository' => $repository, 665 'commit' => $commit, 666 ))); 667 668 } catch (AphrontDuplicateKeyQueryException $ex) { 669 $commit->killTransaction(); 670 // Ignore. This can happen because we discover the same new commit 671 // more than once when looking at history, or because of races or 672 // data inconsistency or cosmic radiation; in any case, we're still 673 // in a good state if we ignore the failure. 674 } 675 } 676 677 private function didDiscoverCommit( 678 PhabricatorRepository $repository, 679 PhabricatorRepositoryCommit $commit, 680 $epoch, 681 $task_priority) { 682 683 $this->queueCommitImportTask( 684 $repository, 685 $commit->getPHID(), 686 $task_priority, 687 $via = 'discovery'); 688 689 // Update the repository summary table. 690 queryfx( 691 $commit->establishConnection('w'), 692 'INSERT INTO %T (repositoryID, size, lastCommitID, epoch) 693 VALUES (%d, 1, %d, %d) 694 ON DUPLICATE KEY UPDATE 695 size = size + 1, 696 lastCommitID = 697 IF(VALUES(epoch) > epoch, VALUES(lastCommitID), lastCommitID), 698 epoch = IF(VALUES(epoch) > epoch, VALUES(epoch), epoch)', 699 PhabricatorRepository::TABLE_SUMMARY, 700 $repository->getID(), 701 $commit->getID(), 702 $epoch); 703 } 704 705 private function didDiscoverRefs(array $refs) { 706 foreach ($refs as $ref) { 707 $this->workingSet[$ref->getIdentifier()] = true; 708 } 709 } 710 711 private function isInitialImport(array $refs) { 712 $commit_count = count($refs); 713 714 if ($commit_count <= PhabricatorRepository::IMPORT_THRESHOLD) { 715 // If we fetched a small number of commits, assume it's an initial 716 // commit or a stack of a few initial commits. 717 return false; 718 } 719 720 $viewer = $this->getViewer(); 721 $repository = $this->getRepository(); 722 723 $any_commits = id(new DiffusionCommitQuery()) 724 ->setViewer($viewer) 725 ->withRepository($repository) 726 ->setLimit(1) 727 ->execute(); 728 729 if ($any_commits) { 730 // If the repository already has commits, this isn't an import. 731 return false; 732 } 733 734 return true; 735 } 736 737 738 private function getObservedVersion(PhabricatorRepository $repository) { 739 if ($repository->isHosted()) { 740 return null; 741 } 742 743 if ($repository->isGit()) { 744 return $this->getGitObservedVersion($repository); 745 } 746 747 return null; 748 } 749 750 private function getGitObservedVersion(PhabricatorRepository $repository) { 751 $refs = id(new DiffusionLowLevelGitRefQuery()) 752 ->setRepository($repository) 753 ->execute(); 754 if (!$refs) { 755 return null; 756 } 757 758 // In Git, the observed version is the most recently discovered commit 759 // at any repository HEAD. It's possible for this to regress temporarily 760 // if a branch is pushed and then deleted. This is acceptable because it 761 // doesn't do anything meaningfully bad and will fix itself on the next 762 // push. 763 764 $ref_identifiers = mpull($refs, 'getCommitIdentifier'); 765 $ref_identifiers = array_fuse($ref_identifiers); 766 767 $version = queryfx_one( 768 $repository->establishConnection('w'), 769 'SELECT MAX(id) version FROM %T WHERE repositoryID = %d 770 AND commitIdentifier IN (%Ls)', 771 id(new PhabricatorRepositoryCommit())->getTableName(), 772 $repository->getID(), 773 $ref_identifiers); 774 775 if (!$version) { 776 return null; 777 } 778 779 return (int)$version['version']; 780 } 781 782 private function markUnreachableCommits(PhabricatorRepository $repository) { 783 if (!$repository->isGit() && !$repository->isHg()) { 784 return; 785 } 786 787 // Find older versions of refs which we haven't processed yet. We're going 788 // to make sure their commits are still reachable. 789 $old_refs = id(new PhabricatorRepositoryOldRef())->loadAllWhere( 790 'repositoryPHID = %s', 791 $repository->getPHID()); 792 793 // If we don't have any refs to update, bail out before building a graph 794 // stream. In particular, this improves behavior in empty repositories, 795 // where `git log` exits with an error. 796 if (!$old_refs) { 797 return; 798 } 799 800 // We can share a single graph stream across all the checks we need to do. 801 if ($repository->isGit()) { 802 $stream = new PhabricatorGitGraphStream($repository); 803 } else if ($repository->isHg()) { 804 $stream = new PhabricatorMercurialGraphStream($repository); 805 } 806 807 foreach ($old_refs as $old_ref) { 808 $identifier = $old_ref->getCommitIdentifier(); 809 $this->markUnreachableFrom($repository, $stream, $identifier); 810 811 // If nothing threw an exception, we're all done with this ref. 812 $old_ref->delete(); 813 } 814 } 815 816 private function markUnreachableFrom( 817 PhabricatorRepository $repository, 818 PhabricatorRepositoryGraphStream $stream, 819 $identifier) { 820 821 $unreachable = array(); 822 823 $commit = id(new PhabricatorRepositoryCommit())->loadOneWhere( 824 'repositoryID = %s AND commitIdentifier = %s', 825 $repository->getID(), 826 $identifier); 827 if (!$commit) { 828 return; 829 } 830 831 $look = array($commit); 832 $seen = array(); 833 while ($look) { 834 $target = array_pop($look); 835 836 // If we've already checked this commit (for example, because history 837 // branches and then merges) we don't need to check it again. 838 $target_identifier = $target->getCommitIdentifier(); 839 if (isset($seen[$target_identifier])) { 840 continue; 841 } 842 843 $seen[$target_identifier] = true; 844 845 // See PHI1688. If this commit is already marked as unreachable, we don't 846 // need to consider its ancestors. This may skip a lot of work if many 847 // branches with a lot of shared ancestry are deleted at the same time. 848 if ($target->isUnreachable()) { 849 continue; 850 } 851 852 try { 853 $stream->getCommitDate($target_identifier); 854 $reachable = true; 855 } catch (Exception $ex) { 856 $reachable = false; 857 } 858 859 if ($reachable) { 860 // This commit is reachable, so we don't need to go any further 861 // down this road. 862 continue; 863 } 864 865 $unreachable[] = $target; 866 867 // Find the commit's parents and check them for reachability, too. We 868 // have to look in the database since we no may longer have the commit 869 // in the repository. 870 $rows = queryfx_all( 871 $commit->establishConnection('w'), 872 'SELECT commit.* FROM %T commit 873 JOIN %T parents ON commit.id = parents.parentCommitID 874 WHERE parents.childCommitID = %d', 875 $commit->getTableName(), 876 PhabricatorRepository::TABLE_PARENTS, 877 $target->getID()); 878 if (!$rows) { 879 continue; 880 } 881 882 $parents = id(new PhabricatorRepositoryCommit()) 883 ->loadAllFromArray($rows); 884 foreach ($parents as $parent) { 885 $look[] = $parent; 886 } 887 } 888 889 $unreachable = array_reverse($unreachable); 890 891 $flag = PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE; 892 foreach ($unreachable as $unreachable_commit) { 893 $unreachable_commit->writeImportStatusFlag($flag); 894 } 895 896 // If anything was unreachable, just rebuild the whole summary table. 897 // We can't really update it incrementally when a commit becomes 898 // unreachable. 899 if ($unreachable) { 900 $this->rebuildSummaryTable($repository); 901 } 902 } 903 904 private function rebuildSummaryTable(PhabricatorRepository $repository) { 905 $conn_w = $repository->establishConnection('w'); 906 907 $data = queryfx_one( 908 $conn_w, 909 'SELECT COUNT(*) N, MAX(id) id, MAX(epoch) epoch 910 FROM %T WHERE repositoryID = %d AND (importStatus & %d) != %d', 911 id(new PhabricatorRepositoryCommit())->getTableName(), 912 $repository->getID(), 913 PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE, 914 PhabricatorRepositoryCommit::IMPORTED_UNREACHABLE); 915 916 queryfx( 917 $conn_w, 918 'INSERT INTO %T (repositoryID, size, lastCommitID, epoch) 919 VALUES (%d, %d, %d, %d) 920 ON DUPLICATE KEY UPDATE 921 size = VALUES(size), 922 lastCommitID = VALUES(lastCommitID), 923 epoch = VALUES(epoch)', 924 PhabricatorRepository::TABLE_SUMMARY, 925 $repository->getID(), 926 $data['N'], 927 $data['id'], 928 $data['epoch']); 929 } 930 931}