@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Move all cluster locking logic to a separate class

Summary: Ref T10860. This doesn't change anything, it just separates all this stuff out of `PhabricatorRepository` since I'm planning to add a bit more state to it and it's already pretty big and fairly separable.

Test Plan: Pulled, pushed, browsed Diffusion.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T10860

Differential Revision: https://secure.phabricator.com/D15790

+479 -393
+2
src/__phutil_library_map__.php
··· 745 745 'DiffusionRenameHistoryQuery' => 'applications/diffusion/query/DiffusionRenameHistoryQuery.php', 746 746 'DiffusionRepositoryBasicsManagementPanel' => 'applications/diffusion/management/DiffusionRepositoryBasicsManagementPanel.php', 747 747 'DiffusionRepositoryByIDRemarkupRule' => 'applications/diffusion/remarkup/DiffusionRepositoryByIDRemarkupRule.php', 748 + 'DiffusionRepositoryClusterEngine' => 'applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php', 748 749 'DiffusionRepositoryClusterManagementPanel' => 'applications/diffusion/management/DiffusionRepositoryClusterManagementPanel.php', 749 750 'DiffusionRepositoryController' => 'applications/diffusion/controller/DiffusionRepositoryController.php', 750 751 'DiffusionRepositoryCreateController' => 'applications/diffusion/controller/DiffusionRepositoryCreateController.php', ··· 4953 4954 'DiffusionRenameHistoryQuery' => 'Phobject', 4954 4955 'DiffusionRepositoryBasicsManagementPanel' => 'DiffusionRepositoryManagementPanel', 4955 4956 'DiffusionRepositoryByIDRemarkupRule' => 'PhabricatorObjectRemarkupRule', 4957 + 'DiffusionRepositoryClusterEngine' => 'Phobject', 4956 4958 'DiffusionRepositoryClusterManagementPanel' => 'DiffusionRepositoryManagementPanel', 4957 4959 'DiffusionRepositoryController' => 'DiffusionController', 4958 4960 'DiffusionRepositoryCreateController' => 'DiffusionRepositoryEditController',
+7 -3
src/applications/diffusion/conduit/DiffusionQueryCommitsConduitAPIMethod.php
··· 29 29 protected function execute(ConduitAPIRequest $request) { 30 30 $need_messages = $request->getValue('needMessages'); 31 31 $bypass_cache = $request->getValue('bypassCache'); 32 + $viewer = $request->getUser(); 32 33 33 34 $query = id(new DiffusionCommitQuery()) 34 - ->setViewer($request->getUser()) 35 + ->setViewer($viewer) 35 36 ->needCommitData(true); 36 37 37 38 $repository_phid = $request->getValue('repositoryPHID'); 38 39 if ($repository_phid) { 39 40 $repository = id(new PhabricatorRepositoryQuery()) 40 - ->setViewer($request->getUser()) 41 + ->setViewer($viewer) 41 42 ->withPHIDs(array($repository_phid)) 42 43 ->executeOne(); 43 44 if ($repository) { 44 45 $query->withRepository($repository); 45 46 if ($bypass_cache) { 46 - $repository->synchronizeWorkingCopyBeforeRead(); 47 + id(new DiffusionRepositoryClusterEngine()) 48 + ->setViewer($viewer) 49 + ->setRepository($repository) 50 + ->synchronizeWorkingCopyBeforeRead(); 47 51 } 48 52 } 49 53 }
+6 -2
src/applications/diffusion/conduit/DiffusionQueryConduitAPIMethod.php
··· 124 124 // to prevent infinite recursion. 125 125 126 126 $is_cluster_request = $request->getIsClusterRequest(); 127 + $viewer = $request->getUser(); 127 128 128 129 $repository = $drequest->getRepository(); 129 130 $client = $repository->newConduitClient( 130 - $request->getUser(), 131 + $viewer, 131 132 $is_cluster_request); 132 133 if ($client) { 133 134 // We're proxying, so just make an intracluster call. ··· 149 150 // fetching the most up-to-date data? Synchronization can be slow, and a 150 151 // lot of web reads are probably fine if they're a few seconds out of 151 152 // date. 152 - $repository->synchronizeWorkingCopyBeforeRead(); 153 + id(new DiffusionRepositoryClusterEngine()) 154 + ->setViewer($viewer) 155 + ->setRepository($repository) 156 + ->synchronizeWorkingCopyBeforeRead(); 153 157 154 158 return $this->getResult($request); 155 159 }
+7 -3
src/applications/diffusion/controller/DiffusionServeController.php
··· 540 540 541 541 $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); 542 542 543 + $cluster_engine = id(new DiffusionRepositoryClusterEngine()) 544 + ->setViewer($viewer) 545 + ->setRepository($repository); 546 + 543 547 $did_write_lock = false; 544 548 if ($this->isReadOnlyRequest($repository)) { 545 - $repository->synchronizeWorkingCopyBeforeRead(); 549 + $cluster_engine->synchronizeWorkingCopyBeforeRead(); 546 550 } else { 547 551 $did_write_lock = true; 548 - $repository->synchronizeWorkingCopyBeforeWrite($viewer); 552 + $cluster_engine->synchronizeWorkingCopyBeforeWrite(); 549 553 } 550 554 551 555 $caught = null; ··· 559 563 } 560 564 561 565 if ($did_write_lock) { 562 - $repository->synchronizeWorkingCopyAfterWrite(); 566 + $cluster_engine->synchronizeWorkingCopyAfterWrite(); 563 567 } 564 568 565 569 unset($unguarded);
+435
src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php
··· 1 + <?php 2 + 3 + /** 4 + * Manages repository synchronization for cluster repositories. 5 + * 6 + * @task config Configuring Synchronization 7 + * @task sync Cluster Synchronization 8 + * @task internal Internals 9 + */ 10 + final class DiffusionRepositoryClusterEngine extends Phobject { 11 + 12 + private $repository; 13 + private $viewer; 14 + private $clusterWriteLock; 15 + private $clusterWriteVersion; 16 + 17 + 18 + /* -( Configuring Synchronization )---------------------------------------- */ 19 + 20 + 21 + public function setRepository(PhabricatorRepository $repository) { 22 + $this->repository = $repository; 23 + return $this; 24 + } 25 + 26 + public function getRepository() { 27 + return $this->repository; 28 + } 29 + 30 + public function setViewer(PhabricatorUser $viewer) { 31 + $this->viewer = $viewer; 32 + return $this; 33 + } 34 + 35 + public function getViewer() { 36 + return $this->viewer; 37 + } 38 + 39 + 40 + /* -( Cluster Synchronization )-------------------------------------------- */ 41 + 42 + 43 + /** 44 + * Synchronize repository version information after creating a repository. 45 + * 46 + * This initializes working copy versions for all currently bound devices to 47 + * 0, so that we don't get stuck making an ambiguous choice about which 48 + * devices are leaders when we later synchronize before a read. 49 + * 50 + * @task sync 51 + */ 52 + public function synchronizeWorkingCopyAfterCreation() { 53 + if (!$this->shouldEnableSynchronization()) { 54 + return; 55 + } 56 + 57 + $repository = $this->getRepository(); 58 + $repository_phid = $repository->getPHID(); 59 + 60 + $service = $repository->loadAlmanacService(); 61 + if (!$service) { 62 + throw new Exception(pht('Failed to load repository cluster service.')); 63 + } 64 + 65 + $bindings = $service->getActiveBindings(); 66 + foreach ($bindings as $binding) { 67 + PhabricatorRepositoryWorkingCopyVersion::updateVersion( 68 + $repository_phid, 69 + $binding->getDevicePHID(), 70 + 0); 71 + } 72 + 73 + return $this; 74 + } 75 + 76 + 77 + /** 78 + * @task sync 79 + */ 80 + public function synchronizeWorkingCopyBeforeRead() { 81 + if (!$this->shouldEnableSynchronization()) { 82 + return; 83 + } 84 + 85 + $repository = $this->getRepository(); 86 + $repository_phid = $repository->getPHID(); 87 + 88 + $device = AlmanacKeys::getLiveDevice(); 89 + $device_phid = $device->getPHID(); 90 + 91 + $read_lock = PhabricatorRepositoryWorkingCopyVersion::getReadLock( 92 + $repository_phid, 93 + $device_phid); 94 + 95 + // TODO: Raise a more useful exception if we fail to grab this lock. 96 + $read_lock->lock(phutil_units('2 minutes in seconds')); 97 + 98 + $versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions( 99 + $repository_phid); 100 + $versions = mpull($versions, null, 'getDevicePHID'); 101 + 102 + $this_version = idx($versions, $device_phid); 103 + if ($this_version) { 104 + $this_version = (int)$this_version->getRepositoryVersion(); 105 + } else { 106 + $this_version = -1; 107 + } 108 + 109 + if ($versions) { 110 + // This is the normal case, where we have some version information and 111 + // can identify which nodes are leaders. If the current node is not a 112 + // leader, we want to fetch from a leader and then update our version. 113 + 114 + $max_version = (int)max(mpull($versions, 'getRepositoryVersion')); 115 + if ($max_version > $this_version) { 116 + $fetchable = array(); 117 + foreach ($versions as $version) { 118 + if ($version->getRepositoryVersion() == $max_version) { 119 + $fetchable[] = $version->getDevicePHID(); 120 + } 121 + } 122 + 123 + $this->synchronizeWorkingCopyFromDevices($fetchable); 124 + 125 + PhabricatorRepositoryWorkingCopyVersion::updateVersion( 126 + $repository_phid, 127 + $device_phid, 128 + $max_version); 129 + } 130 + 131 + $result_version = $max_version; 132 + } else { 133 + // If no version records exist yet, we need to be careful, because we 134 + // can not tell which nodes are leaders. 135 + 136 + // There might be several nodes with arbitrary existing data, and we have 137 + // no way to tell which one has the "right" data. If we pick wrong, we 138 + // might erase some or all of the data in the repository. 139 + 140 + // Since this is dangeorus, we refuse to guess unless there is only one 141 + // device. If we're the only device in the group, we obviously must be 142 + // a leader. 143 + 144 + $service = $repository->loadAlmanacService(); 145 + if (!$service) { 146 + throw new Exception(pht('Failed to load repository cluster service.')); 147 + } 148 + 149 + $bindings = $service->getActiveBindings(); 150 + $device_map = array(); 151 + foreach ($bindings as $binding) { 152 + $device_map[$binding->getDevicePHID()] = true; 153 + } 154 + 155 + if (count($device_map) > 1) { 156 + throw new Exception( 157 + pht( 158 + 'Repository "%s" exists on more than one device, but no device '. 159 + 'has any repository version information. Phabricator can not '. 160 + 'guess which copy of the existing data is authoritative. Remove '. 161 + 'all but one device from service to mark the remaining device '. 162 + 'as the authority.', 163 + $repository->getDisplayName())); 164 + } 165 + 166 + if (empty($device_map[$device->getPHID()])) { 167 + throw new Exception( 168 + pht( 169 + 'Repository "%s" is being synchronized on device "%s", but '. 170 + 'this device is not bound to the corresponding cluster '. 171 + 'service ("%s").', 172 + $repository->getDisplayName(), 173 + $device->getName(), 174 + $service->getName())); 175 + } 176 + 177 + // The current device is the only device in service, so it must be a 178 + // leader. We can safely have any future nodes which come online read 179 + // from it. 180 + PhabricatorRepositoryWorkingCopyVersion::updateVersion( 181 + $repository_phid, 182 + $device_phid, 183 + 0); 184 + 185 + $result_version = 0; 186 + } 187 + 188 + $read_lock->unlock(); 189 + 190 + return $result_version; 191 + } 192 + 193 + 194 + /** 195 + * @task sync 196 + */ 197 + public function synchronizeWorkingCopyBeforeWrite() { 198 + if (!$this->shouldEnableSynchronization()) { 199 + return; 200 + } 201 + 202 + $repository = $this->getRepository(); 203 + $viewer = $this->getViewer(); 204 + 205 + $repository_phid = $repository->getPHID(); 206 + 207 + $device = AlmanacKeys::getLiveDevice(); 208 + $device_phid = $device->getPHID(); 209 + 210 + $write_lock = PhabricatorRepositoryWorkingCopyVersion::getWriteLock( 211 + $repository_phid); 212 + 213 + // TODO: Raise a more useful exception if we fail to grab this lock. 214 + $write_lock->lock(phutil_units('2 minutes in seconds')); 215 + 216 + $versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions( 217 + $repository_phid); 218 + foreach ($versions as $version) { 219 + if (!$version->getIsWriting()) { 220 + continue; 221 + } 222 + 223 + throw new Exception( 224 + pht( 225 + 'An previous write to this repository was interrupted; refusing '. 226 + 'new writes. This issue resolves operator intervention to resolve, '. 227 + 'see "Write Interruptions" in the "Cluster: Repositories" in the '. 228 + 'documentation for instructions.')); 229 + } 230 + 231 + try { 232 + $max_version = $this->synchronizeWorkingCopyBeforeRead(); 233 + } catch (Exception $ex) { 234 + $write_lock->unlock(); 235 + throw $ex; 236 + } 237 + 238 + PhabricatorRepositoryWorkingCopyVersion::willWrite( 239 + $repository_phid, 240 + $device_phid, 241 + array( 242 + 'userPHID' => $viewer->getPHID(), 243 + 'epoch' => PhabricatorTime::getNow(), 244 + 'devicePHID' => $device_phid, 245 + )); 246 + 247 + $this->clusterWriteVersion = $max_version; 248 + $this->clusterWriteLock = $write_lock; 249 + } 250 + 251 + 252 + /** 253 + * @task sync 254 + */ 255 + public function synchronizeWorkingCopyAfterWrite() { 256 + if (!$this->shouldEnableSynchronization()) { 257 + return; 258 + } 259 + 260 + if (!$this->clusterWriteLock) { 261 + throw new Exception( 262 + pht( 263 + 'Trying to synchronize after write, but not holding a write '. 264 + 'lock!')); 265 + } 266 + 267 + $repository = $this->getRepository(); 268 + $repository_phid = $repository->getPHID(); 269 + 270 + $device = AlmanacKeys::getLiveDevice(); 271 + $device_phid = $device->getPHID(); 272 + 273 + // NOTE: This means we're still bumping the version when pushes fail. We 274 + // could select only un-rejected events instead to bump a little less 275 + // often. 276 + 277 + $new_log = id(new PhabricatorRepositoryPushEventQuery()) 278 + ->setViewer(PhabricatorUser::getOmnipotentUser()) 279 + ->withRepositoryPHIDs(array($repository_phid)) 280 + ->setLimit(1) 281 + ->executeOne(); 282 + 283 + $old_version = $this->clusterWriteVersion; 284 + if ($new_log) { 285 + $new_version = $new_log->getID(); 286 + } else { 287 + $new_version = $old_version; 288 + } 289 + 290 + PhabricatorRepositoryWorkingCopyVersion::didWrite( 291 + $repository_phid, 292 + $device_phid, 293 + $this->clusterWriteVersion, 294 + $new_log->getID()); 295 + 296 + $this->clusterWriteLock->unlock(); 297 + $this->clusterWriteLock = null; 298 + } 299 + 300 + 301 + /* -( Internals )---------------------------------------------------------- */ 302 + 303 + 304 + /** 305 + * @task internal 306 + */ 307 + private function shouldEnableSynchronization() { 308 + $repository = $this->getRepository(); 309 + 310 + $service_phid = $repository->getAlmanacServicePHID(); 311 + if (!$service_phid) { 312 + return false; 313 + } 314 + 315 + // TODO: For now, this is only supported for Git. 316 + if (!$repository->isGit()) { 317 + return false; 318 + } 319 + 320 + // TODO: It may eventually make sense to try to version and synchronize 321 + // observed repositories (so that daemons don't do reads against out-of 322 + // date hosts), but don't bother for now. 323 + if (!$repository->isHosted()) { 324 + return false; 325 + } 326 + 327 + $device = AlmanacKeys::getLiveDevice(); 328 + if (!$device) { 329 + return false; 330 + } 331 + 332 + return true; 333 + } 334 + 335 + 336 + /** 337 + * @task internal 338 + */ 339 + private function synchronizeWorkingCopyFromDevices(array $device_phids) { 340 + $repository = $this->getRepository(); 341 + 342 + $service = $repository->loadAlmanacService(); 343 + if (!$service) { 344 + throw new Exception(pht('Failed to load repository cluster service.')); 345 + } 346 + 347 + $device_map = array_fuse($device_phids); 348 + $bindings = $service->getActiveBindings(); 349 + 350 + $fetchable = array(); 351 + foreach ($bindings as $binding) { 352 + // We can't fetch from nodes which don't have the newest version. 353 + $device_phid = $binding->getDevicePHID(); 354 + if (empty($device_map[$device_phid])) { 355 + continue; 356 + } 357 + 358 + // TODO: For now, only fetch over SSH. We could support fetching over 359 + // HTTP eventually. 360 + if ($binding->getAlmanacPropertyValue('protocol') != 'ssh') { 361 + continue; 362 + } 363 + 364 + $fetchable[] = $binding; 365 + } 366 + 367 + if (!$fetchable) { 368 + throw new Exception( 369 + pht( 370 + 'Leader lost: no up-to-date nodes in repository cluster are '. 371 + 'fetchable.')); 372 + } 373 + 374 + $caught = null; 375 + foreach ($fetchable as $binding) { 376 + try { 377 + $this->synchronizeWorkingCopyFromBinding($binding); 378 + $caught = null; 379 + break; 380 + } catch (Exception $ex) { 381 + $caught = $ex; 382 + } 383 + } 384 + 385 + if ($caught) { 386 + throw $caught; 387 + } 388 + } 389 + 390 + 391 + /** 392 + * @task internal 393 + */ 394 + private function synchronizeWorkingCopyFromBinding($binding) { 395 + $repository = $this->getRepository(); 396 + 397 + $fetch_uri = $repository->getClusterRepositoryURIFromBinding($binding); 398 + $local_path = $repository->getLocalPath(); 399 + 400 + if ($repository->isGit()) { 401 + if (!Filesystem::pathExists($local_path)) { 402 + $device = AlmanacKeys::getLiveDevice(); 403 + throw new Exception( 404 + pht( 405 + 'Repository "%s" does not have a working copy on this device '. 406 + 'yet, so it can not be synchronized. Wait for the daemons to '. 407 + 'construct one or run `bin/repository update %s` on this host '. 408 + '("%s") to build it explicitly.', 409 + $repository->getDisplayName(), 410 + $repository->getMonogram(), 411 + $device->getName())); 412 + } 413 + 414 + $argv = array( 415 + 'fetch --prune -- %s %s', 416 + $fetch_uri, 417 + '+refs/*:refs/*', 418 + ); 419 + } else { 420 + throw new Exception(pht('Binding sync only supported for git!')); 421 + } 422 + 423 + $future = DiffusionCommandEngine::newCommandEngine($repository) 424 + ->setArgv($argv) 425 + ->setConnectAsDevice(true) 426 + ->setSudoAsDaemon(true) 427 + ->setProtocol($fetch_uri->getProtocol()) 428 + ->newFuture(); 429 + 430 + $future->setCWD($local_path); 431 + 432 + $future->resolvex(); 433 + } 434 + 435 + }
+7 -4
src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php
··· 15 15 16 16 protected function executeRepositoryOperations() { 17 17 $repository = $this->getRepository(); 18 + $viewer = $this->getViewer(); 18 19 19 20 // This is a write, and must have write access. 20 21 $this->requireWriteAccess(); 21 22 23 + $cluster_engine = id(new DiffusionRepositoryClusterEngine()) 24 + ->setViewer($viewer) 25 + ->setRepository($repository); 26 + 22 27 if ($this->shouldProxy()) { 23 28 $command = $this->getProxyCommand(); 24 29 $did_synchronize = false; 25 30 } else { 26 31 $command = csprintf('git-receive-pack %s', $repository->getLocalPath()); 27 - 28 32 $did_synchronize = true; 29 - $viewer = $this->getUser(); 30 - $repository->synchronizeWorkingCopyBeforeWrite($viewer); 33 + $cluster_engine->synchronizeWorkingCopyBeforeWrite(); 31 34 } 32 35 33 36 $caught = null; ··· 40 43 // We've committed the write (or rejected it), so we can release the lock 41 44 // without waiting for the client to receive the acknowledgement. 42 45 if ($did_synchronize) { 43 - $repository->synchronizeWorkingCopyAfterWrite(); 46 + $cluster_engine->synchronizeWorkingCopyAfterWrite(); 44 47 } 45 48 46 49 if ($caught) {
+5 -1
src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php
··· 15 15 16 16 protected function executeRepositoryOperations() { 17 17 $repository = $this->getRepository(); 18 + $viewer = $this->getUser(); 18 19 19 20 $skip_sync = $this->shouldSkipReadSynchronization(); 20 21 ··· 23 24 } else { 24 25 $command = csprintf('git-upload-pack -- %s', $repository->getLocalPath()); 25 26 if (!$skip_sync) { 26 - $repository->synchronizeWorkingCopyBeforeRead(); 27 + $cluster_engine = id(new DiffusionRepositoryClusterEngine()) 28 + ->setViewer($viewer) 29 + ->setRepository($repository) 30 + ->synchronizeWorkingCopyBeforeRead(); 27 31 } 28 32 } 29 33 $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command);
+4 -1
src/applications/repository/editor/PhabricatorRepositoryEditor.php
··· 684 684 } 685 685 686 686 if ($this->getIsNewObject()) { 687 - $object->synchronizeWorkingCopyAfterCreation(); 687 + id(new DiffusionRepositoryClusterEngine()) 688 + ->setViewer($this->getActor()) 689 + ->setRepository($object) 690 + ->synchronizeWorkingCopyAfterCreation(); 688 691 } 689 692 690 693 return $xactions;
+5 -1
src/applications/repository/engine/PhabricatorRepositoryPullEngine.php
··· 23 23 24 24 public function pullRepository() { 25 25 $repository = $this->getRepository(); 26 + $viewer = PhabricatorUser::getOmnipotentUser(); 26 27 27 28 $is_hg = false; 28 29 $is_git = false; ··· 96 97 } 97 98 98 99 if ($repository->isHosted()) { 99 - $repository->synchronizeWorkingCopyBeforeRead(); 100 + id(new DiffusionRepositoryClusterEngine()) 101 + ->setViewer($viewer) 102 + ->setRepository($repository) 103 + ->synchronizeWorkingCopyBeforeRead(); 100 104 101 105 if ($is_git) { 102 106 $this->installGitHook();
+1 -378
src/applications/repository/storage/PhabricatorRepository.php
··· 68 68 private $projectPHIDs = self::ATTACHABLE; 69 69 private $uris = self::ATTACHABLE; 70 70 71 - private $clusterWriteLock; 72 - private $clusterWriteVersion; 73 - 74 71 75 72 public static function initializeNewRepository(PhabricatorUser $actor) { 76 73 $app = id(new PhabricatorApplicationQuery()) ··· 2193 2190 } 2194 2191 2195 2192 2196 - /* -( Cluster Synchronization )-------------------------------------------- */ 2197 - 2198 - 2199 - private function shouldEnableSynchronization() { 2200 - $service_phid = $this->getAlmanacServicePHID(); 2201 - if (!$service_phid) { 2202 - return false; 2203 - } 2204 - 2205 - // TODO: For now, this is only supported for Git. 2206 - if (!$this->isGit()) { 2207 - return false; 2208 - } 2209 - 2210 - // TODO: It may eventually make sense to try to version and synchronize 2211 - // observed repositories (so that daemons don't do reads against out-of 2212 - // date hosts), but don't bother for now. 2213 - if (!$this->isHosted()) { 2214 - return false; 2215 - } 2216 - 2217 - $device = AlmanacKeys::getLiveDevice(); 2218 - if (!$device) { 2219 - return false; 2220 - } 2221 - 2222 - return true; 2223 - } 2224 - 2225 - 2226 - /** 2227 - * Synchronize repository version information after creating a repository. 2228 - * 2229 - * This initializes working copy versions for all currently bound devices to 2230 - * 0, so that we don't get stuck making an ambiguous choice about which 2231 - * devices are leaders when we later synchronize before a read. 2232 - * 2233 - * @task sync 2234 - */ 2235 - public function synchronizeWorkingCopyAfterCreation() { 2236 - if (!$this->shouldEnableSynchronization()) { 2237 - return; 2238 - } 2239 - 2240 - $repository_phid = $this->getPHID(); 2241 - 2242 - $service = $this->loadAlmanacService(); 2243 - if (!$service) { 2244 - throw new Exception(pht('Failed to load repository cluster service.')); 2245 - } 2246 - 2247 - $bindings = $service->getActiveBindings(); 2248 - foreach ($bindings as $binding) { 2249 - PhabricatorRepositoryWorkingCopyVersion::updateVersion( 2250 - $repository_phid, 2251 - $binding->getDevicePHID(), 2252 - 0); 2253 - } 2254 - } 2255 - 2256 - 2257 - /** 2258 - * @task sync 2259 - */ 2260 - public function synchronizeWorkingCopyBeforeRead() { 2261 - if (!$this->shouldEnableSynchronization()) { 2262 - return; 2263 - } 2264 - 2265 - $repository_phid = $this->getPHID(); 2266 - 2267 - $device = AlmanacKeys::getLiveDevice(); 2268 - $device_phid = $device->getPHID(); 2269 - 2270 - $read_lock = PhabricatorRepositoryWorkingCopyVersion::getReadLock( 2271 - $repository_phid, 2272 - $device_phid); 2273 - 2274 - // TODO: Raise a more useful exception if we fail to grab this lock. 2275 - $read_lock->lock(phutil_units('2 minutes in seconds')); 2276 - 2277 - $versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions( 2278 - $repository_phid); 2279 - $versions = mpull($versions, null, 'getDevicePHID'); 2280 - 2281 - $this_version = idx($versions, $device_phid); 2282 - if ($this_version) { 2283 - $this_version = (int)$this_version->getRepositoryVersion(); 2284 - } else { 2285 - $this_version = -1; 2286 - } 2287 - 2288 - if ($versions) { 2289 - // This is the normal case, where we have some version information and 2290 - // can identify which nodes are leaders. If the current node is not a 2291 - // leader, we want to fetch from a leader and then update our version. 2292 - 2293 - $max_version = (int)max(mpull($versions, 'getRepositoryVersion')); 2294 - if ($max_version > $this_version) { 2295 - $fetchable = array(); 2296 - foreach ($versions as $version) { 2297 - if ($version->getRepositoryVersion() == $max_version) { 2298 - $fetchable[] = $version->getDevicePHID(); 2299 - } 2300 - } 2301 - 2302 - $this->synchronizeWorkingCopyFromDevices($fetchable); 2303 - 2304 - PhabricatorRepositoryWorkingCopyVersion::updateVersion( 2305 - $repository_phid, 2306 - $device_phid, 2307 - $max_version); 2308 - } 2309 - 2310 - $result_version = $max_version; 2311 - } else { 2312 - // If no version records exist yet, we need to be careful, because we 2313 - // can not tell which nodes are leaders. 2314 - 2315 - // There might be several nodes with arbitrary existing data, and we have 2316 - // no way to tell which one has the "right" data. If we pick wrong, we 2317 - // might erase some or all of the data in the repository. 2318 - 2319 - // Since this is dangeorus, we refuse to guess unless there is only one 2320 - // device. If we're the only device in the group, we obviously must be 2321 - // a leader. 2322 - 2323 - $service = $this->loadAlmanacService(); 2324 - if (!$service) { 2325 - throw new Exception(pht('Failed to load repository cluster service.')); 2326 - } 2327 - 2328 - $bindings = $service->getActiveBindings(); 2329 - $device_map = array(); 2330 - foreach ($bindings as $binding) { 2331 - $device_map[$binding->getDevicePHID()] = true; 2332 - } 2333 - 2334 - if (count($device_map) > 1) { 2335 - throw new Exception( 2336 - pht( 2337 - 'Repository "%s" exists on more than one device, but no device '. 2338 - 'has any repository version information. Phabricator can not '. 2339 - 'guess which copy of the existing data is authoritative. Remove '. 2340 - 'all but one device from service to mark the remaining device '. 2341 - 'as the authority.', 2342 - $this->getDisplayName())); 2343 - } 2344 - 2345 - if (empty($device_map[$device->getPHID()])) { 2346 - throw new Exception( 2347 - pht( 2348 - 'Repository "%s" is being synchronized on device "%s", but '. 2349 - 'this device is not bound to the corresponding cluster '. 2350 - 'service ("%s").', 2351 - $this->getDisplayName(), 2352 - $device->getName(), 2353 - $service->getName())); 2354 - } 2355 - 2356 - // The current device is the only device in service, so it must be a 2357 - // leader. We can safely have any future nodes which come online read 2358 - // from it. 2359 - PhabricatorRepositoryWorkingCopyVersion::updateVersion( 2360 - $repository_phid, 2361 - $device_phid, 2362 - 0); 2363 - 2364 - $result_version = 0; 2365 - } 2366 - 2367 - $read_lock->unlock(); 2368 - 2369 - return $result_version; 2370 - } 2371 - 2372 - 2373 - /** 2374 - * @task sync 2375 - */ 2376 - public function synchronizeWorkingCopyBeforeWrite( 2377 - PhabricatorUser $actor) { 2378 - if (!$this->shouldEnableSynchronization()) { 2379 - return; 2380 - } 2381 - 2382 - $repository_phid = $this->getPHID(); 2383 - 2384 - $device = AlmanacKeys::getLiveDevice(); 2385 - $device_phid = $device->getPHID(); 2386 - 2387 - $write_lock = PhabricatorRepositoryWorkingCopyVersion::getWriteLock( 2388 - $repository_phid); 2389 - 2390 - // TODO: Raise a more useful exception if we fail to grab this lock. 2391 - $write_lock->lock(phutil_units('2 minutes in seconds')); 2392 - 2393 - $versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions( 2394 - $repository_phid); 2395 - foreach ($versions as $version) { 2396 - if (!$version->getIsWriting()) { 2397 - continue; 2398 - } 2399 - 2400 - throw new Exception( 2401 - pht( 2402 - 'An previous write to this repository was interrupted; refusing '. 2403 - 'new writes. This issue resolves operator intervention to resolve, '. 2404 - 'see "Write Interruptions" in the "Cluster: Repositories" in the '. 2405 - 'documentation for instructions.')); 2406 - } 2407 - 2408 - try { 2409 - $max_version = $this->synchronizeWorkingCopyBeforeRead(); 2410 - } catch (Exception $ex) { 2411 - $write_lock->unlock(); 2412 - throw $ex; 2413 - } 2414 - 2415 - PhabricatorRepositoryWorkingCopyVersion::willWrite( 2416 - $repository_phid, 2417 - $device_phid, 2418 - array( 2419 - 'userPHID' => $actor->getPHID(), 2420 - 'epoch' => PhabricatorTime::getNow(), 2421 - 'devicePHID' => $device_phid, 2422 - )); 2423 - 2424 - $this->clusterWriteVersion = $max_version; 2425 - $this->clusterWriteLock = $write_lock; 2426 - } 2427 - 2428 - 2429 - /** 2430 - * @task sync 2431 - */ 2432 - public function synchronizeWorkingCopyAfterWrite() { 2433 - if (!$this->shouldEnableSynchronization()) { 2434 - return; 2435 - } 2436 - 2437 - if (!$this->clusterWriteLock) { 2438 - throw new Exception( 2439 - pht( 2440 - 'Trying to synchronize after write, but not holding a write '. 2441 - 'lock!')); 2442 - } 2443 - 2444 - $repository_phid = $this->getPHID(); 2445 - 2446 - $device = AlmanacKeys::getLiveDevice(); 2447 - $device_phid = $device->getPHID(); 2448 - 2449 - // NOTE: This means we're still bumping the version when pushes fail. We 2450 - // could select only un-rejected events instead to bump a little less 2451 - // often. 2452 - 2453 - $new_log = id(new PhabricatorRepositoryPushEventQuery()) 2454 - ->setViewer(PhabricatorUser::getOmnipotentUser()) 2455 - ->withRepositoryPHIDs(array($repository_phid)) 2456 - ->setLimit(1) 2457 - ->executeOne(); 2458 - 2459 - $old_version = $this->clusterWriteVersion; 2460 - if ($new_log) { 2461 - $new_version = $new_log->getID(); 2462 - } else { 2463 - $new_version = $old_version; 2464 - } 2465 - 2466 - PhabricatorRepositoryWorkingCopyVersion::didWrite( 2467 - $repository_phid, 2468 - $device_phid, 2469 - $this->clusterWriteVersion, 2470 - $new_log->getID()); 2471 - 2472 - $this->clusterWriteLock->unlock(); 2473 - $this->clusterWriteLock = null; 2474 - } 2475 - 2476 - 2477 - /** 2478 - * @task sync 2479 - */ 2480 - private function synchronizeWorkingCopyFromDevices(array $device_phids) { 2481 - $service = $this->loadAlmanacService(); 2482 - if (!$service) { 2483 - throw new Exception(pht('Failed to load repository cluster service.')); 2484 - } 2485 - 2486 - $device_map = array_fuse($device_phids); 2487 - $bindings = $service->getActiveBindings(); 2488 - 2489 - $fetchable = array(); 2490 - foreach ($bindings as $binding) { 2491 - // We can't fetch from nodes which don't have the newest version. 2492 - $device_phid = $binding->getDevicePHID(); 2493 - if (empty($device_map[$device_phid])) { 2494 - continue; 2495 - } 2496 - 2497 - // TODO: For now, only fetch over SSH. We could support fetching over 2498 - // HTTP eventually. 2499 - if ($binding->getAlmanacPropertyValue('protocol') != 'ssh') { 2500 - continue; 2501 - } 2502 - 2503 - $fetchable[] = $binding; 2504 - } 2505 - 2506 - if (!$fetchable) { 2507 - throw new Exception( 2508 - pht( 2509 - 'Leader lost: no up-to-date nodes in repository cluster are '. 2510 - 'fetchable.')); 2511 - } 2512 - 2513 - $caught = null; 2514 - foreach ($fetchable as $binding) { 2515 - try { 2516 - $this->synchronizeWorkingCopyFromBinding($binding); 2517 - $caught = null; 2518 - break; 2519 - } catch (Exception $ex) { 2520 - $caught = $ex; 2521 - } 2522 - } 2523 - 2524 - if ($caught) { 2525 - throw $caught; 2526 - } 2527 - } 2528 - 2529 - private function synchronizeWorkingCopyFromBinding($binding) { 2530 - $fetch_uri = $this->getClusterRepositoryURIFromBinding($binding); 2531 - $local_path = $this->getLocalPath(); 2532 - 2533 - if ($this->isGit()) { 2534 - if (!Filesystem::pathExists($local_path)) { 2535 - $device = AlmanacKeys::getLiveDevice(); 2536 - throw new Exception( 2537 - pht( 2538 - 'Repository "%s" does not have a working copy on this device '. 2539 - 'yet, so it can not be synchronized. Wait for the daemons to '. 2540 - 'construct one or run `bin/repository update %s` on this host '. 2541 - '("%s") to build it explicitly.', 2542 - $this->getDisplayName(), 2543 - $this->getMonogram(), 2544 - $device->getName())); 2545 - } 2546 - 2547 - $argv = array( 2548 - 'fetch --prune -- %s %s', 2549 - $fetch_uri, 2550 - '+refs/*:refs/*', 2551 - ); 2552 - } else { 2553 - throw new Exception(pht('Binding sync only supported for git!')); 2554 - } 2555 - 2556 - $future = DiffusionCommandEngine::newCommandEngine($this) 2557 - ->setArgv($argv) 2558 - ->setConnectAsDevice(true) 2559 - ->setSudoAsDaemon(true) 2560 - ->setProtocol($fetch_uri->getProtocol()) 2561 - ->newFuture(); 2562 - 2563 - $future->setCWD($local_path); 2564 - 2565 - $future->resolvex(); 2566 - } 2567 - 2568 - private function getClusterRepositoryURIFromBinding( 2193 + public function getClusterRepositoryURIFromBinding( 2569 2194 AlmanacBinding $binding) { 2570 2195 $protocol = $binding->getAlmanacPropertyValue('protocol'); 2571 2196 if ($protocol === null) { ··· 2611 2236 2612 2237 return $service; 2613 2238 } 2614 - 2615 - 2616 2239 2617 2240 2618 2241 /* -( Symbols )-------------------------------------------------------------*/