@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Move toward multi-master replicated repositories

Summary:
Ref T4292. This mostly implements the locking/versioning logic for multi-master repositories. It is only active on Git SSH pathways, and doesn't actually do anything useful yet: it just does bookkeeping so far.

When we read (e.g., `git fetch`) the logic goes like this:

- Get the read lock (unique to device + repository).
- Read all the versions of the repository on every other device.
- If any node has a newer version:
- Fetch the newer version.
- Increment our version to be the same as the version we fetched.
- Release the read lock.
- Actually do the fetch.

This makes sure that any time you do a read, you always read the most recently acknowledged write. You may have to wait for an internal fetch to happen (this isn't actually implemented yet) but the operation will always work like you expect it to.

When we write (e.g., `git push`) the logic goes like this:

- Get the write lock (unique to the repository).
- Do all the read steps so we're up to date.
- Mark a write pending.
- Do the actual write.
- Bump our version and mark our write finished.
- Release the write lock.

This allows you to write to any replica. Again, you might have to wait for a fetch first, but everything will work like you expect.

There's one notable failure mode here: if the network connection between the repository node and the database fails during the write, the write lock might be released even though a write is ongoing.

The "isWriting" column protects against that, by staying locked if we lose our connection to the database. This will currently "freeze" the repository (prevent any new writes) until an administrator can sort things out, since it'd dangerous to continue doing writes (we may lose data).

(Since we won't actually acknowledge the write, I think, we could probably smooth this out a bit and make it self-healing //most// of the time: basically, have the broken node rewind itself by updating from another good node. But that's a little more complex.)

Test Plan:
- Pushed changes to a cluster-mode repository.
- Viewed web interface, saw "writing" flag and version changes.
- Pulled changes.
- Faked various failures, got sensible states.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T4292

Differential Revision: https://secure.phabricator.com/D15688

+361 -1
+8
resources/sql/autopatches/20160411.repo.1.version.sql
··· 1 + CREATE TABLE {$NAMESPACE}_repository.repository_workingcopyversion ( 2 + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, 3 + repositoryPHID VARBINARY(64) NOT NULL, 4 + devicePHID VARBINARY(64) NOT NULL, 5 + repositoryVersion INT UNSIGNED NOT NULL, 6 + isWriting BOOL NOT NULL, 7 + UNIQUE KEY `key_workingcopy` (repositoryPHID, devicePHID) 8 + ) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT};
+2
src/__phutil_library_map__.php
··· 3210 3210 'PhabricatorRepositoryURITestCase' => 'applications/repository/storage/__tests__/PhabricatorRepositoryURITestCase.php', 3211 3211 'PhabricatorRepositoryVCSPassword' => 'applications/repository/storage/PhabricatorRepositoryVCSPassword.php', 3212 3212 'PhabricatorRepositoryVersion' => 'applications/repository/constants/PhabricatorRepositoryVersion.php', 3213 + 'PhabricatorRepositoryWorkingCopyVersion' => 'applications/repository/storage/PhabricatorRepositoryWorkingCopyVersion.php', 3213 3214 'PhabricatorRequestExceptionHandler' => 'aphront/handler/PhabricatorRequestExceptionHandler.php', 3214 3215 'PhabricatorResourceSite' => 'aphront/site/PhabricatorResourceSite.php', 3215 3216 'PhabricatorRobotsController' => 'applications/system/controller/PhabricatorRobotsController.php', ··· 7854 7855 'PhabricatorRepositoryURITestCase' => 'PhabricatorTestCase', 7855 7856 'PhabricatorRepositoryVCSPassword' => 'PhabricatorRepositoryDAO', 7856 7857 'PhabricatorRepositoryVersion' => 'Phobject', 7858 + 'PhabricatorRepositoryWorkingCopyVersion' => 'PhabricatorRepositoryDAO', 7857 7859 'PhabricatorRequestExceptionHandler' => 'AphrontRequestExceptionHandler', 7858 7860 'PhabricatorResourceSite' => 'PhabricatorSite', 7859 7861 'PhabricatorRobotsController' => 'PhabricatorController',
+38 -1
src/applications/diffusion/management/DiffusionRepositoryClusterManagementPanel.php
··· 44 44 $bindings = $service->getBindings(); 45 45 $bindings = mgroup($bindings, 'getDevicePHID'); 46 46 47 + // This is an unusual read which always comes from the master. 48 + if (PhabricatorEnv::isReadOnly()) { 49 + $versions = array(); 50 + } else { 51 + $versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions( 52 + $repository->getPHID()); 53 + } 54 + 55 + $versions = mpull($versions, null, 'getDevicePHID'); 56 + 47 57 foreach ($bindings as $binding_group) { 48 58 $all_disabled = true; 49 59 foreach ($binding_group as $binding) { ··· 73 83 74 84 $device = $any_binding->getDevice(); 75 85 86 + $version = idx($versions, $device->getPHID()); 87 + if ($version) { 88 + $version_number = $version->getRepositoryVersion(); 89 + $version_number = phutil_tag( 90 + 'a', 91 + array( 92 + 'href' => "/diffusion/pushlog/view/{$version_number}/", 93 + ), 94 + $version_number); 95 + } else { 96 + $version_number = '-'; 97 + } 98 + 99 + if ($version && $version->getIsWriting()) { 100 + $is_writing = id(new PHUIIconView()) 101 + ->setIcon('fa-pencil green'); 102 + } else { 103 + $is_writing = id(new PHUIIconView()) 104 + ->setIcon('fa-pencil grey'); 105 + } 106 + 76 107 $rows[] = array( 77 108 $binding_icon, 78 109 phutil_tag( ··· 81 112 'href' => $device->getURI(), 82 113 ), 83 114 $device->getName()), 115 + $version_number, 116 + $is_writing, 84 117 ); 85 118 } 86 119 } ··· 91 124 array( 92 125 null, 93 126 pht('Device'), 127 + pht('Version'), 128 + pht('Writing'), 94 129 )) 95 130 ->setColumnClasses( 96 131 array( 97 132 null, 98 - 'wide', 133 + null, 134 + null, 135 + 'right wide', 99 136 )); 100 137 101 138 $doc_href = PhabricatorEnv::getDoclink('Cluster: Repositories');
+8
src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php
··· 21 21 22 22 if ($this->shouldProxy()) { 23 23 $command = $this->getProxyCommand(); 24 + $is_proxy = true; 24 25 } else { 25 26 $command = csprintf('git-receive-pack %s', $repository->getLocalPath()); 27 + $is_proxy = false; 28 + 29 + $repository->synchronizeWorkingCopyBeforeWrite(); 26 30 } 27 31 $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); 28 32 ··· 39 43 PhabricatorRepositoryStatusMessage::TYPE_NEEDS_UPDATE, 40 44 PhabricatorRepositoryStatusMessage::CODE_OKAY); 41 45 $this->waitForGitClient(); 46 + } 47 + 48 + if (!$is_proxy) { 49 + $repository->synchronizeWorkingCopyAfterWrite(); 42 50 } 43 51 44 52 return $err;
+1
src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php
··· 20 20 $command = $this->getProxyCommand(); 21 21 } else { 22 22 $command = csprintf('git-upload-pack -- %s', $repository->getLocalPath()); 23 + $repository->synchronizeWorkingCopyBeforeRead(); 23 24 } 24 25 $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); 25 26
+159
src/applications/repository/storage/PhabricatorRepository.php
··· 3 3 /** 4 4 * @task uri Repository URI Management 5 5 * @task autoclose Autoclose 6 + * @task sync Cluster Synchronization 6 7 */ 7 8 final class PhabricatorRepository extends PhabricatorRepositoryDAO 8 9 implements ··· 61 62 private $commitCount = self::ATTACHABLE; 62 63 private $mostRecentCommit = self::ATTACHABLE; 63 64 private $projectPHIDs = self::ATTACHABLE; 65 + 66 + private $clusterWriteLock; 67 + private $clusterWriteVersion; 64 68 65 69 public static function initializeNewRepository(PhabricatorUser $actor) { 66 70 $app = id(new PhabricatorApplicationQuery()) ··· 2259 2263 } 2260 2264 2261 2265 return $client; 2266 + } 2267 + 2268 + 2269 + /* -( Cluster Synchronization )-------------------------------------------- */ 2270 + 2271 + 2272 + /** 2273 + * @task sync 2274 + */ 2275 + public function synchronizeWorkingCopyBeforeRead() { 2276 + $device = AlmanacKeys::getLiveDevice(); 2277 + if (!$device) { 2278 + return; 2279 + } 2280 + 2281 + $repository_phid = $this->getPHID(); 2282 + $device_phid = $device->getPHID(); 2283 + 2284 + $read_lock = PhabricatorRepositoryWorkingCopyVersion::getReadLock( 2285 + $repository_phid, 2286 + $device_phid); 2287 + 2288 + // TODO: Raise a more useful exception if we fail to grab this lock. 2289 + $read_lock->lock(phutil_units('2 minutes in seconds')); 2290 + 2291 + $versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions( 2292 + $repository_phid); 2293 + $versions = mpull($versions, null, 'getDevicePHID'); 2294 + 2295 + $this_version = idx($versions, $device_phid); 2296 + if ($this_version) { 2297 + $this_version = (int)$this_version->getRepositoryVersion(); 2298 + } else { 2299 + $this_version = 0; 2300 + } 2301 + 2302 + if ($versions) { 2303 + $max_version = (int)max(mpull($versions, 'getRepositoryVersion')); 2304 + } else { 2305 + $max_version = 0; 2306 + } 2307 + 2308 + if ($max_version > $this_version) { 2309 + $fetchable = array(); 2310 + foreach ($versions as $version) { 2311 + if ($version->getRepositoryVersion() == $max_version) { 2312 + $fetchable[] = $version->getDevicePHID(); 2313 + } 2314 + } 2315 + 2316 + // TODO: Actualy fetch the newer version from one of the nodes which has 2317 + // it. 2318 + 2319 + PhabricatorRepositoryWorkingCopyVersion::updateVersion( 2320 + $repository_phid, 2321 + $device_phid, 2322 + $max_version); 2323 + } 2324 + 2325 + $read_lock->unlock(); 2326 + 2327 + return $max_version; 2328 + } 2329 + 2330 + 2331 + /** 2332 + * @task sync 2333 + */ 2334 + public function synchronizeWorkingCopyBeforeWrite() { 2335 + $device = AlmanacKeys::getLiveDevice(); 2336 + if (!$device) { 2337 + return; 2338 + } 2339 + 2340 + $repository_phid = $this->getPHID(); 2341 + $device_phid = $device->getPHID(); 2342 + 2343 + $write_lock = PhabricatorRepositoryWorkingCopyVersion::getWriteLock( 2344 + $repository_phid); 2345 + 2346 + // TODO: Raise a more useful exception if we fail to grab this lock. 2347 + $write_lock->lock(phutil_units('2 minutes in seconds')); 2348 + 2349 + $versions = PhabricatorRepositoryWorkingCopyVersion::loadVersions( 2350 + $repository_phid); 2351 + foreach ($versions as $version) { 2352 + if (!$version->getIsWriting()) { 2353 + continue; 2354 + } 2355 + 2356 + // TODO: This should provide more help so users can resolve the issue. 2357 + throw new Exception( 2358 + pht( 2359 + 'An incomplete write was previously performed to this repository; '. 2360 + 'refusing new writes.')); 2361 + } 2362 + 2363 + $max_version = $this->synchronizeWorkingCopyBeforeRead(); 2364 + 2365 + PhabricatorRepositoryWorkingCopyVersion::willWrite( 2366 + $repository_phid, 2367 + $device_phid); 2368 + 2369 + $this->clusterWriteVersion = $max_version; 2370 + $this->clusterWriteLock = $write_lock; 2371 + } 2372 + 2373 + 2374 + /** 2375 + * @task sync 2376 + */ 2377 + public function synchronizeWorkingCopyAfterWrite() { 2378 + if (!$this->clusterWriteLock) { 2379 + throw new Exception( 2380 + pht( 2381 + 'Trying to synchronize after write, but not holding a write '. 2382 + 'lock!')); 2383 + } 2384 + 2385 + $device = AlmanacKeys::getLiveDevice(); 2386 + if (!$device) { 2387 + throw new Exception( 2388 + pht( 2389 + 'Trying to synchronize after write, but this host is not an '. 2390 + 'Almanac device.')); 2391 + } 2392 + 2393 + $repository_phid = $this->getPHID(); 2394 + $device_phid = $device->getPHID(); 2395 + 2396 + // NOTE: This means we're still bumping the version when pushes fail. We 2397 + // could select only un-rejected events instead to bump a little less 2398 + // often. 2399 + 2400 + $new_log = id(new PhabricatorRepositoryPushEventQuery()) 2401 + ->setViewer(PhabricatorUser::getOmnipotentUser()) 2402 + ->withRepositoryPHIDs(array($repository_phid)) 2403 + ->setLimit(1) 2404 + ->executeOne(); 2405 + 2406 + $old_version = $this->clusterWriteVersion; 2407 + if ($new_log) { 2408 + $new_version = $new_log->getID(); 2409 + } else { 2410 + $new_version = $old_version; 2411 + } 2412 + 2413 + PhabricatorRepositoryWorkingCopyVersion::didWrite( 2414 + $repository_phid, 2415 + $device_phid, 2416 + $this->clusterWriteVersion, 2417 + $new_log->getID()); 2418 + 2419 + $this->clusterWriteLock->unlock(); 2420 + $this->clusterWriteLock = null; 2262 2421 } 2263 2422 2264 2423
+145
src/applications/repository/storage/PhabricatorRepositoryWorkingCopyVersion.php
··· 1 + <?php 2 + 3 + final class PhabricatorRepositoryWorkingCopyVersion 4 + extends PhabricatorRepositoryDAO { 5 + 6 + protected $repositoryPHID; 7 + protected $devicePHID; 8 + protected $repositoryVersion; 9 + protected $isWriting; 10 + 11 + protected function getConfiguration() { 12 + return array( 13 + self::CONFIG_TIMESTAMPS => false, 14 + self::CONFIG_COLUMN_SCHEMA => array( 15 + 'repositoryVersion' => 'uint32', 16 + 'isWriting' => 'bool', 17 + ), 18 + self::CONFIG_KEY_SCHEMA => array( 19 + 'key_workingcopy' => array( 20 + 'columns' => array('repositoryPHID', 'devicePHID'), 21 + 'unique' => true, 22 + ), 23 + ), 24 + ) + parent::getConfiguration(); 25 + } 26 + 27 + public static function loadVersions($repository_phid) { 28 + $version = new self(); 29 + $conn_w = $version->establishConnection('w'); 30 + $table = $version->getTableName(); 31 + 32 + // This is a normal read, but force it to come from the master. 33 + $rows = queryfx_all( 34 + $conn_w, 35 + 'SELECT * FROM %T WHERE repositoryPHID = %s', 36 + $table, 37 + $repository_phid); 38 + 39 + return $version->loadAllFromArray($rows); 40 + } 41 + 42 + public static function getReadLock($repository_phid, $device_phid) { 43 + $repository_hash = PhabricatorHash::digestForIndex($repository_phid); 44 + $device_hash = PhabricatorHash::digestForIndex($device_phid); 45 + $lock_key = "repo.read({$repository_hash}, {$device_hash})"; 46 + 47 + return PhabricatorGlobalLock::newLock($lock_key); 48 + } 49 + 50 + public static function getWriteLock($repository_phid) { 51 + $repository_hash = PhabricatorHash::digestForIndex($repository_phid); 52 + $lock_key = "repo.write({$repository_hash})"; 53 + 54 + return PhabricatorGlobalLock::newLock($lock_key); 55 + } 56 + 57 + 58 + /** 59 + * Before a write, set the "isWriting" flag. 60 + * 61 + * This allows us to detect when we lose a node partway through a write and 62 + * may have committed and acknowledged a write on a node that lost the lock 63 + * partway through the write and is no longer reachable. 64 + * 65 + * In particular, if a node loses its connection to the datbase the global 66 + * lock is released by default. This is a durable lock which stays locked 67 + * by default. 68 + */ 69 + public static function willWrite($repository_phid, $device_phid) { 70 + $version = new self(); 71 + $conn_w = $version->establishConnection('w'); 72 + $table = $version->getTableName(); 73 + 74 + queryfx( 75 + $conn_w, 76 + 'INSERT INTO %T 77 + (repositoryPHID, devicePHID, repositoryVersion, isWriting) 78 + VALUES 79 + (%s, %s, %d, %d) 80 + ON DUPLICATE KEY UPDATE 81 + isWriting = VALUES(isWriting)', 82 + $table, 83 + $repository_phid, 84 + $device_phid, 85 + 1, 86 + 1); 87 + } 88 + 89 + 90 + /** 91 + * After a write, update the version and release the "isWriting" lock. 92 + */ 93 + public static function didWrite( 94 + $repository_phid, 95 + $device_phid, 96 + $old_version, 97 + $new_version) { 98 + $version = new self(); 99 + $conn_w = $version->establishConnection('w'); 100 + $table = $version->getTableName(); 101 + 102 + queryfx( 103 + $conn_w, 104 + 'UPDATE %T SET repositoryVersion = %d, isWriting = 0 105 + WHERE 106 + repositoryPHID = %s AND 107 + devicePHID = %s AND 108 + repositoryVersion = %d AND 109 + isWriting = 1', 110 + $table, 111 + $new_version, 112 + $repository_phid, 113 + $device_phid, 114 + $old_version); 115 + } 116 + 117 + 118 + /** 119 + * After a fetch, set the local version to the fetched version. 120 + */ 121 + public static function updateVersion( 122 + $repository_phid, 123 + $device_phid, 124 + $new_version) { 125 + $version = new self(); 126 + $conn_w = $version->establishConnection('w'); 127 + $table = $version->getTableName(); 128 + 129 + queryfx( 130 + $conn_w, 131 + 'INSERT INTO %T 132 + (repositoryPHID, devicePHID, repositoryVersion, isWriting) 133 + VALUES 134 + (%s, %s, %d, %d) 135 + ON DUPLICATE KEY UPDATE 136 + repositoryVersion = VALUES(repositoryVersion)', 137 + $table, 138 + $repository_phid, 139 + $device_phid, 140 + $new_version, 141 + 0); 142 + } 143 + 144 + 145 + }