@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Generalize repository proxy retry logic to writes

Summary:
Ref T13286. The current (very safe / conservative) rules for retrying git reads generalize to git writes, so we can use the same ruleset in both cases.

Normally, writes converge rapidly to only having good nodes at the head of the list, so this has less impact than the similar change to reads, but it generally improves consistency and allows us to assert that writes which can be served will be served.

Test Plan:
- In a cluster with an up node and a down node, pushed changes.
- Saw a push to the down node fail, retry, and succeed.
- Did some pulls, saw appropriate retries and success.
- Note that once one write goes through, the node which received the write always ends up at the head of the writable list, so nodes need to be explicitly thawed to reproduce the failure/retry behavior.

Maniphest Tasks: T13286

Differential Revision: https://secure.phabricator.com/D20778

+137 -151
+24 -37
src/applications/diffusion/ssh/DiffusionGitReceivePackSSHWorkflow.php
··· 14 14 } 15 15 16 16 protected function executeRepositoryOperations() { 17 + // This is a write, and must have write access. 18 + $this->requireWriteAccess(); 19 + 20 + $is_proxy = $this->shouldProxy(); 21 + if ($is_proxy) { 22 + return $this->executeRepositoryProxyOperations($for_write = true); 23 + } 24 + 17 25 $host_wait_start = microtime(true); 18 26 19 27 $repository = $this->getRepository(); 20 28 $viewer = $this->getSSHUser(); 21 29 $device = AlmanacKeys::getLiveDevice(); 22 30 23 - // This is a write, and must have write access. 24 - $this->requireWriteAccess(); 25 - 26 31 $cluster_engine = id(new DiffusionRepositoryClusterEngine()) 27 32 ->setViewer($viewer) 28 33 ->setRepository($repository) 29 34 ->setLog($this); 30 35 31 - $is_proxy = $this->shouldProxy(); 32 - if ($is_proxy) { 33 - $command = $this->getProxyCommand(true); 34 - $did_write = false; 36 + $command = csprintf('git-receive-pack %s', $repository->getLocalPath()); 37 + $cluster_engine->synchronizeWorkingCopyBeforeWrite(); 35 38 36 - if ($device) { 37 - $this->writeClusterEngineLogMessage( 38 - pht( 39 - "# Push received by \"%s\", forwarding to cluster host.\n", 40 - $device->getName())); 41 - } 42 - } else { 43 - $command = csprintf('git-receive-pack %s', $repository->getLocalPath()); 44 - $did_write = true; 45 - $cluster_engine->synchronizeWorkingCopyBeforeWrite(); 46 - 47 - if ($device) { 48 - $this->writeClusterEngineLogMessage( 49 - pht( 50 - "# Ready to receive on cluster host \"%s\".\n", 51 - $device->getName())); 52 - } 39 + if ($device) { 40 + $this->writeClusterEngineLogMessage( 41 + pht( 42 + "# Ready to receive on cluster host \"%s\".\n", 43 + $device->getName())); 53 44 } 54 45 55 46 $log = $this->newProtocolLog($is_proxy); ··· 71 62 72 63 // We've committed the write (or rejected it), so we can release the lock 73 64 // without waiting for the client to receive the acknowledgement. 74 - if ($did_write) { 75 - $cluster_engine->synchronizeWorkingCopyAfterWrite(); 76 - } 65 + $cluster_engine->synchronizeWorkingCopyAfterWrite(); 77 66 78 67 if ($caught) { 79 68 throw $caught; ··· 85 74 // When a repository is clustered, we reach this cleanup code on both 86 75 // the proxy and the actual final endpoint node. Don't do more cleanup 87 76 // or logging than we need to. 88 - if ($did_write) { 89 - $repository->writeStatusMessage( 90 - PhabricatorRepositoryStatusMessage::TYPE_NEEDS_UPDATE, 91 - PhabricatorRepositoryStatusMessage::CODE_OKAY); 77 + $repository->writeStatusMessage( 78 + PhabricatorRepositoryStatusMessage::TYPE_NEEDS_UPDATE, 79 + PhabricatorRepositoryStatusMessage::CODE_OKAY); 92 80 93 - $host_wait_end = microtime(true); 81 + $host_wait_end = microtime(true); 94 82 95 - $this->updatePushLogWithTimingInformation( 96 - $this->getClusterEngineLogProperty('writeWait'), 97 - $this->getClusterEngineLogProperty('readWait'), 98 - ($host_wait_end - $host_wait_start)); 99 - } 83 + $this->updatePushLogWithTimingInformation( 84 + $this->getClusterEngineLogProperty('writeWait'), 85 + $this->getClusterEngineLogProperty('readWait'), 86 + ($host_wait_end - $host_wait_start)); 100 87 } 101 88 102 89 return $err;
+112
src/applications/diffusion/ssh/DiffusionGitSSHWorkflow.php
··· 10 10 private $wireProtocol; 11 11 private $ioBytesRead = 0; 12 12 private $ioBytesWritten = 0; 13 + private $requestAttempts = 0; 14 + private $requestFailures = 0; 13 15 14 16 protected function writeError($message) { 15 17 // Git assumes we'll add our own newlines. ··· 144 146 145 147 final protected function getIOBytesWritten() { 146 148 return $this->ioBytesWritten; 149 + } 150 + 151 + final protected function executeRepositoryProxyOperations($for_write) { 152 + $device = AlmanacKeys::getLiveDevice(); 153 + 154 + $refs = $this->getAlmanacServiceRefs($for_write); 155 + $err = 1; 156 + 157 + while (true) { 158 + $ref = head($refs); 159 + 160 + $command = $this->getProxyCommandForServiceRef($ref); 161 + 162 + if ($device) { 163 + $this->writeClusterEngineLogMessage( 164 + pht( 165 + "# Request received by \"%s\", forwarding to cluster ". 166 + "host \"%s\".\n", 167 + $device->getName(), 168 + $ref->getDeviceName())); 169 + } 170 + 171 + $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); 172 + 173 + $future = id(new ExecFuture('%C', $command)) 174 + ->setEnv($this->getEnvironment()); 175 + 176 + $this->didBeginRequest(); 177 + 178 + $err = $this->newPassthruCommand() 179 + ->setIOChannel($this->getIOChannel()) 180 + ->setCommandChannelFromExecFuture($future) 181 + ->execute(); 182 + 183 + // TODO: Currently, when proxying, we do not write an event log on the 184 + // proxy. Perhaps we should write a "proxy log". This is not very useful 185 + // for statistics or auditing, but could be useful for diagnostics. 186 + // Marking the proxy logs as proxied (and recording devicePHID on all 187 + // logs) would make differentiating between these use cases easier. 188 + 189 + if (!$err) { 190 + $this->waitForGitClient(); 191 + return $err; 192 + } 193 + 194 + // Throw away this service: the request failed and we're treating the 195 + // failure as persistent, so we don't want to retry another request to 196 + // the same host. 197 + array_shift($refs); 198 + 199 + $should_retry = $this->shouldRetryRequest($refs); 200 + if (!$should_retry) { 201 + return $err; 202 + } 203 + 204 + // If we haven't bailed out yet, we'll retry the request with the next 205 + // service. 206 + } 207 + 208 + throw new Exception(pht('Reached an unreachable place.')); 209 + } 210 + 211 + private function didBeginRequest() { 212 + $this->requestAttempts++; 213 + return $this; 214 + } 215 + 216 + private function shouldRetryRequest(array $remaining_refs) { 217 + $this->requestFailures++; 218 + 219 + if ($this->requestFailures > $this->requestAttempts) { 220 + throw new Exception( 221 + pht( 222 + "Workflow has recorded more failures than attempts; there is a ". 223 + "missing call to \"didBeginRequest()\".\n")); 224 + } 225 + 226 + if (!$remaining_refs) { 227 + $this->writeClusterEngineLogMessage( 228 + pht( 229 + "# All available services failed to serve the request, ". 230 + "giving up.\n")); 231 + return false; 232 + } 233 + 234 + $read_len = $this->getIOBytesRead(); 235 + if ($read_len) { 236 + $this->writeClusterEngineLogMessage( 237 + pht( 238 + "# Client already read from service (%s bytes), unable to retry.\n", 239 + new PhutilNumber($read_len))); 240 + return false; 241 + } 242 + 243 + $write_len = $this->getIOBytesWritten(); 244 + if ($write_len) { 245 + $this->writeClusterEngineLogMessage( 246 + pht( 247 + "# Client already wrote to service (%s bytes), unable to retry.\n", 248 + new PhutilNumber($write_len))); 249 + return false; 250 + } 251 + 252 + $this->writeClusterEngineLogMessage( 253 + pht( 254 + "# Service request failed, retrying (making attempt %s of %s).\n", 255 + new PhutilNumber($this->requestAttempts + 1), 256 + new PhutilNumber($this->requestAttempts + count($remaining_refs)))); 257 + 258 + return true; 147 259 } 148 260 149 261 }
+1 -114
src/applications/diffusion/ssh/DiffusionGitUploadPackSSHWorkflow.php
··· 3 3 final class DiffusionGitUploadPackSSHWorkflow 4 4 extends DiffusionGitSSHWorkflow { 5 5 6 - private $requestAttempts = 0; 7 - private $requestFailures = 0; 8 - 9 6 protected function didConstruct() { 10 7 $this->setName('git-upload-pack'); 11 8 $this->setArguments( ··· 20 17 protected function executeRepositoryOperations() { 21 18 $is_proxy = $this->shouldProxy(); 22 19 if ($is_proxy) { 23 - return $this->executeRepositoryProxyOperations(); 20 + return $this->executeRepositoryProxyOperations($for_write = false); 24 21 } 25 22 26 23 $viewer = $this->getSSHUser(); ··· 92 89 } 93 90 94 91 return $err; 95 - } 96 - 97 - private function executeRepositoryProxyOperations() { 98 - $device = AlmanacKeys::getLiveDevice(); 99 - $for_write = false; 100 - 101 - $refs = $this->getAlmanacServiceRefs($for_write); 102 - $err = 1; 103 - 104 - while (true) { 105 - $ref = head($refs); 106 - 107 - $command = $this->getProxyCommandForServiceRef($ref); 108 - 109 - if ($device) { 110 - $this->writeClusterEngineLogMessage( 111 - pht( 112 - "# Fetch received by \"%s\", forwarding to cluster host \"%s\".\n", 113 - $device->getName(), 114 - $ref->getDeviceName())); 115 - } 116 - 117 - $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); 118 - 119 - $future = id(new ExecFuture('%C', $command)) 120 - ->setEnv($this->getEnvironment()); 121 - 122 - $this->didBeginRequest(); 123 - 124 - $err = $this->newPassthruCommand() 125 - ->setIOChannel($this->getIOChannel()) 126 - ->setCommandChannelFromExecFuture($future) 127 - ->execute(); 128 - 129 - // TODO: Currently, when proxying, we do not write an event log on the 130 - // proxy. Perhaps we should write a "proxy log". This is not very useful 131 - // for statistics or auditing, but could be useful for diagnostics. 132 - // Marking the proxy logs as proxied (and recording devicePHID on all 133 - // logs) would make differentiating between these use cases easier. 134 - 135 - if (!$err) { 136 - $this->waitForGitClient(); 137 - return $err; 138 - } 139 - 140 - // Throw away this service: the request failed and we're treating the 141 - // failure as persistent, so we don't want to retry another request to 142 - // the same host. 143 - array_shift($refs); 144 - 145 - $should_retry = $this->shouldRetryRequest($refs); 146 - if (!$should_retry) { 147 - return $err; 148 - } 149 - 150 - // If we haven't bailed out yet, we'll retry the request with the next 151 - // service. 152 - } 153 - 154 - throw new Exception(pht('Reached an unreachable place.')); 155 - } 156 - 157 - private function didBeginRequest() { 158 - $this->requestAttempts++; 159 - return $this; 160 - } 161 - 162 - private function shouldRetryRequest(array $remaining_refs) { 163 - $this->requestFailures++; 164 - 165 - if ($this->requestFailures > $this->requestAttempts) { 166 - throw new Exception( 167 - pht( 168 - "Workflow has recorded more failures than attempts; there is a ". 169 - "missing call to \"didBeginRequest()\".\n")); 170 - } 171 - 172 - if (!$remaining_refs) { 173 - $this->writeClusterEngineLogMessage( 174 - pht( 175 - "# All available services failed to serve the request, ". 176 - "giving up.\n")); 177 - return false; 178 - } 179 - 180 - $read_len = $this->getIOBytesRead(); 181 - if ($read_len) { 182 - $this->writeClusterEngineLogMessage( 183 - pht( 184 - "# Client already read from service (%s bytes), unable to retry.\n", 185 - new PhutilNumber($read_len))); 186 - return false; 187 - } 188 - 189 - $write_len = $this->getIOBytesWritten(); 190 - if ($write_len) { 191 - $this->writeClusterEngineLogMessage( 192 - pht( 193 - "# Client already wrote to service (%s bytes), unable to retry.\n", 194 - new PhutilNumber($write_len))); 195 - return false; 196 - } 197 - 198 - $this->writeClusterEngineLogMessage( 199 - pht( 200 - "# Service request failed, retrying (making attempt %s of %s).\n", 201 - new PhutilNumber($this->requestAttempts + 1), 202 - new PhutilNumber($this->requestAttempts + count($remaining_refs)))); 203 - 204 - return true; 205 92 } 206 93 207 94 }