@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

When we fail to acquire a repository lock, try to provide a hint about why

Summary:
Ref T13202. See PHI889. If the lock log is enabled, we can try to offer more details about lock holders.

When we fail to acquire a lock:

- check for recent acquisitions and suggest that this is a bottleneck issue;
- if there are no recent acquisitions, check for the last acquisition and print details about it (what process, how long ago, whether or not we believe it was released).

Test Plan:
- Enabled the lock log.
- Changed the lock wait time to 1 second.
- Added a `sleep(10)` after grabbing the lock.
- In one window, ran a Conduit call or a `git fetch`.
- In another window, ran another operation.
- Got useful/sensible errors for both ssh and web lock holders, for example:

> PhutilProxyException: Failed to acquire read lock after waiting 1 second(s). You may be able to retry later. (This lock was most recently acquired by a process (pid=12609, host=orbital-3.local, sapi=apache2handler, controller=PhabricatorConduitAPIController, method=diffusion.rawdiffquery) 3 second(s) ago. There is no record of this lock being released.)

> PhutilProxyException: Failed to acquire read lock after waiting 1 second(s). You may be able to retry later. (This lock was most recently acquired by a process (pid=65251, host=orbital-3.local, sapi=cli, argv=/Users/epriestley/dev/core/lib/phabricator/bin/ssh-exec --phabricator-ssh-device local.phacility.net --phabricator-ssh-key 2) 2 second(s) ago. There is no record of this lock being released.)

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T13202

Differential Revision: https://secure.phabricator.com/D19702

+160 -14
+8 -6
src/applications/diffusion/protocol/DiffusionRepositoryClusterEngine.php
··· 170 170 pht( 171 171 'Acquired read lock immediately.')); 172 172 } 173 - } catch (Exception $ex) { 173 + } catch (PhutilLockException $ex) { 174 174 throw new PhutilProxyException( 175 175 pht( 176 176 'Failed to acquire read lock after waiting %s second(s). You '. 177 - 'may be able to retry later.', 178 - new PhutilNumber($lock_wait)), 177 + 'may be able to retry later. (%s)', 178 + new PhutilNumber($lock_wait), 179 + $ex->getHint()), 179 180 $ex); 180 181 } 181 182 ··· 349 350 pht( 350 351 'Acquired write lock immediately.')); 351 352 } 352 - } catch (Exception $ex) { 353 + } catch (PhutilLockException $ex) { 353 354 throw new PhutilProxyException( 354 355 pht( 355 356 'Failed to acquire write lock after waiting %s second(s). You '. 356 - 'may be able to retry later.', 357 - new PhutilNumber($lock_wait)), 357 + 'may be able to retry later. (%s)', 358 + new PhutilNumber($lock_wait), 359 + $ex->getHint()), 358 360 $ex); 359 361 } 360 362
+152 -8
src/infrastructure/util/PhabricatorGlobalLock.php
··· 144 144 145 145 $ok = head($result); 146 146 if (!$ok) { 147 - throw new PhutilLockException($lock_name); 147 + throw id(new PhutilLockException($lock_name)) 148 + ->setHint($this->newHint($lock_name, $wait)); 148 149 } 149 150 150 151 $conn->rememberLock($lock_name); ··· 152 153 $this->conn = $conn; 153 154 154 155 if ($this->shouldLogLock()) { 155 - global $argv; 156 - 157 - $lock_context = array( 158 - 'pid' => getmypid(), 159 - 'host' => php_uname('n'), 160 - 'argv' => $argv, 161 - ); 156 + $lock_context = $this->newLockContext(); 162 157 163 158 $log = id(new PhabricatorDaemonLockLog()) 164 159 ->setLockName($lock_name) ··· 226 221 } 227 222 228 223 return true; 224 + } 225 + 226 + private function newLockContext() { 227 + $context = array( 228 + 'pid' => getmypid(), 229 + 'host' => php_uname('n'), 230 + 'sapi' => php_sapi_name(), 231 + ); 232 + 233 + global $argv; 234 + if ($argv) { 235 + $context['argv'] = $argv; 236 + } 237 + 238 + $access_log = null; 239 + 240 + // TODO: There's currently no cohesive way to get the parameterized access 241 + // log for the current request across different request types. Web requests 242 + // have an "AccessLog", SSH requests have an "SSHLog", and other processes 243 + // (like scripts) have no log. But there's no method to say "give me any 244 + // log you've got". For now, just test if we have a web request and use the 245 + // "AccessLog" if we do, since that's the only one we actually read any 246 + // parameters from. 247 + 248 + // NOTE: "PhabricatorStartup" is only available from web requests, not 249 + // from CLI scripts. 250 + if (class_exists('PhabricatorStartup', false)) { 251 + $access_log = PhabricatorAccessLog::getLog(); 252 + } 253 + 254 + if ($access_log) { 255 + $controller = $access_log->getData('C'); 256 + if ($controller) { 257 + $context['controller'] = $controller; 258 + } 259 + 260 + $method = $access_log->getData('m'); 261 + if ($method) { 262 + $context['method'] = $method; 263 + } 264 + } 265 + 266 + return $context; 267 + } 268 + 269 + private function newHint($lock_name, $wait) { 270 + if (!$this->shouldLogLock()) { 271 + return pht( 272 + 'Enable the lock log for more detailed information about '. 273 + 'which process is holding this lock.'); 274 + } 275 + 276 + $now = PhabricatorTime::getNow(); 277 + 278 + // First, look for recent logs. If other processes have been acquiring and 279 + // releasing this lock while we've been waiting, this is more likely to be 280 + // a contention/throughput issue than an issue with something hung while 281 + // holding the lock. 282 + $limit = 100; 283 + $logs = id(new PhabricatorDaemonLockLog())->loadAllWhere( 284 + 'lockName = %s AND dateCreated >= %d ORDER BY id ASC LIMIT %d', 285 + $lock_name, 286 + ($now - $wait), 287 + $limit); 288 + 289 + if ($logs) { 290 + if (count($logs) === $limit) { 291 + return pht( 292 + 'During the last %s second(s) spent waiting for the lock, more '. 293 + 'than %s other process(es) acquired it, so this is likely a '. 294 + 'bottleneck. Use "bin/lock log --name %s" to review log activity.', 295 + new PhutilNumber($wait), 296 + new PhutilNumber($limit), 297 + $lock_name); 298 + } else { 299 + return pht( 300 + 'During the last %s second(s) spent waiting for the lock, %s '. 301 + 'other process(es) acquired it, so this is likely a '. 302 + 'bottleneck. Use "bin/lock log --name %s" to review log activity.', 303 + new PhutilNumber($wait), 304 + phutil_count($logs), 305 + $lock_name); 306 + } 307 + } 308 + 309 + $last_log = id(new PhabricatorDaemonLockLog())->loadOneWhere( 310 + 'lockName = %s ORDER BY id DESC LIMIT 1', 311 + $lock_name); 312 + 313 + if ($last_log) { 314 + $info = array(); 315 + 316 + $acquired = $last_log->getDateCreated(); 317 + $context = $last_log->getLockContext(); 318 + 319 + $process_info = array(); 320 + 321 + $pid = idx($context, 'pid'); 322 + if ($pid) { 323 + $process_info[] = 'pid='.$pid; 324 + } 325 + 326 + $host = idx($context, 'host'); 327 + if ($host) { 328 + $process_info[] = 'host='.$host; 329 + } 330 + 331 + $sapi = idx($context, 'sapi'); 332 + if ($sapi) { 333 + $process_info[] = 'sapi='.$sapi; 334 + } 335 + 336 + $argv = idx($context, 'argv'); 337 + if ($argv) { 338 + $process_info[] = 'argv='.(string)csprintf('%LR', $argv); 339 + } 340 + 341 + $controller = idx($context, 'controller'); 342 + if ($controller) { 343 + $process_info[] = 'controller='.$controller; 344 + } 345 + 346 + $method = idx($context, 'method'); 347 + if ($method) { 348 + $process_info[] = 'method='.$method; 349 + } 350 + 351 + $process_info = implode(', ', $process_info); 352 + 353 + $info[] = pht( 354 + 'This lock was most recently acquired by a process (%s) '. 355 + '%s second(s) ago.', 356 + $process_info, 357 + new PhutilNumber($now - $acquired)); 358 + 359 + $released = $last_log->getLockReleased(); 360 + if ($released) { 361 + $info[] = pht( 362 + 'This lock was released %s second(s) ago.', 363 + new PhutilNumber($now - $released)); 364 + } else { 365 + $info[] = pht('There is no record of this lock being released.'); 366 + } 367 + 368 + return implode(' ', $info); 369 + } 370 + 371 + return pht( 372 + 'Found no records of processes acquiring or releasing this lock.'); 229 373 } 230 374 231 375 }