@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

When we "discover" new fatal setup issues, stop serving traffic

Summary:
Ref T10759. We may "discover" the presence of a fatal setup error later, after starting Phabricator.

This can happen in a few ways, but most are unlikely. The one I'm immediately concerned about is:

- Phabricator starts up during a disaster with some databases unreachable.
- We start with warnings (unreachable databases are generally not fatal, since it's OK for some subset of hosts to be down in replicated/partitioned setups).
- The unreachable databases later recover and become accessible again.
- When we run checks against them, we discover that they are misconfigured.

Currently, "fatal" setup issues are not truly fatal if we're "in flight" -- we've survived setup checks at least once in the past. This is bad in the scenario above.

Especially with partitioning, it could lead to mangled data in a disaster scenario where operations staff makes a small configuration mistake while trying to get things running again.

Instead, if we "discover" a fatal error while already "in flight", reset the whole setup process as though the webserver had just restarted. Don't serve requests again until we can make it through setup without hitting fatals.

Test Plan:
- Started Phabricator with multiple masters, one of which was down and broken.
- Got a warning about the bad master.
- Revived the master.
- Before: Phabricator detects the fatal, but keeps serving requests.
- After: Phabricator detects the fatal, resets the webserver, and stops serving requests until the fatal is resolved.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T10759

Differential Revision: https://secure.phabricator.com/D16903

+123 -22
+2
src/__phutil_library_map__.php
··· 3706 3706 'PhabricatorSettingsTimezoneController' => 'applications/settings/controller/PhabricatorSettingsTimezoneController.php', 3707 3707 'PhabricatorSetupCheck' => 'applications/config/check/PhabricatorSetupCheck.php', 3708 3708 'PhabricatorSetupCheckTestCase' => 'applications/config/check/__tests__/PhabricatorSetupCheckTestCase.php', 3709 + 'PhabricatorSetupEngine' => 'applications/config/engine/PhabricatorSetupEngine.php', 3709 3710 'PhabricatorSetupIssue' => 'applications/config/issue/PhabricatorSetupIssue.php', 3710 3711 'PhabricatorSetupIssueUIExample' => 'applications/uiexample/examples/PhabricatorSetupIssueUIExample.php', 3711 3712 'PhabricatorSetupIssueView' => 'applications/config/view/PhabricatorSetupIssueView.php', ··· 8868 8869 'PhabricatorSettingsTimezoneController' => 'PhabricatorController', 8869 8870 'PhabricatorSetupCheck' => 'Phobject', 8870 8871 'PhabricatorSetupCheckTestCase' => 'PhabricatorTestCase', 8872 + 'PhabricatorSetupEngine' => 'Phobject', 8871 8873 'PhabricatorSetupIssue' => 'Phobject', 8872 8874 'PhabricatorSetupIssueUIExample' => 'PhabricatorUIExample', 8873 8875 'PhabricatorSetupIssueView' => 'AphrontView',
+37 -7
src/applications/config/check/PhabricatorSetupCheck.php
··· 68 68 return $cache->getKey('phabricator.setup.issue-keys'); 69 69 } 70 70 71 + final public static function resetSetupState() { 72 + $cache = PhabricatorCaches::getSetupCache(); 73 + $cache->deleteKey('phabricator.setup.issue-keys'); 74 + 75 + $server_cache = PhabricatorCaches::getServerStateCache(); 76 + $server_cache->deleteKey('phabricator.in-flight'); 77 + 78 + $use_scope = AphrontWriteGuard::isGuardActive(); 79 + if ($use_scope) { 80 + $unguarded = AphrontWriteGuard::beginScopedUnguardedWrites(); 81 + } else { 82 + AphrontWriteGuard::allowDangerousUnguardedWrites(true); 83 + } 84 + 85 + $caught = null; 86 + try { 87 + $db_cache = new PhabricatorKeyValueDatabaseCache(); 88 + $db_cache->deleteKey('phabricator.setup.issue-keys'); 89 + } catch (Exception $ex) { 90 + $caught = $ex; 91 + } 92 + 93 + if ($use_scope) { 94 + unset($unguarded); 95 + } else { 96 + AphrontWriteGuard::allowDangerousUnguardedWrites(false); 97 + } 98 + 99 + if ($caught) { 100 + throw $caught; 101 + } 102 + } 103 + 71 104 final public static function setOpenSetupIssueKeys( 72 105 array $keys, 73 106 $update_database) { ··· 161 194 final public static function willProcessRequest() { 162 195 $issue_keys = self::getOpenSetupIssueKeys(); 163 196 if ($issue_keys === null) { 164 - $issues = self::runNormalChecks(); 165 - foreach ($issues as $issue) { 166 - if ($issue->getIsFatal()) { 167 - return self::newIssueResponse($issue); 168 - } 197 + $engine = new PhabricatorSetupEngine(); 198 + $response = $engine->execute(); 199 + if ($response) { 200 + return $response; 169 201 } 170 - $issue_keys = self::getUnignoredIssueKeys($issues); 171 - self::setOpenSetupIssueKeys($issue_keys, $update_database = true); 172 202 } else if ($issue_keys) { 173 203 // If Phabricator is configured in a cluster with multiple web devices, 174 204 // we can end up with setup issues cached on every device. This can cause
+6 -4
src/applications/config/controller/PhabricatorConfigIssueListController.php
··· 9 9 $nav = $this->buildSideNavView(); 10 10 $nav->selectFilter('issue/'); 11 11 12 - $issues = PhabricatorSetupCheck::runNormalChecks(); 13 - PhabricatorSetupCheck::setOpenSetupIssueKeys( 14 - PhabricatorSetupCheck::getUnignoredIssueKeys($issues), 15 - $update_database = true); 12 + $engine = new PhabricatorSetupEngine(); 13 + $response = $engine->execute(); 14 + if ($response) { 15 + return $response; 16 + } 17 + $issues = $engine->getIssues(); 16 18 17 19 $important = $this->buildIssueList( 18 20 $issues,
+8 -7
src/applications/config/controller/PhabricatorConfigIssuePanelController.php
··· 5 5 6 6 public function handleRequest(AphrontRequest $request) { 7 7 $viewer = $request->getViewer(); 8 - $open_items = PhabricatorSetupCheck::getOpenSetupIssueKeys(); 9 - $issues = PhabricatorSetupCheck::runNormalChecks(); 10 - PhabricatorSetupCheck::setOpenSetupIssueKeys( 11 - PhabricatorSetupCheck::getUnignoredIssueKeys($issues), 12 - $update_database = true); 8 + 9 + $engine = new PhabricatorSetupEngine(); 10 + $response = $engine->execute(); 11 + if ($response) { 12 + return $response; 13 + } 14 + $issues = $engine->getIssues(); 15 + $unresolved_count = count($engine->getUnresolvedIssues()); 13 16 14 17 if ($issues) { 15 18 require_celerity_resource('phabricator-notification-menu-css'); ··· 54 57 ), 55 58 pht('Unresolved Setup Issues')), 56 59 $content); 57 - 58 - $unresolved_count = count($open_items); 59 60 60 61 $json = array( 61 62 'content' => $content,
+6 -4
src/applications/config/controller/PhabricatorConfigIssueViewController.php
··· 7 7 $viewer = $request->getViewer(); 8 8 $issue_key = $request->getURIData('key'); 9 9 10 - $issues = PhabricatorSetupCheck::runNormalChecks(); 11 - PhabricatorSetupCheck::setOpenSetupIssueKeys( 12 - PhabricatorSetupCheck::getUnignoredIssueKeys($issues), 13 - $update_database = true); 10 + $engine = new PhabricatorSetupEngine(); 11 + $response = $engine->execute(); 12 + if ($response) { 13 + return $response; 14 + } 15 + $issues = $engine->getIssues(); 14 16 15 17 $nav = $this->buildSideNavView(); 16 18 $nav->selectFilter('issue/');
+64
src/applications/config/engine/PhabricatorSetupEngine.php
··· 1 + <?php 2 + 3 + final class PhabricatorSetupEngine 4 + extends Phobject { 5 + 6 + private $issues; 7 + 8 + public function getIssues() { 9 + if ($this->issues === null) { 10 + throw new PhutilInvalidStateException('execute'); 11 + } 12 + 13 + return $this->issues; 14 + } 15 + 16 + public function getUnresolvedIssues() { 17 + $issues = $this->getIssues(); 18 + $issues = mpull($issues, null, 'getIssueKey'); 19 + 20 + $unresolved_keys = PhabricatorSetupCheck::getUnignoredIssueKeys($issues); 21 + 22 + return array_select_keys($issues, $unresolved_keys); 23 + } 24 + 25 + public function execute() { 26 + $issues = PhabricatorSetupCheck::runNormalChecks(); 27 + 28 + $fatal_issue = null; 29 + foreach ($issues as $issue) { 30 + if ($issue->getIsFatal()) { 31 + $fatal_issue = $issue; 32 + break; 33 + } 34 + } 35 + 36 + if ($fatal_issue) { 37 + // If we've discovered a fatal, we reset any in-flight state to push 38 + // web hosts out of service. 39 + 40 + // This can happen if Phabricator starts during a disaster and some 41 + // databases can not be reached. We allow Phabricator to start up in 42 + // this situation, since it may still be able to usefully serve requests 43 + // without risk to data. 44 + 45 + // However, if databases later become reachable and we learn that they 46 + // are fatally misconfigured, we want to tear the world down again 47 + // because data may be at risk. 48 + PhabricatorSetupCheck::resetSetupState(); 49 + 50 + return PhabricatorSetupCheck::newIssueResponse($issue); 51 + } 52 + 53 + $issue_keys = PhabricatorSetupCheck::getUnignoredIssueKeys($issues); 54 + 55 + PhabricatorSetupCheck::setOpenSetupIssueKeys( 56 + $issue_keys, 57 + $update_database = true); 58 + 59 + $this->issues = $issues; 60 + 61 + return null; 62 + } 63 + 64 + }