@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

When documents are indexed, record the indexer version (versus the object version) and index epoch

Summary:
Ref T13587. D21495 has significant changes to the ngram indexer, which might possibly contain bugs.

Make it easier to reindex a subset of documents (based on the date when the index was built, and/or the software version which generated the index).

This is in addition to the existing versioning, which is focused on object versions.

Test Plan: Ran `bin/search index` with various old and new arguments. Spot-checked the `IndexVersion` table.

Subscribers: PHID-OPKG-gm6ozazyms6q6i22gyam

Maniphest Tasks: T13587

Differential Revision: https://secure.phabricator.com/D21560

+279 -85
+2
resources/sql/autopatches/20210216.index.01.version.sql
··· 1 + ALTER TABLE {$NAMESPACE}_search.search_indexversion 2 + ADD indexVersion BINARY(12) NOT NULL;
+2
resources/sql/autopatches/20210216.index.02.epoch.sql
··· 1 + ALTER TABLE {$NAMESPACE}_search.search_indexversion 2 + ADD indexEpoch INT UNSIGNED NOT NULL;
+22 -7
src/applications/search/index/PhabricatorIndexEngine.php
··· 109 109 110 110 $rows = queryfx_all( 111 111 $conn_r, 112 - 'SELECT * FROM %T WHERE objectPHID = %s AND extensionKey IN (%Ls)', 113 - $table->getTableName(), 112 + 'SELECT version, extensionKey 113 + FROM %R 114 + WHERE objectPHID = %s AND extensionKey IN (%Ls)', 115 + $table, 114 116 $object_phid, 115 117 $extension_keys); 116 118 ··· 128 130 $table = new PhabricatorSearchIndexVersion(); 129 131 $conn_w = $table->establishConnection('w'); 130 132 133 + $now = PhabricatorTime::getNow(); 134 + 135 + // See T13587. For now, this is just a marker to make it easy to reindex 136 + // documents if some version of the indexing code is later discovered to 137 + // be questionable. 138 + $index_version = '2021-02-16-A'; 139 + 131 140 $sql = array(); 132 141 foreach ($versions as $key => $version) { 133 142 $sql[] = qsprintf( 134 143 $conn_w, 135 - '(%s, %s, %s)', 144 + '(%s, %s, %s, %s, %d)', 136 145 $object_phid, 137 146 $key, 138 - $version); 147 + $version, 148 + $index_version, 149 + $now); 139 150 } 140 151 141 152 queryfx( 142 153 $conn_w, 143 - 'INSERT INTO %T (objectPHID, extensionKey, version) 154 + 'INSERT INTO %R (objectPHID, extensionKey, version, 155 + indexVersion, indexEpoch) 144 156 VALUES %LQ 145 - ON DUPLICATE KEY UPDATE version = VALUES(version)', 146 - $table->getTableName(), 157 + ON DUPLICATE KEY UPDATE 158 + version = VALUES(version), 159 + indexVersion = VALUES(indexVersion), 160 + indexEpoch = VALUES(indexEpoch)', 161 + $table, 147 162 $sql); 148 163 } 149 164
+245 -78
src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php
··· 8 8 ->setName('index') 9 9 ->setSynopsis(pht('Build or rebuild search indexes.')) 10 10 ->setExamples( 11 - "**index** D123\n". 12 - "**index** --type task\n". 13 - "**index** --all") 11 + implode( 12 + "\n", 13 + array( 14 + '**index** D123', 15 + '**index** --all', 16 + '**index** [--type __task__] [--version __version__] ...', 17 + ))) 14 18 ->setArguments( 15 19 array( 16 20 array( ··· 20 24 array( 21 25 'name' => 'type', 22 26 'param' => 'type', 27 + 'repeat' => true, 23 28 'help' => pht( 24 29 'Object types to reindex, like "task", "commit" or "revision".'), 25 30 ), ··· 38 43 'incremental update.'), 39 44 ), 40 45 array( 46 + 'name' => 'version', 47 + 'param' => 'version', 48 + 'repeat' => true, 49 + 'help' => pht( 50 + 'Reindex objects previously indexed with a particular '. 51 + 'version of the indexer.'), 52 + ), 53 + array( 54 + 'name' => 'min-index-date', 55 + 'param' => 'date', 56 + 'help' => pht( 57 + 'Reindex objects previously indexed on or after a '. 58 + 'given date.'), 59 + ), 60 + array( 61 + 'name' => 'max-index-date', 62 + 'param' => 'date', 63 + 'help' => pht( 64 + 'Reindex objects previously indexed on or before a '. 65 + 'given date.'), 66 + ), 67 + array( 41 68 'name' => 'objects', 42 69 'wildcard' => true, 43 70 ), ··· 47 74 public function execute(PhutilArgumentParser $args) { 48 75 $this->validateClusterSearchConfig(); 49 76 50 - $console = PhutilConsole::getConsole(); 51 - 52 77 $is_all = $args->getArg('all'); 53 - $is_type = $args->getArg('type'); 54 78 $is_force = $args->getArg('force'); 55 79 56 - $obj_names = $args->getArg('objects'); 80 + $object_types = $args->getArg('type'); 81 + $index_versions = $args->getArg('version'); 82 + 83 + $min_epoch = $args->getArg('min-index-date'); 84 + if ($min_epoch !== null) { 85 + $min_epoch = $this->parseTimeArgument($min_epoch); 86 + } 57 87 58 - if ($obj_names && ($is_all || $is_type)) { 88 + $max_epoch = $args->getArg('max-index-date'); 89 + if ($max_epoch !== null) { 90 + $max_epoch = $this->parseTimeArgument($max_epoch); 91 + } 92 + 93 + $object_names = $args->getArg('objects'); 94 + 95 + $any_constraints = 96 + ($object_names) || 97 + ($object_types) || 98 + ($index_versions) || 99 + ($min_epoch) || 100 + ($max_epoch); 101 + 102 + if ($is_all && $any_constraints) { 59 103 throw new PhutilArgumentUsageException( 60 104 pht( 61 - "You can not name objects to index alongside the '%s' or '%s' flags.", 62 - '--all', 63 - '--type')); 64 - } else if (!$obj_names && !($is_all || $is_type)) { 105 + 'You can not use query constraint flags (like "--version", '. 106 + '"--type", or a list of specific objects) with "--all".')); 107 + } 108 + 109 + if (!$is_all && !$any_constraints) { 65 110 throw new PhutilArgumentUsageException( 66 111 pht( 67 - "Provide one of '%s', '%s' or a list of object names.", 68 - '--all', 69 - '--type')); 112 + 'Provide a list of objects to index (like "D123"), or a set of '. 113 + 'query constraint flags (like "--type"), or "--all" to index '. 114 + 'all objects.')); 70 115 } 71 116 72 - if ($obj_names) { 73 - $phids = $this->loadPHIDsByNames($obj_names); 74 - } else { 75 - $phids = $this->loadPHIDsByTypes($is_type); 76 - } 77 - 78 - if (!$phids) { 79 - throw new PhutilArgumentUsageException(pht('Nothing to index!')); 80 - } 81 117 82 118 if ($args->getArg('background')) { 83 119 $is_background = true; ··· 87 123 } 88 124 89 125 if (!$is_background) { 90 - echo tsprintf( 91 - "**<bg:blue> %s </bg>** %s\n", 126 + $this->logInfo( 92 127 pht('NOTE'), 93 128 pht( 94 - 'Run this workflow with "%s" to queue tasks for the daemon workers.', 95 - '--background')); 129 + 'Run this workflow with "--background" to queue tasks for the '. 130 + 'daemon workers.')); 131 + } 132 + 133 + $this->logInfo( 134 + pht('SELECT'), 135 + pht('Selecting objects to index...')); 136 + 137 + $object_phids = null; 138 + if ($object_names) { 139 + $object_phids = $this->loadPHIDsByNames($object_names); 140 + $object_phids = array_fuse($object_phids); 141 + } 142 + 143 + $type_phids = null; 144 + if ($is_all || $object_types) { 145 + $object_map = $this->getIndexableObjectsByTypes($object_types); 146 + $type_phids = array(); 147 + foreach ($object_map as $object) { 148 + $iterator = new LiskMigrationIterator($object); 149 + foreach ($iterator as $o) { 150 + $type_phids[] = $o->getPHID(); 151 + } 152 + } 153 + $type_phids = array_fuse($type_phids); 154 + } 155 + 156 + $index_phids = null; 157 + if ($index_versions || $min_epoch || $max_epoch) { 158 + $index_phids = $this->loadPHIDsByIndexConstraints( 159 + $index_versions, 160 + $min_epoch, 161 + $max_epoch); 162 + $index_phids = array_fuse($index_phids); 163 + } 164 + 165 + $working_set = null; 166 + $filter_sets = array( 167 + $object_phids, 168 + $type_phids, 169 + $index_phids, 170 + ); 171 + 172 + foreach ($filter_sets as $filter_set) { 173 + if ($filter_set === null) { 174 + continue; 175 + } 176 + 177 + if ($working_set === null) { 178 + $working_set = $filter_set; 179 + continue; 180 + } 181 + 182 + $working_set = array_intersect_key($working_set, $filter_set); 96 183 } 97 184 98 - $groups = phid_group_by_type($phids); 99 - foreach ($groups as $group_type => $group) { 100 - $console->writeOut( 101 - "%s\n", 102 - pht('Indexing %d object(s) of type %s.', count($group), $group_type)); 185 + $phids = array_keys($working_set); 186 + 187 + if (!$phids) { 188 + $this->logWarn( 189 + pht('NO OBJECTS'), 190 + pht('No objects selected to index.')); 191 + return 0; 103 192 } 193 + 194 + $this->logInfo( 195 + pht('INDEXING'), 196 + pht( 197 + 'Indexing %s object(s).', 198 + phutil_count($phids))); 104 199 105 200 $bar = id(new PhutilConsoleProgressBar()) 106 201 ->setTotal(count($phids)); ··· 166 261 167 262 if ($track_skips) { 168 263 if ($count_updated) { 169 - echo tsprintf( 170 - "**<bg:green> %s </bg>** %s\n", 264 + $this->logOkay( 171 265 pht('DONE'), 172 266 pht( 173 267 'Updated search indexes for %s document(s).', ··· 175 269 } 176 270 177 271 if ($count_skipped) { 178 - echo tsprintf( 179 - "**<bg:yellow> %s </bg>** %s\n", 272 + $this->logWarn( 180 273 pht('SKIP'), 181 274 pht( 182 275 'Skipped %s documents(s) which have not updated since they were '. 183 276 'last indexed.', 184 277 new PhutilNumber($count_skipped))); 185 - echo tsprintf( 186 - "**<bg:blue> %s </bg>** %s\n", 278 + $this->logInfo( 187 279 pht('NOTE'), 188 280 pht( 189 281 'Use "--force" to force the index to update these documents.')); 190 282 } 191 283 } else if ($is_background) { 192 - echo tsprintf( 193 - "**<bg:green> %s </bg>** %s\n", 284 + $this->logOkay( 194 285 pht('DONE'), 195 286 pht( 196 287 'Queued %s document(s) for background indexing.', 197 288 new PhutilNumber(count($phids)))); 198 289 } else { 199 - echo tsprintf( 200 - "**<bg:green> %s </bg>** %s\n", 290 + $this->logOkay( 201 291 pht('DONE'), 202 292 pht( 203 293 'Forced search index updates for %s document(s).', ··· 224 314 return mpull($objects, 'getPHID'); 225 315 } 226 316 227 - private function loadPHIDsByTypes($type) { 317 + private function getIndexableObjectsByTypes(array $types) { 228 318 $objects = id(new PhutilClassMapQuery()) 229 319 ->setAncestorClass('PhabricatorIndexableInterface') 230 320 ->execute(); 231 321 232 - $normalized_type = phutil_utf8_strtolower($type); 322 + $type_map = array(); 323 + $normal_map = array(); 324 + foreach ($types as $type) { 325 + $normalized_type = phutil_utf8_strtolower($type); 326 + $type_map[$type] = $normalized_type; 233 327 234 - $matches = array(); 328 + if (isset($normal_map[$normalized_type])) { 329 + $old_type = $normal_map[$normalized_type]; 330 + throw new PhutilArgumentUsageException( 331 + pht( 332 + 'Type specification "%s" duplicates type specification "%s". '. 333 + 'Specify each type only once.', 334 + $type, 335 + $old_type)); 336 + } 337 + 338 + $normal_map[$normalized_type] = $type; 339 + } 340 + 341 + $object_matches = array(); 342 + 343 + $matches_map = array(); 344 + $exact_map = array(); 235 345 foreach ($objects as $object) { 236 346 $object_class = get_class($object); 347 + 348 + if (!$types) { 349 + $object_matches[$object_class] = $object; 350 + continue; 351 + } 352 + 237 353 $normalized_class = phutil_utf8_strtolower($object_class); 238 354 239 - if ($normalized_class === $normalized_type) { 240 - $matches = array($object_class => $object); 241 - break; 355 + // If a specified type is exactly the name of this class, match it. 356 + if (isset($normal_map[$normalized_class])) { 357 + $object_matches[$object_class] = $object; 358 + $matching_type = $normal_map[$normalized_class]; 359 + $matches_map[$matching_type] = array($object_class); 360 + $exact_map[$matching_type] = true; 361 + continue; 242 362 } 243 363 244 - if (!strlen($type) || 245 - strpos($normalized_class, $normalized_type) !== false) { 246 - $matches[$object_class] = $object; 364 + foreach ($type_map as $type => $normalized_type) { 365 + // If we already have an exact match for this type, don't match it 366 + // as a substring. An indexable "MothObject" should be selectable 367 + // exactly without also selecting "MammothObject". 368 + if (isset($exact_map[$type])) { 369 + continue; 370 + } 371 + 372 + // If the selector isn't a substring of the class name, continue. 373 + if (strpos($normalized_class, $normalized_type) === false) { 374 + continue; 375 + } 247 376 377 + $matches_map[$type][] = $object_class; 378 + $object_matches[$object_class] = $object; 248 379 } 249 380 } 250 381 251 - if (!$matches) { 252 - $all_types = array(); 253 - foreach ($objects as $object) { 254 - $all_types[] = get_class($object); 255 - } 256 - sort($all_types); 257 - 258 - throw new PhutilArgumentUsageException( 259 - pht( 260 - 'Type "%s" matches no indexable objects. Supported types are: %s.', 261 - $type, 262 - implode(', ', $all_types))); 382 + $all_types = array(); 383 + foreach ($objects as $object) { 384 + $all_types[] = get_class($object); 263 385 } 386 + sort($all_types); 387 + $type_list = implode(', ', $all_types); 264 388 265 - if ((count($matches) > 1) && strlen($type)) { 266 - throw new PhutilArgumentUsageException( 267 - pht( 268 - 'Type "%s" matches multiple indexable objects. Use a more '. 269 - 'specific string. Matching object types are: %s.', 270 - $type, 271 - implode(', ', array_keys($matches)))); 272 - } 389 + foreach ($type_map as $type => $normalized_type) { 390 + $matches = idx($matches_map, $type); 391 + if (!$matches) { 392 + throw new PhutilArgumentUsageException( 393 + pht( 394 + 'Type "%s" matches no indexable objects. '. 395 + 'Supported types are: %s.', 396 + $type, 397 + $type_list)); 398 + } 273 399 274 - $phids = array(); 275 - foreach ($matches as $match) { 276 - $iterator = new LiskMigrationIterator($match); 277 - foreach ($iterator as $object) { 278 - $phids[] = $object->getPHID(); 400 + if (count($matches) > 1) { 401 + throw new PhutilArgumentUsageException( 402 + pht( 403 + 'Type "%s" matches multiple indexable objects. Use a more '. 404 + 'specific string. Matching objects are: %s.', 405 + $type, 406 + implode(', ', $matches))); 279 407 } 280 408 } 281 409 282 - return $phids; 410 + return $object_matches; 283 411 } 284 412 285 413 private function loadIndexVersions($phid) { ··· 292 420 ORDER BY extensionKey, version', 293 421 $table->getTableName(), 294 422 $phid); 423 + } 424 + 425 + private function loadPHIDsByIndexConstraints( 426 + array $index_versions, 427 + $min_date, 428 + $max_date) { 429 + 430 + $table = new PhabricatorSearchIndexVersion(); 431 + $conn = $table->establishConnection('r'); 432 + 433 + $where = array(); 434 + if ($index_versions) { 435 + $where[] = qsprintf( 436 + $conn, 437 + 'indexVersion IN (%Ls)', 438 + $index_versions); 439 + } 440 + 441 + if ($min_date !== null) { 442 + $where[] = qsprintf( 443 + $conn, 444 + 'indexEpoch >= %d', 445 + $min_date); 446 + } 447 + 448 + if ($max_date !== null) { 449 + $where[] = qsprintf( 450 + $conn, 451 + 'indexEpoch <= %d', 452 + $max_date); 453 + } 454 + 455 + $rows = queryfx_all( 456 + $conn, 457 + 'SELECT DISTINCT objectPHID FROM %R WHERE %LA', 458 + $table, 459 + $where); 460 + 461 + return ipull($rows, 'objectPHID'); 295 462 } 296 463 297 464 }
+8
src/applications/search/storage/PhabricatorSearchIndexVersion.php
··· 6 6 protected $objectPHID; 7 7 protected $extensionKey; 8 8 protected $version; 9 + protected $indexVersion; 10 + protected $indexEpoch; 9 11 10 12 protected function getConfiguration() { 11 13 return array( ··· 13 15 self::CONFIG_COLUMN_SCHEMA => array( 14 16 'extensionKey' => 'text64', 15 17 'version' => 'text128', 18 + 'indexVersion' => 'bytes12', 19 + 'indexEpoch' => 'epoch', 16 20 ), 17 21 self::CONFIG_KEY_SCHEMA => array( 18 22 'key_object' => array( 19 23 'columns' => array('objectPHID', 'extensionKey'), 20 24 'unique' => true, 21 25 ), 26 + 27 + // NOTE: "bin/search index" may query this table by "indexVersion" or 28 + // "indexEpoch", but this is rare and scanning the table seems fine. 29 + 22 30 ), 23 31 ) + parent::getConfiguration(); 24 32 }