@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at recaptime-dev/main 544 lines 16 kB view raw
1<?php 2 3class PhabricatorElasticFulltextStorageEngine 4 extends PhabricatorFulltextStorageEngine { 5 6 private $index; 7 private $timeout; 8 private $version; 9 10 public function setService(PhabricatorSearchService $service) { 11 $this->service = $service; 12 $config = $service->getConfig(); 13 $index = idx($config, 'path', '/phabricator'); 14 $this->index = str_replace('/', '', $index); 15 $this->timeout = idx($config, 'timeout', 15); 16 $this->version = (int)idx($config, 'version', 5); 17 return $this; 18 } 19 20 public function getEngineIdentifier() { 21 return 'elasticsearch'; 22 } 23 24 public function getTimestampField() { 25 return $this->version < 2 ? 26 '_timestamp' : 'lastModified'; 27 } 28 29 public function getTextFieldType() { 30 return $this->version >= 5 31 ? 'text' : 'string'; 32 } 33 34 public function getHostType() { 35 return new PhabricatorElasticsearchHost($this); 36 } 37 38 public function getHostForRead() { 39 return $this->getService()->getAnyHostForRole('read'); 40 } 41 42 public function getHostForWrite() { 43 return $this->getService()->getAnyHostForRole('write'); 44 } 45 46 public function setTimeout($timeout) { 47 $this->timeout = $timeout; 48 return $this; 49 } 50 51 public function getTimeout() { 52 return $this->timeout; 53 } 54 55 public function getTypeConstants($class) { 56 $relationship_class = new ReflectionClass($class); 57 $typeconstants = $relationship_class->getConstants(); 58 return array_unique(array_values($typeconstants)); 59 } 60 61 public function reindexAbstractDocument( 62 PhabricatorSearchAbstractDocument $doc) { 63 64 $host = $this->getHostForWrite(); 65 66 $type = $doc->getDocumentType(); 67 $phid = $doc->getPHID(); 68 $handle = id(new PhabricatorHandleQuery()) 69 ->setViewer(PhabricatorUser::getOmnipotentUser()) 70 ->withPHIDs(array($phid)) 71 ->executeOne(); 72 73 $timestamp_key = $this->getTimestampField(); 74 75 $spec = array( 76 'title' => $doc->getDocumentTitle(), 77 'dateCreated' => $doc->getDocumentCreated(), 78 $timestamp_key => $doc->getDocumentModified(), 79 ); 80 81 foreach ($doc->getFieldData() as $field) { 82 list($field_name, $corpus, $aux) = $field; 83 if (!isset($spec[$field_name])) { 84 $spec[$field_name] = array($corpus); 85 } else { 86 $spec[$field_name][] = $corpus; 87 } 88 if ($aux != null) { 89 $spec[$field_name][] = $aux; 90 } 91 } 92 93 foreach ($doc->getRelationshipData() as $field) { 94 list($field_name, $related_phid, $rtype, $time) = $field; 95 if (!isset($spec[$field_name])) { 96 $spec[$field_name] = array($related_phid); 97 } else { 98 $spec[$field_name][] = $related_phid; 99 } 100 if ($time) { 101 $spec[$field_name.'_ts'] = $time; 102 } 103 } 104 105 $this->executeRequest($host, "/{$type}/{$phid}/", $spec, 'PUT'); 106 } 107 108 private function buildSpec(PhabricatorSavedQuery $query) { 109 $q = new PhabricatorElasticsearchQueryBuilder(); 110 $query_string = $query->getParameter('query'); 111 if (strlen($query_string)) { 112 $fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType'); 113 114 // Build a simple_query_string query over all fields that must match all 115 // of the words in the search string. 116 $q->addMustClause(array( 117 'simple_query_string' => array( 118 'query' => $query_string, 119 'fields' => array( 120 PhabricatorSearchDocumentFieldType::FIELD_TITLE.'.*', 121 PhabricatorSearchDocumentFieldType::FIELD_BODY.'.*', 122 PhabricatorSearchDocumentFieldType::FIELD_COMMENT.'.*', 123 ), 124 'default_operator' => 'AND', 125 ), 126 )); 127 128 // This second query clause is "SHOULD' so it only affects ranking of 129 // documents which already matched the Must clause. This amplifies the 130 // score of documents which have an exact match on title, body 131 // or comments. 132 $q->addShouldClause(array( 133 'simple_query_string' => array( 134 'query' => $query_string, 135 'fields' => array( 136 '*.raw', 137 PhabricatorSearchDocumentFieldType::FIELD_TITLE.'^4', 138 PhabricatorSearchDocumentFieldType::FIELD_BODY.'^3', 139 PhabricatorSearchDocumentFieldType::FIELD_COMMENT.'^1.2', 140 ), 141 'analyzer' => 'english_exact', 142 'default_operator' => 'and', 143 ), 144 )); 145 146 } 147 148 $exclude = $query->getParameter('exclude'); 149 if ($exclude) { 150 $q->addFilterClause(array( 151 'not' => array( 152 'ids' => array( 153 'values' => array($exclude), 154 ), 155 ), 156 )); 157 } 158 159 $relationship_map = array( 160 PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR => 161 $query->getParameter('authorPHIDs', array()), 162 PhabricatorSearchRelationship::RELATIONSHIP_SUBSCRIBER => 163 $query->getParameter('subscriberPHIDs', array()), 164 PhabricatorSearchRelationship::RELATIONSHIP_PROJECT => 165 $query->getParameter('projectPHIDs', array()), 166 PhabricatorSearchRelationship::RELATIONSHIP_REPOSITORY => 167 $query->getParameter('repositoryPHIDs', array()), 168 ); 169 170 $statuses = $query->getParameter('statuses', array()); 171 $statuses = array_fuse($statuses); 172 173 $rel_open = PhabricatorSearchRelationship::RELATIONSHIP_OPEN; 174 $rel_closed = PhabricatorSearchRelationship::RELATIONSHIP_CLOSED; 175 $rel_unowned = PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED; 176 177 $include_open = !empty($statuses[$rel_open]); 178 $include_closed = !empty($statuses[$rel_closed]); 179 180 if ($include_open && !$include_closed) { 181 $q->addExistsClause($rel_open); 182 } else if (!$include_open && $include_closed) { 183 $q->addExistsClause($rel_closed); 184 } 185 186 if ($query->getParameter('withUnowned')) { 187 $q->addExistsClause($rel_unowned); 188 } 189 190 $rel_owner = PhabricatorSearchRelationship::RELATIONSHIP_OWNER; 191 if ($query->getParameter('withAnyOwner')) { 192 $q->addExistsClause($rel_owner); 193 } else { 194 $owner_phids = $query->getParameter('ownerPHIDs', array()); 195 if (count($owner_phids)) { 196 $q->addTermsClause($rel_owner, $owner_phids); 197 } 198 } 199 200 foreach ($relationship_map as $field => $phids) { 201 if (is_array($phids) && !empty($phids)) { 202 $q->addTermsClause($field, $phids); 203 } 204 } 205 206 if (!$q->getClauseCount('must')) { 207 $q->addMustClause(array('match_all' => array('boost' => 1 ))); 208 } 209 210 $spec = array( 211 '_source' => false, 212 'query' => array( 213 'bool' => $q->toArray(), 214 ), 215 ); 216 217 218 if (!$query->getParameter('query')) { 219 $spec['sort'] = array( 220 array('dateCreated' => 'desc'), 221 ); 222 } 223 224 $offset = (int)$query->getParameter('offset', 0); 225 $limit = (int)$query->getParameter('limit', 101); 226 if ($offset + $limit > 10000) { 227 throw new Exception(pht( 228 'Query offset is too large. offset+limit=%s (max=%s)', 229 $offset + $limit, 230 10000)); 231 } 232 $spec['from'] = $offset; 233 $spec['size'] = $limit; 234 235 return $spec; 236 } 237 238 public function executeSearch(PhabricatorSavedQuery $query) { 239 $types = $query->getParameter('types'); 240 if (!$types) { 241 $types = array_keys( 242 PhabricatorSearchApplicationSearchEngine::getIndexableDocumentTypes()); 243 } 244 245 // Don't use '/_search' for the case that there is something 246 // else in the index (for example if 'phabricator' is only an alias to 247 // some bigger index). Use '/$types/_search' instead. 248 $uri = '/'.implode(',', $types).'/_search'; 249 250 $spec = $this->buildSpec($query); 251 $exceptions = array(); 252 253 foreach ($this->service->getAllHostsForRole('read') as $host) { 254 try { 255 $response = $this->executeRequest($host, $uri, $spec); 256 $phids = ipull($response['hits']['hits'], '_id'); 257 return $phids; 258 } catch (Exception $e) { 259 $exceptions[] = $e; 260 } 261 } 262 throw new PhutilAggregateException(pht('All Fulltext Search hosts failed:'), 263 $exceptions); 264 } 265 266 public function indexExists(?PhabricatorElasticsearchHost $host = null) { 267 if (!$host) { 268 $host = $this->getHostForRead(); 269 } 270 try { 271 if ($this->version >= 5) { 272 $uri = '/_stats/'; 273 $res = $this->executeRequest($host, $uri, array()); 274 return isset($res['indices']['phabricator']); 275 } else if ($this->version >= 2) { 276 $uri = ''; 277 } else { 278 $uri = '/_status/'; 279 } 280 return (bool)$this->executeRequest($host, $uri, array()); 281 } catch (HTTPFutureHTTPResponseStatus $e) { 282 if ($e->getStatusCode() == 404) { 283 return false; 284 } 285 throw $e; 286 } 287 } 288 289 private function getIndexConfiguration() { 290 $data = array(); 291 $data['settings'] = array( 292 'index' => array( 293 'auto_expand_replicas' => '0-2', 294 'analysis' => array( 295 'filter' => array( 296 'english_stop' => array( 297 'type' => 'stop', 298 'stopwords' => '_english_', 299 ), 300 'english_stemmer' => array( 301 'type' => 'stemmer', 302 'language' => 'english', 303 ), 304 'english_possessive_stemmer' => array( 305 'type' => 'stemmer', 306 'language' => 'possessive_english', 307 ), 308 ), 309 'analyzer' => array( 310 'english_exact' => array( 311 'tokenizer' => 'standard', 312 'filter' => array('lowercase'), 313 ), 314 'letter_stop' => array( 315 'tokenizer' => 'letter', 316 'filter' => array('lowercase', 'english_stop'), 317 ), 318 'english_stem' => array( 319 'tokenizer' => 'standard', 320 'filter' => array( 321 'english_possessive_stemmer', 322 'lowercase', 323 'english_stop', 324 'english_stemmer', 325 ), 326 ), 327 ), 328 ), 329 ), 330 ); 331 332 $fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType'); 333 $relationships = $this->getTypeConstants('PhabricatorSearchRelationship'); 334 335 $doc_types = array_keys( 336 PhabricatorSearchApplicationSearchEngine::getIndexableDocumentTypes()); 337 338 $text_type = $this->getTextFieldType(); 339 340 foreach ($doc_types as $type) { 341 $properties = array(); 342 foreach ($fields as $field) { 343 // Use the custom analyzer for the corpus of text 344 $properties[$field] = array( 345 'type' => $text_type, 346 'fields' => array( 347 'raw' => array( 348 'type' => $text_type, 349 'analyzer' => 'english_exact', 350 'search_analyzer' => 'english', 351 'search_quote_analyzer' => 'english_exact', 352 ), 353 'keywords' => array( 354 'type' => $text_type, 355 'analyzer' => 'letter_stop', 356 ), 357 'stems' => array( 358 'type' => $text_type, 359 'analyzer' => 'english_stem', 360 ), 361 ), 362 ); 363 } 364 365 if ($this->version < 5) { 366 foreach ($relationships as $rel) { 367 $properties[$rel] = array( 368 'type' => 'string', 369 'index' => 'not_analyzed', 370 'include_in_all' => false, 371 ); 372 $properties[$rel.'_ts'] = array( 373 'type' => 'date', 374 'include_in_all' => false, 375 ); 376 } 377 } else { 378 foreach ($relationships as $rel) { 379 $properties[$rel] = array( 380 'type' => 'keyword', 381 'include_in_all' => false, 382 'doc_values' => false, 383 ); 384 $properties[$rel.'_ts'] = array( 385 'type' => 'date', 386 'include_in_all' => false, 387 ); 388 } 389 } 390 391 // Ensure we have dateCreated since the default query requires it 392 $properties['dateCreated']['type'] = 'date'; 393 $properties['lastModified']['type'] = 'date'; 394 395 $data['mappings'][$type]['properties'] = $properties; 396 } 397 return $data; 398 } 399 400 public function indexIsSane(?PhabricatorElasticsearchHost $host = null) { 401 if (!$host) { 402 $host = $this->getHostForRead(); 403 } 404 if (!$this->indexExists($host)) { 405 return false; 406 } 407 $cur_mapping = $this->executeRequest($host, '/_mapping/', array()); 408 $cur_settings = $this->executeRequest($host, '/_settings/', array()); 409 $actual = array_merge($cur_settings[$this->index], 410 $cur_mapping[$this->index]); 411 412 $res = $this->check($actual, $this->getIndexConfiguration()); 413 return $res; 414 } 415 416 /** 417 * Recursively check if two Elasticsearch configuration arrays are equal 418 * 419 * @param $actual 420 * @param $required array 421 * @return bool 422 */ 423 private function check($actual, $required, $path = '') { 424 foreach ($required as $key => $value) { 425 if (!array_key_exists($key, $actual)) { 426 if ($key === '_all') { 427 // The _all field never comes back so we just have to assume it 428 // is set correctly. 429 continue; 430 } 431 return false; 432 } 433 if (is_array($value)) { 434 if (!is_array($actual[$key])) { 435 return false; 436 } 437 if (!$this->check($actual[$key], $value, $path.'.'.$key)) { 438 return false; 439 } 440 continue; 441 } 442 443 $actual[$key] = self::normalizeConfigValue($actual[$key]); 444 $value = self::normalizeConfigValue($value); 445 if ($actual[$key] != $value) { 446 return false; 447 } 448 } 449 return true; 450 } 451 452 /** 453 * Normalize a config value for comparison. Elasticsearch accepts all kinds 454 * of config values but it tends to throw back 'true' for true and 'false' for 455 * false so we normalize everything. Sometimes, oddly, it'll throw back false 456 * for false.... 457 * 458 * @param mixed $value config value 459 * @return mixed value normalized 460 */ 461 private static function normalizeConfigValue($value) { 462 if ($value === true) { 463 return 'true'; 464 } else if ($value === false) { 465 return 'false'; 466 } 467 return $value; 468 } 469 470 public function initIndex() { 471 $host = $this->getHostForWrite(); 472 if ($this->indexExists()) { 473 $this->executeRequest($host, '/', array(), 'DELETE'); 474 } 475 $data = $this->getIndexConfiguration(); 476 $this->executeRequest($host, '/', $data, 'PUT'); 477 } 478 479 public function getIndexStats(?PhabricatorElasticsearchHost $host = null) { 480 if ($this->version < 2) { 481 return false; 482 } 483 if (!$host) { 484 $host = $this->getHostForRead(); 485 } 486 $uri = '/_stats/'; 487 488 $res = $this->executeRequest($host, $uri, array()); 489 $stats = $res['indices'][$this->index]; 490 return array( 491 pht('Queries') => 492 idxv($stats, array('primaries', 'search', 'query_total')), 493 pht('Documents') => 494 idxv($stats, array('total', 'docs', 'count')), 495 pht('Deleted') => 496 idxv($stats, array('total', 'docs', 'deleted')), 497 pht('Storage Used') => 498 phutil_format_bytes(idxv($stats, 499 array('total', 'store', 'size_in_bytes'))), 500 ); 501 } 502 503 private function executeRequest(PhabricatorElasticsearchHost $host, $path, 504 array $data, $method = 'GET') { 505 506 $uri = $host->getURI($path); 507 $data = phutil_json_encode($data); 508 $future = new HTTPSFuture($uri, $data); 509 $future->addHeader('Content-Type', 'application/json'); 510 511 if ($method != 'GET') { 512 $future->setMethod($method); 513 } 514 if ($this->getTimeout()) { 515 $future->setTimeout($this->getTimeout()); 516 } 517 try { 518 list($body) = $future->resolvex(); 519 } catch (HTTPFutureResponseStatus $ex) { 520 if ($ex->isTimeout() || (int)$ex->getStatusCode() > 499) { 521 $host->didHealthCheck(false); 522 } 523 throw $ex; 524 } 525 526 if ($method != 'GET') { 527 return null; 528 } 529 530 try { 531 $data = phutil_json_decode($body); 532 $host->didHealthCheck(true); 533 return $data; 534 } catch (PhutilJSONParserException $ex) { 535 $host->didHealthCheck(false); 536 throw new Exception( 537 pht('Elasticsearch server returned invalid JSON!'), 538 0, 539 $ex); 540 } 541 542 } 543 544}