@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Make "mysql" mean "Ferret engine" in Fulltext search

Summary: Ref T12819. Swaps constants so existing configurations that use a "mysql" engine now use the Ferret engine, not an InnoDB/MyISAM FULLTEXT engine.

Test Plan: Swapped my local config back to "mysql" (the default), saw Ferret engine results in the UI.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T12819

Differential Revision: https://secure.phabricator.com/D18590

+1 -507
-2
src/__phutil_library_map__.php
··· 3197 3197 'PhabricatorMustVerifyEmailController' => 'applications/auth/controller/PhabricatorMustVerifyEmailController.php', 3198 3198 'PhabricatorMySQLConfigOptions' => 'applications/config/option/PhabricatorMySQLConfigOptions.php', 3199 3199 'PhabricatorMySQLFileStorageEngine' => 'applications/files/engine/PhabricatorMySQLFileStorageEngine.php', 3200 - 'PhabricatorMySQLFulltextStorageEngine' => 'applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php', 3201 3200 'PhabricatorMySQLSearchHost' => 'infrastructure/cluster/search/PhabricatorMySQLSearchHost.php', 3202 3201 'PhabricatorMySQLSetupCheck' => 'applications/config/check/PhabricatorMySQLSetupCheck.php', 3203 3202 'PhabricatorNamedQuery' => 'applications/search/storage/PhabricatorNamedQuery.php', ··· 8588 8587 'PhabricatorMustVerifyEmailController' => 'PhabricatorAuthController', 8589 8588 'PhabricatorMySQLConfigOptions' => 'PhabricatorApplicationConfigOptions', 8590 8589 'PhabricatorMySQLFileStorageEngine' => 'PhabricatorFileStorageEngine', 8591 - 'PhabricatorMySQLFulltextStorageEngine' => 'PhabricatorFulltextStorageEngine', 8592 8590 'PhabricatorMySQLSearchHost' => 'PhabricatorSearchHost', 8593 8591 'PhabricatorMySQLSetupCheck' => 'PhabricatorSetupCheck', 8594 8592 'PhabricatorNamedQuery' => array(
+1 -1
src/applications/search/fulltextstorage/PhabricatorFerretFulltextStorageEngine.php
··· 7 7 private $engineLimits; 8 8 9 9 public function getEngineIdentifier() { 10 - return 'ferret'; 10 + return 'mysql'; 11 11 } 12 12 13 13 public function getHostType() {
-504
src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php
··· 1 - <?php 2 - 3 - final class PhabricatorMySQLFulltextStorageEngine 4 - extends PhabricatorFulltextStorageEngine { 5 - 6 - private $fulltextTokens = array(); 7 - private $engineLimits; 8 - 9 - public function getEngineIdentifier() { 10 - return 'mysql'; 11 - } 12 - 13 - public function getHostType() { 14 - return new PhabricatorMySQLSearchHost($this); 15 - } 16 - 17 - public function reindexAbstractDocument( 18 - PhabricatorSearchAbstractDocument $doc) { 19 - 20 - $phid = $doc->getPHID(); 21 - if (!$phid) { 22 - throw new Exception(pht('Document has no PHID!')); 23 - } 24 - 25 - $store = new PhabricatorSearchDocument(); 26 - $store->setPHID($doc->getPHID()); 27 - $store->setDocumentType($doc->getDocumentType()); 28 - $store->setDocumentTitle($doc->getDocumentTitle()); 29 - $store->setDocumentCreated($doc->getDocumentCreated()); 30 - $store->setDocumentModified($doc->getDocumentModified()); 31 - $store->replace(); 32 - 33 - $conn_w = $store->establishConnection('w'); 34 - 35 - $stemmer = new PhutilSearchStemmer(); 36 - 37 - $field_dao = new PhabricatorSearchDocumentField(); 38 - queryfx( 39 - $conn_w, 40 - 'DELETE FROM %T WHERE phid = %s', 41 - $field_dao->getTableName(), 42 - $phid); 43 - foreach ($doc->getFieldData() as $field) { 44 - list($ftype, $corpus, $aux_phid) = $field; 45 - 46 - $stemmed_corpus = $stemmer->stemCorpus($corpus); 47 - 48 - queryfx( 49 - $conn_w, 50 - 'INSERT INTO %T 51 - (phid, phidType, field, auxPHID, corpus, stemmedCorpus) '. 52 - 'VALUES (%s, %s, %s, %ns, %s, %s)', 53 - $field_dao->getTableName(), 54 - $phid, 55 - $doc->getDocumentType(), 56 - $ftype, 57 - $aux_phid, 58 - $corpus, 59 - $stemmed_corpus); 60 - } 61 - 62 - 63 - $sql = array(); 64 - foreach ($doc->getRelationshipData() as $relationship) { 65 - list($rtype, $to_phid, $to_type, $time) = $relationship; 66 - $sql[] = qsprintf( 67 - $conn_w, 68 - '(%s, %s, %s, %s, %d)', 69 - $phid, 70 - $to_phid, 71 - $rtype, 72 - $to_type, 73 - $time); 74 - } 75 - 76 - $rship_dao = new PhabricatorSearchDocumentRelationship(); 77 - queryfx( 78 - $conn_w, 79 - 'DELETE FROM %T WHERE phid = %s', 80 - $rship_dao->getTableName(), 81 - $phid); 82 - if ($sql) { 83 - queryfx( 84 - $conn_w, 85 - 'INSERT INTO %T '. 86 - '(phid, relatedPHID, relation, relatedType, relatedTime) '. 87 - 'VALUES %Q', 88 - $rship_dao->getTableName(), 89 - implode(', ', $sql)); 90 - } 91 - 92 - } 93 - 94 - public function executeSearch(PhabricatorSavedQuery $query) { 95 - $table = new PhabricatorSearchDocument(); 96 - $document_table = $table->getTableName(); 97 - $conn = $table->establishConnection('r'); 98 - 99 - $subquery = $this->newFulltextSubquery($query, $conn); 100 - 101 - $offset = (int)$query->getParameter('offset', 0); 102 - $limit = (int)$query->getParameter('limit', 25); 103 - 104 - // NOTE: We must JOIN the subquery in order to apply a limit. 105 - $results = queryfx_all( 106 - $conn, 107 - 'SELECT 108 - documentPHID, 109 - MAX(fieldScore) AS documentScore 110 - FROM (%Q) query 111 - JOIN %T root ON query.documentPHID = root.phid 112 - GROUP BY documentPHID 113 - ORDER BY documentScore DESC 114 - LIMIT %d, %d', 115 - $subquery, 116 - $document_table, 117 - $offset, 118 - $limit); 119 - 120 - return ipull($results, 'documentPHID'); 121 - } 122 - 123 - private function newFulltextSubquery( 124 - PhabricatorSavedQuery $query, 125 - AphrontDatabaseConnection $conn) { 126 - 127 - $field = new PhabricatorSearchDocumentField(); 128 - $field_table = $field->getTableName(); 129 - 130 - $document = new PhabricatorSearchDocument(); 131 - $document_table = $document->getTableName(); 132 - 133 - $select = array(); 134 - $select[] = 'document.phid AS documentPHID'; 135 - 136 - $join = array(); 137 - $where = array(); 138 - 139 - $title_field = PhabricatorSearchDocumentFieldType::FIELD_TITLE; 140 - $title_boost = 1024; 141 - 142 - $stemmer = new PhutilSearchStemmer(); 143 - 144 - $raw_query = $query->getParameter('query'); 145 - $raw_query = trim($raw_query); 146 - if (strlen($raw_query)) { 147 - $compiler = PhabricatorSearchDocument::newQueryCompiler() 148 - ->setStemmer($stemmer); 149 - 150 - $tokens = $compiler->newTokens($raw_query); 151 - 152 - list($min_length, $stopword_list) = $this->getEngineLimits($conn); 153 - 154 - // Process all the parts of the user's query so we can show them which 155 - // parts we searched for and which ones we ignored. 156 - $fulltext_tokens = array(); 157 - foreach ($tokens as $key => $token) { 158 - $fulltext_token = id(new PhabricatorFulltextToken()) 159 - ->setToken($token); 160 - 161 - $fulltext_tokens[$key] = $fulltext_token; 162 - 163 - $value = $token->getValue(); 164 - 165 - // If the value is unquoted, we'll stem it in the query, so stem it 166 - // here before performing filtering tests. See T12596. 167 - if (!$token->isQuoted()) { 168 - $value = $stemmer->stemToken($value); 169 - } 170 - 171 - if ($this->isShortToken($value, $min_length)) { 172 - $fulltext_token->setIsShort(true); 173 - continue; 174 - } 175 - 176 - if (isset($stopword_list[phutil_utf8_strtolower($value)])) { 177 - $fulltext_token->setIsStopword(true); 178 - continue; 179 - } 180 - } 181 - $this->fulltextTokens = $fulltext_tokens; 182 - 183 - // Remove tokens which aren't queryable from the query. This is mostly 184 - // a workaround for the peculiar behaviors described in T12137. 185 - foreach ($this->fulltextTokens as $key => $fulltext_token) { 186 - if (!$fulltext_token->isQueryable()) { 187 - unset($tokens[$key]); 188 - } 189 - } 190 - 191 - if (!$tokens) { 192 - throw new PhutilSearchQueryCompilerSyntaxException( 193 - pht( 194 - 'All of your search terms are too short or too common to '. 195 - 'appear in the search index. Search for longer or more '. 196 - 'distinctive terms.')); 197 - } 198 - 199 - $queries = array(); 200 - $queries[] = $compiler->compileLiteralQuery($tokens); 201 - $queries[] = $compiler->compileStemmedQuery($tokens); 202 - $compiled_query = implode(' ', array_filter($queries)); 203 - } else { 204 - $compiled_query = null; 205 - } 206 - 207 - if (strlen($compiled_query)) { 208 - $select[] = qsprintf( 209 - $conn, 210 - 'IF(field.field = %s, %d, 0) + 211 - MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE) 212 - AS fieldScore', 213 - $title_field, 214 - $title_boost, 215 - $compiled_query); 216 - 217 - $join[] = qsprintf( 218 - $conn, 219 - '%T field ON field.phid = document.phid', 220 - $field_table); 221 - 222 - $where[] = qsprintf( 223 - $conn, 224 - 'MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE)', 225 - $compiled_query); 226 - 227 - if ($query->getParameter('field')) { 228 - $where[] = qsprintf( 229 - $conn, 230 - 'field.field = %s', 231 - $field); 232 - } 233 - } else { 234 - $select[] = qsprintf( 235 - $conn, 236 - 'document.documentCreated AS fieldScore'); 237 - } 238 - 239 - $exclude = $query->getParameter('exclude'); 240 - if ($exclude) { 241 - $where[] = qsprintf( 242 - $conn, 243 - 'document.phid != %s', 244 - $exclude); 245 - } 246 - 247 - $types = $query->getParameter('types'); 248 - if ($types) { 249 - if (strlen($compiled_query)) { 250 - $where[] = qsprintf( 251 - $conn, 252 - 'field.phidType IN (%Ls)', 253 - $types); 254 - } 255 - 256 - $where[] = qsprintf( 257 - $conn, 258 - 'document.documentType IN (%Ls)', 259 - $types); 260 - } 261 - 262 - $join[] = $this->joinRelationship( 263 - $conn, 264 - $query, 265 - 'authorPHIDs', 266 - PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR); 267 - 268 - $statuses = $query->getParameter('statuses', array()); 269 - $statuses = array_fuse($statuses); 270 - $open_rel = PhabricatorSearchRelationship::RELATIONSHIP_OPEN; 271 - $closed_rel = PhabricatorSearchRelationship::RELATIONSHIP_CLOSED; 272 - $include_open = !empty($statuses[$open_rel]); 273 - $include_closed = !empty($statuses[$closed_rel]); 274 - 275 - if ($include_open && !$include_closed) { 276 - $join[] = $this->joinRelationship( 277 - $conn, 278 - $query, 279 - 'statuses', 280 - $open_rel, 281 - true); 282 - } else if ($include_closed && !$include_open) { 283 - $join[] = $this->joinRelationship( 284 - $conn, 285 - $query, 286 - 'statuses', 287 - $closed_rel, 288 - true); 289 - } 290 - 291 - if ($query->getParameter('withAnyOwner')) { 292 - $join[] = $this->joinRelationship( 293 - $conn, 294 - $query, 295 - 'withAnyOwner', 296 - PhabricatorSearchRelationship::RELATIONSHIP_OWNER, 297 - true); 298 - } else if ($query->getParameter('withUnowned')) { 299 - $join[] = $this->joinRelationship( 300 - $conn, 301 - $query, 302 - 'withUnowned', 303 - PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED, 304 - true); 305 - } else { 306 - $join[] = $this->joinRelationship( 307 - $conn, 308 - $query, 309 - 'ownerPHIDs', 310 - PhabricatorSearchRelationship::RELATIONSHIP_OWNER); 311 - } 312 - 313 - $join[] = $this->joinRelationship( 314 - $conn, 315 - $query, 316 - 'subscriberPHIDs', 317 - PhabricatorSearchRelationship::RELATIONSHIP_SUBSCRIBER); 318 - 319 - $join[] = $this->joinRelationship( 320 - $conn, 321 - $query, 322 - 'projectPHIDs', 323 - PhabricatorSearchRelationship::RELATIONSHIP_PROJECT); 324 - 325 - $join[] = $this->joinRelationship( 326 - $conn, 327 - $query, 328 - 'repository', 329 - PhabricatorSearchRelationship::RELATIONSHIP_REPOSITORY); 330 - 331 - $select = implode(', ', $select); 332 - 333 - $join = array_filter($join); 334 - foreach ($join as $key => $clause) { 335 - $join[$key] = ' JOIN '.$clause; 336 - } 337 - $join = implode(' ', $join); 338 - 339 - if ($where) { 340 - $where = 'WHERE '.implode(' AND ', $where); 341 - } else { 342 - $where = ''; 343 - } 344 - 345 - if (strlen($compiled_query)) { 346 - $order = ''; 347 - } else { 348 - // When not executing a query, order by document creation date. This 349 - // is the default view in object browser dialogs, like "Close Duplicate". 350 - $order = qsprintf( 351 - $conn, 352 - 'ORDER BY document.documentCreated DESC'); 353 - } 354 - 355 - return qsprintf( 356 - $conn, 357 - 'SELECT %Q FROM %T document %Q %Q %Q LIMIT 1000', 358 - $select, 359 - $document_table, 360 - $join, 361 - $where, 362 - $order); 363 - } 364 - 365 - protected function joinRelationship( 366 - AphrontDatabaseConnection $conn, 367 - PhabricatorSavedQuery $query, 368 - $field, 369 - $type, 370 - $is_existence = false) { 371 - 372 - $sql = qsprintf( 373 - $conn, 374 - '%T AS %C ON %C.phid = document.phid AND %C.relation = %s', 375 - id(new PhabricatorSearchDocumentRelationship())->getTableName(), 376 - $field, 377 - $field, 378 - $field, 379 - $type); 380 - 381 - if (!$is_existence) { 382 - $phids = $query->getParameter($field, array()); 383 - if (!$phids) { 384 - return null; 385 - } 386 - $sql .= qsprintf( 387 - $conn, 388 - ' AND %C.relatedPHID in (%Ls)', 389 - $field, 390 - $phids); 391 - } 392 - 393 - return $sql; 394 - } 395 - 396 - public function indexExists() { 397 - return true; 398 - } 399 - 400 - public function getIndexStats() { 401 - return false; 402 - } 403 - 404 - public function getFulltextTokens() { 405 - return $this->fulltextTokens; 406 - } 407 - 408 - private function getEngineLimits(AphrontDatabaseConnection $conn) { 409 - if ($this->engineLimits === null) { 410 - $this->engineLimits = $this->newEngineLimits($conn); 411 - } 412 - return $this->engineLimits; 413 - } 414 - 415 - private function newEngineLimits(AphrontDatabaseConnection $conn) { 416 - // First, try InnoDB. Some database may not have both table engines, so 417 - // selecting variables from missing table engines can fail and throw. 418 - 419 - try { 420 - $result = queryfx_one( 421 - $conn, 422 - 'SELECT @@innodb_ft_min_token_size innodb_max, 423 - @@innodb_ft_server_stopword_table innodb_stopword_config'); 424 - } catch (AphrontQueryException $ex) { 425 - $result = null; 426 - } 427 - 428 - if ($result) { 429 - $min_len = $result['innodb_max']; 430 - 431 - $stopword_config = $result['innodb_stopword_config']; 432 - if (preg_match('(/)', $stopword_config)) { 433 - // If the setting is nonempty and contains a slash, query the 434 - // table the user has configured. 435 - $parts = explode('/', $stopword_config); 436 - list($stopword_database, $stopword_table) = $parts; 437 - } else { 438 - // Otherwise, query the InnoDB default stopword table. 439 - $stopword_database = 'INFORMATION_SCHEMA'; 440 - $stopword_table = 'INNODB_FT_DEFAULT_STOPWORD'; 441 - } 442 - 443 - $stopwords = queryfx_all( 444 - $conn, 445 - 'SELECT * FROM %T.%T', 446 - $stopword_database, 447 - $stopword_table); 448 - $stopwords = ipull($stopwords, 'value'); 449 - $stopwords = array_fuse($stopwords); 450 - 451 - return array($min_len, $stopwords); 452 - } 453 - 454 - // If InnoDB fails, try MyISAM. 455 - $result = queryfx_one( 456 - $conn, 457 - 'SELECT 458 - @@ft_min_word_len myisam_max, 459 - @@ft_stopword_file myisam_stopwords'); 460 - 461 - $min_len = $result['myisam_max']; 462 - 463 - $file = $result['myisam_stopwords']; 464 - if (preg_match('(/resources/sql/stopwords\.txt\z)', $file)) { 465 - // If this is set to something that looks like the Phabricator 466 - // stopword file, read that. 467 - $file = 'stopwords.txt'; 468 - } else { 469 - // Otherwise, just use the default stopwords. This might be wrong 470 - // but we can't read the actual value dynamically and reading 471 - // whatever file the variable is set to could be a big headache 472 - // to get right from a security perspective. 473 - $file = 'stopwords_myisam.txt'; 474 - } 475 - 476 - $root = dirname(phutil_get_library_root('phabricator')); 477 - $data = Filesystem::readFile($root.'/resources/sql/'.$file); 478 - $stopwords = explode("\n", $data); 479 - $stopwords = array_filter($stopwords); 480 - $stopwords = array_fuse($stopwords); 481 - 482 - return array($min_len, $stopwords); 483 - } 484 - 485 - private function isShortToken($value, $min_length) { 486 - // NOTE: The engine tokenizes internally on periods, so terms in the form 487 - // "ab.cd", where short substrings are separated by periods, do not produce 488 - // any queryable tokens. These terms are meaningful if at least one 489 - // substring is longer than the minimum length, like "example.py". See 490 - // T12928. This also applies to words with intermediate apostrophes, like 491 - // "to's". 492 - 493 - $parts = preg_split('/[.\']+/', $value); 494 - 495 - foreach ($parts as $part) { 496 - if (phutil_utf8_strlen($part) >= $min_length) { 497 - return false; 498 - } 499 - } 500 - 501 - return true; 502 - } 503 - 504 - }