@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Use stemming in the MySQL fulltext search engine

Summary:
Ref T6740. When we index a document, also save a copy of the stemmed version.

When querying, search the combined corpus for the terms.

(We may need to tune this a bit later since it's possible for literal, quoted terms to match in the stemmed section, but I think this wil rarely cause issues in practice.)

A downside here is that search sort of breaks if you upgrade into this and don't reindex. I wasn't able to find a way to issue the query that remained compatible with older indexes and didn't have awful performance, so my plan is:

- Put this on `secure`.
- Rebuild the index.
- If things look good after a couple of days, add a way that we can tell people they need to rebuild the search index with a setup warning.

We might get some reports between now and then, but if this is super awful we should know by the end of the weekend.

Test Plan:
WOW AMAZING

{F2021466}

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T6740

Differential Revision: https://secure.phabricator.com/D16947

+29 -11
+2
resources/sql/autopatches/20161125.search.01.stemmed.sql
··· 1 + ALTER TABLE {$NAMESPACE}_search.search_documentfield 2 + ADD stemmedCorpus LONGTEXT COLLATE {$COLLATE_FULLTEXT};
+23 -9
src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php
··· 33 33 34 34 $conn_w = $store->establishConnection('w'); 35 35 36 + $stemmer = new PhutilSearchStemmer(); 37 + 36 38 $field_dao = new PhabricatorSearchDocumentField(); 37 39 queryfx( 38 40 $conn_w, ··· 41 43 $phid); 42 44 foreach ($doc->getFieldData() as $field) { 43 45 list($ftype, $corpus, $aux_phid) = $field; 46 + 47 + $stemmed_corpus = $stemmer->stemCorpus($corpus); 48 + 44 49 queryfx( 45 50 $conn_w, 46 - 'INSERT INTO %T (phid, phidType, field, auxPHID, corpus) '. 47 - 'VALUES (%s, %s, %s, %ns, %s)', 51 + 'INSERT INTO %T 52 + (phid, phidType, field, auxPHID, corpus, stemmedCorpus) '. 53 + 'VALUES (%s, %s, %s, %ns, %s, %s)', 48 54 $field_dao->getTableName(), 49 55 $phid, 50 56 $doc->getDocumentType(), 51 57 $ftype, 52 58 $aux_phid, 53 - $corpus); 59 + $corpus, 60 + $stemmed_corpus); 54 61 } 55 62 56 63 ··· 205 212 if (strlen($compiled_query)) { 206 213 $select[] = qsprintf( 207 214 $conn, 208 - 'IF(field.field = %s, %d, 0) + '. 209 - 'MATCH(corpus) AGAINST (%s IN BOOLEAN MODE) AS fieldScore', 215 + 'IF(field.field = %s, %d, 0) + 216 + MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE) 217 + AS fieldScore', 210 218 $title_field, 211 219 $title_boost, 212 220 $compiled_query); ··· 218 226 219 227 $where[] = qsprintf( 220 228 $conn, 221 - 'MATCH(corpus) AGAINST (%s IN BOOLEAN MODE)', 229 + 'MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE)', 222 230 $compiled_query); 223 231 224 232 if ($query->getParameter('field')) { ··· 380 388 } 381 389 382 390 private function compileQuery($raw_query) { 383 - $compiler = PhabricatorSearchDocument::newQueryCompiler(); 391 + $stemmer = new PhutilSearchStemmer(); 384 392 385 - return $compiler 393 + $compiler = PhabricatorSearchDocument::newQueryCompiler() 386 394 ->setQuery($raw_query) 387 - ->compileQuery(); 395 + ->setStemmer($stemmer); 396 + 397 + $queries = array(); 398 + $queries[] = $compiler->compileLiteralQuery(); 399 + $queries[] = $compiler->compileStemmedQuery(); 400 + 401 + return implode(' ', array_filter($queries)); 388 402 } 389 403 390 404 public function indexExists() {
+4 -2
src/applications/search/storage/document/PhabricatorSearchDocumentField.php
··· 6 6 protected $field; 7 7 protected $auxPHID; 8 8 protected $corpus; 9 + protected $stemmedCorpus; 9 10 10 11 protected function getConfiguration() { 11 12 return array( ··· 16 17 'field' => 'text4', 17 18 'auxPHID' => 'phid?', 18 19 'corpus' => 'fulltext?', 20 + 'stemmedCorpus' => 'fulltext?', 19 21 ), 20 22 self::CONFIG_KEY_SCHEMA => array( 21 23 'key_phid' => null, 22 24 'phid' => array( 23 25 'columns' => array('phid'), 24 26 ), 25 - 'corpus' => array( 26 - 'columns' => array('corpus'), 27 + 'key_corpus' => array( 28 + 'columns' => array('corpus', 'stemmedCorpus'), 27 29 'type' => 'FULLTEXT', 28 30 ), 29 31 ),