@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Build a prototype fulltext engine ("Ferret") using only basic MySQL primitives

Summary:
Ref T12819. I gave this stuff a sweet code name because all the terms related to "fulltext" and "search" already mean 5 different things. It, uh, ferrets out documents for you?

I'm building this to work a lot like the existing ngram index, which seems to work pretty well. If this sticks, it will auto-resolve the join issue (in T12443) by letting us do the entire thing locally in a JOIN and thus dodge a lot of mess.

This index gets built alongside other indexes, but only shows up in the UI if you have prototypes enabled. If you do, it appears under the existing fulltext field in Maniphest. No existing functionality is affected or disrupted.

NOTE: The query engine half of this is still EXTREMELY primitive, and this probably performs worse than the existing field for now. If this doesn't show obvious signs of being awful on `secure` I'll improve that in followup changes.

Test Plan:
Indexed my tasks, ran some simple queries, got the results I wanted, even for queries "ko", "k", "v0.1".

{F5147746}

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T12819, T12443

Differential Revision: https://secure.phabricator.com/D18484

+571
+9
resources/sql/autopatches/20170828.ferret.01.taskdoc.sql
··· 1 + CREATE TABLE {$NAMESPACE}_maniphest.maniphest_task_fdocument ( 2 + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, 3 + objectPHID VARBINARY(64) NOT NULL, 4 + isClosed BOOL NOT NULL, 5 + authorPHID VARBINARY(64), 6 + ownerPHID VARBINARY(64), 7 + epochCreated INT UNSIGNED NOT NULL, 8 + epochModified INT UNSIGNED NOT NULL 9 + ) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT};
+7
resources/sql/autopatches/20170828.ferret.02.taskfield.sql
··· 1 + CREATE TABLE {$NAMESPACE}_maniphest.maniphest_task_ffield ( 2 + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, 3 + documentID INT UNSIGNED NOT NULL, 4 + fieldKey VARCHAR(4) NOT NULL COLLATE {$COLLATE_TEXT}, 5 + rawCorpus LONGTEXT NOT NULL COLLATE {$COLLATE_SORT}, 6 + normalCorpus LONGTEXT NOT NULL COLLATE {$COLLATE_SORT} 7 + ) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT};
+5
resources/sql/autopatches/20170828.ferret.03.taskngrams.sql
··· 1 + CREATE TABLE {$NAMESPACE}_maniphest.maniphest_task_fngrams ( 2 + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, 3 + documentID INT UNSIGNED NOT NULL, 4 + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT} 5 + ) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT};
+22
src/__phutil_library_map__.php
··· 1533 1533 'ManiphestTaskEditBulkJobType' => 'applications/maniphest/bulk/ManiphestTaskEditBulkJobType.php', 1534 1534 'ManiphestTaskEditController' => 'applications/maniphest/controller/ManiphestTaskEditController.php', 1535 1535 'ManiphestTaskEditEngineLock' => 'applications/maniphest/editor/ManiphestTaskEditEngineLock.php', 1536 + 'ManiphestTaskFerretDocument' => 'applications/maniphest/storage/ManiphestTaskFerretDocument.php', 1537 + 'ManiphestTaskFerretEngine' => 'applications/maniphest/search/ManiphestTaskFerretEngine.php', 1538 + 'ManiphestTaskFerretField' => 'applications/maniphest/storage/ManiphestTaskFerretField.php', 1539 + 'ManiphestTaskFerretNgrams' => 'applications/maniphest/storage/ManiphestTaskFerretNgrams.php', 1536 1540 'ManiphestTaskFulltextEngine' => 'applications/maniphest/search/ManiphestTaskFulltextEngine.php', 1537 1541 'ManiphestTaskGraph' => 'infrastructure/graph/ManiphestTaskGraph.php', 1538 1542 'ManiphestTaskHasCommitEdgeType' => 'applications/maniphest/edge/ManiphestTaskHasCommitEdgeType.php', ··· 2828 2832 'PhabricatorFeedStoryNotification' => 'applications/notification/storage/PhabricatorFeedStoryNotification.php', 2829 2833 'PhabricatorFeedStoryPublisher' => 'applications/feed/PhabricatorFeedStoryPublisher.php', 2830 2834 'PhabricatorFeedStoryReference' => 'applications/feed/storage/PhabricatorFeedStoryReference.php', 2835 + 'PhabricatorFerretDocument' => 'applications/search/ferret/PhabricatorFerretDocument.php', 2836 + 'PhabricatorFerretEngine' => 'applications/search/ferret/PhabricatorFerretEngine.php', 2837 + 'PhabricatorFerretField' => 'applications/search/ferret/PhabricatorFerretField.php', 2838 + 'PhabricatorFerretFulltextEngineExtension' => 'applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php', 2839 + 'PhabricatorFerretInterface' => 'applications/search/ferret/PhabricatorFerretInterface.php', 2840 + 'PhabricatorFerretNgrams' => 'applications/search/ferret/PhabricatorFerretNgrams.php', 2831 2841 'PhabricatorFile' => 'applications/files/storage/PhabricatorFile.php', 2832 2842 'PhabricatorFileAES256StorageFormat' => 'applications/files/format/PhabricatorFileAES256StorageFormat.php', 2833 2843 'PhabricatorFileBundleLoader' => 'applications/files/query/PhabricatorFileBundleLoader.php', ··· 3195 3205 'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php', 3196 3206 'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php', 3197 3207 'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php', 3208 + 'PhabricatorNgramEngine' => 'applications/search/ngrams/PhabricatorNgramEngine.php', 3198 3209 'PhabricatorNgramsIndexEngineExtension' => 'applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php', 3199 3210 'PhabricatorNgramsInterface' => 'applications/search/interface/PhabricatorNgramsInterface.php', 3200 3211 'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php', ··· 6659 6670 'PhabricatorSpacesInterface', 6660 6671 'PhabricatorConduitResultInterface', 6661 6672 'PhabricatorFulltextInterface', 6673 + 'PhabricatorFerretInterface', 6662 6674 'DoorkeeperBridgedObjectInterface', 6663 6675 'PhabricatorEditEngineSubtypeInterface', 6664 6676 'PhabricatorEditEngineLockableInterface', ··· 6682 6694 'ManiphestTaskEditBulkJobType' => 'PhabricatorWorkerBulkJobType', 6683 6695 'ManiphestTaskEditController' => 'ManiphestController', 6684 6696 'ManiphestTaskEditEngineLock' => 'PhabricatorEditEngineLock', 6697 + 'ManiphestTaskFerretDocument' => 'PhabricatorFerretDocument', 6698 + 'ManiphestTaskFerretEngine' => 'PhabricatorFerretEngine', 6699 + 'ManiphestTaskFerretField' => 'PhabricatorFerretField', 6700 + 'ManiphestTaskFerretNgrams' => 'PhabricatorFerretNgrams', 6685 6701 'ManiphestTaskFulltextEngine' => 'PhabricatorFulltextEngine', 6686 6702 'ManiphestTaskGraph' => 'PhabricatorObjectGraph', 6687 6703 'ManiphestTaskHasCommitEdgeType' => 'PhabricatorEdgeType', ··· 8147 8163 'PhabricatorFeedStoryNotification' => 'PhabricatorFeedDAO', 8148 8164 'PhabricatorFeedStoryPublisher' => 'Phobject', 8149 8165 'PhabricatorFeedStoryReference' => 'PhabricatorFeedDAO', 8166 + 'PhabricatorFerretDocument' => 'PhabricatorSearchDAO', 8167 + 'PhabricatorFerretEngine' => 'Phobject', 8168 + 'PhabricatorFerretField' => 'PhabricatorSearchDAO', 8169 + 'PhabricatorFerretFulltextEngineExtension' => 'PhabricatorFulltextEngineExtension', 8170 + 'PhabricatorFerretNgrams' => 'PhabricatorSearchDAO', 8150 8171 'PhabricatorFile' => array( 8151 8172 'PhabricatorFileDAO', 8152 8173 'PhabricatorApplicationTransactionInterface', ··· 8565 8586 'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', 8566 8587 'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule', 8567 8588 'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock', 8589 + 'PhabricatorNgramEngine' => 'Phobject', 8568 8590 'PhabricatorNgramsIndexEngineExtension' => 'PhabricatorIndexEngineExtension', 8569 8591 'PhabricatorNgramsInterface' => 'PhabricatorIndexableInterface', 8570 8592 'PhabricatorNotificationBuilder' => 'Phobject',
+13
src/applications/maniphest/query/ManiphestTaskSearchEngine.php
··· 49 49 $subtype_map = id(new ManiphestTask())->newEditEngineSubtypeMap(); 50 50 $hide_subtypes = (count($subtype_map) == 1); 51 51 52 + $hide_ferret = !PhabricatorEnv::getEnvConfig('phabricator.show-prototypes'); 53 + 52 54 return array( 53 55 id(new PhabricatorOwnersSearchField()) 54 56 ->setLabel(pht('Assigned To')) ··· 89 91 id(new PhabricatorSearchTextField()) 90 92 ->setLabel(pht('Contains Words')) 91 93 ->setKey('fulltext'), 94 + id(new PhabricatorSearchTextField()) 95 + ->setLabel(pht('Matches (Prototype)')) 96 + ->setKey('ferret') 97 + ->setIsHidden($hide_ferret), 92 98 id(new PhabricatorSearchThreeStateField()) 93 99 ->setLabel(pht('Open Parents')) 94 100 ->setKey('hasParents') ··· 145 151 'priorities', 146 152 'subtypes', 147 153 'fulltext', 154 + 'ferret', 148 155 'hasParents', 149 156 'hasSubtasks', 150 157 'parentIDs', ··· 222 229 223 230 if (strlen($map['fulltext'])) { 224 231 $query->withFullTextSearch($map['fulltext']); 232 + } 233 + 234 + if (strlen($map['ferret'])) { 235 + $query->withFerretConstraint( 236 + id(new ManiphestTask())->newFerretEngine(), 237 + $map['ferret']); 225 238 } 226 239 227 240 if ($map['parentIDs']) {
+18
src/applications/maniphest/search/ManiphestTaskFerretEngine.php
··· 1 + <?php 2 + 3 + final class ManiphestTaskFerretEngine 4 + extends PhabricatorFerretEngine { 5 + 6 + public function newNgramsObject() { 7 + return new ManiphestTaskFerretNgrams(); 8 + } 9 + 10 + public function newDocumentObject() { 11 + return new ManiphestTaskFerretDocument(); 12 + } 13 + 14 + public function newFieldObject() { 15 + return new ManiphestTaskFerretField(); 16 + } 17 + 18 + }
+9
src/applications/maniphest/storage/ManiphestTask.php
··· 16 16 PhabricatorSpacesInterface, 17 17 PhabricatorConduitResultInterface, 18 18 PhabricatorFulltextInterface, 19 + PhabricatorFerretInterface, 19 20 DoorkeeperBridgedObjectInterface, 20 21 PhabricatorEditEngineSubtypeInterface, 21 22 PhabricatorEditEngineLockableInterface { ··· 601 602 602 603 public function newEditEngineLock() { 603 604 return new ManiphestTaskEditEngineLock(); 605 + } 606 + 607 + 608 + /* -( PhabricatorFerretInterface )----------------------------------------- */ 609 + 610 + 611 + public function newFerretEngine() { 612 + return new ManiphestTaskFerretEngine(); 604 613 } 605 614 606 615 }
+14
src/applications/maniphest/storage/ManiphestTaskFerretDocument.php
··· 1 + <?php 2 + 3 + final class ManiphestTaskFerretDocument 4 + extends PhabricatorFerretDocument { 5 + 6 + public function getApplicationName() { 7 + return 'maniphest'; 8 + } 9 + 10 + public function getIndexKey() { 11 + return 'task'; 12 + } 13 + 14 + }
+14
src/applications/maniphest/storage/ManiphestTaskFerretField.php
··· 1 + <?php 2 + 3 + final class ManiphestTaskFerretField 4 + extends PhabricatorFerretField { 5 + 6 + public function getApplicationName() { 7 + return 'maniphest'; 8 + } 9 + 10 + public function getIndexKey() { 11 + return 'task'; 12 + } 13 + 14 + }
+14
src/applications/maniphest/storage/ManiphestTaskFerretNgrams.php
··· 1 + <?php 2 + 3 + final class ManiphestTaskFerretNgrams 4 + extends PhabricatorFerretNgrams { 5 + 6 + public function getApplicationName() { 7 + return 'maniphest'; 8 + } 9 + 10 + public function getIndexKey() { 11 + return 'task'; 12 + } 13 + 14 + }
+126
src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
··· 1 + <?php 2 + 3 + final class PhabricatorFerretFulltextEngineExtension 4 + extends PhabricatorFulltextEngineExtension { 5 + 6 + const EXTENSIONKEY = 'ferret'; 7 + 8 + 9 + public function getExtensionName() { 10 + return pht('Ferret Fulltext Engine'); 11 + } 12 + 13 + 14 + public function shouldIndexFulltextObject($object) { 15 + return ($object instanceof PhabricatorFerretInterface); 16 + } 17 + 18 + 19 + public function indexFulltextObject( 20 + $object, 21 + PhabricatorSearchAbstractDocument $document) { 22 + 23 + $phid = $document->getPHID(); 24 + $engine = $object->newFerretEngine(); 25 + 26 + $ferret_document = $engine->newDocumentObject() 27 + ->setObjectPHID($phid) 28 + ->setIsClosed(0) 29 + ->setEpochCreated(0) 30 + ->setEpochModified(0); 31 + 32 + $stemmer = new PhutilSearchStemmer(); 33 + 34 + $ferret_fields = array(); 35 + $ngrams_source = array(); 36 + foreach ($document->getFieldData() as $field) { 37 + list($key, $raw_corpus) = $field; 38 + 39 + if (!strlen($raw_corpus)) { 40 + continue; 41 + } 42 + 43 + $normal_corpus = $stemmer->stemCorpus($raw_corpus); 44 + 45 + $ferret_fields[] = $engine->newFieldObject() 46 + ->setFieldKey($key) 47 + ->setRawCorpus($raw_corpus) 48 + ->setNormalCorpus($normal_corpus); 49 + 50 + $ngrams_source[] = $raw_corpus; 51 + } 52 + $ngrams_source = implode(' ', $ngrams_source); 53 + 54 + $ngrams = id(new PhabricatorNgramEngine()) 55 + ->getNgramsFromString($ngrams_source, 'index'); 56 + 57 + $ferret_document->openTransaction(); 58 + $this->deleteOldDocument($engine, $object, $document); 59 + 60 + $ferret_document->save(); 61 + 62 + $document_id = $ferret_document->getID(); 63 + foreach ($ferret_fields as $ferret_field) { 64 + $ferret_field 65 + ->setDocumentID($document_id) 66 + ->save(); 67 + } 68 + 69 + $ferret_ngrams = $engine->newNgramsObject(); 70 + $conn = $ferret_ngrams->establishConnection('w'); 71 + 72 + $sql = array(); 73 + foreach ($ngrams as $ngram) { 74 + $sql[] = qsprintf( 75 + $conn, 76 + '(%d, %s)', 77 + $document_id, 78 + $ngram); 79 + } 80 + 81 + foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { 82 + queryfx( 83 + $conn, 84 + 'INSERT INTO %T (documentID, ngram) VALUES %Q', 85 + $ferret_ngrams->getTableName(), 86 + $chunk); 87 + } 88 + $ferret_document->saveTransaction(); 89 + } 90 + 91 + 92 + private function deleteOldDocument( 93 + PhabricatorFerretEngine $engine, 94 + $object, 95 + PhabricatorSearchAbstractDocument $document) { 96 + 97 + $old_document = $engine->newDocumentObject()->loadOneWhere( 98 + 'objectPHID = %s', 99 + $document->getPHID()); 100 + if (!$old_document) { 101 + return; 102 + } 103 + 104 + $conn = $old_document->establishConnection('w'); 105 + $old_id = $old_document->getID(); 106 + 107 + queryfx( 108 + $conn, 109 + 'DELETE FROM %T WHERE id = %d', 110 + $engine->newDocumentObject()->getTableName(), 111 + $old_id); 112 + 113 + queryfx( 114 + $conn, 115 + 'DELETE FROM %T WHERE documentID = %d', 116 + $engine->newFieldObject()->getTableName(), 117 + $old_id); 118 + 119 + queryfx( 120 + $conn, 121 + 'DELETE FROM %T WHERE documentID = %d', 122 + $engine->newNgramsObject()->getTableName(), 123 + $old_id); 124 + } 125 + 126 + }
+40
src/applications/search/ferret/PhabricatorFerretDocument.php
··· 1 + <?php 2 + 3 + abstract class PhabricatorFerretDocument 4 + extends PhabricatorSearchDAO { 5 + 6 + protected $objectPHID; 7 + protected $isClosed; 8 + protected $authorPHID; 9 + protected $ownerPHID; 10 + protected $epochCreated; 11 + protected $epochModified; 12 + 13 + abstract public function getIndexKey(); 14 + 15 + protected function getConfiguration() { 16 + return array( 17 + self::CONFIG_TIMESTAMPS => false, 18 + self::CONFIG_COLUMN_SCHEMA => array( 19 + 'isClosed' => 'bool', 20 + 'authorPHID' => 'phid?', 21 + 'ownerPHID' => 'phid?', 22 + 'epochCreated' => 'epoch', 23 + 'epochModified' => 'epoch', 24 + ), 25 + self::CONFIG_KEY_SCHEMA => array( 26 + 'key_object' => array( 27 + 'columns' => array('objectPHID'), 28 + 'unique' => true, 29 + ), 30 + ), 31 + ) + parent::getConfiguration(); 32 + } 33 + 34 + public function getTableName() { 35 + $application = $this->getApplicationName(); 36 + $key = $this->getIndexKey(); 37 + return "{$application}_{$key}_fdocument"; 38 + } 39 + 40 + }
+9
src/applications/search/ferret/PhabricatorFerretEngine.php
··· 1 + <?php 2 + 3 + abstract class PhabricatorFerretEngine extends Phobject { 4 + 5 + abstract public function newNgramsObject(); 6 + abstract public function newDocumentObject(); 7 + abstract public function newFieldObject(); 8 + 9 + }
+36
src/applications/search/ferret/PhabricatorFerretField.php
··· 1 + <?php 2 + 3 + abstract class PhabricatorFerretField 4 + extends PhabricatorSearchDAO { 5 + 6 + protected $documentID; 7 + protected $fieldKey; 8 + protected $rawCorpus; 9 + protected $normalCorpus; 10 + 11 + abstract public function getIndexKey(); 12 + 13 + protected function getConfiguration() { 14 + return array( 15 + self::CONFIG_TIMESTAMPS => false, 16 + self::CONFIG_COLUMN_SCHEMA => array( 17 + 'documentID' => 'uint32', 18 + 'fieldKey' => 'text4', 19 + 'rawCorpus' => 'sort', 20 + 'normalCorpus' => 'sort', 21 + ), 22 + self::CONFIG_KEY_SCHEMA => array( 23 + 'key_document' => array( 24 + 'columns' => array('documentID', 'fieldKey'), 25 + ), 26 + ), 27 + ) + parent::getConfiguration(); 28 + } 29 + 30 + public function getTableName() { 31 + $application = $this->getApplicationName(); 32 + $key = $this->getIndexKey(); 33 + return "{$application}_{$key}_ffield"; 34 + } 35 + 36 + }
+7
src/applications/search/ferret/PhabricatorFerretInterface.php
··· 1 + <?php 2 + 3 + interface PhabricatorFerretInterface { 4 + 5 + public function newFerretEngine(); 6 + 7 + }
+35
src/applications/search/ferret/PhabricatorFerretNgrams.php
··· 1 + <?php 2 + 3 + abstract class PhabricatorFerretNgrams 4 + extends PhabricatorSearchDAO { 5 + 6 + protected $documentID; 7 + protected $ngram; 8 + 9 + abstract public function getIndexKey(); 10 + 11 + protected function getConfiguration() { 12 + return array( 13 + self::CONFIG_TIMESTAMPS => false, 14 + self::CONFIG_COLUMN_SCHEMA => array( 15 + 'documentID' => 'uint32', 16 + 'ngram' => 'char3', 17 + ), 18 + self::CONFIG_KEY_SCHEMA => array( 19 + 'key_ngram' => array( 20 + 'columns' => array('ngram', 'documentID'), 21 + ), 22 + 'key_object' => array( 23 + 'columns' => array('documentID'), 24 + ), 25 + ), 26 + ) + parent::getConfiguration(); 27 + } 28 + 29 + public function getTableName() { 30 + $application = $this->getApplicationName(); 31 + $key = $this->getIndexKey(); 32 + return "{$application}_{$key}_fngrams"; 33 + } 34 + 35 + }
+41
src/applications/search/ngrams/PhabricatorNgramEngine.php
··· 1 + <?php 2 + 3 + final class PhabricatorNgramEngine extends Phobject { 4 + 5 + public function tokenizeString($value) { 6 + $value = trim($value, ' '); 7 + $value = preg_split('/ +/', $value); 8 + return $value; 9 + } 10 + 11 + public function getNgramsFromString($value, $mode) { 12 + $tokens = $this->tokenizeString($value); 13 + 14 + $ngrams = array(); 15 + foreach ($tokens as $token) { 16 + $token = phutil_utf8_strtolower($token); 17 + 18 + switch ($mode) { 19 + case 'query': 20 + break; 21 + case 'index': 22 + $token = ' '.$token.' '; 23 + break; 24 + case 'prefix': 25 + $token = ' '.$token; 26 + break; 27 + } 28 + 29 + $len = (strlen($token) - 2); 30 + for ($ii = 0; $ii < $len; $ii++) { 31 + $ngram = substr($token, $ii, 3); 32 + $ngrams[$ngram] = $ngram; 33 + } 34 + } 35 + 36 + ksort($ngrams); 37 + 38 + return array_keys($ngrams); 39 + } 40 + 41 + }
+152
src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
··· 27 27 private $spacePHIDs; 28 28 private $spaceIsArchived; 29 29 private $ngrams = array(); 30 + private $ferretEngine; 31 + private $ferretConstraints; 30 32 31 33 protected function getPageCursors(array $page) { 32 34 return array( ··· 270 272 $joins[] = $this->buildEdgeLogicJoinClause($conn); 271 273 $joins[] = $this->buildApplicationSearchJoinClause($conn); 272 274 $joins[] = $this->buildNgramsJoinClause($conn); 275 + $joins[] = $this->buildFerretJoinClause($conn); 273 276 return $joins; 274 277 } 275 278 ··· 292 295 $where[] = $this->buildEdgeLogicWhereClause($conn); 293 296 $where[] = $this->buildSpacesWhereClause($conn); 294 297 $where[] = $this->buildNgramsWhereClause($conn); 298 + $where[] = $this->buildFerretWhereClause($conn); 295 299 return $where; 296 300 } 297 301 ··· 343 347 } 344 348 345 349 if ($this->shouldGroupNgramResultRows()) { 350 + return true; 351 + } 352 + 353 + if ($this->shouldGroupFerretResultRows()) { 346 354 return true; 347 355 } 348 356 ··· 1370 1378 protected function isCustomFieldOrderKey($key) { 1371 1379 $prefix = 'custom:'; 1372 1380 return !strncmp($key, $prefix, strlen($prefix)); 1381 + } 1382 + 1383 + 1384 + /* -( Ferret )------------------------------------------------------------- */ 1385 + 1386 + 1387 + public function withFerretConstraint( 1388 + PhabricatorFerretEngine $engine, 1389 + $raw_query) { 1390 + 1391 + if ($this->ferretEngine) { 1392 + throw new Exception( 1393 + pht( 1394 + 'Query may not have multiple fulltext constraints.')); 1395 + } 1396 + 1397 + if (!strlen($raw_query)) { 1398 + return $this; 1399 + } 1400 + 1401 + $this->ferretEngine = $engine; 1402 + $this->ferretConstraints = preg_split('/\s+/', $raw_query); 1403 + 1404 + return $this; 1405 + } 1406 + 1407 + protected function buildFerretJoinClause(AphrontDatabaseConnection $conn) { 1408 + if (!$this->ferretEngine) { 1409 + return array(); 1410 + } 1411 + 1412 + $engine = $this->ferretEngine; 1413 + $ngram_engine = new PhabricatorNgramEngine(); 1414 + 1415 + $ngram_table = $engine->newNgramsObject(); 1416 + $ngram_table_name = $ngram_table->getTableName(); 1417 + 1418 + $flat = array(); 1419 + foreach ($this->ferretConstraints as $term) { 1420 + $value = $term; 1421 + $length = count(phutil_utf8v($term)); 1422 + 1423 + if ($length >= 3) { 1424 + $ngrams = $ngram_engine->getNgramsFromString($value, 'query'); 1425 + $prefix = false; 1426 + } else if ($length == 2) { 1427 + $ngrams = $ngram_engine->getNgramsFromString($value, 'prefix'); 1428 + $prefix = false; 1429 + } else { 1430 + $ngrams = array(' '.$value); 1431 + $prefix = true; 1432 + } 1433 + 1434 + foreach ($ngrams as $ngram) { 1435 + $flat[] = array( 1436 + 'table' => $ngram_table_name, 1437 + 'ngram' => $ngram, 1438 + 'prefix' => $prefix, 1439 + ); 1440 + } 1441 + } 1442 + 1443 + // MySQL only allows us to join a maximum of 61 tables per query. Each 1444 + // ngram is going to cost us a join toward that limit, so if the user 1445 + // specified a very long query string, just pick 16 of the ngrams 1446 + // at random. 1447 + if (count($flat) > 16) { 1448 + shuffle($flat); 1449 + $flat = array_slice($flat, 0, 16); 1450 + } 1451 + 1452 + $alias = $this->getPrimaryTableAlias(); 1453 + if ($alias) { 1454 + $phid_column = qsprintf($conn, '%T.%T', $alias, 'phid'); 1455 + } else { 1456 + $phid_column = qsprintf($conn, '%T', 'phid'); 1457 + } 1458 + 1459 + $document_table = $engine->newDocumentObject(); 1460 + $field_table = $engine->newFieldObject(); 1461 + 1462 + $joins = array(); 1463 + $joins[] = qsprintf( 1464 + $conn, 1465 + 'JOIN %T ftdoc ON ftdoc.objectPHID = %Q', 1466 + $document_table->getTableName(), 1467 + $phid_column); 1468 + 1469 + $idx = 1; 1470 + foreach ($flat as $spec) { 1471 + $table = $spec['table']; 1472 + $ngram = $spec['ngram']; 1473 + $prefix = $spec['prefix']; 1474 + 1475 + $alias = 'ft'.$idx++; 1476 + 1477 + if ($prefix) { 1478 + $joins[] = qsprintf( 1479 + $conn, 1480 + 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram LIKE %>', 1481 + $table, 1482 + $alias, 1483 + $alias, 1484 + $alias, 1485 + $ngram); 1486 + } else { 1487 + $joins[] = qsprintf( 1488 + $conn, 1489 + 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram = %s', 1490 + $table, 1491 + $alias, 1492 + $alias, 1493 + $alias, 1494 + $ngram); 1495 + } 1496 + } 1497 + 1498 + $joins[] = qsprintf( 1499 + $conn, 1500 + 'JOIN %T ftfield ON ftdoc.id = ftfield.documentID', 1501 + $field_table->getTableName()); 1502 + 1503 + return $joins; 1504 + } 1505 + 1506 + protected function buildFerretWhereClause(AphrontDatabaseConnection $conn) { 1507 + if (!$this->ferretEngine) { 1508 + return array(); 1509 + } 1510 + 1511 + $where = array(); 1512 + foreach ($this->ferretConstraints as $constraint) { 1513 + $where[] = qsprintf( 1514 + $conn, 1515 + '(ftfield.rawCorpus LIKE %~ OR ftfield.normalCorpus LIKE %~)', 1516 + $constraint, 1517 + $constraint); 1518 + } 1519 + 1520 + return $where; 1521 + } 1522 + 1523 + protected function shouldGroupFerretResultRows() { 1524 + return (bool)$this->ferretConstraints; 1373 1525 } 1374 1526 1375 1527