@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Implement basic ngram search for Owners Package names

Summary:
Ref T9979. This uses ngrams (specifically, trigrams) to build a reasonably efficient index for substring matching. Specifically, for a package like "Example", with ID 123, we store rows like this:

```
< ex, 123>
<exa, 123>
<xam, 123>
<amp, 123>
<mpl, 123>
<ple, 123>
<le , 123>
```

When the user searches for `exam`, we join this table for packages with tokens `exa` and `xam`. MySQL can do this a lot more efficiently than it can process a `LIKE "%exam%"` query against a huge table.

When the user searches for a one-letter or two-letter string, we only search the beginnings of words. This is probably what they want, the only thing we can do quickly, and a reasonable/expected behavior for typeaheads.

Test Plan:
- Ran storage upgrades and search indexer.
- Searched for stuff with "name contains".
- Used typehaead and got sensible results.
- Searched for `aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz` and saw only 16 joins.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T9979

Differential Revision: https://secure.phabricator.com/D14846

+457 -28
+7
resources/sql/autopatches/20151221.search.2.ownersngrams.sql
··· 1 + CREATE TABLE {$NAMESPACE}_owners.owners_name_ngrams ( 2 + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, 3 + objectID INT UNSIGNED NOT NULL, 4 + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, 5 + KEY `key_object` (objectID), 6 + KEY `key_ngram` (ngram, objectID) 7 + ) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT};
+11
resources/sql/autopatches/20151221.search.3.reindex.php
··· 1 + <?php 2 + 3 + $table = new PhabricatorOwnersPackage(); 4 + 5 + foreach (new LiskMigrationIterator($table) as $package) { 6 + PhabricatorSearchWorker::queueDocumentForIndexing( 7 + $package->getPHID(), 8 + array( 9 + 'force' => true, 10 + )); 11 + }
+13
src/__phutil_library_map__.php
··· 2548 2548 'PhabricatorNamedQueryQuery' => 'applications/search/query/PhabricatorNamedQueryQuery.php', 2549 2549 'PhabricatorNavigationRemarkupRule' => 'infrastructure/markup/rule/PhabricatorNavigationRemarkupRule.php', 2550 2550 'PhabricatorNeverTriggerClock' => 'infrastructure/daemon/workers/clock/PhabricatorNeverTriggerClock.php', 2551 + 'PhabricatorNgramsIndexEngineExtension' => 'applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php', 2552 + 'PhabricatorNgramsInterface' => 'applications/search/interface/PhabricatorNgramsInterface.php', 2551 2553 'PhabricatorNotificationBuilder' => 'applications/notification/builder/PhabricatorNotificationBuilder.php', 2552 2554 'PhabricatorNotificationClearController' => 'applications/notification/controller/PhabricatorNotificationClearController.php', 2553 2555 'PhabricatorNotificationClient' => 'applications/notification/client/PhabricatorNotificationClient.php', ··· 2636 2638 'PhabricatorOwnersPackage' => 'applications/owners/storage/PhabricatorOwnersPackage.php', 2637 2639 'PhabricatorOwnersPackageDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php', 2638 2640 'PhabricatorOwnersPackageEditEngine' => 'applications/owners/editor/PhabricatorOwnersPackageEditEngine.php', 2641 + 'PhabricatorOwnersPackageFulltextEngine' => 'applications/owners/query/PhabricatorOwnersPackageFulltextEngine.php', 2639 2642 'PhabricatorOwnersPackageFunctionDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageFunctionDatasource.php', 2643 + 'PhabricatorOwnersPackageNameNgrams' => 'applications/owners/storage/PhabricatorOwnersPackageNameNgrams.php', 2640 2644 'PhabricatorOwnersPackageOwnerDatasource' => 'applications/owners/typeahead/PhabricatorOwnersPackageOwnerDatasource.php', 2641 2645 'PhabricatorOwnersPackagePHIDType' => 'applications/owners/phid/PhabricatorOwnersPackagePHIDType.php', 2642 2646 'PhabricatorOwnersPackageQuery' => 'applications/owners/query/PhabricatorOwnersPackageQuery.php', ··· 3047 3051 'PhabricatorSearchManagementIndexWorkflow' => 'applications/search/management/PhabricatorSearchManagementIndexWorkflow.php', 3048 3052 'PhabricatorSearchManagementInitWorkflow' => 'applications/search/management/PhabricatorSearchManagementInitWorkflow.php', 3049 3053 'PhabricatorSearchManagementWorkflow' => 'applications/search/management/PhabricatorSearchManagementWorkflow.php', 3054 + 'PhabricatorSearchNgrams' => 'applications/search/ngrams/PhabricatorSearchNgrams.php', 3055 + 'PhabricatorSearchNgramsDestructionEngineExtension' => 'applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php', 3050 3056 'PhabricatorSearchOrderController' => 'applications/search/controller/PhabricatorSearchOrderController.php', 3051 3057 'PhabricatorSearchOrderField' => 'applications/search/field/PhabricatorSearchOrderField.php', 3052 3058 'PhabricatorSearchPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorSearchPreferencesSettingsPanel.php', ··· 6802 6808 'PhabricatorNamedQueryQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', 6803 6809 'PhabricatorNavigationRemarkupRule' => 'PhutilRemarkupRule', 6804 6810 'PhabricatorNeverTriggerClock' => 'PhabricatorTriggerClock', 6811 + 'PhabricatorNgramsIndexEngineExtension' => 'PhabricatorIndexEngineExtension', 6805 6812 'PhabricatorNotificationBuilder' => 'Phobject', 6806 6813 'PhabricatorNotificationClearController' => 'PhabricatorNotificationController', 6807 6814 'PhabricatorNotificationClient' => 'Phobject', ··· 6907 6914 'PhabricatorCustomFieldInterface', 6908 6915 'PhabricatorDestructibleInterface', 6909 6916 'PhabricatorConduitResultInterface', 6917 + 'PhabricatorFulltextInterface', 6918 + 'PhabricatorNgramsInterface', 6910 6919 ), 6911 6920 'PhabricatorOwnersPackageDatasource' => 'PhabricatorTypeaheadDatasource', 6912 6921 'PhabricatorOwnersPackageEditEngine' => 'PhabricatorEditEngine', 6922 + 'PhabricatorOwnersPackageFulltextEngine' => 'PhabricatorFulltextEngine', 6913 6923 'PhabricatorOwnersPackageFunctionDatasource' => 'PhabricatorTypeaheadCompositeDatasource', 6924 + 'PhabricatorOwnersPackageNameNgrams' => 'PhabricatorSearchNgrams', 6914 6925 'PhabricatorOwnersPackageOwnerDatasource' => 'PhabricatorTypeaheadCompositeDatasource', 6915 6926 'PhabricatorOwnersPackagePHIDType' => 'PhabricatorPHIDType', 6916 6927 'PhabricatorOwnersPackageQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', ··· 7414 7425 'PhabricatorSearchManagementIndexWorkflow' => 'PhabricatorSearchManagementWorkflow', 7415 7426 'PhabricatorSearchManagementInitWorkflow' => 'PhabricatorSearchManagementWorkflow', 7416 7427 'PhabricatorSearchManagementWorkflow' => 'PhabricatorManagementWorkflow', 7428 + 'PhabricatorSearchNgrams' => 'PhabricatorSearchDAO', 7429 + 'PhabricatorSearchNgramsDestructionEngineExtension' => 'PhabricatorDestructionEngineExtension', 7417 7430 'PhabricatorSearchOrderController' => 'PhabricatorSearchBaseController', 7418 7431 'PhabricatorSearchOrderField' => 'PhabricatorSearchField', 7419 7432 'PhabricatorSearchPreferencesSettingsPanel' => 'PhabricatorSettingsPanel',
+7 -1
src/applications/config/schema/PhabricatorConfigSchemaSpec.php
··· 201 201 202 202 $is_binary = ($this->getUTF8Charset() == 'binary'); 203 203 $matches = null; 204 - if (preg_match('/^(fulltext|sort|text)(\d+)?\z/', $data_type, $matches)) { 204 + $pattern = '/^(fulltext|sort|text|char)(\d+)?\z/'; 205 + if (preg_match($pattern, $data_type, $matches)) { 205 206 206 207 // Limit the permitted column lengths under the theory that it would 207 208 // be nice to eventually reduce this to a small set of standard lengths. ··· 220 221 'text8' => true, 221 222 'text4' => true, 222 223 'text' => true, 224 + 'char3' => true, 223 225 'sort255' => true, 224 226 'sort128' => true, 225 227 'sort64' => true, ··· 266 268 // the majority of cases. 267 269 $column_type = 'longtext'; 268 270 break; 271 + case 'char': 272 + $column_type = 'char('.$size.')'; 273 + break; 269 274 } 270 275 271 276 switch ($type) { 272 277 case 'text': 278 + case 'char': 273 279 if ($is_binary) { 274 280 // We leave collation and character set unspecified in order to 275 281 // generate valid SQL.
+4
src/applications/owners/editor/PhabricatorOwnersPackageTransactionEditor.php
··· 334 334 return $body; 335 335 } 336 336 337 + protected function supportsSearch() { 338 + return true; 339 + } 340 + 337 341 }
+26
src/applications/owners/query/PhabricatorOwnersPackageFulltextEngine.php
··· 1 + <?php 2 + 3 + final class PhabricatorOwnersPackageFulltextEngine 4 + extends PhabricatorFulltextEngine { 5 + 6 + protected function buildAbstractDocument( 7 + PhabricatorSearchAbstractDocument $document, 8 + $object) { 9 + 10 + $package = $object; 11 + $document->setDocumentTitle($package->getName()); 12 + 13 + // TODO: These are bogus, but not currently stored on packages. 14 + $document->setDocumentCreated(PhabricatorTime::getNow()); 15 + $document->setDocumentModified(PhabricatorTime::getNow()); 16 + 17 + $document->addRelationship( 18 + $package->isArchived() 19 + ? PhabricatorSearchRelationship::RELATIONSHIP_CLOSED 20 + : PhabricatorSearchRelationship::RELATIONSHIP_OPEN, 21 + $package->getPHID(), 22 + PhabricatorOwnersPackagePHIDType::TYPECONST, 23 + PhabricatorTime::getNow()); 24 + } 25 + 26 + }
+4 -13
src/applications/owners/query/PhabricatorOwnersPackageQuery.php
··· 9 9 private $authorityPHIDs; 10 10 private $repositoryPHIDs; 11 11 private $paths; 12 - private $namePrefix; 13 12 private $statuses; 14 13 15 14 private $controlMap = array(); ··· 78 77 return $this; 79 78 } 80 79 81 - public function withNamePrefix($prefix) { 82 - $this->namePrefix = $prefix; 83 - return $this; 80 + public function withNameNgrams($ngrams) { 81 + return $this->withNgramsConstraint( 82 + new PhabricatorOwnersPackageNameNgrams(), 83 + $ngrams); 84 84 } 85 85 86 86 public function needPaths($need_paths) { ··· 206 206 $conn, 207 207 'p.status IN (%Ls)', 208 208 $this->statuses); 209 - } 210 - 211 - if (strlen($this->namePrefix)) { 212 - // NOTE: This is a hacky mess, but this column is currently case 213 - // sensitive and unique. 214 - $where[] = qsprintf( 215 - $conn, 216 - 'LOWER(p.name) LIKE %>', 217 - phutil_utf8_strtolower($this->namePrefix)); 218 209 } 219 210 220 211 if ($this->controlMap) {
+8
src/applications/owners/query/PhabricatorOwnersPackageSearchEngine.php
··· 25 25 ->setDescription( 26 26 pht('Search for packages with specific owners.')) 27 27 ->setDatasource(new PhabricatorProjectOrUserDatasource()), 28 + id(new PhabricatorSearchTextField()) 29 + ->setLabel(pht('Name Contains')) 30 + ->setKey('name') 31 + ->setDescription(pht('Search for packages by name substrings.')), 28 32 id(new PhabricatorSearchDatasourceField()) 29 33 ->setLabel(pht('Repositories')) 30 34 ->setKey('repositoryPHIDs') ··· 67 71 68 72 if ($map['statuses']) { 69 73 $query->withStatuses($map['statuses']); 74 + } 75 + 76 + if (strlen($map['name'])) { 77 + $query->withNameNgrams($map['name']); 70 78 } 71 79 72 80 return $query;
+23 -13
src/applications/owners/storage/PhabricatorOwnersPackage.php
··· 7 7 PhabricatorApplicationTransactionInterface, 8 8 PhabricatorCustomFieldInterface, 9 9 PhabricatorDestructibleInterface, 10 - PhabricatorConduitResultInterface { 10 + PhabricatorConduitResultInterface, 11 + PhabricatorFulltextInterface, 12 + PhabricatorNgramsInterface { 11 13 12 14 protected $name; 13 15 protected $originalName; ··· 46 48 self::CONFIG_TIMESTAMPS => false, 47 49 self::CONFIG_AUX_PHID => true, 48 50 self::CONFIG_COLUMN_SCHEMA => array( 49 - 'name' => 'text128', 51 + 'name' => 'sort128', 50 52 'originalName' => 'text255', 51 53 'description' => 'text', 52 54 'primaryOwnerPHID' => 'phid?', 53 55 'auditingEnabled' => 'bool', 54 56 'mailKey' => 'bytes20', 55 57 'status' => 'text32', 56 - ), 57 - self::CONFIG_KEY_SCHEMA => array( 58 - 'key_phid' => null, 59 - 'phid' => array( 60 - 'columns' => array('phid'), 61 - 'unique' => true, 62 - ), 63 - 'name' => array( 64 - 'columns' => array('name'), 65 - 'unique' => true, 66 - ), 67 58 ), 68 59 ) + parent::getConfiguration(); 69 60 } ··· 430 421 return array( 431 422 id(new PhabricatorOwnersPathsSearchEngineAttachment()) 432 423 ->setAttachmentKey('paths'), 424 + ); 425 + } 426 + 427 + 428 + /* -( PhabricatorFulltextInterface )--------------------------------------- */ 429 + 430 + 431 + public function newFulltextEngine() { 432 + return new PhabricatorOwnersPackageFulltextEngine(); 433 + } 434 + 435 + 436 + /* -( PhabricatorNgramInterface )------------------------------------------ */ 437 + 438 + 439 + public function newNgrams() { 440 + return array( 441 + id(new PhabricatorOwnersPackageNameNgrams()) 442 + ->setValue($this->getName()), 433 443 ); 434 444 } 435 445
+18
src/applications/owners/storage/PhabricatorOwnersPackageNameNgrams.php
··· 1 + <?php 2 + 3 + final class PhabricatorOwnersPackageNameNgrams 4 + extends PhabricatorSearchNgrams { 5 + 6 + public function getNgramKey() { 7 + return 'name'; 8 + } 9 + 10 + public function getColumnName() { 11 + return 'name'; 12 + } 13 + 14 + public function getApplicationName() { 15 + return 'owners'; 16 + } 17 + 18 + }
+8
src/applications/owners/storage/PhabricatorOwnersPackageTransaction.php
··· 27 27 28 28 switch ($this->getTransactionType()) { 29 29 case self::TYPE_OWNERS: 30 + if (!is_array($old)) { 31 + $old = array(); 32 + } 33 + 34 + if (!is_array($new)) { 35 + $new = array(); 36 + } 37 + 30 38 $add = array_diff($new, $old); 31 39 foreach ($add as $phid) { 32 40 $phids[] = $phid;
+1 -1
src/applications/owners/typeahead/PhabricatorOwnersPackageDatasource.php
··· 22 22 $results = array(); 23 23 24 24 $query = id(new PhabricatorOwnersPackageQuery()) 25 - ->withNamePrefix($raw_query) 25 + ->withNameNgrams($raw_query) 26 26 ->setOrder('name'); 27 27 28 28 $packages = $this->executeQuery($query);
+3
src/applications/search/engineextension/PhabricatorFulltextIndexEngineExtension.php
··· 65 65 66 66 try { 67 67 $comment = $xaction->getApplicationTransactionCommentObject(); 68 + if (!$comment) { 69 + return 'none'; 70 + } 68 71 } catch (Exception $ex) { 69 72 return 'none'; 70 73 }
+34
src/applications/search/engineextension/PhabricatorNgramsIndexEngineExtension.php
··· 1 + <?php 2 + 3 + final class PhabricatorNgramsIndexEngineExtension 4 + extends PhabricatorIndexEngineExtension { 5 + 6 + const EXTENSIONKEY = 'ngrams'; 7 + 8 + public function getExtensionName() { 9 + return pht('Ngrams Engine'); 10 + } 11 + 12 + public function getIndexVersion($object) { 13 + $ngrams = $object->newNgrams(); 14 + $map = mpull($ngrams, 'getValue', 'getNgramKey'); 15 + ksort($map); 16 + $serialized = serialize($map); 17 + 18 + return PhabricatorHash::digestForIndex($serialized); 19 + } 20 + 21 + public function shouldIndexObject($object) { 22 + return ($object instanceof PhabricatorNgramsInterface); 23 + } 24 + 25 + public function indexObject( 26 + PhabricatorIndexEngine $engine, 27 + $object) { 28 + 29 + foreach ($object->newNgrams() as $ngram) { 30 + $ngram->writeNgram($object->getID()); 31 + } 32 + } 33 + 34 + }
+31
src/applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php
··· 1 + <?php 2 + 3 + final class PhabricatorSearchNgramsDestructionEngineExtension 4 + extends PhabricatorDestructionEngineExtension { 5 + 6 + const EXTENSIONKEY = 'search.ngrams'; 7 + 8 + public function getExtensionName() { 9 + return pht('Search Ngram'); 10 + } 11 + 12 + public function canDestroyObject( 13 + PhabricatorDestructionEngine $engine, 14 + $object) { 15 + return ($object instanceof PhabricatorNgramsInterface); 16 + } 17 + 18 + public function destroyObject( 19 + PhabricatorDestructionEngine $engine, 20 + $object) { 21 + 22 + foreach ($object->newNgrams() as $ngram) { 23 + queryfx( 24 + $ngram->establishConnection('w'), 25 + 'DELETE FROM %T WHERE objectID = %d', 26 + $ngram->getTableName(), 27 + $object->getID()); 28 + } 29 + } 30 + 31 + }
+7
src/applications/search/interface/PhabricatorNgramsInterface.php
··· 1 + <?php 2 + 3 + interface PhabricatorNgramsInterface { 4 + 5 + public function newNgrams(); 6 + 7 + }
+113
src/applications/search/ngrams/PhabricatorSearchNgrams.php
··· 1 + <?php 2 + 3 + abstract class PhabricatorSearchNgrams 4 + extends PhabricatorSearchDAO { 5 + 6 + protected $objectID; 7 + protected $ngram; 8 + 9 + private $value; 10 + 11 + abstract public function getNgramKey(); 12 + abstract public function getColumnName(); 13 + 14 + final public function setValue($value) { 15 + $this->value = $value; 16 + return $this; 17 + } 18 + 19 + final public function getValue() { 20 + return $this->value; 21 + } 22 + 23 + protected function getConfiguration() { 24 + return array( 25 + self::CONFIG_TIMESTAMPS => false, 26 + self::CONFIG_COLUMN_SCHEMA => array( 27 + 'objectID' => 'uint32', 28 + 'ngram' => 'char3', 29 + ), 30 + self::CONFIG_KEY_SCHEMA => array( 31 + 'key_ngram' => array( 32 + 'columns' => array('ngram', 'objectID'), 33 + ), 34 + 'key_object' => array( 35 + 'columns' => array('objectID'), 36 + ), 37 + ), 38 + ) + parent::getConfiguration(); 39 + } 40 + 41 + public function getTableName() { 42 + $application = $this->getApplicationName(); 43 + $key = $this->getNgramKey(); 44 + return "{$application}_{$key}_ngrams"; 45 + } 46 + 47 + final public function tokenizeString($value) { 48 + $value = trim($value, ' '); 49 + $value = preg_split('/ +/', $value); 50 + return $value; 51 + } 52 + 53 + final public function getNgramsFromString($value, $mode) { 54 + $tokens = $this->tokenizeString($value); 55 + 56 + $ngrams = array(); 57 + foreach ($tokens as $token) { 58 + $token = phutil_utf8_strtolower($token); 59 + 60 + switch ($mode) { 61 + case 'query': 62 + break; 63 + case 'index': 64 + $token = ' '.$token.' '; 65 + break; 66 + case 'prefix': 67 + $token = ' '.$token; 68 + break; 69 + } 70 + 71 + $len = (strlen($token) - 2); 72 + for ($ii = 0; $ii < $len; $ii++) { 73 + $ngram = substr($token, $ii, 3); 74 + $ngrams[$ngram] = $ngram; 75 + } 76 + } 77 + 78 + ksort($ngrams); 79 + 80 + return array_keys($ngrams); 81 + } 82 + 83 + final public function writeNgram($object_id) { 84 + $ngrams = $this->getNgramsFromString($this->getValue(), 'index'); 85 + $conn_w = $this->establishConnection('w'); 86 + 87 + $sql = array(); 88 + foreach ($ngrams as $ngram) { 89 + $sql[] = qsprintf( 90 + $conn_w, 91 + '(%d, %s)', 92 + $object_id, 93 + $ngram); 94 + } 95 + 96 + queryfx( 97 + $conn_w, 98 + 'DELETE FROM %T WHERE objectID = %d', 99 + $this->getTableName(), 100 + $object_id); 101 + 102 + if ($sql) { 103 + queryfx( 104 + $conn_w, 105 + 'INSERT INTO %T (objectID, ngram) VALUES %Q', 106 + $this->getTableName(), 107 + implode(', ', $sql)); 108 + } 109 + 110 + return $this; 111 + } 112 + 113 + }
+139
src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php
··· 26 26 private $edgeLogicConstraintsAreValid = false; 27 27 private $spacePHIDs; 28 28 private $spaceIsArchived; 29 + private $ngrams = array(); 29 30 30 31 protected function getPageCursors(array $page) { 31 32 return array( ··· 253 254 $joins = array(); 254 255 $joins[] = $this->buildEdgeLogicJoinClause($conn); 255 256 $joins[] = $this->buildApplicationSearchJoinClause($conn); 257 + $joins[] = $this->buildNgramsJoinClause($conn); 256 258 return $joins; 257 259 } 258 260 ··· 274 276 $where[] = $this->buildPagingClause($conn); 275 277 $where[] = $this->buildEdgeLogicWhereClause($conn); 276 278 $where[] = $this->buildSpacesWhereClause($conn); 279 + $where[] = $this->buildNgramsWhereClause($conn); 277 280 return $where; 278 281 } 279 282 ··· 321 324 } 322 325 323 326 if ($this->getApplicationSearchMayJoinMultipleRows()) { 327 + return true; 328 + } 329 + 330 + if ($this->shouldGroupNgramResultRows()) { 324 331 return true; 325 332 } 326 333 ··· 1342 1349 protected function isCustomFieldOrderKey($key) { 1343 1350 $prefix = 'custom:'; 1344 1351 return !strncmp($key, $prefix, strlen($prefix)); 1352 + } 1353 + 1354 + 1355 + /* -( Ngrams )------------------------------------------------------------- */ 1356 + 1357 + 1358 + protected function withNgramsConstraint( 1359 + PhabricatorSearchNgrams $index, 1360 + $value) { 1361 + 1362 + if (strlen($value)) { 1363 + $this->ngrams[] = array( 1364 + 'index' => $index, 1365 + 'value' => $value, 1366 + 'length' => count(phutil_utf8v($value)), 1367 + ); 1368 + } 1369 + 1370 + return $this; 1371 + } 1372 + 1373 + 1374 + protected function buildNgramsJoinClause(AphrontDatabaseConnection $conn) { 1375 + $flat = array(); 1376 + foreach ($this->ngrams as $spec) { 1377 + $index = $spec['index']; 1378 + $value = $spec['value']; 1379 + $length = $spec['length']; 1380 + 1381 + if ($length >= 3) { 1382 + $ngrams = $index->getNgramsFromString($value, 'query'); 1383 + $prefix = false; 1384 + } else if ($length == 2) { 1385 + $ngrams = $index->getNgramsFromString($value, 'prefix'); 1386 + $prefix = false; 1387 + } else { 1388 + $ngrams = array(' '.$value); 1389 + $prefix = true; 1390 + } 1391 + 1392 + foreach ($ngrams as $ngram) { 1393 + $flat[] = array( 1394 + 'table' => $index->getTableName(), 1395 + 'ngram' => $ngram, 1396 + 'prefix' => $prefix, 1397 + ); 1398 + } 1399 + } 1400 + 1401 + // MySQL only allows us to join a maximum of 61 tables per query. Each 1402 + // ngram is going to cost us a join toward that limit, so if the user 1403 + // specified a very long query string, just pick 16 of the ngrams 1404 + // at random. 1405 + if (count($flat) > 16) { 1406 + shuffle($flat); 1407 + $flat = array_slice($flat, 0, 16); 1408 + } 1409 + 1410 + $alias = $this->getPrimaryTableAlias(); 1411 + if ($alias) { 1412 + $id_column = qsprintf($conn, '%T.%T', $alias, 'id'); 1413 + } else { 1414 + $id_column = qsprintf($conn, '%T', 'id'); 1415 + } 1416 + 1417 + $idx = 1; 1418 + $joins = array(); 1419 + foreach ($flat as $spec) { 1420 + $table = $spec['table']; 1421 + $ngram = $spec['ngram']; 1422 + $prefix = $spec['prefix']; 1423 + 1424 + $alias = 'ngm'.$idx++; 1425 + 1426 + if ($prefix) { 1427 + $joins[] = qsprintf( 1428 + $conn, 1429 + 'JOIN %T %T ON %T.objectID = %Q AND %T.ngram LIKE %>', 1430 + $table, 1431 + $alias, 1432 + $alias, 1433 + $id_column, 1434 + $alias, 1435 + $ngram); 1436 + } else { 1437 + $joins[] = qsprintf( 1438 + $conn, 1439 + 'JOIN %T %T ON %T.objectID = %Q AND %T.ngram = %s', 1440 + $table, 1441 + $alias, 1442 + $alias, 1443 + $id_column, 1444 + $alias, 1445 + $ngram); 1446 + } 1447 + } 1448 + 1449 + return $joins; 1450 + } 1451 + 1452 + 1453 + protected function buildNgramsWhereClause(AphrontDatabaseConnection $conn) { 1454 + $where = array(); 1455 + 1456 + foreach ($this->ngrams as $ngram) { 1457 + $index = $ngram['index']; 1458 + $value = $ngram['value']; 1459 + 1460 + $column = $index->getColumnName(); 1461 + $alias = $this->getPrimaryTableAlias(); 1462 + if ($alias) { 1463 + $column = qsprintf($conn, '%T.%T', $alias, $column); 1464 + } else { 1465 + $column = qsprintf($conn, '%T', $column); 1466 + } 1467 + 1468 + $tokens = $index->tokenizeString($value); 1469 + foreach ($tokens as $token) { 1470 + $where[] = qsprintf( 1471 + $conn, 1472 + '%Q LIKE %~', 1473 + $column, 1474 + $token); 1475 + } 1476 + } 1477 + 1478 + return $where; 1479 + } 1480 + 1481 + 1482 + protected function shouldGroupNgramResultRows() { 1483 + return (bool)$this->ngrams; 1345 1484 } 1346 1485 1347 1486