@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at recaptime-dev/main 450 lines 12 kB view raw
1<?php 2 3final class PhabricatorFerretFulltextEngineExtension 4 extends PhabricatorFulltextEngineExtension { 5 6 const EXTENSIONKEY = 'ferret'; 7 8 9 public function getExtensionName() { 10 return pht('Ferret Fulltext Engine'); 11 } 12 13 14 public function shouldIndexFulltextObject($object) { 15 return ($object instanceof PhabricatorFerretInterface); 16 } 17 18 19 public function indexFulltextObject( 20 $object, 21 PhabricatorSearchAbstractDocument $document) { 22 23 $phid = $document->getPHID(); 24 $engine = $object->newFerretEngine(); 25 26 $is_closed = 0; 27 $author_phid = null; 28 $owner_phid = null; 29 foreach ($document->getRelationshipData() as $relationship) { 30 list($related_type, $related_phid) = $relationship; 31 switch ($related_type) { 32 case PhabricatorSearchRelationship::RELATIONSHIP_OPEN: 33 $is_closed = 0; 34 break; 35 case PhabricatorSearchRelationship::RELATIONSHIP_CLOSED: 36 $is_closed = 1; 37 break; 38 case PhabricatorSearchRelationship::RELATIONSHIP_OWNER: 39 $owner_phid = $related_phid; 40 break; 41 case PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED: 42 $owner_phid = null; 43 break; 44 case PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR: 45 $author_phid = $related_phid; 46 break; 47 } 48 } 49 50 $stemmer = $engine->newStemmer(); 51 52 // Copy all of the "title" and "body" fields to create new "core" fields. 53 // This allows users to search "in title or body" with the "core:" prefix. 54 $document_fields = $document->getFieldData(); 55 $virtual_fields = array(); 56 foreach ($document_fields as $field) { 57 $virtual_fields[] = $field; 58 59 list($key, $raw_corpus) = $field; 60 switch ($key) { 61 case PhabricatorSearchDocumentFieldType::FIELD_TITLE: 62 case PhabricatorSearchDocumentFieldType::FIELD_BODY: 63 $virtual_fields[] = array( 64 PhabricatorSearchDocumentFieldType::FIELD_CORE, 65 $raw_corpus, 66 ); 67 break; 68 } 69 70 $virtual_fields[] = array( 71 PhabricatorSearchDocumentFieldType::FIELD_ALL, 72 $raw_corpus, 73 ); 74 } 75 76 $empty_template = array( 77 'raw' => array(), 78 'term' => array(), 79 'normal' => array(), 80 ); 81 82 $ferret_corpus_map = array(); 83 84 foreach ($virtual_fields as $field) { 85 list($key, $raw_corpus) = $field; 86 if (!strlen($raw_corpus)) { 87 continue; 88 } 89 90 $term_corpus = $engine->newTermsCorpus($raw_corpus); 91 92 $normal_corpus = $stemmer->stemCorpus($raw_corpus); 93 $normal_corpus = $engine->newTermsCorpus($normal_corpus); 94 95 if (!isset($ferret_corpus_map[$key])) { 96 $ferret_corpus_map[$key] = $empty_template; 97 } 98 99 $ferret_corpus_map[$key]['raw'][] = $raw_corpus; 100 $ferret_corpus_map[$key]['term'][] = $term_corpus; 101 $ferret_corpus_map[$key]['normal'][] = $normal_corpus; 102 } 103 104 $ferret_fields = array(); 105 $ngrams_source = array(); 106 foreach ($ferret_corpus_map as $key => $fields) { 107 $raw_corpus = $fields['raw']; 108 $raw_corpus = implode("\n", $raw_corpus); 109 if (strlen($raw_corpus)) { 110 $ngrams_source[] = $raw_corpus; 111 } 112 113 $normal_corpus = $fields['normal']; 114 $normal_corpus = implode("\n", $normal_corpus); 115 if (strlen($normal_corpus)) { 116 $ngrams_source[] = $normal_corpus; 117 } 118 119 $term_corpus = $fields['term']; 120 $term_corpus = implode("\n", $term_corpus); 121 if (strlen($term_corpus)) { 122 $ngrams_source[] = $term_corpus; 123 } 124 125 $ferret_fields[] = array( 126 'fieldKey' => $key, 127 'rawCorpus' => $raw_corpus, 128 'termCorpus' => $term_corpus, 129 'normalCorpus' => $normal_corpus, 130 ); 131 } 132 $ngrams_source = implode("\n", $ngrams_source); 133 134 $ngram_engine = new PhabricatorSearchNgramEngine(); 135 $ngrams = $ngram_engine->getTermNgramsFromString($ngrams_source); 136 137 $conn = $object->establishConnection('w'); 138 139 if ($ngrams) { 140 $common = queryfx_all( 141 $conn, 142 'SELECT ngram FROM %T WHERE ngram IN (%Ls)', 143 $engine->getCommonNgramsTableName(), 144 $ngrams); 145 $common = ipull($common, 'ngram', 'ngram'); 146 147 foreach ($ngrams as $key => $ngram) { 148 if (isset($common[$ngram])) { 149 unset($ngrams[$key]); 150 continue; 151 } 152 153 // NOTE: MySQL discards trailing whitespace in CHAR(X) columns. 154 $trimmed_ngram = rtrim($ngram, ' '); 155 if (isset($common[$trimmed_ngram])) { 156 unset($ngrams[$key]); 157 continue; 158 } 159 } 160 } 161 162 $object->openTransaction(); 163 164 try { 165 // See T13587. If this document already exists in the index, we try to 166 // update the existing rows to avoid leaving the ngrams table heavily 167 // fragmented. 168 169 $old_document = queryfx_one( 170 $conn, 171 'SELECT id FROM %T WHERE objectPHID = %s', 172 $engine->getDocumentTableName(), 173 $object->getPHID()); 174 if ($old_document) { 175 $old_document_id = (int)$old_document['id']; 176 } else { 177 $old_document_id = null; 178 } 179 180 if ($old_document_id === null) { 181 queryfx( 182 $conn, 183 'INSERT INTO %T (objectPHID, isClosed, epochCreated, epochModified, 184 authorPHID, ownerPHID) VALUES (%s, %d, %d, %d, %ns, %ns)', 185 $engine->getDocumentTableName(), 186 $object->getPHID(), 187 $is_closed, 188 $document->getDocumentCreated(), 189 $document->getDocumentModified(), 190 $author_phid, 191 $owner_phid); 192 $document_id = $conn->getInsertID(); 193 194 $is_new = true; 195 } else { 196 $document_id = $old_document_id; 197 queryfx( 198 $conn, 199 'UPDATE %T 200 SET 201 isClosed = %d, 202 epochCreated = %d, 203 epochModified = %d, 204 authorPHID = %ns, 205 ownerPHID = %ns 206 WHERE id = %d', 207 $engine->getDocumentTableName(), 208 $is_closed, 209 $document->getDocumentCreated(), 210 $document->getDocumentModified(), 211 $author_phid, 212 $owner_phid, 213 $document_id); 214 215 $is_new = false; 216 } 217 218 $this->updateStoredFields( 219 $conn, 220 $is_new, 221 $document_id, 222 $engine, 223 $ferret_fields); 224 225 $this->updateStoredNgrams( 226 $conn, 227 $is_new, 228 $document_id, 229 $engine, 230 $ngrams); 231 232 } catch (Exception $ex) { 233 $object->killTransaction(); 234 throw $ex; 235 } catch (Throwable $ex) { 236 $object->killTransaction(); 237 throw $ex; 238 } 239 240 $object->saveTransaction(); 241 } 242 243 private function updateStoredFields( 244 AphrontDatabaseConnection $conn, 245 $is_new, 246 $document_id, 247 PhabricatorFerretEngine $engine, 248 $new_fields) { 249 250 if (!$is_new) { 251 $old_fields = queryfx_all( 252 $conn, 253 'SELECT * FROM %T WHERE documentID = %d', 254 $engine->getFieldTableName(), 255 $document_id); 256 } else { 257 $old_fields = array(); 258 } 259 260 $old_fields = ipull($old_fields, null, 'fieldKey'); 261 $new_fields = ipull($new_fields, null, 'fieldKey'); 262 263 $delete_rows = array(); 264 $insert_rows = array(); 265 $update_rows = array(); 266 267 foreach ($old_fields as $field_key => $old_field) { 268 if (!isset($new_fields[$field_key])) { 269 $delete_rows[] = $old_field; 270 } 271 } 272 273 $compare_keys = array( 274 'rawCorpus', 275 'termCorpus', 276 'normalCorpus', 277 ); 278 279 foreach ($new_fields as $field_key => $new_field) { 280 if (!isset($old_fields[$field_key])) { 281 $insert_rows[] = $new_field; 282 continue; 283 } 284 285 $old_field = $old_fields[$field_key]; 286 287 $same_row = true; 288 foreach ($compare_keys as $compare_key) { 289 if ($old_field[$compare_key] !== $new_field[$compare_key]) { 290 $same_row = false; 291 break; 292 } 293 } 294 295 if ($same_row) { 296 continue; 297 } 298 299 $new_field['id'] = $old_field['id']; 300 $update_rows[] = $new_field; 301 } 302 303 if ($delete_rows) { 304 queryfx( 305 $conn, 306 'DELETE FROM %T WHERE id IN (%Ld)', 307 $engine->getFieldTableName(), 308 ipull($delete_rows, 'id')); 309 } 310 311 foreach ($update_rows as $update_row) { 312 queryfx( 313 $conn, 314 'UPDATE %T 315 SET 316 rawCorpus = %s, 317 termCorpus = %s, 318 normalCorpus = %s 319 WHERE id = %d', 320 $engine->getFieldTableName(), 321 $update_row['rawCorpus'], 322 $update_row['termCorpus'], 323 $update_row['normalCorpus'], 324 $update_row['id']); 325 } 326 327 foreach ($insert_rows as $insert_row) { 328 queryfx( 329 $conn, 330 'INSERT INTO %T (documentID, fieldKey, rawCorpus, termCorpus, 331 normalCorpus) VALUES (%d, %s, %s, %s, %s)', 332 $engine->getFieldTableName(), 333 $document_id, 334 $insert_row['fieldKey'], 335 $insert_row['rawCorpus'], 336 $insert_row['termCorpus'], 337 $insert_row['normalCorpus']); 338 } 339 } 340 341 private function updateStoredNgrams( 342 AphrontDatabaseConnection $conn, 343 $is_new, 344 $document_id, 345 PhabricatorFerretEngine $engine, 346 $new_ngrams) { 347 348 if ($is_new) { 349 $old_ngrams = array(); 350 } else { 351 $old_ngrams = queryfx_all( 352 $conn, 353 'SELECT id, ngram FROM %T WHERE documentID = %d', 354 $engine->getNgramsTableName(), 355 $document_id); 356 } 357 358 $old_ngrams = ipull($old_ngrams, 'id', 'ngram'); 359 $new_ngrams = array_fuse($new_ngrams); 360 361 $delete_ids = array(); 362 $insert_ngrams = array(); 363 364 // NOTE: MySQL discards trailing whitespace in CHAR(X) columns. 365 366 foreach ($old_ngrams as $ngram => $id) { 367 if (isset($new_ngrams[$ngram])) { 368 continue; 369 } 370 371 $untrimmed_ngram = $ngram.' '; 372 if (isset($new_ngrams[$untrimmed_ngram])) { 373 continue; 374 } 375 376 $delete_ids[] = $id; 377 } 378 379 foreach ($new_ngrams as $ngram) { 380 if (isset($old_ngrams[$ngram])) { 381 continue; 382 } 383 384 $trimmed_ngram = rtrim($ngram, ' '); 385 if (isset($old_ngrams[$trimmed_ngram])) { 386 continue; 387 } 388 389 $insert_ngrams[] = $ngram; 390 } 391 392 if ($delete_ids) { 393 $sql = array(); 394 foreach ($delete_ids as $id) { 395 $sql[] = qsprintf( 396 $conn, 397 '%d', 398 $id); 399 } 400 401 foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { 402 queryfx( 403 $conn, 404 'DELETE FROM %T WHERE id IN (%LQ)', 405 $engine->getNgramsTableName(), 406 $chunk); 407 } 408 } 409 410 if ($insert_ngrams) { 411 $sql = array(); 412 foreach ($insert_ngrams as $ngram) { 413 $sql[] = qsprintf( 414 $conn, 415 '(%d, %s)', 416 $document_id, 417 $ngram); 418 } 419 420 foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { 421 queryfx( 422 $conn, 423 'INSERT INTO %T (documentID, ngram) VALUES %LQ', 424 $engine->getNgramsTableName(), 425 $chunk); 426 } 427 } 428 } 429 430 public function newFerretSearchFunctions() { 431 return array( 432 id(new FerretConfigurableSearchFunction()) 433 ->setFerretFunctionName('all') 434 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_ALL), 435 id(new FerretConfigurableSearchFunction()) 436 ->setFerretFunctionName('title') 437 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_TITLE), 438 id(new FerretConfigurableSearchFunction()) 439 ->setFerretFunctionName('body') 440 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_BODY), 441 id(new FerretConfigurableSearchFunction()) 442 ->setFerretFunctionName('core') 443 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_CORE), 444 id(new FerretConfigurableSearchFunction()) 445 ->setFerretFunctionName('comment') 446 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_COMMENT), 447 ); 448 } 449 450}