@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Execute fulltext queries using a subquery instead of by ordering the entire result set

Summary:
Ref T6740. Currently, we issue fulltext queries with an "ORDER BY <score>" on the entire result set.

For very large result sets, this can require MySQL to do a lot of work. However, this work is generally useless: if you search for some common word like "diff" or "internet" or whatever and match 4,000 documents, the chance that we can score whatever thing you were thinking of at the top of the result set is nearly nothing. It's more useful to return quickly, and let the user see that they need to narrow their query to get useful results.

Instead of doing all that work, let MySQL find up to 1,000 results, then pick the best ones out of those.

This actual change is a little flimsy, since our index isn't really big enough to suffer indexing issues. However, searching for common terms on my local install (where I have some large repositories imported and indexed) drops from ~40ms to ~10ms.

My hope is to improve downstream performance for queries like "translatewiki" here, particularly:

<https://phabricator.wikimedia.org/T143863>

That query matches about 300 trillion documents but there's a ~0% chance that the one the user wants is at the top. It takes a couple of seconds to execute, for me. Better to return quickly and let the user refine their results.

I think this will also make some other changes related to stemming easier.

This also removes the "list users first" ordering on the query, which made performance more complicated and seems irrelevant now that we have the typeahead.

Test Plan:
- Searched for some common terms like "code" locally, saw similar results with better performance.
- Searched for useful queries (e.g., small result set), got identical results.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T6740

Differential Revision: https://secure.phabricator.com/D16944

+83 -62
+83 -62
src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php
··· 153 153 } 154 154 155 155 public function executeSearch(PhabricatorSavedQuery $query) { 156 - $where = array(); 157 - $join = array(); 158 - $order = 'ORDER BY documentCreated DESC'; 156 + $table = new PhabricatorSearchDocument(); 157 + $document_table = $table->getTableName(); 158 + $conn = $table->establishConnection('r'); 159 + 160 + $subquery = $this->newFulltextSubquery($query, $conn); 161 + 162 + $offset = (int)$query->getParameter('offset', 0); 163 + $limit = (int)$query->getParameter('limit', 25); 164 + 165 + // NOTE: We must JOIN the subquery in order to apply a limit. 166 + $results = queryfx_all( 167 + $conn, 168 + 'SELECT 169 + documentPHID, 170 + MAX(fieldScore) AS documentScore 171 + FROM (%Q) query 172 + JOIN %T root ON query.documentPHID = root.phid 173 + GROUP BY documentPHID 174 + ORDER BY documentScore DESC 175 + LIMIT %d, %d', 176 + $subquery, 177 + $document_table, 178 + $offset, 179 + $limit); 180 + 181 + return ipull($results, 'documentPHID'); 182 + } 183 + 184 + private function newFulltextSubquery( 185 + PhabricatorSavedQuery $query, 186 + AphrontDatabaseConnection $conn) { 187 + 188 + $field = new PhabricatorSearchDocumentField(); 189 + $field_table = $field->getTableName(); 159 190 160 - $dao_doc = new PhabricatorSearchDocument(); 161 - $dao_field = new PhabricatorSearchDocumentField(); 191 + $document = new PhabricatorSearchDocument(); 192 + $document_table = $document->getTableName(); 162 193 163 - $t_doc = $dao_doc->getTableName(); 164 - $t_field = $dao_field->getTableName(); 194 + $select = array(); 195 + $select[] = 'document.phid AS documentPHID'; 165 196 166 - $conn_r = $dao_doc->establishConnection('r'); 197 + $join = array(); 198 + $where = array(); 167 199 168 200 $raw_query = $query->getParameter('query'); 169 - $q = $this->compileQuery($raw_query); 201 + $compiled_query = $this->compileQuery($raw_query); 202 + if (strlen($compiled_query)) { 203 + $select[] = qsprintf( 204 + $conn, 205 + 'MATCH(corpus) AGAINST (%s IN BOOLEAN MODE) AS fieldScore', 206 + $compiled_query); 170 207 171 - if (strlen($q)) { 172 - $join[] = qsprintf( 173 - $conn_r, 208 + $join[] = qsprintf( 209 + $conn, 174 210 '%T field ON field.phid = document.phid', 175 - $t_field); 211 + $field_table); 212 + 176 213 $where[] = qsprintf( 177 - $conn_r, 214 + $conn, 178 215 'MATCH(corpus) AGAINST (%s IN BOOLEAN MODE)', 179 - $q); 216 + $compiled_query); 180 217 181 - // When searching for a string, promote user listings above other 182 - // listings. 183 - $order = qsprintf( 184 - $conn_r, 185 - 'ORDER BY 186 - IF(documentType = %s, 0, 1) ASC, 187 - MAX(MATCH(corpus) AGAINST (%s)) DESC', 188 - 'USER', 189 - $q); 190 - 191 - $field = $query->getParameter('field'); 192 - if ($field) { 218 + if ($query->getParameter('field')) { 193 219 $where[] = qsprintf( 194 - $conn_r, 220 + $conn, 195 221 'field.field = %s', 196 222 $field); 197 223 } 224 + } else { 225 + $select[] = qsprintf( 226 + $conn, 227 + 'document.dateCreated AS fieldScore'); 198 228 } 199 229 200 230 $exclude = $query->getParameter('exclude'); 201 231 if ($exclude) { 202 - $where[] = qsprintf($conn_r, 'document.phid != %s', $exclude); 232 + $where[] = qsprintf( 233 + $conn, 234 + 'document.phid != %s', 235 + $exclude); 203 236 } 204 237 205 238 $types = $query->getParameter('types'); 206 239 if ($types) { 207 - if (strlen($q)) { 240 + if (strlen($compiled_query)) { 208 241 $where[] = qsprintf( 209 - $conn_r, 242 + $conn, 210 243 'field.phidType IN (%Ls)', 211 244 $types); 212 245 } 246 + 213 247 $where[] = qsprintf( 214 - $conn_r, 248 + $conn, 215 249 'document.documentType IN (%Ls)', 216 250 $types); 217 251 } 218 252 219 253 $join[] = $this->joinRelationship( 220 - $conn_r, 254 + $conn, 221 255 $query, 222 256 'authorPHIDs', 223 257 PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR); ··· 231 265 232 266 if ($include_open && !$include_closed) { 233 267 $join[] = $this->joinRelationship( 234 - $conn_r, 268 + $conn, 235 269 $query, 236 270 'statuses', 237 271 $open_rel, 238 272 true); 239 273 } else if ($include_closed && !$include_open) { 240 274 $join[] = $this->joinRelationship( 241 - $conn_r, 275 + $conn, 242 276 $query, 243 277 'statuses', 244 278 $closed_rel, ··· 247 281 248 282 if ($query->getParameter('withAnyOwner')) { 249 283 $join[] = $this->joinRelationship( 250 - $conn_r, 284 + $conn, 251 285 $query, 252 286 'withAnyOwner', 253 287 PhabricatorSearchRelationship::RELATIONSHIP_OWNER, 254 288 true); 255 289 } else if ($query->getParameter('withUnowned')) { 256 290 $join[] = $this->joinRelationship( 257 - $conn_r, 291 + $conn, 258 292 $query, 259 293 'withUnowned', 260 294 PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED, 261 295 true); 262 296 } else { 263 297 $join[] = $this->joinRelationship( 264 - $conn_r, 298 + $conn, 265 299 $query, 266 300 'ownerPHIDs', 267 301 PhabricatorSearchRelationship::RELATIONSHIP_OWNER); 268 302 } 269 303 270 304 $join[] = $this->joinRelationship( 271 - $conn_r, 305 + $conn, 272 306 $query, 273 307 'subscriberPHIDs', 274 308 PhabricatorSearchRelationship::RELATIONSHIP_SUBSCRIBER); 275 309 276 310 $join[] = $this->joinRelationship( 277 - $conn_r, 311 + $conn, 278 312 $query, 279 313 'projectPHIDs', 280 314 PhabricatorSearchRelationship::RELATIONSHIP_PROJECT); 281 315 282 316 $join[] = $this->joinRelationship( 283 - $conn_r, 317 + $conn, 284 318 $query, 285 319 'repository', 286 320 PhabricatorSearchRelationship::RELATIONSHIP_REPOSITORY); 287 321 288 - $join = array_filter($join); 322 + $select = implode(', ', $select); 289 323 324 + $join = array_filter($join); 290 325 foreach ($join as $key => $clause) { 291 326 $join[$key] = ' JOIN '.$clause; 292 327 } ··· 298 333 $where = ''; 299 334 } 300 335 301 - $offset = (int)$query->getParameter('offset', 0); 302 - $limit = (int)$query->getParameter('limit', 25); 303 - 304 - $hits = queryfx_all( 305 - $conn_r, 306 - 'SELECT 307 - document.phid 308 - FROM %T document 309 - %Q 310 - %Q 311 - GROUP BY document.phid 312 - %Q 313 - LIMIT %d, %d', 314 - $t_doc, 336 + return qsprintf( 337 + $conn, 338 + 'SELECT %Q FROM %T document %Q %Q LIMIT 1000', 339 + $select, 340 + $document_table, 315 341 $join, 316 - $where, 317 - $order, 318 - $offset, 319 - $limit); 320 - 321 - return ipull($hits, 'phid'); 342 + $where); 322 343 } 323 344 324 345 protected function joinRelationship(