@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Pull Git filesize logic into a separate LowLevel query and use more Iterators

Summary:
Depends on D19829. Ref T13216. See PHI908. The current implementation is kind of a lot to live in `CommitHookEngine` and will likely fail if `git diff-tree` produces more than 2GB of output.

Pull it out and make it slightly more robust against enormous commits. It's probably limited by this, now:

```
implode("\n", $every_path)
```

We could replace that with some `PhutilReverseRopeSource` primitive or something but since we don't have one of those and it seems unlikely that we'll hit this case in practice, I left it here for now with just the easy stuff converted to be stream-oriented.

Test Plan:
Used this script to test the query against various commits, got good results:

```
<?php

require_once 'scripts/init/init-script.php';

$viewer = PhabricatorUser::getOmnipotentUser();

$repository = id(new PhabricatorRepositoryQuery())
->setViewer($viewer)
->withCallsigns(array('P'))
->executeOne();

var_dump(
id(new DiffusionLowLevelFilesizeQuery())
->setRepository($repository)
->withIdentifier($argv[1])
->execute());
```

Used this to find large commits in history and pull filesizes (worked great, although our largest commit only touches a couple thousand paths):

```
for hash in `git log --format=%H`; do echo -n $hash; echo -n ' '; git diff-tree -r --no-commit-id $hash | wc -l | awk '{print $1}'; done | awk '{print $2 " " $1}' | sort -n
```

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T13216

Differential Revision: https://secure.phabricator.com/D19830

+131 -90
+2
src/__phutil_library_map__.php
··· 839 839 'DiffusionLookSoonConduitAPIMethod' => 'applications/diffusion/conduit/DiffusionLookSoonConduitAPIMethod.php', 840 840 'DiffusionLowLevelCommitFieldsQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelCommitFieldsQuery.php', 841 841 'DiffusionLowLevelCommitQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelCommitQuery.php', 842 + 'DiffusionLowLevelFilesizeQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelFilesizeQuery.php', 842 843 'DiffusionLowLevelGitRefQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelGitRefQuery.php', 843 844 'DiffusionLowLevelMercurialBranchesQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelMercurialBranchesQuery.php', 844 845 'DiffusionLowLevelMercurialPathsQuery' => 'applications/diffusion/query/lowlevel/DiffusionLowLevelMercurialPathsQuery.php', ··· 6250 6251 'DiffusionLookSoonConduitAPIMethod' => 'DiffusionConduitAPIMethod', 6251 6252 'DiffusionLowLevelCommitFieldsQuery' => 'DiffusionLowLevelQuery', 6252 6253 'DiffusionLowLevelCommitQuery' => 'DiffusionLowLevelQuery', 6254 + 'DiffusionLowLevelFilesizeQuery' => 'DiffusionLowLevelQuery', 6253 6255 'DiffusionLowLevelGitRefQuery' => 'DiffusionLowLevelQuery', 6254 6256 'DiffusionLowLevelMercurialBranchesQuery' => 'DiffusionLowLevelQuery', 6255 6257 'DiffusionLowLevelMercurialPathsQuery' => 'DiffusionLowLevelQuery',
+4 -90
src/applications/diffusion/engine/DiffusionCommitHookEngine.php
··· 1305 1305 1306 1306 public function loadFileSizesForCommit($identifier) { 1307 1307 $repository = $this->getRepository(); 1308 - $vcs = $repository->getVersionControlSystem(); 1309 1308 1310 - $path_sizes = array(); 1311 - 1312 - switch ($vcs) { 1313 - case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: 1314 - list($paths_raw) = $repository->execxLocalCommand( 1315 - 'diff-tree -z -r --no-commit-id %s --', 1316 - $identifier); 1317 - 1318 - // With "-z" we get "<fields>\0<filename>\0" for each line. Group the 1319 - // delimited text into "<fields>, <filename>" pairs. 1320 - $paths_raw = trim($paths_raw, "\0"); 1321 - $paths_raw = explode("\0", $paths_raw); 1322 - if (count($paths_raw) % 2) { 1323 - throw new Exception( 1324 - pht( 1325 - 'Unexpected number of output lines from "git diff-tree" when '. 1326 - 'processing commit ("%s"): got %s lines, expected an even '. 1327 - 'number.', 1328 - $identifier, 1329 - phutil_count($paths_raw))); 1330 - } 1331 - $paths_raw = array_chunk($paths_raw, 2); 1332 - 1333 - $paths = array(); 1334 - foreach ($paths_raw as $path_raw) { 1335 - list($fields, $pathname) = $path_raw; 1336 - $fields = explode(' ', $fields); 1337 - 1338 - // Fields are: 1339 - // 1340 - // :100644 100644 aaaa bbbb M 1341 - // 1342 - // [0] Old file mode. 1343 - // [1] New file mode. 1344 - // [2] Old object hash. 1345 - // [3] New object hash. 1346 - // [4] Change mode. 1347 - 1348 - $paths[] = array( 1349 - 'path' => $pathname, 1350 - 'newHash' => $fields[3], 1351 - ); 1352 - } 1353 - 1354 - if ($paths) { 1355 - $check_paths = array(); 1356 - foreach ($paths as $path) { 1357 - if ($path['newHash'] === self::EMPTY_HASH) { 1358 - $path_sizes[$path['path']] = 0; 1359 - continue; 1360 - } 1361 - $check_paths[$path['newHash']][] = $path['path']; 1362 - } 1363 - 1364 - if ($check_paths) { 1365 - $future = $repository->getLocalCommandFuture( 1366 - 'cat-file --batch-check=%s', 1367 - '%(objectsize)') 1368 - ->write(implode("\n", array_keys($check_paths))); 1369 - 1370 - list($sizes) = $future->resolvex(); 1371 - $sizes = trim($sizes); 1372 - $sizes = phutil_split_lines($sizes, false); 1373 - if (count($sizes) !== count($check_paths)) { 1374 - throw new Exception( 1375 - pht( 1376 - 'Unexpected number of output lines from "git cat-file" when '. 1377 - 'processing commit ("%s"): got %s lines, expected %s.', 1378 - $identifier, 1379 - phutil_count($sizes), 1380 - phutil_count($check_paths))); 1381 - } 1382 - 1383 - foreach ($check_paths as $object_hash => $path_names) { 1384 - $object_size = (int)array_shift($sizes); 1385 - foreach ($path_names as $path_name) { 1386 - $path_sizes[$path_name] = $object_size; 1387 - } 1388 - } 1389 - } 1390 - } 1391 - break; 1392 - default: 1393 - throw new Exception( 1394 - pht( 1395 - 'File size limits are not supported for this VCS.')); 1396 - } 1397 - 1398 - return $path_sizes; 1309 + return id(new DiffusionLowLevelFilesizeQuery()) 1310 + ->setRepository($repository) 1311 + ->withIdentifier($identifier) 1312 + ->execute(); 1399 1313 } 1400 1314 1401 1315 public function loadCommitRefForCommit($identifier) {
+125
src/applications/diffusion/query/lowlevel/DiffusionLowLevelFilesizeQuery.php
··· 1 + <?php 2 + 3 + final class DiffusionLowLevelFilesizeQuery 4 + extends DiffusionLowLevelQuery { 5 + 6 + private $identifier; 7 + 8 + public function withIdentifier($identifier) { 9 + $this->identifier = $identifier; 10 + return $this; 11 + } 12 + 13 + protected function executeQuery() { 14 + if (!strlen($this->identifier)) { 15 + throw new PhutilInvalidStateException('withIdentifier'); 16 + } 17 + 18 + $type = $this->getRepository()->getVersionControlSystem(); 19 + switch ($type) { 20 + case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: 21 + $result = $this->loadGitFilesizes(); 22 + break; 23 + default: 24 + throw new Exception(pht('Unsupported repository type "%s"!', $type)); 25 + } 26 + 27 + return $result; 28 + } 29 + 30 + private function loadGitFilesizes() { 31 + $repository = $this->getRepository(); 32 + $identifier = $this->identifier; 33 + 34 + $paths_future = $repository->getLocalCommandFuture( 35 + 'diff-tree -z -r --no-commit-id %s --', 36 + $identifier); 37 + 38 + // With "-z" we get "<fields>\0<filename>\0" for each line. Process the 39 + // delimited text as "<fields>, <filename>" pairs. 40 + 41 + $path_lines = id(new LinesOfALargeExecFuture($paths_future)) 42 + ->setDelimiter("\0"); 43 + 44 + $paths = array(); 45 + 46 + $path_pairs = new PhutilChunkedIterator($path_lines, 2); 47 + foreach ($path_pairs as $path_pair) { 48 + if (count($path_pair) != 2) { 49 + throw new Exception( 50 + pht( 51 + 'Unexpected number of output lines from "git diff-tree" when '. 52 + 'processing commit ("%s"): expected an even number of lines.', 53 + $identifier)); 54 + } 55 + 56 + list($fields, $pathname) = array_values($path_pair); 57 + $fields = explode(' ', $fields); 58 + 59 + // Fields are: 60 + // 61 + // :100644 100644 aaaa bbbb M 62 + // 63 + // [0] Old file mode. 64 + // [1] New file mode. 65 + // [2] Old object hash. 66 + // [3] New object hash. 67 + // [4] Change mode. 68 + 69 + $paths[] = array( 70 + 'path' => $pathname, 71 + 'newHash' => $fields[3], 72 + ); 73 + } 74 + 75 + $path_sizes = array(); 76 + 77 + if (!$paths) { 78 + return $path_sizes; 79 + } 80 + 81 + $check_paths = array(); 82 + foreach ($paths as $path) { 83 + if ($path['newHash'] === DiffusionCommitHookEngine::EMPTY_HASH) { 84 + $path_sizes[$path['path']] = 0; 85 + continue; 86 + } 87 + $check_paths[$path['newHash']][] = $path['path']; 88 + } 89 + 90 + if (!$check_paths) { 91 + return $path_sizes; 92 + } 93 + 94 + $future = $repository->getLocalCommandFuture( 95 + 'cat-file --batch-check=%s', 96 + '%(objectsize)'); 97 + 98 + $future->write(implode("\n", array_keys($check_paths))); 99 + 100 + $size_lines = id(new LinesOfALargeExecFuture($future)) 101 + ->setDelimiter("\n"); 102 + foreach ($size_lines as $line) { 103 + $object_size = (int)$line; 104 + 105 + $object_hash = head_key($check_paths); 106 + $path_names = $check_paths[$object_hash]; 107 + unset($check_paths[$object_hash]); 108 + 109 + foreach ($path_names as $path_name) { 110 + $path_sizes[$path_name] = $object_size; 111 + } 112 + } 113 + 114 + if ($check_paths) { 115 + throw new Exception( 116 + pht( 117 + 'Unexpected number of output lines from "git cat-file" when '. 118 + 'processing commit ("%s").', 119 + $identifier)); 120 + } 121 + 122 + return $path_sizes; 123 + } 124 + 125 + }