@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Separate changeset analysis code from DifferentialDiff and provide a standalone `rebuild-changesets` workflow

Summary:
Ref T13137. The "analyze/cache data about changesets" step is becoming more involved. We recently added detection for generated code to support "Ignore generated changes" in Owners, and I now plan to hash the new file content so we can hide changes which have no effect.

Before adding this new hashing step, pull the "detect copied code" and "detect generated code" stuff out and move them to a separate `ChangesetEngine`. Then support doing a changeset rebuild directly with `bin/differential rebuild-changesets`.

This simplifies things a bit and makes testing easier since you don't need to keep creating new revisions to re-run copy/generated/hash logic.

Test Plan: Ran `bin/differential rebuild-changesets --revision Dxxx`, saw changesets rebuild. See also next change.

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T13137

Differential Revision: https://secure.phabricator.com/D19452

+323 -216
+4
src/__phutil_library_map__.php
··· 432 432 'DifferentialChangesSinceLastUpdateField' => 'applications/differential/customfield/DifferentialChangesSinceLastUpdateField.php', 433 433 'DifferentialChangeset' => 'applications/differential/storage/DifferentialChangeset.php', 434 434 'DifferentialChangesetDetailView' => 'applications/differential/view/DifferentialChangesetDetailView.php', 435 + 'DifferentialChangesetEngine' => 'applications/differential/engine/DifferentialChangesetEngine.php', 435 436 'DifferentialChangesetFileTreeSideNavBuilder' => 'applications/differential/view/DifferentialChangesetFileTreeSideNavBuilder.php', 436 437 'DifferentialChangesetHTMLRenderer' => 'applications/differential/render/DifferentialChangesetHTMLRenderer.php', 437 438 'DifferentialChangesetListController' => 'applications/differential/controller/DifferentialChangesetListController.php', ··· 2869 2870 'PhabricatorDifferentialExtractWorkflow' => 'applications/differential/management/PhabricatorDifferentialExtractWorkflow.php', 2870 2871 'PhabricatorDifferentialManagementWorkflow' => 'applications/differential/management/PhabricatorDifferentialManagementWorkflow.php', 2871 2872 'PhabricatorDifferentialMigrateHunkWorkflow' => 'applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php', 2873 + 'PhabricatorDifferentialRebuildChangesetsWorkflow' => 'applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php', 2872 2874 'PhabricatorDifferentialRevisionTestDataGenerator' => 'applications/differential/lipsum/PhabricatorDifferentialRevisionTestDataGenerator.php', 2873 2875 'PhabricatorDiffusionApplication' => 'applications/diffusion/application/PhabricatorDiffusionApplication.php', 2874 2876 'PhabricatorDiffusionBlameSetting' => 'applications/settings/setting/PhabricatorDiffusionBlameSetting.php', ··· 5729 5731 'PhabricatorDestructibleInterface', 5730 5732 ), 5731 5733 'DifferentialChangesetDetailView' => 'AphrontView', 5734 + 'DifferentialChangesetEngine' => 'Phobject', 5732 5735 'DifferentialChangesetFileTreeSideNavBuilder' => 'Phobject', 5733 5736 'DifferentialChangesetHTMLRenderer' => 'DifferentialChangesetRenderer', 5734 5737 'DifferentialChangesetListController' => 'DifferentialController', ··· 8530 8533 'PhabricatorDifferentialExtractWorkflow' => 'PhabricatorDifferentialManagementWorkflow', 8531 8534 'PhabricatorDifferentialManagementWorkflow' => 'PhabricatorManagementWorkflow', 8532 8535 'PhabricatorDifferentialMigrateHunkWorkflow' => 'PhabricatorDifferentialManagementWorkflow', 8536 + 'PhabricatorDifferentialRebuildChangesetsWorkflow' => 'PhabricatorDifferentialManagementWorkflow', 8533 8537 'PhabricatorDifferentialRevisionTestDataGenerator' => 'PhabricatorTestDataGenerator', 8534 8538 'PhabricatorDiffusionApplication' => 'PhabricatorApplication', 8535 8539 'PhabricatorDiffusionBlameSetting' => 'PhabricatorInternalSetting',
+223
src/applications/differential/engine/DifferentialChangesetEngine.php
··· 1 + <?php 2 + 3 + final class DifferentialChangesetEngine extends Phobject { 4 + 5 + public function rebuildChangesets(array $changesets) { 6 + assert_instances_of($changesets, 'DifferentialChangeset'); 7 + 8 + foreach ($changesets as $changeset) { 9 + $this->detectGeneratedCode($changeset); 10 + } 11 + 12 + $this->detectCopiedCode($changesets); 13 + } 14 + 15 + 16 + /* -( Generated Code )----------------------------------------------------- */ 17 + 18 + 19 + private function detectGeneratedCode(DifferentialChangeset $changeset) { 20 + $is_generated_trusted = $this->isTrustedGeneratedCode($changeset); 21 + if ($is_generated_trusted) { 22 + $changeset->setTrustedChangesetAttribute( 23 + DifferentialChangeset::ATTRIBUTE_GENERATED, 24 + $is_generated_trusted); 25 + } 26 + 27 + $is_generated_untrusted = $this->isUntrustedGeneratedCode($changeset); 28 + if ($is_generated_untrusted) { 29 + $changeset->setUntrustedChangesetAttribute( 30 + DifferentialChangeset::ATTRIBUTE_GENERATED, 31 + $is_generated_untrusted); 32 + } 33 + } 34 + 35 + private function isTrustedGeneratedCode(DifferentialChangeset $changeset) { 36 + 37 + $filename = $changeset->getFilename(); 38 + 39 + $paths = PhabricatorEnv::getEnvConfig('differential.generated-paths'); 40 + foreach ($paths as $regexp) { 41 + if (preg_match($regexp, $filename)) { 42 + return true; 43 + } 44 + } 45 + 46 + return false; 47 + } 48 + 49 + private function isUntrustedGeneratedCode(DifferentialChangeset $changeset) { 50 + 51 + if ($changeset->getHunks()) { 52 + $new_data = $changeset->makeNewFile(); 53 + if (strpos($new_data, '@'.'generated') !== false) { 54 + return true; 55 + } 56 + } 57 + 58 + return false; 59 + } 60 + 61 + 62 + /* -( Copied Code )-------------------------------------------------------- */ 63 + 64 + 65 + private function detectCopiedCode(array $changesets) { 66 + $min_width = 30; 67 + $min_lines = 3; 68 + 69 + $map = array(); 70 + $files = array(); 71 + $types = array(); 72 + foreach ($changesets as $changeset) { 73 + $file = $changeset->getFilename(); 74 + foreach ($changeset->getHunks() as $hunk) { 75 + $lines = $hunk->getStructuredOldFile(); 76 + foreach ($lines as $line => $info) { 77 + $type = $info['type']; 78 + if ($type == '\\') { 79 + continue; 80 + } 81 + $types[$file][$line] = $type; 82 + 83 + $text = $info['text']; 84 + $text = trim($text); 85 + $files[$file][$line] = $text; 86 + 87 + if (strlen($text) >= $min_width) { 88 + $map[$text][] = array($file, $line); 89 + } 90 + } 91 + } 92 + } 93 + 94 + foreach ($changesets as $changeset) { 95 + $copies = array(); 96 + foreach ($changeset->getHunks() as $hunk) { 97 + $added = $hunk->getStructuredNewFile(); 98 + $atype = array(); 99 + 100 + foreach ($added as $line => $info) { 101 + $atype[$line] = $info['type']; 102 + $added[$line] = trim($info['text']); 103 + } 104 + 105 + $skip_lines = 0; 106 + foreach ($added as $line => $code) { 107 + if ($skip_lines) { 108 + // We're skipping lines that we already processed because we 109 + // extended a block above them downward to include them. 110 + $skip_lines--; 111 + continue; 112 + } 113 + 114 + if ($atype[$line] !== '+') { 115 + // This line hasn't been changed in the new file, so don't try 116 + // to figure out where it came from. 117 + continue; 118 + } 119 + 120 + if (empty($map[$code])) { 121 + // This line was too short to trigger copy/move detection. 122 + continue; 123 + } 124 + 125 + if (count($map[$code]) > 16) { 126 + // If there are a large number of identical lines in this diff, 127 + // don't try to figure out where this block came from: the analysis 128 + // is O(N^2), since we need to compare every line against every 129 + // other line. Even if we arrive at a result, it is unlikely to be 130 + // meaningful. See T5041. 131 + continue; 132 + } 133 + 134 + $best_length = 0; 135 + 136 + // Explore all candidates. 137 + foreach ($map[$code] as $val) { 138 + list($file, $orig_line) = $val; 139 + $length = 1; 140 + 141 + // Search backward and forward to find all of the adjacent lines 142 + // which match. 143 + foreach (array(-1, 1) as $direction) { 144 + $offset = $direction; 145 + while (true) { 146 + if (isset($copies[$line + $offset])) { 147 + // If we run into a block above us which we've already 148 + // attributed to a move or copy from elsewhere, stop 149 + // looking. 150 + break; 151 + } 152 + 153 + if (!isset($added[$line + $offset])) { 154 + // If we've run off the beginning or end of the new file, 155 + // stop looking. 156 + break; 157 + } 158 + 159 + if (!isset($files[$file][$orig_line + $offset])) { 160 + // If we've run off the beginning or end of the original 161 + // file, we also stop looking. 162 + break; 163 + } 164 + 165 + $old = $files[$file][$orig_line + $offset]; 166 + $new = $added[$line + $offset]; 167 + if ($old !== $new) { 168 + // If the old line doesn't match the new line, stop 169 + // looking. 170 + break; 171 + } 172 + 173 + $length++; 174 + $offset += $direction; 175 + } 176 + } 177 + 178 + if ($length < $best_length) { 179 + // If we already know of a better source (more matching lines) 180 + // for this move/copy, stick with that one. We prefer long 181 + // copies/moves which match a lot of context over short ones. 182 + continue; 183 + } 184 + 185 + if ($length == $best_length) { 186 + if (idx($types[$file], $orig_line) != '-') { 187 + // If we already know of an equally good source (same number 188 + // of matching lines) and this isn't a move, stick with the 189 + // other one. We prefer moves over copies. 190 + continue; 191 + } 192 + } 193 + 194 + $best_length = $length; 195 + // ($offset - 1) contains number of forward matching lines. 196 + $best_offset = $offset - 1; 197 + $best_file = $file; 198 + $best_line = $orig_line; 199 + } 200 + 201 + $file = ($best_file == $changeset->getFilename() ? '' : $best_file); 202 + for ($i = $best_length; $i--; ) { 203 + $type = idx($types[$best_file], $best_line + $best_offset - $i); 204 + $copies[$line + $best_offset - $i] = ($best_length < $min_lines 205 + ? array() // Ignore short blocks. 206 + : array($file, $best_line + $best_offset - $i, $type)); 207 + } 208 + 209 + $skip_lines = $best_offset; 210 + } 211 + } 212 + 213 + $copies = array_filter($copies); 214 + if ($copies) { 215 + $metadata = $changeset->getMetadata(); 216 + $metadata['copy:lines'] = $copies; 217 + $changeset->setMetadata($metadata); 218 + } 219 + } 220 + 221 + } 222 + 223 + }
+92
src/applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php
··· 1 + <?php 2 + 3 + final class PhabricatorDifferentialRebuildChangesetsWorkflow 4 + extends PhabricatorDifferentialManagementWorkflow { 5 + 6 + protected function didConstruct() { 7 + $this 8 + ->setName('rebuild-changesets') 9 + ->setExamples('**rebuild-changesets** --revision __revision__') 10 + ->setSynopsis(pht('Rebuild changesets for a revision.')) 11 + ->setArguments( 12 + array( 13 + array( 14 + 'name' => 'revision', 15 + 'param' => 'revision', 16 + 'help' => pht('Revision to rebuild changesets for.'), 17 + ), 18 + )); 19 + } 20 + 21 + public function execute(PhutilArgumentParser $args) { 22 + $viewer = $this->getViewer(); 23 + 24 + $revision_identifier = $args->getArg('revision'); 25 + if (!$revision_identifier) { 26 + throw new PhutilArgumentUsageException( 27 + pht('Specify a revision to rebuild changesets for with "--revision".')); 28 + } 29 + 30 + $revision = id(new PhabricatorObjectQuery()) 31 + ->setViewer($viewer) 32 + ->withNames(array($revision_identifier)) 33 + ->executeOne(); 34 + if ($revision) { 35 + if (!($revision instanceof DifferentialRevision)) { 36 + throw new PhutilArgumentUsageException( 37 + pht( 38 + 'Object "%s" specified by "--revision" must be a Differential '. 39 + 'revision.')); 40 + } 41 + } else { 42 + $revision = id(new DifferentialRevisionQuery()) 43 + ->setViewer($viewer) 44 + ->withIDs(array($revision_identifier)) 45 + ->executeOne(); 46 + } 47 + 48 + if (!$revision) { 49 + throw new PhutilArgumentUsageException( 50 + pht( 51 + 'No revision "%s" exists.', 52 + $revision_identifier)); 53 + } 54 + 55 + $diffs = id(new DifferentialDiffQuery()) 56 + ->setViewer($viewer) 57 + ->withRevisionIDs(array($revision->getID())) 58 + ->execute(); 59 + 60 + $changesets = id(new DifferentialChangesetQuery()) 61 + ->setViewer($viewer) 62 + ->withDiffs($diffs) 63 + ->needHunks(true) 64 + ->execute(); 65 + 66 + $changeset_groups = mgroup($changesets, 'getDiffID'); 67 + 68 + foreach ($changeset_groups as $diff_id => $changesets) { 69 + echo tsprintf( 70 + "%s\n", 71 + pht( 72 + 'Rebuilding %s changeset(s) for diff ID %d.', 73 + phutil_count($changesets), 74 + $diff_id)); 75 + 76 + foreach ($changesets as $changeset) { 77 + echo tsprintf( 78 + " %s\n", 79 + $changeset->getFilename()); 80 + } 81 + 82 + id(new DifferentialChangesetEngine()) 83 + ->rebuildChangesets($changesets); 84 + 85 + echo tsprintf( 86 + "%s\n", 87 + pht('Done.')); 88 + } 89 + } 90 + 91 + 92 + }
-161
src/applications/differential/parser/DifferentialChangesetParser.php
··· 1419 1419 return sprintf('%d%%', 100 * ($covered / ($covered + $not_covered))); 1420 1420 } 1421 1421 1422 - public function detectCopiedCode( 1423 - array $changesets, 1424 - $min_width = 30, 1425 - $min_lines = 3) { 1426 - 1427 - assert_instances_of($changesets, 'DifferentialChangeset'); 1428 - 1429 - $map = array(); 1430 - $files = array(); 1431 - $types = array(); 1432 - foreach ($changesets as $changeset) { 1433 - $file = $changeset->getFilename(); 1434 - foreach ($changeset->getHunks() as $hunk) { 1435 - $lines = $hunk->getStructuredOldFile(); 1436 - foreach ($lines as $line => $info) { 1437 - $type = $info['type']; 1438 - if ($type == '\\') { 1439 - continue; 1440 - } 1441 - $types[$file][$line] = $type; 1442 - 1443 - $text = $info['text']; 1444 - $text = trim($text); 1445 - $files[$file][$line] = $text; 1446 - 1447 - if (strlen($text) >= $min_width) { 1448 - $map[$text][] = array($file, $line); 1449 - } 1450 - } 1451 - } 1452 - } 1453 - 1454 - foreach ($changesets as $changeset) { 1455 - $copies = array(); 1456 - foreach ($changeset->getHunks() as $hunk) { 1457 - $added = $hunk->getStructuredNewFile(); 1458 - $atype = array(); 1459 - 1460 - foreach ($added as $line => $info) { 1461 - $atype[$line] = $info['type']; 1462 - $added[$line] = trim($info['text']); 1463 - } 1464 - 1465 - $skip_lines = 0; 1466 - foreach ($added as $line => $code) { 1467 - if ($skip_lines) { 1468 - // We're skipping lines that we already processed because we 1469 - // extended a block above them downward to include them. 1470 - $skip_lines--; 1471 - continue; 1472 - } 1473 - 1474 - if ($atype[$line] !== '+') { 1475 - // This line hasn't been changed in the new file, so don't try 1476 - // to figure out where it came from. 1477 - continue; 1478 - } 1479 - 1480 - if (empty($map[$code])) { 1481 - // This line was too short to trigger copy/move detection. 1482 - continue; 1483 - } 1484 - 1485 - if (count($map[$code]) > 16) { 1486 - // If there are a large number of identical lines in this diff, 1487 - // don't try to figure out where this block came from: the analysis 1488 - // is O(N^2), since we need to compare every line against every 1489 - // other line. Even if we arrive at a result, it is unlikely to be 1490 - // meaningful. See T5041. 1491 - continue; 1492 - } 1493 - 1494 - $best_length = 0; 1495 - 1496 - // Explore all candidates. 1497 - foreach ($map[$code] as $val) { 1498 - list($file, $orig_line) = $val; 1499 - $length = 1; 1500 - 1501 - // Search backward and forward to find all of the adjacent lines 1502 - // which match. 1503 - foreach (array(-1, 1) as $direction) { 1504 - $offset = $direction; 1505 - while (true) { 1506 - if (isset($copies[$line + $offset])) { 1507 - // If we run into a block above us which we've already 1508 - // attributed to a move or copy from elsewhere, stop 1509 - // looking. 1510 - break; 1511 - } 1512 - 1513 - if (!isset($added[$line + $offset])) { 1514 - // If we've run off the beginning or end of the new file, 1515 - // stop looking. 1516 - break; 1517 - } 1518 - 1519 - if (!isset($files[$file][$orig_line + $offset])) { 1520 - // If we've run off the beginning or end of the original 1521 - // file, we also stop looking. 1522 - break; 1523 - } 1524 - 1525 - $old = $files[$file][$orig_line + $offset]; 1526 - $new = $added[$line + $offset]; 1527 - if ($old !== $new) { 1528 - // If the old line doesn't match the new line, stop 1529 - // looking. 1530 - break; 1531 - } 1532 - 1533 - $length++; 1534 - $offset += $direction; 1535 - } 1536 - } 1537 - 1538 - if ($length < $best_length) { 1539 - // If we already know of a better source (more matching lines) 1540 - // for this move/copy, stick with that one. We prefer long 1541 - // copies/moves which match a lot of context over short ones. 1542 - continue; 1543 - } 1544 - 1545 - if ($length == $best_length) { 1546 - if (idx($types[$file], $orig_line) != '-') { 1547 - // If we already know of an equally good source (same number 1548 - // of matching lines) and this isn't a move, stick with the 1549 - // other one. We prefer moves over copies. 1550 - continue; 1551 - } 1552 - } 1553 - 1554 - $best_length = $length; 1555 - // ($offset - 1) contains number of forward matching lines. 1556 - $best_offset = $offset - 1; 1557 - $best_file = $file; 1558 - $best_line = $orig_line; 1559 - } 1560 - 1561 - $file = ($best_file == $changeset->getFilename() ? '' : $best_file); 1562 - for ($i = $best_length; $i--; ) { 1563 - $type = idx($types[$best_file], $best_line + $best_offset - $i); 1564 - $copies[$line + $best_offset - $i] = ($best_length < $min_lines 1565 - ? array() // Ignore short blocks. 1566 - : array($file, $best_line + $best_offset - $i, $type)); 1567 - } 1568 - 1569 - $skip_lines = $best_offset; 1570 - } 1571 - } 1572 - 1573 - $copies = array_filter($copies); 1574 - if ($copies) { 1575 - $metadata = $changeset->getMetadata(); 1576 - $metadata['copy:lines'] = $copies; 1577 - $changeset->setMetadata($metadata); 1578 - } 1579 - } 1580 - return $changesets; 1581 - } 1582 - 1583 1422 /** 1584 1423 * Build maps from lines comments appear on to actual lines. 1585 1424 */
+4 -55
src/applications/differential/storage/DifferentialDiff.php
··· 226 226 $changeset->setAddLines($add_lines); 227 227 $changeset->setDelLines($del_lines); 228 228 229 - self::detectGeneratedCode($changeset); 230 - 231 229 $diff->addUnsavedChangeset($changeset); 232 230 } 233 231 $diff->setLineCount($lines); 234 232 235 - $parser = new DifferentialChangesetParser(); 236 - $changesets = $parser->detectCopiedCode( 237 - $diff->getChangesets(), 238 - $min_width = 30, 239 - $min_lines = 3); 240 - $diff->attachChangesets($changesets); 233 + $changesets = $diff->getChangesets(); 234 + 235 + id(new DifferentialChangesetEngine()) 236 + ->rebuildChangesets($changesets); 241 237 242 238 return $diff; 243 239 } 244 - 245 240 246 241 public function getDiffDict() { 247 242 $dict = array( ··· 821 816 id(new DifferentialCommitsSearchEngineAttachment()) 822 817 ->setAttachmentKey('commits'), 823 818 ); 824 - } 825 - 826 - private static function detectGeneratedCode( 827 - DifferentialChangeset $changeset) { 828 - 829 - $is_generated_trusted = self::isTrustedGeneratedCode($changeset); 830 - if ($is_generated_trusted) { 831 - $changeset->setTrustedChangesetAttribute( 832 - DifferentialChangeset::ATTRIBUTE_GENERATED, 833 - $is_generated_trusted); 834 - } 835 - 836 - $is_generated_untrusted = self::isUntrustedGeneratedCode($changeset); 837 - if ($is_generated_untrusted) { 838 - $changeset->setUntrustedChangesetAttribute( 839 - DifferentialChangeset::ATTRIBUTE_GENERATED, 840 - $is_generated_untrusted); 841 - } 842 - } 843 - 844 - private static function isTrustedGeneratedCode( 845 - DifferentialChangeset $changeset) { 846 - 847 - $filename = $changeset->getFilename(); 848 - 849 - $paths = PhabricatorEnv::getEnvConfig('differential.generated-paths'); 850 - foreach ($paths as $regexp) { 851 - if (preg_match($regexp, $filename)) { 852 - return true; 853 - } 854 - } 855 - 856 - return false; 857 - } 858 - 859 - private static function isUntrustedGeneratedCode( 860 - DifferentialChangeset $changeset) { 861 - 862 - if ($changeset->getHunks()) { 863 - $new_data = $changeset->makeNewFile(); 864 - if (strpos($new_data, '@'.'generated') !== false) { 865 - return true; 866 - } 867 - } 868 - 869 - return false; 870 819 } 871 820 872 821 }