@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Simplify hunk parsing

Summary:
Simplify whitespace-ignoring diffing:

- Currently, we generate two diffs (one ignoring whitespace, one normal) and then copy the //text content// from the normal one to the whitespace-ignoring one.
- Instead, copy the //change types// from the ignoring one to the normal one.
- This is cheaper and much simpler.
- It means that we have the right change types in reparseHunksForSpecialAttributes(), and can simplify some other things.

Simplify whitespace changes, unchanged files, and deleted file detections:

- Currently, we do this inline with a bunch of other stuff, in the reparse step.
- Instead, do it separately. This is simpler.

Simplify intraline whitespace handling:

- Currently, this is a complicated mess that makes roughly zero sense.
- Instead, do it in reparse in a straightforward way.

Partially fix handling of files changed only by changing whitespace.
Partially fix handling of unchanged files.

Test Plan:
- Ran unit tests.
- Created context-less diffs, verified they rendered reasonably.
- Generated a diff with prefix whitespace, suffix whitespace, intraline whitespace, and non-whitespace changes. Verified changes rendered correctly in "ignore most" and "show all" modes.
- Verified unchanged files and files with only whitspace changes render with the correct masks.

Reviewers: btrahan

Reviewed By: btrahan

CC: aran

Maniphest Tasks: T2009

Differential Revision: https://secure.phabricator.com/D4398

+203 -232
+26 -58
src/applications/differential/parser/DifferentialChangesetParser.php
··· 40 40 private $markupEngine; 41 41 private $highlightErrors; 42 42 43 - const CACHE_VERSION = 9; 43 + const CACHE_VERSION = 10; 44 44 const CACHE_MAX_SIZE = 8e6; 45 45 46 46 const ATTR_GENERATED = 'attr:generated'; ··· 497 497 } 498 498 } 499 499 500 - $old_text = array(); 501 - $new_text = array(); 502 - $is_unchanged = null; 503 - $whitelines = null; 504 - if ($ignore_all) { 500 + $hunk_parser = new DifferentialHunkParser(); 501 + $hunk_parser->setWhitespaceMode($whitespace_mode); 502 + $hunk_parser->parseHunksForLineData($changeset->getHunks()); 505 503 506 - // Huge mess. Generate a "-bw" (ignore all whitespace changes) diff, 507 - // parse it out, and then play a shell game with the parsed format 508 - // later so we highlight only changed lines but render 509 - // whitespace differences. If we don't do this, we either fail to 510 - // render whitespace changes (which is incredibly confusing, 511 - // especially for python) or often produce a much larger set of 512 - // differences than necessary. 513 - 504 + // Depending on the whitespace mode, we may need to compute a different 505 + // set of changes than the set of changes in the hunk data (specificaly, 506 + // we might want to consider changed lines which have only whitespace 507 + // changes as unchanged). 508 + if ($ignore_all) { 514 509 $engine = new PhabricatorDifferenceEngine(); 515 510 $engine->setIgnoreWhitespace(true); 516 511 $no_whitespace_changeset = $engine->generateChangesetFromFileContent( 517 512 $old_file, 518 513 $new_file); 519 514 520 - $hunk_parser = new DifferentialHunkParser(); 521 - $hunk_parser->setWhitespaceMode($this->whitespaceMode); 522 - $hunk_parser->parseHunksForLineData($changeset->getHunks()); 523 - $hunk_parser->reparseHunksForSpecialAttributes(); 524 - $is_unchanged = $hunk_parser->getIsUnchanged(); 525 - $whitelines = $hunk_parser->getHasWhiteLines(); 515 + $type_parser = new DifferentialHunkParser(); 516 + $type_parser->parseHunksForLineData($no_whitespace_changeset->getHunks()); 526 517 527 - // While we aren't updating $this->changeset (since it has a bunch 528 - // of metadata we need to preserve, so that headers like "this file 529 - // was moved" render correctly), we're overwriting the local 530 - // $changeset so that the block below will choose the synthetic 531 - // hunks we've built instead of the original hunks. 532 - $changeset = $no_whitespace_changeset; 533 - 534 - // let the games continue - pull out the proper text so we can 535 - // later accurately display the diff 536 - $old_text = ipull($hunk_parser->getOldLines(), 'text', 'line'); 537 - $new_text = ipull($hunk_parser->getNewLines(), 'text', 'line'); 518 + $hunk_parser->setOldLineTypeMap($type_parser->getOldLineTypeMap()); 519 + $hunk_parser->setNewLineTypeMap($type_parser->getNewLineTypeMap()); 538 520 } 539 521 540 - // This either uses the real hunks, or synthetic hunks we built above. 541 - // $is_unchanged, $whitelines, $old_text and $new_text are populated 542 - // for synthetic hunks, otherwise they are default values. 543 - $hunk_parser = new DifferentialHunkParser(); 544 - $hunk_parser->setWhitespaceMode($this->whitespaceMode); 545 - $hunk_parser->parseHunksForLineData($changeset->getHunks()); 546 522 $hunk_parser->reparseHunksForSpecialAttributes(); 547 523 548 524 $unchanged = false; 549 - // i.e. if we didn't have to play horrendous games above 550 - if ($is_unchanged === null) { 551 - if ($hunk_parser->getIsUnchanged()) { 552 - $filetype = $this->changeset->getFileType(); 553 - if ($filetype == DifferentialChangeType::FILE_TEXT || 554 - $filetype == DifferentialChangeType::FILE_SYMLINK) { 555 - $unchanged = true; 556 - } 525 + if (!$hunk_parser->getHasAnyChanges()) { 526 + $filetype = $this->changeset->getFileType(); 527 + if ($filetype == DifferentialChangeType::FILE_TEXT || 528 + $filetype == DifferentialChangeType::FILE_SYMLINK) { 529 + $unchanged = true; 557 530 } 558 - $whitelines = $hunk_parser->getHasWhiteLines(); 559 - } else { 560 - $unchanged = $is_unchanged; 561 531 } 532 + 562 533 $changetype = $this->changeset->getChangeType(); 563 534 if ($changetype == DifferentialChangeType::TYPE_MOVE_AWAY) { 564 535 // sometimes we show moved files as unchanged, sometimes deleted, ··· 567 538 // omit the 'not changed' notice if this is the source of a move 568 539 $unchanged = false; 569 540 } 541 + 570 542 $this->setSpecialAttributes(array( 571 543 self::ATTR_UNCHANGED => $unchanged, 572 544 self::ATTR_DELETED => $hunk_parser->getIsDeleted(), 573 - self::ATTR_WHITELINES => $whitelines 545 + self::ATTR_WHITELINES => !$hunk_parser->getHasTextChanges(), 574 546 )); 575 547 576 - $hunk_parser->updateParsedHunksText($old_text, $new_text); 577 548 $hunk_parser->generateIntraLineDiffs(); 578 549 $hunk_parser->generateVisibileLinesMask(); 579 550 ··· 722 693 'need to be reviewed.'), 723 694 true); 724 695 } else if ($this->isUnchanged()) { 725 - if ($this->isWhitespaceOnly()) { 726 - $shield = $renderer->renderShield( 727 - pht( 728 - 'This file was changed only by adding or removing trailing '. 729 - 'whitespace.'), 730 - false); 731 - } else { 732 696 $shield = $renderer->renderShield( 733 697 pht("The contents of this file were not changed."), 734 698 false); 735 - } 699 + } else if ($this->isWhitespaceOnly()) { 700 + $shield = $renderer->renderShield( 701 + pht( 702 + 'This file was changed only by adding or removing whitespace.'), 703 + false); 736 704 } else if ($this->isDeleted()) { 737 705 $shield = $renderer->renderShield( 738 706 pht("This file was completely deleted."),
+177 -174
src/applications/differential/parser/DifferentialHunkParser.php
··· 2 2 3 3 final class DifferentialHunkParser { 4 4 5 - private $isUnchanged; 6 - private $hasWhiteLines; 7 - private $isDeleted; 8 5 private $oldLines; 9 6 private $newLines; 10 - private $skipIntraLines; 11 - private $whitespaceMode; 12 7 private $intraLineDiffs; 13 8 private $visibleLinesMask; 9 + private $whitespaceMode; 14 10 15 11 /** 16 12 * Get a map of lines on which hunks start, other than line 1. This ··· 59 55 return $this->intraLineDiffs; 60 56 } 61 57 62 - public function setWhitespaceMode($white_space_mode) { 63 - $this->whitespaceMode = $white_space_mode; 64 - return $this; 65 - } 66 - private function getWhitespaceMode() { 67 - if ($this->whitespaceMode === null) { 68 - throw new Exception( 69 - 'You must setWhitespaceMode before accessing this data.' 70 - ); 71 - } 72 - return $this->whitespaceMode; 73 - } 74 - 75 - private function setSkipIntraLines($skip_intra_lines) { 76 - $this->skipIntraLines = $skip_intra_lines; 77 - return $this; 78 - } 79 - public function getSkipIntraLines() { 80 - if ($this->skipIntraLines === null) { 81 - throw new Exception( 82 - 'You must reparseHunksForSpecialAttributes before accessing this data.' 83 - ); 84 - } 85 - return $this->skipIntraLines; 86 - } 87 - 88 58 private function setNewLines($new_lines) { 89 59 $this->newLines = $new_lines; 90 60 return $this; ··· 111 81 return $this->oldLines; 112 82 } 113 83 114 - private function setIsDeleted($is_deleted) { 115 - $this->isDeleted = $is_deleted; 84 + public function getOldLineTypeMap() { 85 + $map = array(); 86 + $old = $this->getOldLines(); 87 + foreach ($old as $o) { 88 + if (!$o) { 89 + continue; 90 + } 91 + $map[$o['line']] = $o['type']; 92 + } 93 + return $map; 94 + } 95 + 96 + public function setOldLineTypeMap(array $map) { 97 + $lines = $this->getOldLines(); 98 + foreach ($lines as $key => $data) { 99 + $lines[$key]['type'] = $map[$data['line']]; 100 + } 101 + $this->oldLines = $lines; 116 102 return $this; 117 103 } 118 - public function getIsDeleted() { 119 - return $this->isDeleted; 104 + 105 + public function getNewLineTypeMap() { 106 + $map = array(); 107 + $new = $this->getNewLines(); 108 + foreach ($new as $n) { 109 + if (!$n) { 110 + continue; 111 + } 112 + $map[$n['line']] = $n['type']; 113 + } 114 + return $map; 120 115 } 121 116 122 - private function setHasWhiteLines($has_white_lines) { 123 - $this->hasWhiteLines = $has_white_lines; 117 + public function setNewLineTypeMap(array $map) { 118 + $lines = $this->getNewLines(); 119 + foreach ($lines as $key => $data) { 120 + $lines[$key]['type'] = $map[$data['line']]; 121 + } 122 + $this->newLines = $lines; 124 123 return $this; 125 124 } 126 - public function getHasWhiteLines() { 127 - return $this->hasWhiteLines; 128 - } 129 125 130 - public function setIsUnchanged($is_unchanged) { 131 - $this->isUnchanged = $is_unchanged; 126 + 127 + public function setWhitespaceMode($white_space_mode) { 128 + $this->whitespaceMode = $white_space_mode; 132 129 return $this; 133 130 } 134 - public function getIsUnchanged() { 135 - return $this->isUnchanged; 131 + 132 + private function getWhitespaceMode() { 133 + if ($this->whitespaceMode === null) { 134 + throw new Exception( 135 + 'You must setWhitespaceMode before accessing this data.' 136 + ); 137 + } 138 + return $this->whitespaceMode; 139 + } 140 + 141 + public function getIsDeleted() { 142 + foreach ($this->getNewLines() as $line) { 143 + if ($line) { 144 + // At least one new line, so the entire file wasn't deleted. 145 + return false; 146 + } 147 + } 148 + 149 + foreach ($this->getOldLines() as $line) { 150 + if ($line) { 151 + // No new lines, at least one old line; the entire file was deleted. 152 + return true; 153 + } 154 + } 155 + 156 + // This is an empty file. 157 + return false; 136 158 } 137 159 138 160 /** 161 + * Returns true if the hunks change any text, not just whitespace. 162 + */ 163 + public function getHasTextChanges() { 164 + return $this->getHasChanges('text'); 165 + } 166 + 167 + /** 168 + * Returns true if the hunks change anything, including whitespace. 169 + */ 170 + public function getHasAnyChanges() { 171 + return $this->getHasChanges('any'); 172 + } 173 + 174 + private function getHasChanges($filter) { 175 + if ($filter !== 'any' && $filter !== 'text') { 176 + throw new Exception("Unknown change filter '{$filter}'."); 177 + } 178 + 179 + $old = $this->getOldLines(); 180 + $new = $this->getNewLines(); 181 + 182 + $is_any = ($filter === 'any'); 183 + 184 + foreach ($old as $key => $o) { 185 + $n = $new[$key]; 186 + if ($o === null || $n === null) { 187 + // One side is missing, and it's impossible for both sides to be null, 188 + // so the other side must have something, and thus the two sides are 189 + // different and the file has been changed under any type of filter. 190 + return true; 191 + } 192 + 193 + if ($o['type'] !== $n['type']) { 194 + // The types are different, so either the underlying text is actually 195 + // different or whatever whitespace rules we're using consider them 196 + // different. 197 + return true; 198 + } 199 + 200 + if ($o['text'] !== $n['text']) { 201 + if ($is_any) { 202 + // The text is different, so there's a change. 203 + return true; 204 + } else if (trim($o['text']) !== trim($n['text'])) { 205 + return true; 206 + } 207 + } 208 + } 209 + 210 + // No changes anywhere in the file. 211 + return false; 212 + } 213 + 214 + 215 + /** 139 216 * This function takes advantage of the parsing work done in 140 217 * @{method:parseHunksForLineData} and continues the struggle to hammer this 141 218 * data into something we can display to a user. 142 219 * 143 220 * In particular, this function re-parses the hunks to make them equivalent 144 221 * in length for easy rendering, adding `null` as necessary to pad the 145 - * length. Further, this re-parsing stage figures out various special 146 - * properties about the changes such as if the change is a delete, has any 147 - * whitelines, or has any changes whatsoever. Finally, this function 148 - * calculates what lines - if any - should be skipped within a diff display, 149 - * ostensibly because they don't have anything to do with the current set 150 - * of changes with respect to display options. 222 + * length. 151 223 * 152 224 * Anyhoo, this function is not particularly well-named but I try. 153 225 * 154 226 * NOTE: this function must be called after 155 227 * @{method:parseHunksForLineData}. 156 - * NOTE: you must @{method:setWhitespaceMode} before calling this method. 157 228 */ 158 229 public function reparseHunksForSpecialAttributes() { 159 230 $rebuild_old = array(); 160 231 $rebuild_new = array(); 161 - $skip_intra = array(); 162 232 163 233 $old_lines = array_reverse($this->getOldLines()); 164 234 $new_lines = array_reverse($this->getNewLines()); 165 235 166 - $whitelines = false; 167 - $changed = false; 168 - 169 236 while (count($old_lines) || count($new_lines)) { 170 237 $old_line_data = array_pop($old_lines); 171 238 $new_line_data = array_pop($new_lines); ··· 188 255 if ($new_line_data) { 189 256 array_push($new_lines, $new_line_data); 190 257 } 191 - $changed = true; 192 258 continue; 193 259 } 194 260 ··· 198 264 if ($old_line_data) { 199 265 array_push($old_lines, $old_line_data); 200 266 } 201 - $changed = true; 202 267 continue; 203 268 } 204 269 205 - if ($this->getWhitespaceMode() != 206 - DifferentialChangesetParser::WHITESPACE_SHOW_ALL) { 207 - $similar = false; 208 - switch ($this->getWhitespaceMode()) { 209 - case DifferentialChangesetParser::WHITESPACE_IGNORE_TRAILING: 210 - if (rtrim($old_line_data['text']) == 211 - rtrim($new_line_data['text'])) { 212 - if ($old_line_data['type']) { 213 - // If we're converting this into an unchanged line because of 214 - // a trailing whitespace difference, mark it as a whitespace 215 - // change so we can show "This file was modified only by 216 - // adding or removing trailing whitespace." instead of 217 - // "This file was not modified.". 218 - $whitelines = true; 219 - } 220 - $similar = true; 221 - } 222 - break; 223 - default: 224 - // In this case, the lines are similar if there is no change type 225 - // (that is, just trust the diff algorithm). 226 - if (!$old_line_data['type']) { 227 - $similar = true; 228 - } 229 - break; 230 - } 231 - if ($similar) { 232 - if ($old_line_data['type'] == '\\') { 233 - // These are similar because they're "No newline at end of file" 234 - // comments. 235 - } else { 236 - $old_line_data['type'] = null; 237 - $new_line_data['type'] = null; 238 - $skip_intra[count($rebuild_old)] = true; 239 - } 240 - } else { 241 - $changed = true; 242 - } 243 - } else { 244 - $changed = true; 245 - } 246 - 247 270 $rebuild_old[] = $old_line_data; 248 271 $rebuild_new[] = $new_line_data; 249 272 } ··· 251 274 $this->setOldLines($rebuild_old); 252 275 $this->setNewLines($rebuild_new); 253 276 254 - $this->setIsUnchanged(!$changed); 255 - $this->setHasWhiteLines($whitelines); 256 - $this->setIsDeleted(array_filter($this->getOldLines()) && 257 - !array_filter($this->getNewLines())); 258 - $this->setSkipIntraLines($skip_intra); 277 + $this->updateChangeTypesForWhitespaceMode(); 259 278 260 279 return $this; 261 280 } 262 281 263 - public function updateParsedHunksText($old_text, $new_text) { 264 - if ($old_text || $new_text) { 282 + private function updateChangeTypesForWhitespaceMode() { 283 + $mode = $this->getWhitespaceMode(); 265 284 266 - // Use this parser's side-by-side line information -- notably, the 267 - // change types -- but replace all the line text. 268 - // This lets us render whitespace-only changes without marking them as 269 - // different. 285 + $mode_show_all = DifferentialChangesetParser::WHITESPACE_SHOW_ALL; 286 + if ($mode === $mode_show_all) { 287 + // If we're showing all whitespace, we don't need to perform any updates. 288 + return; 289 + } 270 290 271 - $old = $this->getOldLines(); 272 - $new = $this->getNewLines(); 291 + $mode_trailing = DifferentialChangesetParser::WHITESPACE_IGNORE_TRAILING; 292 + $is_trailing = ($mode === $mode_trailing); 273 293 274 - foreach ($old as $k => $desc) { 275 - if (empty($desc)) { 276 - continue; 277 - } 278 - $old[$k]['text'] = idx($old_text, $desc['line']); 294 + $new = $this->getNewLines(); 295 + $old = $this->getOldLines(); 296 + foreach ($old as $key => $o) { 297 + $n = $new[$key]; 298 + 299 + if (!$o || !$n) { 300 + continue; 279 301 } 280 - $skip_intra = $this->getSkipIntraLines(); 281 - foreach ($new as $k => $desc) { 282 - if (empty($desc)) { 283 - continue; 284 - } 285 - $new[$k]['text'] = idx($new_text, $desc['line']); 286 302 287 - if ($this->whitespaceMode == 288 - DifferentialChangesetParser::WHITESPACE_IGNORE_FORCE) { 289 - // Under forced ignore mode, ignore even internal whitespace 290 - // changes. 291 - continue; 303 + if ($is_trailing) { 304 + // In "trailing" mode, we need to identify lines which are marked 305 + // changed but differ only by trailing whitespace. We mark these lines 306 + // unchanged. 307 + if ($o['type'] != $n['type']) { 308 + if (rtrim($o['text']) === rtrim($n['text'])) { 309 + $old[$key]['type'] = null; 310 + $new[$key]['type'] = null; 311 + } 292 312 } 293 - 294 - // If there's a corresponding "old" text and the line is marked as 295 - // unchanged, test if there are internal whitespace changes between 296 - // non-whitespace characters, e.g. spaces added to a string or spaces 297 - // added around operators. If we find internal spaces, mark the line 298 - // as changed. 299 - // 300 - // We only need to do this for "new" lines because any line that is 301 - // missing either "old" or "new" text certainly can not have internal 302 - // whitespace changes without also having non-whitespace changes, 303 - // because characters had to be either added or removed to create the 304 - // possibility of internal whitespace. 305 - if (isset($old[$k]['text']) && empty($new[$k]['type'])) { 306 - if (trim($old[$k]['text']) != trim($new[$k]['text'])) { 307 - // The strings aren't the same when trimmed, so there are internal 308 - // whitespace changes. Mark this line changed. 309 - $old[$k]['type'] = '-'; 310 - $new[$k]['type'] = '+'; 311 - 312 - // Re-mark this line for intraline diffing. 313 - unset($skip_intra[$k]); 313 + } else { 314 + // In "ignore most" and "ignore all" modes, we need to identify lines 315 + // which are marked unchanged but have internal whitespace changes. 316 + // We want to ignore leading and trailing whitespace changes only, not 317 + // internal whitespace changes (`diff` doesn't have a mode for this, so 318 + // we have to fix it here). If the text is marked unchanged but the 319 + // old and new text differs by internal space, mark the lines changed. 320 + if ($o['type'] === null && $n['type'] === null) { 321 + if ($o['text'] !== $n['text']) { 322 + if (trim($o['text']) !== trim($n['text'])) { 323 + $old[$key]['type'] = '-'; 324 + $new[$key]['type'] = '+'; 325 + } 314 326 } 315 327 } 316 328 } 329 + } 317 330 318 - $this->setSkipIntraLines($skip_intra); 319 - $this->setOldLines($old); 320 - $this->setNewLines($new); 321 - } 331 + $this->setOldLines($old); 332 + $this->setNewLines($new); 322 333 323 334 return $this; 324 335 } ··· 326 337 public function generateIntraLineDiffs() { 327 338 $old = $this->getOldLines(); 328 339 $new = $this->getNewLines(); 329 - $skip_intra = $this->getSkipIntraLines(); 330 - $intra_line_diffs = array(); 331 340 332 - $min_length = min(count($old), count($new)); 333 - for ($ii = 0; $ii < $min_length; $ii++) { 334 - if ($old[$ii] || $new[$ii]) { 335 - if (isset($old[$ii]['text'])) { 336 - $otext = $old[$ii]['text']; 337 - } else { 338 - $otext = ''; 339 - } 340 - if (isset($new[$ii]['text'])) { 341 - $ntext = $new[$ii]['text']; 342 - } else { 343 - $ntext = ''; 344 - } 345 - if ($otext != $ntext && empty($skip_intra[$ii])) { 346 - $intra_line_diffs[$ii] = ArcanistDiffUtils::generateIntralineDiff( 347 - $otext, 348 - $ntext); 349 - } 341 + $diffs = array(); 342 + foreach ($old as $key => $o) { 343 + $n = $new[$key]; 344 + 345 + if (!$o || !$n) { 346 + continue; 347 + } 348 + 349 + if ($o['type'] != $n['type']) { 350 + $diffs[$key] = ArcanistDiffUtils::generateIntralineDiff( 351 + $o['text'], 352 + $n['text']); 350 353 } 351 354 } 352 355 353 - $this->setIntraLineDiffs($intra_line_diffs); 356 + $this->setIntraLineDiffs($diffs); 354 357 355 358 return $this; 356 359 }