@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

In prose diffs, use hash-and-diff for coarse "level 0" diffing to scale better

Summary: Depends on D20838. Fixes T13414. Instead of doing coarse diffing with "PhutilEditDistanceMatrix", use hash-and-diff with "DocumentEngine".

Test Plan:
- On a large document (~3K top level blocks), saw a more sensible diff, instead of the whole thing falling back to "everything changed" mode.
- On a small document, still saw a sensible granular diff.

{F6888249}

Maniphest Tasks: T13414

Differential Revision: https://secure.phabricator.com/D20839

+130 -48
+1 -1
src/__phutil_library_map__.php
··· 12402 12402 'PhutilPHPCodeSnippetContextFreeGrammar' => 'PhutilCLikeCodeSnippetContextFreeGrammar', 12403 12403 'PhutilPhabricatorAuthAdapter' => 'PhutilOAuthAuthAdapter', 12404 12404 'PhutilProseDiff' => 'Phobject', 12405 - 'PhutilProseDiffTestCase' => 'PhutilTestCase', 12405 + 'PhutilProseDiffTestCase' => 'PhabricatorTestCase', 12406 12406 'PhutilProseDifferenceEngine' => 'Phobject', 12407 12407 'PhutilQueryString' => 'Phobject', 12408 12408 'PhutilRealNameContextFreeGrammar' => 'PhutilContextFreeGrammar',
+19 -4
src/applications/files/diff/PhabricatorDocumentEngineBlocks.php
··· 50 50 51 51 if ($old_line) { 52 52 $old_hash = rtrim($old_line['text'], "\n"); 53 - $old_block = array_shift($old_map[$old_hash]); 54 - $old_block->setDifferenceType($old_line['type']); 53 + if (!strlen($old_hash)) { 54 + // This can happen when one of the sources has no blocks. 55 + $old_block = null; 56 + } else { 57 + $old_block = array_shift($old_map[$old_hash]); 58 + $old_block->setDifferenceType($old_line['type']); 59 + } 55 60 } else { 56 61 $old_block = null; 57 62 } 58 63 59 64 if ($new_line) { 60 65 $new_hash = rtrim($new_line['text'], "\n"); 61 - $new_block = array_shift($new_map[$new_hash]); 62 - $new_block->setDifferenceType($new_line['type']); 66 + if (!strlen($new_hash)) { 67 + $new_block = null; 68 + } else { 69 + $new_block = array_shift($new_map[$new_hash]); 70 + $new_block->setDifferenceType($new_line['type']); 71 + } 63 72 } else { 64 73 $new_block = null; 74 + } 75 + 76 + // If both lists are empty, we may generate a row which has two empty 77 + // blocks. 78 + if (!$old_block && !$new_block) { 79 + continue; 65 80 } 66 81 67 82 $rows[] = array(
+108 -42
src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php
··· 10 10 $u_parts = $this->splitCorpus($u, $level); 11 11 $v_parts = $this->splitCorpus($v, $level); 12 12 13 - $matrix = id(new PhutilEditDistanceMatrix()) 14 - ->setMaximumLength(128) 15 - ->setSequences($u_parts, $v_parts) 16 - ->setComputeString(true); 17 - 18 - // For word-level and character-level changes, smooth the output string 19 - // to reduce the choppiness of the diff. 20 - if ($level > 1) { 21 - $matrix->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_FULL); 22 - } 23 - 24 - $u_pos = 0; 25 - $v_pos = 0; 26 - 27 - $edits = $matrix->getEditString(); 28 - $edits_length = strlen($edits); 29 - 30 - $diff = new PhutilProseDiff(); 31 - for ($ii = 0; $ii < $edits_length; $ii++) { 32 - $c = $edits[$ii]; 33 - if ($c == 's') { 34 - $diff->addPart('=', $u_parts[$u_pos]); 35 - $u_pos++; 36 - $v_pos++; 37 - } else if ($c == 'd') { 38 - $diff->addPart('-', $u_parts[$u_pos]); 39 - $u_pos++; 40 - } else if ($c == 'i') { 41 - $diff->addPart('+', $v_parts[$v_pos]); 42 - $v_pos++; 43 - } else if ($c == 'x') { 44 - $diff->addPart('-', $u_parts[$u_pos]); 45 - $diff->addPart('+', $v_parts[$v_pos]); 46 - $u_pos++; 47 - $v_pos++; 48 - } else { 49 - throw new Exception( 50 - pht( 51 - 'Unexpected character ("%s") in edit string.', 52 - $c)); 53 - } 13 + if ($level === 0) { 14 + $diff = $this->newHashDiff($u_parts, $v_parts); 15 + $too_large = false; 16 + } else { 17 + list($diff, $too_large) = $this->newEditDistanceMatrixDiff( 18 + $u_parts, 19 + $v_parts, 20 + $level); 54 21 } 55 22 56 23 $diff->reorderParts(); ··· 119 86 } else if (!strlen($new)) { 120 87 $result->addPart('-', $old); 121 88 } else { 122 - if ($matrix->didReachMaximumLength()) { 89 + if ($too_large) { 123 90 // If this text was too big to diff, don't try to subdivide it. 124 91 $result->addPart('-', $old); 125 92 $result->addPart('+', $new); ··· 204 171 } 205 172 206 173 return $results; 174 + } 175 + 176 + private function newEditDistanceMatrixDiff( 177 + array $u_parts, 178 + array $v_parts, 179 + $level) { 180 + 181 + $matrix = id(new PhutilEditDistanceMatrix()) 182 + ->setMaximumLength(128) 183 + ->setSequences($u_parts, $v_parts) 184 + ->setComputeString(true); 185 + 186 + // For word-level and character-level changes, smooth the output string 187 + // to reduce the choppiness of the diff. 188 + if ($level > 1) { 189 + $matrix->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_FULL); 190 + } 191 + 192 + $u_pos = 0; 193 + $v_pos = 0; 194 + 195 + $edits = $matrix->getEditString(); 196 + $edits_length = strlen($edits); 197 + 198 + $diff = new PhutilProseDiff(); 199 + for ($ii = 0; $ii < $edits_length; $ii++) { 200 + $c = $edits[$ii]; 201 + if ($c == 's') { 202 + $diff->addPart('=', $u_parts[$u_pos]); 203 + $u_pos++; 204 + $v_pos++; 205 + } else if ($c == 'd') { 206 + $diff->addPart('-', $u_parts[$u_pos]); 207 + $u_pos++; 208 + } else if ($c == 'i') { 209 + $diff->addPart('+', $v_parts[$v_pos]); 210 + $v_pos++; 211 + } else if ($c == 'x') { 212 + $diff->addPart('-', $u_parts[$u_pos]); 213 + $diff->addPart('+', $v_parts[$v_pos]); 214 + $u_pos++; 215 + $v_pos++; 216 + } else { 217 + throw new Exception( 218 + pht( 219 + 'Unexpected character ("%s") in edit string.', 220 + $c)); 221 + } 222 + } 223 + 224 + return array($diff, $matrix->didReachMaximumLength()); 225 + } 226 + 227 + private function newHashDiff(array $u_parts, array $v_parts) { 228 + 229 + $u_ref = new PhabricatorDocumentRef(); 230 + $v_ref = new PhabricatorDocumentRef(); 231 + 232 + $u_blocks = $this->newDocumentEngineBlocks($u_parts); 233 + $v_blocks = $this->newDocumentEngineBlocks($v_parts); 234 + 235 + $rows = id(new PhabricatorDocumentEngineBlocks()) 236 + ->addBlockList($u_ref, $u_blocks) 237 + ->addBlockList($v_ref, $v_blocks) 238 + ->newTwoUpLayout(); 239 + 240 + $diff = new PhutilProseDiff(); 241 + foreach ($rows as $row) { 242 + list($u_block, $v_block) = $row; 243 + 244 + if ($u_block && $v_block) { 245 + if ($u_block->getDifferenceType() === '-') { 246 + $diff->addPart('-', $u_block->getContent()); 247 + $diff->addPart('+', $v_block->getContent()); 248 + } else { 249 + $diff->addPart('=', $u_block->getContent()); 250 + } 251 + } else if ($u_block) { 252 + $diff->addPart('-', $u_block->getContent()); 253 + } else { 254 + $diff->addPart('+', $v_block->getContent()); 255 + } 256 + } 257 + 258 + return $diff; 259 + } 260 + 261 + private function newDocumentEngineBlocks(array $parts) { 262 + $blocks = array(); 263 + 264 + foreach ($parts as $part) { 265 + $hash = PhabricatorHash::digestForIndex($part); 266 + 267 + $blocks[] = id(new PhabricatorDocumentEngineBlock()) 268 + ->setContent($part) 269 + ->setDifferenceHash($hash); 270 + } 271 + 272 + return $blocks; 207 273 } 208 274 209 275 }
+2 -1
src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php
··· 1 1 <?php 2 2 3 - final class PhutilProseDiffTestCase extends PhutilTestCase { 3 + final class PhutilProseDiffTestCase 4 + extends PhabricatorTestCase { 4 5 5 6 public function testProseDiffsDistance() { 6 7 $this->assertProseParts(