@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Fix an issue where prose diffing may fail after hitting the PCRE backtracking limit

Summary:
Fixes T13554. For certain prose diff inputs and PCRE backtracking limits, this regular expression may back track too often and fail.

A characteristic input is "x x x x ...", i.e. many sequences where `(.*?)\s*\z` looks like it may be able to match but actually can not.

I think writing an expression which has all the behavior we'd like without this backtracking issue isn't trivial (at least, I don't think I know how to do it offhand); just use a strategy based on "trim()" insetad, which avoids any PCRE complexities here.

Test Plan: Locally, this passes the "x x x ..." test which the previous code failed. I'm not including that test because it won't reproduce across values of "pcre.backtrac_limit", PCRE versions, etc.

Maniphest Tasks: T13554

Differential Revision: https://secure.phabricator.com/D21422

+68 -16
+35 -16
src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php
··· 142 142 } 143 143 144 144 if ($level < 2) { 145 - // Split pieces into separate text and whitespace sections: make one 146 - // piece out of all the whitespace at the beginning, one piece out of 147 - // all the actual text in the middle, and one piece out of all the 148 - // whitespace at the end. 149 - 150 - $matches = null; 151 - preg_match('/^(\s*)(.*?)(\s*)\z/s', $result, $matches); 152 - 153 - if (strlen($matches[1])) { 154 - $results[] = $matches[1]; 155 - } 156 - if (strlen($matches[2])) { 157 - $results[] = $matches[2]; 158 - } 159 - if (strlen($matches[3])) { 160 - $results[] = $matches[3]; 145 + $trimmed_pieces = $this->trimApart($result); 146 + foreach ($trimmed_pieces as $trimmed_piece) { 147 + $results[] = $trimmed_piece; 161 148 } 162 149 } else { 163 150 $results[] = $result; ··· 270 257 } 271 258 272 259 return $blocks; 260 + } 261 + 262 + public static function trimApart($input) { 263 + // Split pieces into separate text and whitespace sections: make one 264 + // piece out of all the whitespace at the beginning, one piece out of 265 + // all the actual text in the middle, and one piece out of all the 266 + // whitespace at the end. 267 + 268 + $parts = array(); 269 + 270 + $length = strlen($input); 271 + 272 + $corpus = ltrim($input); 273 + $l_length = strlen($corpus); 274 + if ($l_length !== $length) { 275 + $parts[] = substr($input, 0, $length - $l_length); 276 + } 277 + 278 + $corpus = rtrim($corpus); 279 + $lr_length = strlen($corpus); 280 + 281 + if ($lr_length) { 282 + $parts[] = $corpus; 283 + } 284 + 285 + if ($lr_length !== $l_length) { 286 + // NOTE: This will be a negative value; we're slicing from the end of 287 + // the input string. 288 + $parts[] = substr($input, $lr_length - $l_length); 289 + } 290 + 291 + return $parts; 273 292 } 274 293 275 294 }
+33
src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php
··· 3 3 final class PhutilProseDiffTestCase 4 4 extends PhabricatorTestCase { 5 5 6 + public function testTrimApart() { 7 + $map = array( 8 + '' => array(), 9 + 'a' => array('a'), 10 + ' a ' => array( 11 + ' ', 12 + 'a', 13 + ' ', 14 + ), 15 + ' a' => array( 16 + ' ', 17 + 'a', 18 + ), 19 + 'a ' => array( 20 + 'a', 21 + ' ', 22 + ), 23 + ' a b ' => array( 24 + ' ', 25 + 'a b', 26 + ' ', 27 + ), 28 + ); 29 + 30 + foreach ($map as $input => $expect) { 31 + $actual = PhutilProseDifferenceEngine::trimApart($input); 32 + $this->assertEqual( 33 + $expect, 34 + $actual, 35 + pht('Trim Apart: %s', $input)); 36 + } 37 + } 38 + 6 39 public function testProseDiffsDistance() { 7 40 $this->assertProseParts( 8 41 '',