@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Detect copied code by own algorithm

Summary:
Required for D2321.
Deprecates D2320.
Uses algorithm described at D2320#16.

Complexity of this algorithm would be `O(N)` (`N` stands for number of lines) in most cases.
The worst case is `O(A*F)` (`A` stands for number of added lines, `F` for number of colliding lines) but it should be pretty rare. Real-world example is 100 modified files with moved license block (15 lines) in each. This will require 1500*100 comparisons because the algorithm will be trying to find the longest block in each file.

Test Plan:
`arc diff --only` on commit with copied code.
More tests on standalone algorithm.

Reviewers: epriestley

Reviewed By: epriestley

CC: aran, Koolvin

Differential Revision: https://secure.phabricator.com/D2333

vrana 7affae93 6a9ef778

+83
+66
src/applications/differential/storage/diff/DifferentialDiff.php
··· 151 151 } 152 152 $diff->setLineCount($lines); 153 153 154 + $diff->detectCopiedCode(); 155 + 154 156 return $diff; 157 + } 158 + 159 + private function detectCopiedCode($min_width = 40, $min_lines = 3) { 160 + $map = array(); 161 + $files = array(); 162 + foreach ($this->changesets as $changeset) { 163 + $file = $changeset->getFilename(); 164 + foreach ($changeset->getHunks() as $hunk) { 165 + $line = $hunk->getOldOffset(); 166 + foreach (explode("\n", $hunk->makeOldFile()) as $code) { 167 + $files[$file][$line] = $code; 168 + if (strlen($code) >= $min_width) { 169 + $map[$code][] = array($file, $line); 170 + } 171 + $line++; 172 + } 173 + } 174 + } 175 + 176 + foreach ($this->changesets as $changeset) { 177 + $copies = array(); 178 + foreach ($changeset->getHunks() as $hunk) { 179 + $added = $hunk->getAddedLines(); 180 + for (reset($added); list($line, $code) = each($added); next($added)) { 181 + if (isset($map[$code])) { // We found a long matching line. 182 + $lengths = array(); 183 + $max_offsets = array(); 184 + foreach ($map[$code] as $val) { // Explore all candidates. 185 + list($file, $orig_line) = $val; 186 + $lengths["$orig_line:$file"] = 1; 187 + // Search also backwards for short lines. 188 + foreach (array(-1, 1) as $direction) { 189 + $offset = $direction; 190 + $orig_code = idx($files[$file], $orig_line + $offset); 191 + while (!isset($copies[$line + $offset]) && 192 + isset($added[$line + $offset]) && 193 + $orig_code === $added[$line + $offset]) { 194 + $lengths["$orig_line:$file"]++; 195 + $offset += $direction; 196 + } 197 + } 198 + // ($offset - 1) contains number of forward matching lines. 199 + $max_offsets["$orig_line:$file"] = $offset - 1; 200 + } 201 + $length = max($lengths); // Choose longest candidate. 202 + $val = array_search($length, $lengths); 203 + $offset = $max_offsets[$val]; 204 + list($orig_line, $file) = explode(':', $val, 2); 205 + $save_file = ($file == $changeset->getFilename() ? '' : $file); 206 + for ($i = $length; $i--; ) { 207 + $copies[$line + $offset - $i] = ($length < $min_lines 208 + ? array() // Ignore short blocks. 209 + : array($save_file, $orig_line + $offset - $i)); 210 + } 211 + for ($i = 0; $i < $offset; $i++) { 212 + next($added); 213 + } 214 + } 215 + } 216 + } 217 + $metadata = $changeset->getMetadata(); 218 + $metadata['copy:lines'] = array_filter($copies); 219 + $changeset->setMetadata($metadata); 220 + } 155 221 } 156 222 157 223 public function getDiffDict() {
+17
src/applications/differential/storage/hunk/DifferentialHunk.php
··· 25 25 protected $newOffset; 26 26 protected $newLen; 27 27 28 + public function getAddedLines() { 29 + $lines = array(); 30 + $n = $this->newOffset; 31 + foreach (explode("\n", $this->changes) as $diff_line) { 32 + if ($diff_line == '' || $diff_line[0] == '\\') { 33 + continue; 34 + } 35 + if ($diff_line[0] == '+') { 36 + $lines[$n] = substr($diff_line, 1); 37 + } 38 + if ($diff_line[0] != '-') { 39 + $n++; 40 + } 41 + } 42 + return $lines; 43 + } 44 + 28 45 public function makeNewFile() { 29 46 return $this->makeContent($exclude = '-'); 30 47 }