@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

As Harbormaster logs are processed, build a sparse map of byte offsets to line numbers

Summary:
Depends on D19138. Ref T13088. When we want to read the last part of a logfile //and show accurate line numbers//, we need to be able to get from byte offsets to line numbers somehow.

Our fundamental unit must remain byte offsets, because a test can emit an arbitrarily long line, and we should accommodate it cleanly if a test emits 2GB of the letter "A".

To support going from byte offsets to line numbers, compute a map with periodic line markers throughout the offsets of the file. From here, we can figure out the line numbers for arbitrary positions in the file with only a constant amount of work.

Test Plan: Added unit tests; ran unit tests.

Subscribers: PHID-OPKG-gm6ozazyms6q6i22gyam

Maniphest Tasks: T13088

Differential Revision: https://secure.phabricator.com/D19139

+260 -7
+2
resources/sql/autopatches/20180223.log.04.linemap.sql
··· 1 + ALTER TABLE {$NAMESPACE}_harbormaster.harbormaster_buildlog 2 + ADD lineMap LONGTEXT NOT NULL COLLATE {$COLLATE_TEXT};
+2
resources/sql/autopatches/20180223.log.05.linemapdefault.sql
··· 1 + UPDATE {$NAMESPACE}_harbormaster.harbormaster_buildlog 2 + SET lineMap = '[]' WHERE lineMap = '';
+2
src/__phutil_library_map__.php
··· 1230 1230 'HarbormasterBuildLogDownloadController' => 'applications/harbormaster/controller/HarbormasterBuildLogDownloadController.php', 1231 1231 'HarbormasterBuildLogPHIDType' => 'applications/harbormaster/phid/HarbormasterBuildLogPHIDType.php', 1232 1232 'HarbormasterBuildLogQuery' => 'applications/harbormaster/query/HarbormasterBuildLogQuery.php', 1233 + 'HarbormasterBuildLogTestCase' => 'applications/harbormaster/__tests__/HarbormasterBuildLogTestCase.php', 1233 1234 'HarbormasterBuildLogView' => 'applications/harbormaster/view/HarbormasterBuildLogView.php', 1234 1235 'HarbormasterBuildLogViewController' => 'applications/harbormaster/controller/HarbormasterBuildLogViewController.php', 1235 1236 'HarbormasterBuildMessage' => 'applications/harbormaster/storage/HarbormasterBuildMessage.php', ··· 6518 6519 'HarbormasterBuildLogDownloadController' => 'HarbormasterController', 6519 6520 'HarbormasterBuildLogPHIDType' => 'PhabricatorPHIDType', 6520 6521 'HarbormasterBuildLogQuery' => 'PhabricatorCursorPagedPolicyAwareQuery', 6522 + 'HarbormasterBuildLogTestCase' => 'PhabricatorTestCase', 6521 6523 'HarbormasterBuildLogView' => 'AphrontView', 6522 6524 'HarbormasterBuildLogViewController' => 'HarbormasterController', 6523 6525 'HarbormasterBuildMessage' => array(
+117
src/applications/harbormaster/__tests__/HarbormasterBuildLogTestCase.php
··· 1 + <?php 2 + 3 + final class HarbormasterBuildLogTestCase 4 + extends PhabricatorTestCase { 5 + 6 + public function testBuildLogLineMaps() { 7 + $snowman = "\xE2\x98\x83"; 8 + 9 + $inputs = array( 10 + 'no_newlines.log' => array( 11 + 64, 12 + array( 13 + str_repeat('AAAAAAAA', 32), 14 + ), 15 + array( 16 + array(64, 0), 17 + array(128, 0), 18 + array(192, 0), 19 + array(255, 0), 20 + ), 21 + ), 22 + 'no_newlines_updated.log' => array( 23 + 64, 24 + array_fill(0, 32, 'AAAAAAAA'), 25 + array( 26 + array(64, 0), 27 + array(128, 0), 28 + array(192, 0), 29 + ), 30 + ), 31 + 'one_newline.log' => array( 32 + 64, 33 + array( 34 + str_repeat('AAAAAAAA', 16), 35 + "\n", 36 + str_repeat('AAAAAAAA', 16), 37 + ), 38 + array( 39 + array(64, 0), 40 + array(127, 0), 41 + array(191, 1), 42 + array(255, 1), 43 + ), 44 + ), 45 + 'several_newlines.log' => array( 46 + 64, 47 + array_fill(0, 12, "AAAAAAAAAAAAAAAAAA\n"), 48 + array( 49 + array(56, 2), 50 + array(113, 5), 51 + array(170, 8), 52 + array(227, 11), 53 + ), 54 + ), 55 + 'mixed_newlines.log' => array( 56 + 64, 57 + array( 58 + str_repeat('A', 63)."\r", 59 + str_repeat('A', 63)."\r\n", 60 + str_repeat('A', 63)."\n", 61 + str_repeat('A', 63), 62 + ), 63 + array( 64 + array(63, 0), 65 + array(127, 1), 66 + array(191, 2), 67 + array(255, 3), 68 + ), 69 + ), 70 + 'more_mixed_newlines.log' => array( 71 + 64, 72 + array( 73 + str_repeat('A', 63)."\r", 74 + str_repeat('A', 62)."\r\n", 75 + str_repeat('A', 63)."\n", 76 + str_repeat('A', 63), 77 + ), 78 + array( 79 + array(63, 0), 80 + array(128, 2), 81 + array(191, 2), 82 + array(254, 3), 83 + ), 84 + ), 85 + 'emoji.log' => array( 86 + 64, 87 + array( 88 + str_repeat($snowman, 64), 89 + ), 90 + array( 91 + array(63, 0), 92 + array(126, 0), 93 + array(189, 0), 94 + ), 95 + ), 96 + ); 97 + 98 + foreach ($inputs as $label => $input) { 99 + list($distance, $parts, $expect) = $input; 100 + 101 + $log = id(new HarbormasterBuildLog()) 102 + ->setByteLength(0); 103 + 104 + foreach ($parts as $part) { 105 + $log->updateLineMap($part, $distance); 106 + } 107 + 108 + list($actual) = $log->getLineMap(); 109 + 110 + $this->assertEqual( 111 + $expect, 112 + $actual, 113 + pht('Line Map for "%s"', $label)); 114 + } 115 + } 116 + 117 + }
+130 -1
src/applications/harbormaster/storage/build/HarbormasterBuildLog.php
··· 14 14 protected $filePHID; 15 15 protected $byteLength; 16 16 protected $chunkFormat; 17 + protected $lineMap = array(); 17 18 18 19 private $buildTarget = self::ATTACHABLE; 19 20 private $rope; ··· 64 65 protected function getConfiguration() { 65 66 return array( 66 67 self::CONFIG_AUX_PHID => true, 68 + self::CONFIG_SERIALIZATION => array( 69 + 'lineMap' => self::SERIALIZATION_JSON, 70 + ), 67 71 self::CONFIG_COLUMN_SCHEMA => array( 68 72 // T6203/NULLABILITY 69 73 // It seems like these should be non-nullable? All logs should have a ··· 369 373 $this->writeChunk($encoding_text, $data_size, $append_data); 370 374 } 371 375 372 - $this->byteLength += $data_size; 376 + $this->updateLineMap($append_data); 377 + 373 378 $this->save(); 374 379 $this->saveTransaction(); 375 380 376 381 $rope->removeBytesFromHead($data_size); 377 382 } 383 + } 384 + 385 + public function updateLineMap($append_data, $marker_distance = null) { 386 + $this->byteLength += strlen($append_data); 387 + 388 + if (!$marker_distance) { 389 + $marker_distance = (self::CHUNK_BYTE_LIMIT / 2); 390 + } 391 + 392 + if (!$this->lineMap) { 393 + $this->lineMap = array( 394 + array(), 395 + 0, 396 + 0, 397 + null, 398 + ); 399 + } 400 + 401 + list($map, $map_bytes, $line_count, $prefix) = $this->lineMap; 402 + 403 + $buffer = $append_data; 404 + 405 + if ($prefix) { 406 + $prefix = base64_decode($prefix); 407 + $buffer = $prefix.$buffer; 408 + } 409 + 410 + if ($map) { 411 + list($last_marker, $last_count) = last($map); 412 + } else { 413 + $last_marker = 0; 414 + $last_count = 0; 415 + } 416 + 417 + $max_utf8_width = 8; 418 + $next_marker = $last_marker + $marker_distance; 419 + 420 + $pos = 0; 421 + $len = strlen($buffer); 422 + while (true) { 423 + // If we only have a few bytes left in the buffer, leave it as a prefix 424 + // for next time. 425 + if (($len - $pos) <= ($max_utf8_width * 2)) { 426 + $prefix = substr($buffer, $pos); 427 + break; 428 + } 429 + 430 + // The next slice we're going to look at is the smaller of: 431 + // 432 + // - the number of bytes we need to make it to the next marker; or 433 + // - all the bytes we have left, minus one. 434 + 435 + $slice_length = min( 436 + ($marker_distance - $map_bytes), 437 + ($len - $pos) - 1); 438 + 439 + // We don't slice all the way to the end for two reasons. 440 + 441 + // First, we want to avoid slicing immediately after a "\r" if we don't 442 + // know what the next character is, because we want to make sure to 443 + // count "\r\n" as a single newline, rather than counting the "\r" as 444 + // a newline and then later counting the "\n" as another newline. 445 + 446 + // Second, we don't want to slice in the middle of a UTF8 character if 447 + // we can help it. We may not be able to avoid this, since the whole 448 + // buffer may just be binary data, but in most cases we can backtrack 449 + // a little bit and try to make it out of emoji or other legitimate 450 + // multibyte UTF8 characters which appear in the log. 451 + 452 + $min_width = max(1, $slice_length - $max_utf8_width); 453 + while ($slice_length >= $min_width) { 454 + $here = $buffer[$pos + ($slice_length - 1)]; 455 + $next = $buffer[$pos + ($slice_length - 1) + 1]; 456 + 457 + // If this is "\r" and the next character is "\n", extend the slice 458 + // to include the "\n". Otherwise, we're fine to slice here since we 459 + // know we're not in the middle of a UTF8 character. 460 + if ($here === "\r") { 461 + if ($next === "\n") { 462 + $slice_length++; 463 + } 464 + break; 465 + } 466 + 467 + // If the next character is 0x7F or lower, or between 0xC2 and 0xF4, 468 + // we're not slicing in the middle of a UTF8 character. 469 + $ord = ord($next); 470 + if ($ord <= 0x7F || ($ord >= 0xC2 && $ord <= 0xF4)) { 471 + break; 472 + } 473 + 474 + $slice_length--; 475 + } 476 + 477 + $slice = substr($buffer, $pos, $slice_length); 478 + $pos += $slice_length; 479 + 480 + $map_bytes += $slice_length; 481 + $line_count += count(preg_split("/\r\n|\r|\n/", $slice)) - 1; 482 + 483 + if ($map_bytes >= ($marker_distance - $max_utf8_width)) { 484 + $map[] = array( 485 + $last_marker + $map_bytes, 486 + $last_count + $line_count, 487 + ); 488 + 489 + $last_count = $last_count + $line_count; 490 + $line_count = 0; 491 + 492 + $last_marker = $last_marker + $map_bytes; 493 + $map_bytes = 0; 494 + 495 + $next_marker = $last_marker + $marker_distance; 496 + } 497 + } 498 + 499 + $this->lineMap = array( 500 + $map, 501 + $map_bytes, 502 + $line_count, 503 + base64_encode($prefix), 504 + ); 505 + 506 + return $this; 378 507 } 379 508 380 509
+7 -6
src/applications/harbormaster/worker/HarbormasterLogWorker.php
··· 57 57 $data = $this->getTaskData(); 58 58 $is_force = idx($data, 'force'); 59 59 60 - if (!$log->getByteLength() || $is_force) { 60 + if (!$log->getByteLength() || !$log->getLineMap() || $is_force) { 61 61 $iterator = $log->newDataIterator(); 62 62 63 - $byte_length = 0; 63 + $log 64 + ->setByteLength(0) 65 + ->setLineMap(array()); 66 + 64 67 foreach ($iterator as $block) { 65 - $byte_length += strlen($block); 68 + $log->updateLineMap($block); 66 69 } 67 70 68 - $log 69 - ->setByteLength($byte_length) 70 - ->save(); 71 + $log->save(); 71 72 } 72 73 73 74 $format_text = HarbormasterBuildLogChunk::CHUNK_ENCODING_TEXT;