@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Digest changeset anchors into purely alphanumeric strings

Summary:
Ref T13045. See that task for discussion.

This replaces `digestForIndex()` with a "clever" algorithm in `digestForAnchor()`. The new digest is the same as `digestForIndex()` except when the original output was "." or "_". In those cases, a replacement character is selected based on entropy accumulated by the digest function as it iterates through the string.

Test Plan: Added unit tests.

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T13045

Differential Revision: https://secure.phabricator.com/D18909

+103 -3
+1 -1
src/applications/differential/storage/DifferentialChangeset.php
··· 173 173 } 174 174 175 175 public function getAnchorName() { 176 - return 'change-'.PhabricatorHash::digestForIndex($this->getFilename()); 176 + return 'change-'.PhabricatorHash::digestForAnchor($this->getFilename()); 177 177 } 178 178 179 179 public function getAbsoluteRepositoryPath(
+59 -2
src/infrastructure/util/PhabricatorHash.php
··· 3 3 final class PhabricatorHash extends Phobject { 4 4 5 5 const INDEX_DIGEST_LENGTH = 12; 6 + const ANCHOR_DIGEST_LENGTH = 12; 6 7 7 8 /** 8 9 * Digest a string using HMAC+SHA1. ··· 38 39 * related hashing (for general purpose hashing, see @{method:digest}). 39 40 * 40 41 * @param string Input string. 41 - * @return string 12-byte, case-sensitive alphanumeric hash of the string 42 - * which 42 + * @return string 12-byte, case-sensitive, mostly-alphanumeric hash of 43 + * the string. 43 44 */ 44 45 public static function digestForIndex($string) { 45 46 $hash = sha1($string, $raw_output = true); ··· 62 63 63 64 return $result; 64 65 } 66 + 67 + /** 68 + * Digest a string for use in HTML page anchors. This is similar to 69 + * @{method:digestForIndex} but produces purely alphanumeric output. 70 + * 71 + * This tries to be mostly compatible with the index digest to limit how 72 + * much stuff we're breaking by switching to it. For additional discussion, 73 + * see T13045. 74 + * 75 + * @param string Input string. 76 + * @return string 12-byte, case-sensitive, purely-alphanumeric hash of 77 + * the string. 78 + */ 79 + public static function digestForAnchor($string) { 80 + $hash = sha1($string, $raw_output = true); 81 + 82 + static $map; 83 + if ($map === null) { 84 + $map = '0123456789'. 85 + 'abcdefghij'. 86 + 'klmnopqrst'. 87 + 'uvwxyzABCD'. 88 + 'EFGHIJKLMN'. 89 + 'OPQRSTUVWX'. 90 + 'YZ'; 91 + } 92 + 93 + $result = ''; 94 + $accum = 0; 95 + $map_size = strlen($map); 96 + for ($ii = 0; $ii < self::ANCHOR_DIGEST_LENGTH; $ii++) { 97 + $byte = ord($hash[$ii]); 98 + $low_bits = ($byte & 0x3F); 99 + $accum = ($accum + $byte) % $map_size; 100 + 101 + if ($low_bits < $map_size) { 102 + // If an index digest would produce any alphanumeric character, just 103 + // use that character. This means that these digests are the same as 104 + // digests created with "digestForIndex()" in all positions where the 105 + // output character is some character other than "." or "_". 106 + $result .= $map[$low_bits]; 107 + } else { 108 + // If an index digest would produce a non-alphumeric character ("." or 109 + // "_"), pick an alphanumeric character instead. We accumulate an 110 + // index into the alphanumeric character list to try to preserve 111 + // entropy here. We could use this strategy for all bytes instead, 112 + // but then these digests would differ from digests created with 113 + // "digestForIndex()" in all positions, instead of just a small number 114 + // of positions. 115 + $result .= $map[$accum]; 116 + } 117 + } 118 + 119 + return $result; 120 + } 121 + 65 122 66 123 public static function digestToRange($string, $min, $max) { 67 124 if ($min > $max) {
+43
src/infrastructure/util/__tests__/PhabricatorHashTestCase.php
··· 52 52 pht('Distinct characters in hash of: %s', $input)); 53 53 } 54 54 55 + public function testHashForAnchor() { 56 + $map = array( 57 + // For inputs with no "." or "_" in the output, digesting for an index 58 + // or an anchor should be the same. 59 + 'dog' => array( 60 + 'Aliif7Qjd5ct', 61 + 'Aliif7Qjd5ct', 62 + ), 63 + // When an output would contain "." or "_", it should be replaced with 64 + // an alphanumeric character in those positions instead. 65 + 'fig' => array( 66 + 'OpB9tY4i.MOX', 67 + 'OpB9tY4imMOX', 68 + ), 69 + 'cot' => array( 70 + '_iF26XU_PsVY', 71 + '3iF26XUkPsVY', 72 + ), 73 + // The replacement characters are well-distributed and generally keep 74 + // the entropy of the output high: here, "_" characters in the initial 75 + // positions of the digests of "cot" (above) and "dug" (this test) have 76 + // different outputs. 77 + 'dug' => array( 78 + '_XuQnp0LUlUW', 79 + '7XuQnp0LUlUW', 80 + ), 81 + ); 82 + 83 + foreach ($map as $input => $expect) { 84 + list($expect_index, $expect_anchor) = $expect; 85 + 86 + $this->assertEqual( 87 + $expect_index, 88 + PhabricatorHash::digestForIndex($input), 89 + pht('Index digest of "%s".', $input)); 90 + 91 + $this->assertEqual( 92 + $expect_anchor, 93 + PhabricatorHash::digestForAnchor($input), 94 + pht('Anchor digest of "%s".', $input)); 95 + } 96 + } 97 + 55 98 }