@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Write search bolding in a way which is certainly HTML-safe

Summary:
This algorithm is tricky, and uses `phutil_safe_html()` directly, which makes it potentially unsafe.

In particular, D8859 fixes a bug with it which caused it to produce non-utf8 output. This doesn't guarantee it's a security problem, but does make it suspicious.

I don't actually see a way to break it, but rewrite it so that it's absolutely bulletproof and does not need to call `phutil_safe_html()`.

Test Plan:
{F147487}

@rugabarbo, if you have a chance, can you check if this still works for you?

Reviewers: btrahan

Reviewed By: btrahan

Subscribers: epriestley, rugabarbo

Differential Revision: https://secure.phabricator.com/D8862

+79 -20
+79 -20
src/applications/search/view/PhabricatorSearchResultView.php
··· 76 76 $link); 77 77 } 78 78 79 + 80 + /** 81 + * Find the words which are part of the query string, and bold them in a 82 + * result string. This makes it easier for users to see why a result 83 + * matched their query. 84 + */ 79 85 private function emboldenQuery($str) { 80 - if (!$this->query) { 86 + $query = $this->query->getParameter('query'); 87 + 88 + if (!strlen($query) || !strlen($str)) { 89 + return $str; 90 + } 91 + 92 + // This algorithm is safe but not especially fast, so don't bother if 93 + // we're dealing with a lot of data. This mostly prevents silly/malicious 94 + // queries from doing anything bad. 95 + if (strlen($query) + strlen($str) > 2048) { 81 96 return $str; 82 97 } 83 98 84 - $query = $this->query->getParameter('query'); 99 + // Keep track of which characters we're going to make bold. This is 100 + // byte oriented, but we'll make sure we don't put a bold in the middle 101 + // of a character later. 102 + $bold = array_fill(0, strlen($str), false); 103 + 104 + // Split the query into words. 105 + $parts = preg_split('/ +/', $query); 106 + 107 + // Find all occurrences of each word, and mark them to be emboldened. 108 + foreach ($parts as $part) { 109 + $part = trim($part); 110 + $part = trim($part, '"+'); 111 + if (!strlen($part)) { 112 + continue; 113 + } 114 + 115 + $matches = null; 116 + $has_matches = preg_match_all( 117 + '/(?:^|\b)('.preg_quote($part, '/').')/i', 118 + $str, 119 + $matches, 120 + PREG_OFFSET_CAPTURE); 121 + 122 + if (!$has_matches) { 123 + continue; 124 + } 125 + 126 + // Flag the matching part of the range for boldening. 127 + foreach ($matches[1] as $match) { 128 + $offset = $match[1]; 129 + for ($ii = 0; $ii < strlen($match[0]); $ii++) { 130 + $bold[$offset + $ii] = true; 131 + } 132 + } 133 + } 85 134 86 - $quoted_regexp = '/"([^"]*)"/'; 87 - $matches = array(1 => array()); 88 - preg_match_all($quoted_regexp, $query, $matches); 89 - $quoted_queries = $matches[1]; 90 - $query = preg_replace($quoted_regexp, '', $query); 135 + // Split the string into ranges, applying bold styling as required. 136 + $out = array(); 137 + $buf = ''; 138 + $pos = 0; 139 + $is_bold = false; 140 + foreach (phutil_utf8v($str) as $chr) { 141 + if ($bold[$pos] != $is_bold) { 142 + if (strlen($buf)) { 143 + if ($is_bold) { 144 + $out[] = phutil_tag('strong', array(), $buf); 145 + } else { 146 + $out[] = $buf; 147 + } 148 + $buf = ''; 149 + } 150 + $is_bold = !$is_bold; 151 + } 152 + $buf .= $chr; 153 + $pos += strlen($chr); 154 + } 91 155 92 - $query = preg_split('/\s+[+|]?/u', $query); 93 - $query = array_filter($query); 94 - $query = array_merge($query, $quoted_queries); 95 - $str = phutil_escape_html($str); 96 - foreach ($query as $word) { 97 - $word = phutil_escape_html($word); 98 - $word = preg_quote($word, '/'); 99 - $word = preg_replace('/\\\\\*$/', '\w*', $word); 100 - $str = preg_replace( 101 - '/(?:^|\b)('.$word.')(?:\b|$)/i', 102 - '<strong>\1</strong>', 103 - $str); 156 + if (strlen($buf)) { 157 + if ($is_bold) { 158 + $out[] = phutil_tag('strong', array(), $buf); 159 + } else { 160 + $out[] = $buf; 161 + } 104 162 } 105 - return phutil_safe_html($str); 163 + 164 + return $out; 106 165 } 107 166 108 167 }