@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at recaptime-dev/main 273 lines 8.0 kB view raw
1<?php 2 3final class PhutilRemarkupHyperlinkRule extends PhutilRemarkupRule { 4 5 const KEY_HYPERLINKS = 'hyperlinks'; 6 7 public function getPriority() { 8 return 400.0; 9 } 10 11 public function apply($text) { 12 static $angle_pattern; 13 static $curly_pattern; 14 static $bare_pattern; 15 16 if ($angle_pattern === null) { 17 // See T13608. A previous version of this code matched bare URIs 18 // starting with "\w{3,}", which can take a very long time to match 19 // against long inputs. 20 // 21 // Use a protocol length limit in all patterns for general sanity, 22 // and a negative lookbehind in the bare pattern to avoid explosive 23 // complexity during expression evaluation. 24 25 $protocol_fragment = '\w{3,32}'; 26 $uri_fragment = '[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+'; 27 28 $angle_pattern = sprintf( 29 '(<(%s://%s?)>)', 30 $protocol_fragment, 31 $uri_fragment); 32 33 $curly_pattern = sprintf( 34 '({(%s://%s?)})', 35 $protocol_fragment, 36 $uri_fragment); 37 38 $bare_pattern = sprintf( 39 '((?<!\w)%s://%s)', 40 $protocol_fragment, 41 $uri_fragment); 42 } 43 44 // Hyperlinks with explicit "<>" around them get linked exactly, without 45 // the "<>". Angle brackets are basically special and mean "this is a URL 46 // with weird characters". This is assumed to be reasonable because they 47 // don't appear in most normal text or most normal URLs. 48 $text = preg_replace_callback( 49 $angle_pattern, 50 array($this, 'markupHyperlinkAngle'), 51 $text); 52 53 // We match "{uri}", but do not link it by default. 54 $text = preg_replace_callback( 55 $curly_pattern, 56 array($this, 'markupHyperlinkCurly'), 57 $text); 58 59 // Anything else we match "ungreedily", which means we'll look for 60 // stuff that's probably puncutation or otherwise not part of the URL and 61 // not link it. This lets someone write "QuicK! Go to 62 // https://www.example.com/!". We also apply some paren balancing rules. 63 64 // NOTE: We're explicitly avoiding capturing stored blocks, so text like 65 // `https://www.example.com/[[x | y]]` doesn't get aggressively captured. 66 67 $text = preg_replace_callback( 68 $bare_pattern, 69 array($this, 'markupHyperlinkUngreedy'), 70 $text); 71 72 return $text; 73 } 74 75 public function markupHyperlinkAngle(array $matches) { 76 return $this->markupHyperlink('<', $matches); 77 } 78 79 public function markupHyperlinkCurly(array $matches) { 80 return $this->markupHyperlink('{', $matches); 81 } 82 83 /** 84 * @return string Token in the format <0x01>1234Z. 85 * See @{class:PhutilRemarkupBlockStorage} for details 86 */ 87 protected function markupHyperlink($mode, array $matches) { 88 $raw_uri = $matches[1]; 89 90 try { 91 $uri = new PhutilURI($raw_uri); 92 } catch (Exception $ex) { 93 return $matches[0]; 94 } 95 96 $engine = $this->getEngine(); 97 98 $token = $engine->storeText($raw_uri); 99 100 $list_key = self::KEY_HYPERLINKS; 101 $link_list = $engine->getTextMetadata($list_key, array()); 102 103 $link_list[] = array( 104 'token' => $token, 105 'uri' => $raw_uri, 106 'mode' => $mode, 107 ); 108 109 $engine->setTextMetadata($list_key, $link_list); 110 111 return $token; 112 } 113 114 protected function renderHyperlink($link, $is_embed) { 115 // If the URI is "{uri}" and no handler picked it up, we just render it 116 // as plain text. 117 if ($is_embed) { 118 return $this->renderRawLink($link, $is_embed); 119 } 120 121 $engine = $this->getEngine(); 122 123 $uri = new PhutilURIHelper($link); 124 $is_self = $uri->isSelf(); 125 $same_window = $engine->getConfig('uri.same-window', $is_self); 126 if ($same_window) { 127 $target = null; 128 } else { 129 $target = '_blank'; 130 } 131 132 return phutil_tag( 133 'a', 134 array( 135 'href' => $link, 136 'class' => $this->getRemarkupLinkClass($is_self), 137 'target' => $target, 138 'rel' => 'noreferrer', 139 ), 140 $link); 141 } 142 143 private function renderRawLink($link, $is_embed) { 144 if ($is_embed) { 145 return '{'.$link.'}'; 146 } else { 147 return $link; 148 } 149 } 150 151 protected function markupHyperlinkUngreedy($matches) { 152 $match = $matches[0]; 153 $tail = null; 154 $trailing = null; 155 if (preg_match('/[;,.:!?]+$/', $match, $trailing)) { 156 $tail = $trailing[0]; 157 $match = substr($match, 0, -strlen($tail)); 158 } 159 160 // If there's a closing paren at the end but no balancing open paren in 161 // the URL, don't link the close paren. This is an attempt to gracefully 162 // handle the two common paren cases, Wikipedia links and English language 163 // parentheticals, e.g.: 164 // 165 // https://en.wikipedia.org/wiki/Noun_(disambiguation) 166 // (see also https://www.example.com) 167 // 168 // We could apply a craftier heuristic here which tries to actually balance 169 // the parens, but this is probably sufficient. 170 if (preg_match('/\\)$/', $match) && !preg_match('/\\(/', $match)) { 171 $tail = ')'.$tail; 172 $match = substr($match, 0, -1); 173 } 174 175 try { 176 $uri = new PhutilURI($match); 177 } catch (Exception $ex) { 178 return $matches[0]; 179 } 180 181 $link = $this->markupHyperlink(null, array(null, $match)); 182 183 return hsprintf('%s%s', $link, $tail); 184 } 185 186 public function didMarkupText() { 187 $engine = $this->getEngine(); 188 189 $protocols = $engine->getConfig('uri.allowed-protocols', array()); 190 $is_toc = $engine->getState('toc'); 191 $is_text = $engine->isTextMode(); 192 $is_mail = $engine->isHTMLMailMode(); 193 194 $list_key = self::KEY_HYPERLINKS; 195 $raw_list = $engine->getTextMetadata($list_key, array()); 196 197 $links = array(); 198 foreach ($raw_list as $key => $link) { 199 $token = $link['token']; 200 $raw_uri = $link['uri']; 201 $mode = $link['mode']; 202 203 $is_embed = ($mode === '{'); 204 $is_literal = ($mode === '<'); 205 206 // If we're rendering in a "Table of Contents" or a plain text mode, 207 // we're going to render the raw URI without modifications. 208 if ($is_toc || $is_text) { 209 $result = $this->renderRawLink($raw_uri, $is_embed); 210 $engine->overwriteStoredText($token, $result); 211 continue; 212 } 213 214 // If this URI doesn't use a whitelisted protocol, don't link it. This 215 // is primarily intended to prevent "javascript://" silliness. 216 $uri = new PhutilURI($raw_uri); 217 $protocol = $uri->getProtocol(); 218 $valid_protocol = idx($protocols, $protocol); 219 if (!$valid_protocol) { 220 $result = $this->renderRawLink($raw_uri, $is_embed); 221 $engine->overwriteStoredText($token, $result); 222 continue; 223 } 224 225 // If the URI is written as "<uri>", we'll render it literally even if 226 // some handler would otherwise deal with it. 227 // If we're rendering for HTML mail, we also render literally. 228 if ($is_literal || $is_mail) { 229 $result = $this->renderHyperlink($raw_uri, $is_embed); 230 $engine->overwriteStoredText($token, $result); 231 continue; 232 } 233 234 // Otherwise, this link is a valid resource which extensions are allowed 235 // to handle. 236 $links[$key] = $link; 237 } 238 239 if (!$links) { 240 return; 241 } 242 243 foreach ($links as $key => $link) { 244 $links[$key] = new PhutilRemarkupHyperlinkRef($link); 245 } 246 247 $extensions = PhutilRemarkupHyperlinkEngineExtension::getAllLinkEngines(); 248 foreach ($extensions as $extension) { 249 $extension = id(clone $extension) 250 ->setEngine($engine) 251 ->processHyperlinks($links); 252 253 foreach ($links as $key => $link) { 254 $result = $link->getResult(); 255 if ($result !== null) { 256 $engine->overwriteStoredText($link->getToken(), $result); 257 unset($links[$key]); 258 } 259 } 260 261 if (!$links) { 262 break; 263 } 264 } 265 266 // Render any remaining links in a normal way. 267 foreach ($links as $link) { 268 $result = $this->renderHyperlink($link->getURI(), $link->isEmbed()); 269 $engine->overwriteStoredText($link->getToken(), $result); 270 } 271 } 272 273}