@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.)
hq.recaptime.dev/wiki/Phorge
phorge
phabricator
1<?php
2
3final class PhutilRemarkupListBlockRule extends PhutilRemarkupBlockRule {
4
5 /**
6 * This rule must apply before the Code block rule because it needs to
7 * win blocks which begin ` - Lorem ipsum`.
8 */
9 public function getPriority() {
10 return 400;
11 }
12
13 public function getMatchingLineCount(array $lines, $cursor) {
14 $num_lines = 0;
15
16 $first_line = $cursor;
17 $is_one_line = false;
18 while (isset($lines[$cursor])) {
19 if (!$num_lines) {
20 if (preg_match(self::START_BLOCK_PATTERN, $lines[$cursor])) {
21 $num_lines++;
22 $cursor++;
23 $is_one_line = true;
24 continue;
25 }
26 } else {
27 if (preg_match(self::CONT_BLOCK_PATTERN, $lines[$cursor])) {
28 $num_lines++;
29 $cursor++;
30 $is_one_line = false;
31 continue;
32 }
33
34 // Allow lists to continue across multiple paragraphs, as long as lines
35 // are indented or a single empty line separates indented lines.
36
37 $this_empty = !strlen(trim($lines[$cursor]));
38 $this_indented = preg_match('/^ /', $lines[$cursor]);
39
40 $next_empty = true;
41 $next_indented = false;
42 if (isset($lines[$cursor + 1])) {
43 $next_empty = !strlen(trim($lines[$cursor + 1]));
44 $next_indented = preg_match('/^ /', $lines[$cursor + 1]);
45 }
46
47 if ($this_empty || $this_indented) {
48 if (($this_indented && !$this_empty) ||
49 ($next_indented && !$next_empty)) {
50 $num_lines++;
51 $cursor++;
52 continue;
53 }
54 }
55
56 if ($this_empty) {
57 $num_lines++;
58 }
59 }
60
61 break;
62 }
63
64 // If this list only has one item in it, and the list marker is "#", and
65 // it's not the last line in the input, parse it as a header instead of a
66 // list. This produces better behavior for alternate Markdown headers.
67
68 if ($is_one_line) {
69 if (($first_line + $num_lines) < count($lines)) {
70 if (strncmp($lines[$first_line], '#', 1) === 0) {
71 return 0;
72 }
73 }
74 }
75
76 return $num_lines;
77 }
78
79 /**
80 * The maximum sub-list depth you can nest to. Avoids silliness and blowing
81 * the stack.
82 */
83 const MAXIMUM_LIST_NESTING_DEPTH = 12;
84 const START_BLOCK_PATTERN = '@^\s*(?:[-*#]+|([1-9][0-9]*)[.)]|\[\D?\])\s+@';
85 const CONT_BLOCK_PATTERN = '@^\s*(?:[-*#]+|[0-9]+[.)]|\[\D?\])\s+@';
86 const STRIP_BLOCK_PATTERN = '@^\s*(?:[-*#]+|[0-9]+[.)])\s*@';
87
88 public function markupText($text, $children) {
89 $items = array();
90 $lines = explode("\n", $text);
91
92 // We allow users to delimit lists using either differing indentation
93 // levels:
94 //
95 // - a
96 // - b
97 //
98 // ...or differing numbers of item-delimiter characters:
99 //
100 // - a
101 // -- b
102 //
103 // If they use the second style but block-indent the whole list, we'll
104 // get the depth counts wrong for the first item. To prevent this,
105 // un-indent every item by the minimum indentation level for the whole
106 // block before we begin parsing.
107
108 $regex = self::START_BLOCK_PATTERN;
109 $min_space = PHP_INT_MAX;
110 foreach ($lines as $ii => $line) {
111 $matches = null;
112 if (preg_match($regex, $line)) {
113 $regex = self::CONT_BLOCK_PATTERN;
114 if (preg_match('/^(\s+)/', $line, $matches)) {
115 $space = strlen($matches[1]);
116 } else {
117 $space = 0;
118 }
119 $min_space = min($min_space, $space);
120 }
121 }
122
123 $regex = self::START_BLOCK_PATTERN;
124 if ($min_space) {
125 foreach ($lines as $key => $line) {
126 if (preg_match($regex, $line)) {
127 $regex = self::CONT_BLOCK_PATTERN;
128 $lines[$key] = substr($line, $min_space);
129 }
130 }
131 }
132
133
134 // The input text may have linewraps in it, like this:
135 //
136 // - derp derp derp derp
137 // derp derp derp derp
138 // - blarp blarp blarp blarp
139 //
140 // Group text lines together into list items, stored in $items. So the
141 // result in the above case will be:
142 //
143 // array(
144 // array(
145 // "- derp derp derp derp",
146 // " derp derp derp derp",
147 // ),
148 // array(
149 // "- blarp blarp blarp blarp",
150 // ),
151 // );
152
153 $item = array();
154 $starts_at = null;
155 $regex = self::START_BLOCK_PATTERN;
156 foreach ($lines as $line) {
157 $match = null;
158 if (preg_match($regex, $line, $match)) {
159 if (!$starts_at && !empty($match[1])) {
160 $starts_at = $match[1];
161 }
162 $regex = self::CONT_BLOCK_PATTERN;
163 if ($item) {
164 $items[] = $item;
165 $item = array();
166 }
167 }
168 $item[] = $line;
169 }
170 if ($item) {
171 $items[] = $item;
172 }
173 if (!$starts_at) {
174 $starts_at = 1;
175 }
176
177
178 // Process each item to normalize the text, remove line wrapping, and
179 // determine its depth (indentation level) and style (ordered vs unordered).
180 //
181 // We preserve consecutive linebreaks and interpret them as paragraph
182 // breaks.
183 //
184 // Given the above example, the processed array will look like:
185 //
186 // array(
187 // array(
188 // 'text' => 'derp derp derp derp derp derp derp derp',
189 // 'depth' => 0,
190 // 'style' => '-',
191 // ),
192 // array(
193 // 'text' => 'blarp blarp blarp blarp',
194 // 'depth' => 0,
195 // 'style' => '-',
196 // ),
197 // );
198
199 $has_marks = false;
200 foreach ($items as $key => $item) {
201 // Trim space around newlines, to strip trailing whitespace and formatting
202 // indentation.
203 $item = preg_replace('/ *(\n+) */', '\1', implode("\n", $item));
204
205 // Replace single newlines with a space. Preserve multiple newlines as
206 // paragraph breaks.
207 $item = preg_replace('/(?<!\n)\n(?!\n)/', ' ', $item);
208
209 $item = rtrim($item);
210
211 if (!strlen($item)) {
212 unset($items[$key]);
213 continue;
214 }
215
216 $matches = null;
217 if (preg_match('/^\s*([-*#]{2,})/', $item, $matches)) {
218 // Alternate-style indents; use number of list item symbols.
219 $depth = strlen($matches[1]) - 1;
220 } else if (preg_match('/^(\s+)/', $item, $matches)) {
221 // Markdown-style indents; use indent depth.
222 $depth = strlen($matches[1]);
223 } else {
224 $depth = 0;
225 }
226
227 if (preg_match('/^\s*(?:#|[0-9])/', $item)) {
228 $style = '#';
229 } else {
230 $style = '-';
231 }
232
233 // Strip leading indicators off the item.
234 $text = preg_replace(self::STRIP_BLOCK_PATTERN, '', $item);
235
236 // Look for "[]", "[ ]", "[*]", "[x]", etc., which we render as a
237 // checkbox. We don't render [1], [2], etc., as checkboxes, as these
238 // are often used as footnotes.
239 $mark = null;
240 $matches = null;
241 if (preg_match('/^\s*\[(\D?)\]\s*/', $text, $matches)) {
242 if (strlen(trim($matches[1]))) {
243 $mark = true;
244 } else {
245 $mark = false;
246 }
247 $has_marks = true;
248 $text = substr($text, strlen($matches[0]));
249 }
250
251 $items[$key] = array(
252 'text' => $text,
253 'depth' => $depth,
254 'style' => $style,
255 'mark' => $mark,
256 );
257 }
258 $items = array_values($items);
259
260
261 // Users can create a sub-list by indenting any deeper amount than the
262 // previous list, so these are both valid:
263 //
264 // - a
265 // - b
266 //
267 // - a
268 // - b
269 //
270 // In the former case, we'll have depths (0, 2). In the latter case, depths
271 // (0, 4). We don't actually care about how many spaces there are, only
272 // how many list indentation levels (that is, we want to map both of
273 // those cases to (0, 1), indicating "outermost list" and "first sublist").
274 //
275 // This is made more complicated because lists at two different indentation
276 // levels might be at the same list level:
277 //
278 // - a
279 // - b
280 // - c
281 // - d
282 //
283 // Here, 'b' and 'd' are at the same list level (2) but different indent
284 // levels (2, 4).
285 //
286 // Users can also create "staircases" like this:
287 //
288 // - a
289 // - b
290 // # c
291 //
292 // While this is silly, we'd like to render it as faithfully as possible.
293 //
294 // In order to do this, we convert the list of nodes into a tree,
295 // normalizing indentation levels and inserting dummy nodes as necessary to
296 // make the tree well-formed. See additional notes at buildTree().
297 //
298 // In the case above, the result is a tree like this:
299 //
300 // - <null>
301 // - <null>
302 // - a
303 // - b
304 // # c
305
306 $l = 0;
307 $r = count($items);
308 $tree = $this->buildTree($items, $l, $r, $cur_level = 0);
309
310
311 // We may need to open a list on a <null> node, but they do not have
312 // list style information yet. We need to propagate list style information
313 // backward through the tree. In the above example, the tree now looks
314 // like this:
315 //
316 // - <null (style=#)>
317 // - <null (style=-)>
318 // - a
319 // - b
320 // # c
321
322 $this->adjustTreeStyleInformation($tree);
323
324 // Finally, we have enough information to render the tree.
325
326 $out = $this->renderTree($tree, 0, $has_marks, $starts_at);
327
328 if ($this->getEngine()->isTextMode()) {
329 $out = implode('', $out);
330 $out = rtrim($out, "\n");
331 $out = preg_replace('/ +$/m', '', $out);
332 return $out;
333 }
334
335 return phutil_implode_html('', $out);
336 }
337
338 /**
339 * See additional notes in @{method:markupText}.
340 */
341 private function buildTree(array $items, $l, $r, $cur_level) {
342 if ($l == $r) {
343 return array();
344 }
345
346 if ($cur_level > self::MAXIMUM_LIST_NESTING_DEPTH) {
347 // This algorithm is recursive and we don't need you blowing the stack
348 // with your oh-so-clever 50,000-item-deep list. Cap indentation levels
349 // at a reasonable number and just shove everything deeper up to this
350 // level.
351 $nodes = array();
352 for ($ii = $l; $ii < $r; $ii++) {
353 $nodes[] = array(
354 'level' => $cur_level,
355 'items' => array(),
356 ) + $items[$ii];
357 }
358 return $nodes;
359 }
360
361 $min = $l;
362 for ($ii = $r - 1; $ii >= $l; $ii--) {
363 if ($items[$ii]['depth'] <= $items[$min]['depth']) {
364 $min = $ii;
365 }
366 }
367
368 $min_depth = $items[$min]['depth'];
369
370 $nodes = array();
371 if ($min != $l) {
372 $nodes[] = array(
373 'text' => null,
374 'level' => $cur_level,
375 'style' => null,
376 'mark' => null,
377 'items' => $this->buildTree($items, $l, $min, $cur_level + 1),
378 );
379 }
380
381 $last = $min;
382 for ($ii = $last + 1; $ii < $r; $ii++) {
383 if ($items[$ii]['depth'] == $min_depth) {
384 $nodes[] = array(
385 'level' => $cur_level,
386 'items' => $this->buildTree($items, $last + 1, $ii, $cur_level + 1),
387 ) + $items[$last];
388 $last = $ii;
389 }
390 }
391 $nodes[] = array(
392 'level' => $cur_level,
393 'items' => $this->buildTree($items, $last + 1, $r, $cur_level + 1),
394 ) + $items[$last];
395
396 return $nodes;
397 }
398
399
400 /**
401 * See additional notes in @{method:markupText}.
402 */
403 private function adjustTreeStyleInformation(array &$tree) {
404 // The effect here is just to walk backward through the nodes at this level
405 // and apply the first style in the list to any empty nodes we inserted
406 // before it. As we go, also recurse down the tree.
407
408 $style = '-';
409 for ($ii = count($tree) - 1; $ii >= 0; $ii--) {
410 if ($tree[$ii]['style'] !== null) {
411 // This is the earliest node we've seen with style, so set the
412 // style to its style.
413 $style = $tree[$ii]['style'];
414 } else {
415 // This node has no style, so apply the current style.
416 $tree[$ii]['style'] = $style;
417 }
418 if ($tree[$ii]['items']) {
419 $this->adjustTreeStyleInformation($tree[$ii]['items']);
420 }
421 }
422 }
423
424
425 /**
426 * See additional notes in @{method:markupText}.
427 */
428 private function renderTree(
429 array $tree,
430 $level,
431 $has_marks,
432 $starts_at = 1) {
433
434 $style = idx(head($tree), 'style');
435
436 $out = array();
437
438 if (!$this->getEngine()->isTextMode()) {
439 switch ($style) {
440 case '#':
441 $tag = 'ol';
442 break;
443 case '-':
444 $tag = 'ul';
445 break;
446 }
447
448 $start_attr = null;
449 if (ctype_digit(phutil_string_cast($starts_at)) && $starts_at > 1) {
450 $start_attr = hsprintf(' start="%d"', $starts_at);
451 }
452
453 if ($has_marks) {
454 $out[] = hsprintf(
455 '<%s class="remarkup-list remarkup-list-with-checkmarks"%s>',
456 $tag,
457 $start_attr);
458 } else {
459 $out[] = hsprintf(
460 '<%s class="remarkup-list"%s>',
461 $tag,
462 $start_attr);
463 }
464
465 $out[] = "\n";
466 }
467
468 $number = $starts_at;
469 foreach ($tree as $item) {
470 if ($this->getEngine()->isTextMode()) {
471 if ($item['text'] === null) {
472 // Don't render anything.
473 } else {
474 $indent = str_repeat(' ', 2 * $level);
475 $out[] = $indent;
476 if ($item['mark'] !== null) {
477 if ($item['mark']) {
478 $out[] = '[X] ';
479 } else {
480 $out[] = '[ ] ';
481 }
482 } else {
483 switch ($style) {
484 case '#':
485 $out[] = $number.'. ';
486 $number++;
487 break;
488 case '-':
489 $out[] = '- ';
490 break;
491 }
492 }
493
494 $parts = preg_split('/\n{2,}/', $item['text']);
495 foreach ($parts as $key => $part) {
496 if ($key != 0) {
497 $out[] = "\n\n ".$indent;
498 }
499 $out[] = $this->applyRules($part);
500 }
501 $out[] = "\n";
502 }
503 } else {
504 if ($item['text'] === null) {
505 $out[] = hsprintf('<li class="remarkup-list-item phantom-item">');
506 } else {
507 if ($item['mark'] !== null) {
508 if ($item['mark'] == true) {
509 $out[] = hsprintf(
510 '<li class="remarkup-list-item remarkup-checked-item">');
511 } else {
512 $out[] = hsprintf(
513 '<li class="remarkup-list-item remarkup-unchecked-item">');
514 }
515 $out[] = phutil_tag(
516 'input',
517 array(
518 'type' => 'checkbox',
519 'checked' => ($item['mark'] ? 'checked' : null),
520 'disabled' => 'disabled',
521 ));
522 $out[] = ' ';
523 } else {
524 $out[] = hsprintf('<li class="remarkup-list-item">');
525 }
526
527 $parts = preg_split('/\n{2,}/', $item['text']);
528 foreach ($parts as $key => $part) {
529 if ($key != 0) {
530 $out[] = array(
531 "\n",
532 phutil_tag('br'),
533 phutil_tag('br'),
534 "\n",
535 );
536 }
537 $out[] = $this->applyRules($part);
538 }
539 }
540 }
541
542 if ($item['items']) {
543 $subitems = $this->renderTree($item['items'], $level + 1, $has_marks);
544 foreach ($subitems as $i) {
545 $out[] = $i;
546 }
547 }
548 if (!$this->getEngine()->isTextMode()) {
549 $out[] = hsprintf("</li>\n");
550 }
551 }
552
553 if (!$this->getEngine()->isTextMode()) {
554 switch ($style) {
555 case '#':
556 $out[] = hsprintf('</ol>');
557 break;
558 case '-':
559 $out[] = hsprintf('</ul>');
560 break;
561 }
562 }
563
564 return $out;
565 }
566
567}