@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.)
hq.recaptime.dev/wiki/Phorge
phorge
phabricator
1<?php
2
3final class PhabricatorJupyterDocumentEngine
4 extends PhabricatorDocumentEngine {
5
6 const ENGINEKEY = 'jupyter';
7
8 public function getViewAsLabel(PhabricatorDocumentRef $ref) {
9 return pht('View as Jupyter Notebook');
10 }
11
12 protected function getDocumentIconIcon(PhabricatorDocumentRef $ref) {
13 return 'fa-sun-o';
14 }
15
16 protected function getDocumentRenderingText(PhabricatorDocumentRef $ref) {
17 return pht('Rendering Jupyter Notebook...');
18 }
19
20 public function shouldRenderAsync(PhabricatorDocumentRef $ref) {
21 return true;
22 }
23
24 protected function getContentScore(PhabricatorDocumentRef $ref) {
25 $name = $ref->getName();
26
27 if (preg_match('/\\.ipynb\z/i', $name)) {
28 return 2000;
29 }
30
31 return 500;
32 }
33
34 protected function canRenderDocumentType(PhabricatorDocumentRef $ref) {
35 return $ref->isProbablyJSON();
36 }
37
38 public function canDiffDocuments(
39 ?PhabricatorDocumentRef $uref = null,
40 ?PhabricatorDocumentRef $vref = null) {
41 return true;
42 }
43
44 public function newEngineBlocks(
45 ?PhabricatorDocumentRef $uref = null,
46 ?PhabricatorDocumentRef $vref = null) {
47
48 $blocks = new PhabricatorDocumentEngineBlocks();
49
50 try {
51 if ($uref) {
52 $u_blocks = $this->newDiffBlocks($uref);
53 } else {
54 $u_blocks = array();
55 }
56
57 if ($vref) {
58 $v_blocks = $this->newDiffBlocks($vref);
59 } else {
60 $v_blocks = array();
61 }
62
63 $blocks->addBlockList($uref, $u_blocks);
64 $blocks->addBlockList($vref, $v_blocks);
65 } catch (Exception $ex) {
66 phlog($ex);
67 $blocks->addMessage($ex->getMessage());
68 }
69
70 return $blocks;
71 }
72
73 public function newBlockDiffViews(
74 PhabricatorDocumentRef $uref,
75 PhabricatorDocumentEngineBlock $ublock,
76 PhabricatorDocumentRef $vref,
77 PhabricatorDocumentEngineBlock $vblock) {
78
79 $ucell = $ublock->getContent();
80 $vcell = $vblock->getContent();
81
82 $utype = idx($ucell, 'cell_type');
83 $vtype = idx($vcell, 'cell_type');
84
85 if ($utype === $vtype) {
86 switch ($utype) {
87 case 'markdown':
88 $usource = $this->readString($ucell, 'source');
89 $vsource = $this->readString($vcell, 'source');
90
91 $diff = id(new PhutilProseDifferenceEngine())
92 ->getDiff($usource, $vsource);
93
94 $u_content = $this->newProseDiffCell($diff, array('=', '-'));
95 $v_content = $this->newProseDiffCell($diff, array('=', '+'));
96
97 $u_content = $this->newJupyterCell(null, $u_content, null);
98 $v_content = $this->newJupyterCell(null, $v_content, null);
99
100 $u_content = $this->newCellContainer($u_content);
101 $v_content = $this->newCellContainer($v_content);
102
103 return id(new PhabricatorDocumentEngineBlockDiff())
104 ->setOldContent($u_content)
105 ->addOldClass('old')
106 ->setNewContent($v_content)
107 ->addNewClass('new');
108 case 'code/line':
109 $usource = idx($ucell, 'raw');
110 $vsource = idx($vcell, 'raw');
111 $udisplay = idx($ucell, 'display');
112 $vdisplay = idx($vcell, 'display');
113
114 $intraline_segments = ArcanistDiffUtils::generateIntralineDiff(
115 $usource,
116 $vsource);
117
118 $u_segments = array();
119 foreach ($intraline_segments[0] as $u_segment) {
120 $u_segments[] = $u_segment;
121 }
122
123 $v_segments = array();
124 foreach ($intraline_segments[1] as $v_segment) {
125 $v_segments[] = $v_segment;
126 }
127
128 $usource = PhabricatorDifferenceEngine::applyIntralineDiff(
129 $udisplay,
130 $u_segments);
131
132 $vsource = PhabricatorDifferenceEngine::applyIntralineDiff(
133 $vdisplay,
134 $v_segments);
135
136 list($u_label, $u_content) = $this->newCodeLineCell($ucell, $usource);
137 list($v_label, $v_content) = $this->newCodeLineCell($vcell, $vsource);
138
139 $classes = array(
140 'jupyter-cell-flush',
141 );
142
143 $u_content = $this->newJupyterCell($u_label, $u_content, $classes);
144 $v_content = $this->newJupyterCell($v_label, $v_content, $classes);
145
146 $u_content = $this->newCellContainer($u_content);
147 $v_content = $this->newCellContainer($v_content);
148
149 return id(new PhabricatorDocumentEngineBlockDiff())
150 ->setOldContent($u_content)
151 ->addOldClass('old')
152 ->setNewContent($v_content)
153 ->addNewClass('new');
154 }
155 }
156
157 return parent::newBlockDiffViews($uref, $ublock, $vref, $vblock);
158 }
159
160 public function newBlockContentView(
161 PhabricatorDocumentRef $ref,
162 PhabricatorDocumentEngineBlock $block) {
163
164 $viewer = $this->getViewer();
165 $cell = $block->getContent();
166
167 $cell_content = $this->renderJupyterCell($viewer, $cell);
168
169 return $this->newCellContainer($cell_content);
170 }
171
172 private function newCellContainer($cell_content) {
173 $notebook_table = phutil_tag(
174 'table',
175 array(
176 'class' => 'jupyter-notebook',
177 ),
178 $cell_content);
179
180 $container = phutil_tag(
181 'div',
182 array(
183 'class' => 'document-engine-jupyter document-engine-diff',
184 ),
185 $notebook_table);
186
187 return $container;
188 }
189
190 private function newProseDiffCell(PhutilProseDiff $diff, array $mask) {
191 $mask = array_fuse($mask);
192
193 $result = array();
194 foreach ($diff->getParts() as $part) {
195 $type = $part['type'];
196 $text = $part['text'];
197
198 if (!isset($mask[$type])) {
199 continue;
200 }
201
202 switch ($type) {
203 case '-':
204 case '+':
205 $result[] = phutil_tag(
206 'span',
207 array(
208 'class' => 'bright',
209 ),
210 $text);
211 break;
212 case '=':
213 $result[] = $text;
214 break;
215 }
216 }
217
218 return array(
219 null,
220 phutil_tag(
221 'div',
222 array(
223 'class' => 'jupyter-cell-markdown',
224 ),
225 $result),
226 );
227 }
228
229 private function newDiffBlocks(PhabricatorDocumentRef $ref) {
230 $viewer = $this->getViewer();
231 $content = $ref->loadData();
232
233 $cells = $this->newCells($content, true);
234
235 $idx = 1;
236 $blocks = array();
237 foreach ($cells as $cell) {
238 // When the cell is a source code line, we can hash just the raw
239 // input rather than all the cell metadata.
240
241 switch (idx($cell, 'cell_type')) {
242 case 'code/line':
243 $hash_input = $cell['raw'];
244 break;
245 case 'markdown':
246 $hash_input = $this->readString($cell, 'source');
247 break;
248 default:
249 $hash_input = serialize($cell);
250 break;
251 }
252
253 $hash = PhabricatorHash::digestWithNamedKey(
254 $hash_input,
255 'document-engine.content-digest');
256
257 $blocks[] = id(new PhabricatorDocumentEngineBlock())
258 ->setBlockKey($idx)
259 ->setDifferenceHash($hash)
260 ->setContent($cell);
261
262 $idx++;
263 }
264
265 return $blocks;
266 }
267
268 protected function newDocumentContent(PhabricatorDocumentRef $ref) {
269 $viewer = $this->getViewer();
270 $content = $ref->loadData();
271
272 try {
273 $cells = $this->newCells($content, false);
274 } catch (Exception $ex) {
275 return $this->newMessage($ex->getMessage());
276 }
277
278 $rows = array();
279 foreach ($cells as $cell) {
280 $rows[] = $this->renderJupyterCell($viewer, $cell);
281 }
282
283 $notebook_table = phutil_tag(
284 'table',
285 array(
286 'class' => 'jupyter-notebook',
287 ),
288 $rows);
289
290 $container = phutil_tag(
291 'div',
292 array(
293 'class' => 'document-engine-jupyter',
294 ),
295 $notebook_table);
296
297 return $container;
298 }
299
300 private function newCells($content, $for_diff) {
301 try {
302 $data = phutil_json_decode($content);
303 } catch (PhutilJSONParserException $ex) {
304 throw new Exception(
305 pht(
306 'This is not a valid JSON document and can not be rendered as '.
307 'a Jupyter notebook: %s.',
308 $ex->getMessage()));
309 }
310
311 if (!is_array($data)) {
312 throw new Exception(
313 pht(
314 'This document does not encode a valid JSON object and can not '.
315 'be rendered as a Jupyter notebook.'));
316 }
317
318 $nbformat = idx($data, 'nbformat');
319 if (!is_int($nbformat)) {
320 throw new Exception(
321 pht(
322 'This document lacks a valid "nbformat" field. Jupyter notebooks '.
323 'must have this field and it must have an integer value.'));
324 }
325
326 if ($nbformat !== 4) {
327 throw new Exception(
328 pht(
329 'This Jupyter notebook uses an unsupported version of the file '.
330 'format (found version %s, expected version 4).',
331 $nbformat));
332 }
333
334 $cells = idx($data, 'cells');
335 if (!is_array($cells)) {
336 throw new Exception(
337 pht(
338 'This Jupyter notebook does not specify a list of "cells".'));
339 }
340
341 if (!$cells) {
342 throw new Exception(
343 pht(
344 'This Jupyter notebook does not specify any notebook cells.'));
345 }
346
347 if (!$for_diff) {
348 return $cells;
349 }
350
351 // If we're extracting cells to build a diff view, split code cells into
352 // individual lines and individual outputs. We want users to be able to
353 // add inline comments to each line and each output block.
354
355 $results = array();
356 foreach ($cells as $cell) {
357 $cell_type = idx($cell, 'cell_type');
358 if ($cell_type === 'markdown') {
359 $source = $this->readString($cell, 'source');
360
361 // Attempt to split contiguous blocks of markdown into smaller
362 // pieces.
363
364 $chunks = preg_split(
365 '/\n\n+/',
366 $source);
367
368 foreach ($chunks as $chunk) {
369 $result = $cell;
370 $result['source'] = array($chunk);
371 $results[] = $result;
372 }
373
374 continue;
375 }
376
377 if ($cell_type !== 'code') {
378 $results[] = $cell;
379 continue;
380 }
381
382 $label = $this->newCellLabel($cell);
383
384 $lines = $this->readStringList($cell, 'source');
385 $content = $this->highlightLines($lines);
386
387 $count = count($lines);
388 for ($ii = 0; $ii < $count; $ii++) {
389 $is_head = ($ii === 0);
390 $is_last = ($ii === ($count - 1));
391
392 if ($is_head) {
393 $line_label = $label;
394 } else {
395 $line_label = null;
396 }
397
398 $results[] = array(
399 'cell_type' => 'code/line',
400 'label' => $line_label,
401 'raw' => $lines[$ii],
402 'display' => idx($content, $ii),
403 'head' => $is_head,
404 'last' => $is_last,
405 );
406 }
407
408 $outputs = array();
409 $output_list = idx($cell, 'outputs');
410 if (is_array($output_list)) {
411 foreach ($output_list as $output) {
412 $results[] = array(
413 'cell_type' => 'code/output',
414 'output' => $output,
415 );
416 }
417 }
418 }
419
420 return $results;
421 }
422
423
424 private function renderJupyterCell(
425 PhabricatorUser $viewer,
426 array $cell) {
427
428 list($label, $content) = $this->renderJupyterCellContent($viewer, $cell);
429
430 $classes = null;
431 switch (idx($cell, 'cell_type')) {
432 case 'code/line':
433 $classes = 'jupyter-cell-flush';
434 break;
435 }
436
437 return $this->newJupyterCell(
438 $label,
439 $content,
440 $classes);
441 }
442
443 private function newJupyterCell($label, $content, $classes) {
444 $label_cell = phutil_tag(
445 'td',
446 array(
447 'class' => 'jupyter-label',
448 ),
449 $label);
450
451 $content_cell = phutil_tag(
452 'td',
453 array(
454 'class' => $classes,
455 ),
456 $content);
457
458 return phutil_tag(
459 'tr',
460 array(),
461 array(
462 $label_cell,
463 $content_cell,
464 ));
465 }
466
467 private function renderJupyterCellContent(
468 PhabricatorUser $viewer,
469 array $cell) {
470
471 $cell_type = idx($cell, 'cell_type');
472 switch ($cell_type) {
473 case 'markdown':
474 return $this->newMarkdownCell($cell);
475 case 'code':
476 return $this->newCodeCell($cell);
477 case 'code/line':
478 return $this->newCodeLineCell($cell);
479 case 'code/output':
480 return $this->newCodeOutputCell($cell);
481 }
482
483 $json_content = id(new PhutilJSON())
484 ->encodeFormatted($cell);
485
486 return $this->newRawCell($json_content);
487 }
488
489 private function newRawCell($content) {
490 return array(
491 null,
492 phutil_tag(
493 'div',
494 array(
495 'class' => 'jupyter-cell-raw PhabricatorMonospaced',
496 ),
497 $content),
498 );
499 }
500
501 private function newMarkdownCell(array $cell) {
502 $content = $this->readStringList($cell, 'source');
503
504 // TODO: This should ideally highlight as Markdown, but the "md"
505 // highlighter in Pygments is painfully slow and not terribly useful.
506 $content = $this->highlightLines($content, 'txt');
507
508 return array(
509 null,
510 phutil_tag(
511 'div',
512 array(
513 'class' => 'jupyter-cell-markdown',
514 ),
515 $content),
516 );
517 }
518
519 private function newCodeCell(array $cell) {
520 $label = $this->newCellLabel($cell);
521
522 $content = $this->readStringList($cell, 'source');
523 $content = $this->highlightLines($content);
524
525 $outputs = array();
526 $output_list = idx($cell, 'outputs');
527 if (is_array($output_list)) {
528 foreach ($output_list as $output) {
529 $outputs[] = $this->newOutput($output);
530 }
531 }
532
533 return array(
534 $label,
535 array(
536 phutil_tag(
537 'div',
538 array(
539 'class' =>
540 'jupyter-cell-code jupyter-cell-code-block '.
541 'PhabricatorMonospaced remarkup-code',
542 ),
543 array(
544 $content,
545 )),
546 $outputs,
547 ),
548 );
549 }
550
551 private function newCodeLineCell(array $cell, $content = null) {
552 $classes = array();
553 $classes[] = 'PhabricatorMonospaced';
554 $classes[] = 'remarkup-code';
555 $classes[] = 'jupyter-cell-code';
556 $classes[] = 'jupyter-cell-code-line';
557
558 if ($cell['head']) {
559 $classes[] = 'jupyter-cell-code-head';
560 }
561
562 if ($cell['last']) {
563 $classes[] = 'jupyter-cell-code-last';
564 }
565
566 $classes = implode(' ', $classes);
567
568 if ($content === null) {
569 $content = $cell['display'];
570 }
571
572 return array(
573 $cell['label'],
574 array(
575 phutil_tag(
576 'div',
577 array(
578 'class' => $classes,
579 ),
580 array(
581 $content,
582 )),
583 ),
584 );
585 }
586
587 private function newCodeOutputCell(array $cell) {
588 return array(
589 null,
590 $this->newOutput($cell['output']),
591 );
592 }
593
594 private function newOutput(array $output) {
595 if (!is_array($output)) {
596 return pht('<Invalid Output>');
597 }
598
599 $classes = array(
600 'jupyter-output',
601 'PhabricatorMonospaced',
602 );
603
604 $output_name = idx($output, 'name');
605 switch ($output_name) {
606 case 'stderr':
607 $classes[] = 'jupyter-output-stderr';
608 break;
609 }
610
611 $output_type = idx($output, 'output_type');
612 switch ($output_type) {
613 case 'execute_result':
614 case 'display_data':
615 $data = idx($output, 'data');
616
617 $image_formats = array(
618 'image/png',
619 'image/jpeg',
620 'image/jpg',
621 'image/gif',
622 'image/webp',
623 );
624
625 foreach ($image_formats as $image_format) {
626 if (!isset($data[$image_format])) {
627 continue;
628 }
629
630 $raw_data = $this->readString($data, $image_format);
631
632 $content = phutil_tag(
633 'img',
634 array(
635 'src' => 'data:'.$image_format.';base64,'.$raw_data,
636 ));
637
638 break 2;
639 }
640
641 if (isset($data['text/html'])) {
642 $content = $data['text/html'];
643 $classes[] = 'jupyter-output-html';
644 break;
645 }
646
647 if (isset($data['application/javascript'])) {
648 $content = $data['application/javascript'];
649 $classes[] = 'jupyter-output-html';
650 break;
651 }
652
653 if (isset($data['text/plain'])) {
654 $content = $data['text/plain'];
655 break;
656 }
657
658 break;
659 case 'stream':
660 default:
661 $content = $this->readString($output, 'text');
662 break;
663 }
664
665 return phutil_tag(
666 'div',
667 array(
668 'class' => implode(' ', $classes),
669 ),
670 $content);
671 }
672
673 private function newCellLabel(array $cell) {
674 $execution_count = idx($cell, 'execution_count');
675 if ($execution_count) {
676 $label = 'In ['.$execution_count.']:';
677 } else {
678 $label = null;
679 }
680
681 return $label;
682 }
683
684 private function highlightLines(array $lines, $force_language = null) {
685 if ($force_language === null) {
686 $head = head($lines);
687 $matches = null;
688 if (preg_match('/^%%(.*)$/', $head, $matches)) {
689 $restore = array_shift($lines);
690 $lang = $matches[1];
691 } else {
692 $restore = null;
693 $lang = 'py';
694 }
695 } else {
696 $restore = null;
697 $lang = $force_language;
698 }
699
700 $content = PhabricatorSyntaxHighlighter::highlightWithLanguage(
701 $lang,
702 implode('', $lines));
703 $content = phutil_split_lines($content);
704
705 if ($restore !== null) {
706 $language_tag = phutil_tag(
707 'span',
708 array(
709 'class' => 'language-tag',
710 ),
711 $restore);
712
713 array_unshift($content, $language_tag);
714 }
715
716 return $content;
717 }
718
719 public function shouldSuggestEngine(PhabricatorDocumentRef $ref) {
720 return true;
721 }
722
723 private function readString(array $src, $key) {
724 $list = $this->readStringList($src, $key);
725 return implode('', $list);
726 }
727
728 private function readStringList(array $src, $key) {
729 $list = idx($src, $key);
730
731 if (is_string($list)) {
732 $list = array($list);
733 } else if (!is_array($list)) {
734 $list = array();
735 }
736
737 return $list;
738 }
739
740}