Google Docs API client for OCaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

s3, bottler: stream bottle uploads, don't hold the body in memory

New S3.Http.put_object_file takes a file path + env, computes the
SigV4 payload hash by streaming the file once through Digestif
(64 KB buffer, no heap allocation beyond the buffer), then hands
Requests.Body.of_file the same path so the HTTP layer streams the
second pass straight to the socket. A 200 MB bottle no longer has
to fit in the OCaml heap.

Sigv4.sign grows an optional ?payload_hash so callers that hash
the body out-of-band (streaming signers, aws-chunked, unsigned
payload) can override the string-based default without rewriting
the signing pipeline.

Bottler.Upload.put_both swaps Bos.OS.File.read + put_object for
put_object_file, removing the last heap-holds-entire-body
behaviour in the upload path.

+907 -116
+1 -1
bin/fetch.ml
··· 1 - (** [gdocs get <id>] — fetch a document and print its JSON. *) 1 + (** [gdocs get <id>] -- fetch a document and print its JSON. *) 2 2 3 3 let run doc_id = 4 4 Eio_main.run @@ fun env ->
+7 -7
bin/install.ml
··· 1 - (** [gdocs install] — one-time OAuth client registration. *) 1 + (** [gdocs install] -- one-time OAuth client registration. *) 2 2 3 3 let instructions = 4 4 {| ··· 6 6 7 7 Step 1. Open the Google Cloud Console in your browser (the next prompt 8 8 opens it for you). If you don't already have a Cloud project, 9 - create one (free — just pick any name). 9 + create one (free -- just pick any name). 10 10 11 - Step 2. On the "APIs & Services → Credentials" page, click 12 - "Create Credentials → OAuth client ID". 11 + Step 2. On the "APIs & Services -> Credentials" page, click 12 + "Create Credentials -> OAuth client ID". 13 13 - Application type: Desktop app 14 14 - Name: anything, e.g. "gdocs CLI" 15 15 Click Create, then copy the Client ID and Client secret. 16 16 17 17 Step 3. Enable the Google Docs API for the project: 18 - APIs & Services → Library → search "Google Docs API" → Enable. 18 + APIs & Services -> Library -> search "Google Docs API" -> Enable. 19 19 20 20 Step 4. If the OAuth consent screen isn't configured yet, do that too: 21 - APIs & Services → OAuth consent screen. User type: External. 21 + APIs & Services -> OAuth consent screen. User type: External. 22 22 Under Test users, add your own Google email. 23 23 24 24 Step 5. Come back here and paste the two strings. ··· 35 35 if 36 36 not 37 37 (Ui.confirm 38 - (Fmt.str "Existing client credentials at %a — overwrite them?" 38 + (Fmt.str "Existing client credentials at %a -- overwrite them?" 39 39 Eio.Path.pp 40 40 (Gdocs.Store.client_path fs))) 41 41 then begin
+1 -1
bin/login.ml
··· 1 - (** [gdocs login] — per-user OAuth flow. *) 1 + (** [gdocs login] -- per-user OAuth flow. *) 2 2 3 3 let require_client fs = 4 4 match Gdocs.Store.load_client fs with
+1 -1
bin/main.ml
··· 1 - (** gdocs — CLI dispatcher. *) 1 + (** gdocs -- CLI dispatcher. *) 2 2 3 3 let () = 4 4 Crypto_rng_unix.use_default ();
+2 -2
bin/md.ml
··· 1 - (** [gdocs md <id>] — fetch a document and print it as Markdown. *) 1 + (** [gdocs md <id>] -- fetch a document and print it as Markdown. *) 2 2 3 3 let run ~comments doc_id = 4 4 Eio_main.run @@ fun env -> ··· 36 36 let comments_flag = 37 37 let doc = 38 38 "Append a Comments section listing document comments (requires the \ 39 - drive.readonly scope — re-run `gdocs login` if you haven't granted it)." 39 + drive.readonly scope -- re-run `gdocs login` if you haven't granted it)." 40 40 in 41 41 Arg.(value & flag & info [ "comments"; "c" ] ~doc) 42 42
+1 -1
bin/md.mli
··· 1 - (** [gdocs md] subcommand — render a document as Markdown. *) 1 + (** [gdocs md] subcommand -- render a document as Markdown. *) 2 2 3 3 val cmd : unit Cmdliner.Cmd.t 4 4 (** [cmd] is the Cmdliner command value for the [md] subcommand. *)
+1 -1
bin/text.ml
··· 1 - (** [gdocs text <id>] — fetch a document and print its plain text. *) 1 + (** [gdocs text <id>] -- fetch a document and print its plain text. *) 2 2 3 3 let run doc_id = 4 4 Eio_main.run @@ fun env ->
+1 -1
bin/ui.mli
··· 2 2 3 3 val open_in_browser : string -> unit 4 4 (** [open_in_browser url] best-effort opens [url] in the user's default browser. 5 - Silent on failure — callers should also print the URL. *) 5 + Silent on failure -- callers should also print the URL. *) 6 6 7 7 val prompt : string -> string 8 8 (** [prompt label] writes [label: ] to stdout and returns the trimmed line read
+2 -2
lib/comments.ml
··· 22 22 let author = if c.author = "" then "(unknown)" else c.author in 23 23 Fmt.pf ppf "%s: %s" author c.content 24 24 25 - (* ── JSON parsing ────────────────────────────────────────────── *) 25 + (* -- JSON parsing ---------------------------------------------- *) 26 26 27 27 (* Drive API v3 comment shape: 28 28 { ··· 84 84 | Error e -> err_json_decode e 85 85 | Ok r -> Ok r.comments 86 86 87 - (* ── HTTP ────────────────────────────────────────────────────── *) 87 + (* -- HTTP ------------------------------------------------------ *) 88 88 89 89 let api_root = "https://www.googleapis.com/drive/v3/files/" 90 90
+4 -4
lib/comments.mli
··· 23 23 (** A comment on a document. *) 24 24 25 25 val pp : Format.formatter -> t -> unit 26 - (** [pp ppf c] renders a compact one-line representation of [c] — 27 - [author: content] — suitable for logging and [Fmt.str] use. *) 26 + (** [pp ppf c] renders a compact one-line representation of [c] -- 27 + [author: content] -- suitable for logging and [Fmt.str] use. *) 28 28 29 29 val list : 30 30 Requests.t -> ··· 34 34 (** [list http ~token document_id] fetches comments for the document via the 35 35 Drive API. 36 36 37 - Only top-level comments are returned — replies are currently flattened away. 38 - Uses 37 + Only top-level comments are returned -- replies are currently flattened 38 + away. Uses 39 39 [fields=comments(id,author/displayName,content,quotedFileContent,anchor,resolved)] 40 40 to minimize response size. *) 41 41
+1 -1
lib/document.ml
··· 24 24 25 25 We follow [paragraph.elements[*].textRun.content]. The Docs API 26 26 guarantees the last [textRun] of every paragraph ends with ["\n"], 27 - so we just concatenate — no manual newline insertion (which would 27 + so we just concatenate -- no manual newline insertion (which would 28 28 double-space the output). *) 29 29 let extract_text value = 30 30 let open Jsont in
+2 -2
lib/gdocs.ml
··· 9 9 10 10 module Log = (val Logs.src_log src : Logs.LOG) 11 11 12 - (* Error helpers — keep all [`Msg] shapes in one place. *) 12 + (* Error helpers -- keep all [`Msg] shapes in one place. *) 13 13 let err_msg fmt = Fmt.kstr (fun m -> Error (`Msg m)) fmt 14 14 let err_http status body = err_msg "HTTP %d: %s" status body 15 15 let scope_readonly = "https://www.googleapis.com/auth/documents.readonly" 16 16 let scope_readwrite = "https://www.googleapis.com/auth/documents" 17 17 let api_root = "https://docs.googleapis.com/v1/documents/" 18 18 19 - (* ── HTTP ────────────────────────────────────────────────────────── *) 19 + (* -- HTTP ---------------------------------------------------------- *) 20 20 21 21 let auth_header token = 22 22 match Gauth.try_access token with
+378 -45
lib/markdown.ml
··· 2 2 3 3 (* We emit CommonMark strings directly rather than building a 4 4 Cmarkit AST. The AST constructors are verbose (open variants with 5 - Meta values per node) and we don't need source-location fidelity — 5 + Meta values per node) and we don't need source-location fidelity -- 6 6 we only generate. Tests round-trip the output through 7 7 Cmarkit.Doc.of_string to confirm it parses cleanly. *) 8 8 ··· 22 22 let as_array = function Jsont.Array (l, _) -> Some l | _ -> None 23 23 let string_or = function Some (Jsont.String (s, _)) -> s | _ -> "" 24 24 25 - (* ── Inline escaping ────────────────────────────────────────── *) 25 + (* -- Inline escaping ------------------------------------------ *) 26 26 27 27 (* Escape only the CommonMark characters that are always dangerous 28 28 inside an inline text span: backslash, backtick, emphasis markers, 29 29 link brackets, autolink markers. Position-sensitive characters (like 30 - [.], [-], [#], [!]) aren't escaped globally — they're only special 30 + [.], [-], [#], [!]) aren't escaped globally -- they're only special 31 31 at the start of a line and would give visually cluttered output if 32 32 escaped everywhere. *) 33 33 let escape_inline s = ··· 58 58 s; 59 59 Buffer.contents buf 60 60 61 - (* ── Paragraph-level classification ─────────────────────────── *) 61 + (* -- Paragraph-level classification --------------------------- *) 62 62 63 63 (* The heading level for a [namedStyleType] string. Returns [0] for 64 64 body text, -1 for TITLE/SUBTITLE which we treat as H1/H2 respectively. *) ··· 78 78 | None -> "NORMAL_TEXT" 79 79 | Some style -> string_or (member style "namedStyleType") 80 80 81 - let is_bullet paragraph = Option.is_some (member paragraph "bullet") 81 + let bullet_info paragraph = 82 + match member paragraph "bullet" with 83 + | None -> None 84 + | Some b -> 85 + let list_id = 86 + Option.bind (member b "listId") as_string |> Option.value ~default:"" 87 + in 88 + let nesting = 89 + match 90 + Option.bind (member b "nestingLevel") (function 91 + | Jsont.Number (n, _) -> Some n 92 + | _ -> None) 93 + with 94 + | Some n -> int_of_float n 95 + | None -> 0 96 + in 97 + Some (list_id, nesting) 82 98 83 - (* ── textRun rendering ──────────────────────────────────────── *) 99 + (* A glyph type is "numbered" if it produces a digit/letter/roman 100 + sequence. "DECIMAL", "UPPER_ALPHA", "UPPER_ROMAN", etc. *) 101 + let is_numbered_glyph = function 102 + | "DECIMAL" | "UPPER_ALPHA" | "LOWER_ALPHA" | "UPPER_ROMAN" | "LOWER_ROMAN" 103 + | "ZERO_DECIMAL" -> 104 + true 105 + | _ -> false 106 + 107 + (* Resolve a [bullet] to the glyph type at its nesting level by looking 108 + up [lists.<listId>.listProperties.nestingLevels[nesting].glyphType]. 109 + Returns [None] if the document has no [lists] map or the lookup 110 + fails; the caller then defaults to a bullet. *) 111 + let glyph_type ~lists list_id nesting = 112 + match lists with 113 + | None -> None 114 + | Some l -> ( 115 + match member l list_id with 116 + | None -> None 117 + | Some entry -> 118 + let levels = 119 + Option.bind (member entry "listProperties") (fun lp -> 120 + Option.bind (member lp "nestingLevels") as_array) 121 + |> Option.value ~default:[] 122 + in 123 + if nesting < List.length levels then 124 + let level = List.nth levels nesting in 125 + Option.bind (member level "glyphType") as_string 126 + else None) 127 + 128 + (* -- textRun rendering ---------------------------------------- *) 84 129 85 130 type style = { bold : bool; italic : bool; code : bool; link : string option } 86 131 ··· 128 173 | None -> () 129 174 | Some content -> 130 175 let style = parse_text_style (member tr "textStyle") in 131 - (* Strip the trailing newline if present — we add the 176 + (* Strip the trailing newline if present -- we add the 132 177 paragraph terminator ourselves. *) 133 178 let content, trailing_nl = 134 179 let n = String.length content in ··· 149 194 Buffer.add_string buf inner; 150 195 if trailing_nl then Buffer.add_char buf '\n') 151 196 152 - (* ── Paragraph rendering ────────────────────────────────────── *) 197 + (* -- Paragraph rendering -------------------------------------- *) 153 198 154 - let render_paragraph buf paragraph = 155 - let style = named_style paragraph in 156 - let level = heading_level style in 157 - let bullet = is_bullet paragraph in 158 - let prefix = 159 - if level > 0 then String.make level '#' ^ " " 160 - else if bullet then "- " 161 - else "" 162 - in 163 - Buffer.add_string buf prefix; 199 + (* Render the text-run elements of a paragraph into a single line of 200 + Markdown (no terminating newline -- callers decide how to frame the 201 + paragraph within its context). *) 202 + let render_paragraph_inline paragraph = 164 203 let elements = 165 204 Option.bind (member paragraph "elements") as_array 166 205 |> Option.value ~default:[] 167 206 in 168 - (* Capture the paragraph text into a side buffer so we can trim the 169 - trailing newline it may already contain and replace it with our 170 - own paragraph break. *) 171 - let inner = Buffer.create 256 in 207 + let inner = Buffer.create 128 in 172 208 List.iter (render_text_run inner) elements; 173 209 let s = Buffer.contents inner in 174 - let trimmed = 175 - let n = String.length s in 176 - if n > 0 && s.[n - 1] = '\n' then String.sub s 0 (n - 1) else s 210 + let n = String.length s in 211 + if n > 0 && s.[n - 1] = '\n' then String.sub s 0 (n - 1) else s 212 + 213 + let list_marker ~lists list_id nesting = 214 + match glyph_type ~lists list_id nesting with 215 + | Some g when is_numbered_glyph g -> "1. " 216 + | _ -> "- " 217 + 218 + (* CommonMark nests a list item only when its marker is indented at 219 + least as far as the parent's content starts. The parent's content 220 + starts at [String.length parent_marker] columns in, so a child at 221 + level N must be indented by the sum of marker widths at levels 222 + 0..N-1. "- " is 2, "1. " is 3 -- mixing them means we can't use a 223 + fixed stride per level. *) 224 + let list_indent ~lists list_id level = 225 + let total = ref 0 in 226 + for i = 0 to level - 1 do 227 + total := !total + String.length (list_marker ~lists list_id i) 228 + done; 229 + String.make !total ' ' 230 + 231 + (* Returns [(is_list_item, rendered_line_without_trailing_newlines)]. 232 + The caller uses [is_list_item] to decide how much vertical spacing 233 + to emit around this paragraph. *) 234 + let render_paragraph ~lists paragraph = 235 + let style = named_style paragraph in 236 + let level = heading_level style in 237 + let bullet = bullet_info paragraph in 238 + let text = render_paragraph_inline paragraph in 239 + match (level, bullet) with 240 + | l, _ when l > 0 -> 241 + let line = Fmt.str "%s %s" (String.make l '#') text in 242 + (false, line) 243 + | _, Some (list_id, nesting) -> 244 + let line = 245 + Fmt.str "%s%s%s" 246 + (list_indent ~lists list_id nesting) 247 + (list_marker ~lists list_id nesting) 248 + text 249 + in 250 + (true, line) 251 + | _, None -> (false, text) 252 + 253 + (* -- Table rendering ----------------------------------------- *) 254 + 255 + (* Pull a cell's cell.content paragraphs and flatten them into a single 256 + line by replacing newlines with spaces. Markdown table cells can't 257 + span multiple lines without switching to an HTML fallback, so we 258 + serialize into one line. *) 259 + let render_table_cell ~lists cell = 260 + let paragraphs = 261 + Option.bind (member cell "content") as_array |> Option.value ~default:[] 177 262 in 178 - Buffer.add_string buf trimmed; 179 - Buffer.add_string buf "\n\n" 263 + let b = Buffer.create 64 in 264 + let first = ref true in 265 + List.iter 266 + (fun el -> 267 + match member el "paragraph" with 268 + | None -> () 269 + | Some p -> 270 + if not !first then Buffer.add_string b "<br>"; 271 + first := false; 272 + ignore lists; 273 + Buffer.add_string b (render_paragraph_inline p)) 274 + paragraphs; 275 + Buffer.contents b 180 276 181 - let render_body buf body = 277 + (* Escape pipe characters in cell content -- otherwise they break the 278 + row structure. Use backslash-escape per GFM. *) 279 + let escape_table_cell s = 280 + let buf = Buffer.create (String.length s) in 281 + String.iter 282 + (fun c -> 283 + if c = '|' then Buffer.add_string buf "\\|" else Buffer.add_char buf c) 284 + s; 285 + Buffer.contents buf 286 + 287 + let render_table ~lists buf table = 288 + let rows = 289 + Option.bind (member table "tableRows") as_array |> Option.value ~default:[] 290 + in 291 + let rows = 292 + List.map 293 + (fun row -> 294 + let cells = 295 + Option.bind (member row "tableCells") as_array 296 + |> Option.value ~default:[] 297 + in 298 + List.map (fun c -> escape_table_cell (render_table_cell ~lists c)) cells) 299 + rows 300 + in 301 + let ncols = match rows with r :: _ -> List.length r | [] -> 0 in 302 + if ncols = 0 then () 303 + else 304 + let emit_row r = 305 + Buffer.add_string buf "| "; 306 + Buffer.add_string buf (String.concat " | " r); 307 + Buffer.add_string buf " |" 308 + in 309 + (* Google Docs tables do not carry a header-row distinction, so we 310 + refuse to invent one. GFM requires a separator row, so we emit 311 + an empty header followed by the separator. The result renders 312 + as a semantically neutral data grid: no cell is promoted to a 313 + header, and inline markdown in cells is still processed. *) 314 + let blank = List.init ncols (fun _ -> "") in 315 + let dashes = List.init ncols (fun _ -> "---") in 316 + emit_row blank; 317 + Buffer.add_char buf '\n'; 318 + Buffer.add_string buf "| "; 319 + Buffer.add_string buf (String.concat " | " dashes); 320 + Buffer.add_string buf " |"; 321 + List.iter 322 + (fun r -> 323 + Buffer.add_char buf '\n'; 324 + emit_row r) 325 + rows 326 + 327 + (* -- Body / tabs --------------------------------------------- 328 + 329 + Blocks are self-terminating: each one (paragraph, heading, list 330 + group, table) ends with [\n\n] so that concatenations between 331 + bodies or sibling tabs keep their CommonMark paragraph boundaries. 332 + Consecutive list items in the same group are tightly packed 333 + (separated by single [\n]) and the group as a whole closes with 334 + [\n\n] to end the list before the next block. *) 335 + let render_body ~lists buf body = 182 336 let content = 183 337 Option.bind (member body "content") as_array |> Option.value ~default:[] 184 338 in 339 + let in_list = ref false in 340 + let close_list () = 341 + if !in_list then begin 342 + Buffer.add_char buf '\n'; 343 + in_list := false 344 + end 345 + in 185 346 List.iter 186 347 (fun el -> 187 - match member el "paragraph" with 188 - | Some p -> render_paragraph buf p 189 - | None -> ()) 190 - content 348 + match (member el "paragraph", member el "table") with 349 + | Some p, _ -> 350 + let is_list, line = render_paragraph ~lists p in 351 + if is_list then begin 352 + Buffer.add_string buf line; 353 + Buffer.add_char buf '\n'; 354 + in_list := true 355 + end 356 + else begin 357 + close_list (); 358 + Buffer.add_string buf line; 359 + Buffer.add_string buf "\n\n" 360 + end 361 + | None, Some t -> 362 + close_list (); 363 + render_table ~lists buf t; 364 + Buffer.add_string buf "\n\n" 365 + | None, None -> ()) 366 + content; 367 + close_list () 191 368 192 - let render_tabs buf tabs = 369 + let render_tabs ~lists buf tabs = 193 370 let rec walk_tab tab = 194 371 (match member tab "documentTab" with 195 372 | Some dtab -> ( 196 373 match member dtab "body" with 197 - | Some body -> render_body buf body 374 + | Some body -> render_body ~lists buf body 198 375 | None -> ()) 199 376 | None -> ()); 200 377 match Option.bind (member tab "childTabs") as_array with ··· 203 380 in 204 381 List.iter walk_tab tabs 205 382 206 - (* ── Top-level entry point ──────────────────────────────────── *) 383 + (* -- Top-level entry point ------------------------------------ *) 207 384 208 385 let render_document buf doc_json = 209 - (* A leading level-1 heading is added from the document's [title] 210 - field UNLESS the body already starts with a TITLE paragraph. *) 211 386 let title = 212 387 match member doc_json "title" with 213 388 | Some (Jsont.String (s, _)) -> s ··· 229 404 Buffer.add_string buf (escape_inline title); 230 405 Buffer.add_string buf "\n\n" 231 406 end; 407 + let lists = member doc_json "lists" in 232 408 (match member doc_json "body" with 233 - | Some body -> render_body buf body 409 + | Some body -> render_body ~lists buf body 234 410 | None -> ()); 235 411 match Option.bind (member doc_json "tabs") as_array with 236 - | Some tabs -> render_tabs buf tabs 412 + | Some tabs -> render_tabs ~lists buf tabs 237 413 | None -> () 238 414 239 415 let of_document doc = ··· 244 420 render_document buf json; 245 421 Buffer.contents buf 246 422 247 - (* ── Comments footer ────────────────────────────────────────── *) 423 + (* -- Inline comment splicing -------------------------------- 424 + 425 + Google Docs comments carry a [quotedFileContent.value] field 426 + holding the exact text range they anchor to. We parse the rendered 427 + markdown with Cmarkit to identify plain-text spans where it is 428 + safe to insert a footnote marker, then match each comment's 429 + [quoted_text] against those spans. This avoids the failure modes 430 + of naive substring search: 431 + 432 + * It never splices inside link syntax, code spans, emphasis, or 433 + other markup, because those AST nodes are skipped in the span 434 + collection. 435 + * It uses AST-level byte offsets, not a single forward cursor, so 436 + out-of-order or repeated-text comments don't cascade wrongly. 437 + 438 + Comments whose [quoted_text] can't be matched against any safe 439 + span (empty, inside markup, or spanning multiple runs) fall back 440 + to a [## Comments] section. *) 441 + 442 + type safe_span = { 443 + first_byte : int; 444 + content : string; 445 + (* Offset within [content] from which to start the next search. 446 + Advancing this after each match lets two comments anchor to 447 + different occurrences within the same run, and enforces 448 + left-to-right ordering. *) 449 + mutable next_search : int; 450 + } 451 + 452 + let collect_safe_spans md = 453 + let doc = Cmarkit.Doc.of_string ~locs:true md in 454 + let spans = ref [] in 455 + let add meta content = 456 + let loc = Cmarkit.Meta.textloc meta in 457 + if 458 + (not (Cmarkit.Textloc.is_none loc)) && Cmarkit.Textloc.first_byte loc >= 0 459 + then 460 + spans := 461 + { 462 + first_byte = Cmarkit.Textloc.first_byte loc; 463 + content; 464 + next_search = 0; 465 + } 466 + :: !spans 467 + in 468 + let rec walk_inline ~safe inline = 469 + match inline with 470 + | Cmarkit.Inline.Text (s, meta) -> if safe then add meta s 471 + | Cmarkit.Inline.Inlines (is, _) -> List.iter (walk_inline ~safe) is 472 + | Cmarkit.Inline.Emphasis (e, _) | Cmarkit.Inline.Strong_emphasis (e, _) -> 473 + walk_inline ~safe:false (Cmarkit.Inline.Emphasis.inline e) 474 + | Cmarkit.Inline.Link _ | Cmarkit.Inline.Image _ 475 + | Cmarkit.Inline.Code_span _ -> 476 + () 477 + | _ -> () 478 + in 479 + let rec walk_block block = 480 + match block with 481 + | Cmarkit.Block.Paragraph (p, _) -> 482 + walk_inline ~safe:true (Cmarkit.Block.Paragraph.inline p) 483 + | Cmarkit.Block.Heading (h, _) -> 484 + walk_inline ~safe:true (Cmarkit.Block.Heading.inline h) 485 + | Cmarkit.Block.List (lst, _) -> 486 + List.iter 487 + (fun item -> 488 + let it, _ = (item : Cmarkit.Block.List_item.t Cmarkit.node) in 489 + walk_block (Cmarkit.Block.List_item.block it)) 490 + (Cmarkit.Block.List'.items lst) 491 + | Cmarkit.Block.Block_quote (bq, _) -> 492 + walk_block (Cmarkit.Block.Block_quote.block bq) 493 + | Cmarkit.Block.Blocks (bs, _) -> List.iter walk_block bs 494 + | _ -> () 495 + in 496 + walk_block (Cmarkit.Doc.block doc); 497 + (* Sort by first_byte so placement is stable document-order. *) 498 + List.sort (fun a b -> compare a.first_byte b.first_byte) !spans 499 + 500 + let footnote_label i = Fmt.str "^gdc%d" (i + 1) 501 + 502 + (* Find the first span whose content contains [needle] at or after the 503 + span's [next_search] position; return [(span, offset_in_content)]. *) 504 + let find_anchor_span spans ~needle = 505 + List.find_map 506 + (fun span -> 507 + let n = String.length needle and c = String.length span.content in 508 + if n = 0 || span.next_search + n > c then None 509 + else 510 + let rec scan i = 511 + if i + n > c then None 512 + else if String.sub span.content i n = needle then Some (span, i) 513 + else scan (i + 1) 514 + in 515 + scan span.next_search) 516 + spans 248 517 249 - let render_comment buf (c : Comments.t) = 518 + let render_comment_footer_entry buf i (c : Comments.t) = 519 + let author = if c.author = "" then "(unknown)" else c.author in 520 + Buffer.add_string buf "["; 521 + Buffer.add_string buf (footnote_label i); 522 + Buffer.add_string buf "]: **"; 523 + Buffer.add_string buf (escape_inline author); 524 + Buffer.add_string buf "**"; 525 + if c.resolved then Buffer.add_string buf " (resolved)"; 526 + Buffer.add_string buf ": "; 527 + Buffer.add_string buf (escape_inline c.content); 528 + Buffer.add_char buf '\n' 529 + 530 + (* Render a comment that couldn't be anchored inline -- full section 531 + with the quoted text as a blockquote. *) 532 + let render_unanchored buf (c : Comments.t) = 250 533 Buffer.add_string buf "### "; 251 534 if c.author <> "" then Buffer.add_string buf (escape_inline c.author) 252 535 else Buffer.add_string buf "(unknown)"; ··· 264 547 Buffer.add_string buf (escape_inline c.content); 265 548 Buffer.add_string buf "\n\n" 266 549 550 + (* Compute markers to insert. Returns [(markers, anchored, rest)] where: 551 + - [markers] = list of (byte_pos, string) to splice into md 552 + - [anchored] = (comment_idx, comment) for each successfully anchored 553 + - [rest] = (comment_idx, comment) for fallbacks *) 554 + let plan_splice md (comments : Comments.t list) = 555 + let spans = collect_safe_spans md in 556 + let markers = ref [] in 557 + let anchored = ref [] in 558 + let rest = ref [] in 559 + List.iteri 560 + (fun i (c : Comments.t) -> 561 + if c.quoted_text = "" then rest := (i, c) :: !rest 562 + else 563 + let needle = escape_inline c.quoted_text in 564 + match find_anchor_span spans ~needle with 565 + | None -> rest := (i, c) :: !rest 566 + | Some (span, offset) -> 567 + span.next_search <- offset + String.length needle; 568 + let pos = span.first_byte + offset + String.length needle in 569 + let marker = Fmt.str "[%s]" (footnote_label i) in 570 + markers := (pos, marker) :: !markers; 571 + anchored := (i, c) :: !anchored) 572 + comments; 573 + (!markers, List.rev !anchored, List.rev !rest) 574 + 575 + (* Splice markers into [md] at their byte positions. Single left-to- 576 + right pass: for each marker (in ascending position order) copy the 577 + untouched slice up to that position, then the marker. *) 578 + let apply_splice md markers = 579 + let sorted = List.sort (fun (a, _) (b, _) -> compare a b) markers in 580 + let buf = Buffer.create (String.length md + (64 * List.length sorted)) in 581 + let cursor = ref 0 in 582 + List.iter 583 + (fun (pos, marker) -> 584 + if pos > !cursor then Buffer.add_substring buf md !cursor (pos - !cursor); 585 + Buffer.add_string buf marker; 586 + cursor := pos) 587 + sorted; 588 + if !cursor < String.length md then 589 + Buffer.add_substring buf md !cursor (String.length md - !cursor); 590 + Buffer.contents buf 591 + 267 592 let of_document_with_comments ~comments doc = 268 593 let md = of_document doc in 269 594 if comments = [] then md 270 595 else 271 - let buf = Buffer.create (String.length md + 512) in 272 - Buffer.add_string buf md; 273 - Buffer.add_string buf "## Comments\n\n"; 274 - List.iter (render_comment buf) comments; 596 + let markers, anchored, rest = plan_splice md comments in 597 + let md_spliced = apply_splice md markers in 598 + let buf = Buffer.create (String.length md_spliced + 512) in 599 + Buffer.add_string buf md_spliced; 600 + if anchored <> [] then begin 601 + Buffer.add_string buf "\n"; 602 + List.iter (fun (i, c) -> render_comment_footer_entry buf i c) anchored 603 + end; 604 + if rest <> [] then begin 605 + Buffer.add_string buf "\n## Comments\n\n"; 606 + List.iter (fun (_, c) -> render_unanchored buf c) rest 607 + end; 275 608 Buffer.contents buf
+17 -12
lib/markdown.mli
··· 1 1 (** Render a Google Docs document as CommonMark-compatible Markdown. 2 2 3 - The rendering covers the common cases found in typical business documents: 3 + Supported structural elements: 4 4 5 - - Headings (HEADING_1 … HEADING_6, TITLE, SUBTITLE) 5 + - Headings (HEADING_1 ... HEADING_6, TITLE, SUBTITLE) 6 6 - Paragraphs 7 - - Bullet and numbered lists (one nesting level) 7 + - Bullet lists 8 8 - Bold, italic, inline code via [textStyle] 9 9 - Links via [textStyle.link.url] 10 10 11 - Tables, images, drawings, embedded objects, and footnotes are currently 12 - rendered as plain-text placeholders or skipped. 13 - 14 - The output passes {!Cmarkit.Doc.of_string} round-tripping — tests verify 15 - that. *) 11 + The output passes {!Cmarkit.Doc.of_string} round-tripping. *) 16 12 17 13 val of_document : Document.t -> string 18 14 (** [of_document doc] renders [doc] as a Markdown string. The document title ··· 20 16 itself opens with a TITLE paragraph. *) 21 17 22 18 val of_document_with_comments : comments:Comments.t list -> Document.t -> string 23 - (** [of_document_with_comments ~comments doc] renders [doc] as Markdown and 24 - appends a {b Comments} section listing each comment. Inline anchor splicing 25 - is not yet implemented — comments are appended as a footer section for now. 26 - *) 19 + (** [of_document_with_comments ~comments doc] renders [doc] as Markdown with 20 + each comment spliced inline where it anchors. 21 + 22 + For every comment, the [quoted_text] (from the Drive API's 23 + [quotedFileContent.value]) is located in the rendered Markdown and a 24 + GFM-style footnote reference [[^gdcN]] is inserted right after the match. 25 + Footnote definitions -- [[^gdcN]: **author**: content] -- are appended after 26 + the body. 27 + 28 + Comments with empty [quoted_text] or whose text can't be found (e.g. the 29 + anchor crossed a formatting boundary that changed the escaped form) fall 30 + back to a trailing [## Comments] section with the quoted text as a 31 + blockquote. *)
+1 -1
lib/store.ml
··· 21 21 22 22 let save_file path data = 23 23 Eio.Path.save ~create:(`Or_truncate 0o600) path data; 24 - (* Tighten permissions on a pre-existing file — [Or_truncate] only 24 + (* Tighten permissions on a pre-existing file -- [Or_truncate] only 25 25 applies [0o600] on create, not on reuse. *) 26 26 try Unix.chmod (snd path) 0o600 with Unix.Unix_error _ -> () 27 27
+3 -3
lib/store.mli
··· 1 1 (** On-disk storage of gdocs client credentials and user token. 2 2 3 3 Files live under the XDG config directory, typically [$HOME/.config/gdocs/]: 4 - - [client.json] — OAuth client ID/secret (shared across users of this 4 + - [client.json] -- OAuth client ID/secret (shared across users of this 5 5 install). 6 - - [token.json] — per-user OAuth access/refresh token. *) 6 + - [token.json] -- per-user OAuth access/refresh token. *) 7 7 8 8 val config_dir : _ Eio.Path.t -> Eio.Fs.dir_ty Eio.Path.t 9 9 (** [config_dir fs] is the gdocs config directory as an Eio path. *) ··· 50 50 val persist : _ Eio.Path.t -> Gauth.token -> unit 51 51 (** [persist fs token] serializes [token]'s current state to disk via 52 52 {!save_token}. Call this at the end of a command so that any refresh 53 - performed in-memory during the run survives to the next invocation — 53 + performed in-memory during the run survives to the next invocation -- 54 54 otherwise the next run reloads the stale pre-refresh token from disk and 55 55 refreshes unnecessarily (or fails once the stale access token expires past 56 56 the refresh_token's own lifetime). *)
+484 -31
test/test_markdown.ml
··· 228 228 Alcotest.(check bool) "tab content rendered" true (contains "Tab1"); 229 229 assert_parses md 230 230 231 - (* ── Comments footer ─────────────────────────────────────── *) 231 + (* Regression: body content must end with a CommonMark block 232 + boundary so that subsequent blocks (like tabs, or consecutive tabs) 233 + are not merged into one paragraph. *) 234 + let body_then_tab_keeps_boundary () = 235 + let body = 236 + {|{"documentId":"x","title":"", 237 + "body":{"content":[ 238 + { "paragraph": { "elements": [ { "textRun": { "content": "Body.\n" } } ] } } 239 + ]}, 240 + "tabs":[ 241 + { "documentTab": { "body": { "content": [ 242 + { "paragraph": { "elements": [ { "textRun": { "content": "Tab.\n" } } ] } } 243 + ] } } } 244 + ]}|} 245 + in 246 + let md = render body in 247 + (* Must see a blank line between "Body." and "Tab." so they are 248 + separate CommonMark paragraphs. *) 249 + let has_sub sub = 250 + let n = String.length md and l = String.length sub in 251 + let rec loop i = 252 + if i + l > n then false 253 + else if String.sub md i l = sub then true 254 + else loop (i + 1) 255 + in 256 + loop 0 257 + in 258 + Alcotest.(check bool) 259 + "blank line between body and tab" true (has_sub "Body.\n\nTab."); 260 + (* And crucially: not merged. *) 261 + Alcotest.(check bool) "not merged as one line" false (has_sub "Body.\nTab."); 262 + assert_parses md 263 + 264 + let sibling_tabs_keep_boundary () = 265 + let body = 266 + {|{"documentId":"x","title":"", 267 + "tabs":[ 268 + { "documentTab": { "body": { "content": [ 269 + { "paragraph": { "elements": [ { "textRun": { "content": "One.\n" } } ] } } 270 + ] } } }, 271 + { "documentTab": { "body": { "content": [ 272 + { "paragraph": { "elements": [ { "textRun": { "content": "Two.\n" } } ] } } 273 + ] } } } 274 + ]}|} 275 + in 276 + let md = render body in 277 + let has_sub sub = 278 + let n = String.length md and l = String.length sub in 279 + let rec loop i = 280 + if i + l > n then false 281 + else if String.sub md i l = sub then true 282 + else loop (i + 1) 283 + in 284 + loop 0 285 + in 286 + Alcotest.(check bool) 287 + "blank line between sibling tabs" true (has_sub "One.\n\nTwo."); 288 + assert_parses md 289 + 290 + (* ── Numbered lists ──────────────────────────────────────── *) 291 + 292 + let contains md sub = 293 + let n = String.length md and l = String.length sub in 294 + let rec loop i = 295 + if i + l > n then false 296 + else if String.sub md i l = sub then true 297 + else loop (i + 1) 298 + in 299 + loop 0 300 + 301 + let numbered_list () = 302 + let body = 303 + {|{"documentId":"x","title":"", 304 + "lists":{ 305 + "L1":{"listProperties":{"nestingLevels":[ 306 + {"glyphType":"DECIMAL"} 307 + ]}} 308 + }, 309 + "body":{"content":[ 310 + { "paragraph": { 311 + "bullet": { "listId": "L1", "nestingLevel": 0 }, 312 + "elements": [ { "textRun": { "content": "first\n" } } ] 313 + } }, 314 + { "paragraph": { 315 + "bullet": { "listId": "L1", "nestingLevel": 0 }, 316 + "elements": [ { "textRun": { "content": "second\n" } } ] 317 + } } 318 + ]}}|} 319 + in 320 + let md = render body in 321 + Alcotest.(check bool) "ordered marker on first" true (contains md "1. first"); 322 + Alcotest.(check bool) 323 + "ordered marker on second" true (contains md "1. second"); 324 + assert_parses md 325 + 326 + let nested_bullet_list () = 327 + let body = 328 + {|{"documentId":"x","title":"", 329 + "lists":{ 330 + "L1":{"listProperties":{"nestingLevels":[ 331 + {"glyphType":"GLYPH_TYPE_UNSPECIFIED","glyphSymbol":"\u25cf"}, 332 + {"glyphType":"GLYPH_TYPE_UNSPECIFIED","glyphSymbol":"\u25cb"} 333 + ]}} 334 + }, 335 + "body":{"content":[ 336 + { "paragraph": { 337 + "bullet": { "listId": "L1", "nestingLevel": 0 }, 338 + "elements": [ { "textRun": { "content": "outer\n" } } ] 339 + } }, 340 + { "paragraph": { 341 + "bullet": { "listId": "L1", "nestingLevel": 1 }, 342 + "elements": [ { "textRun": { "content": "inner\n" } } ] 343 + } } 344 + ]}}|} 345 + in 346 + let md = render body in 347 + Alcotest.(check bool) "outer at col 0" true (contains md "- outer"); 348 + Alcotest.(check bool) "inner indented" true (contains md " - inner"); 349 + assert_parses md 350 + 351 + (* The real test isn't "does the text appear" -- it's "does Cmarkit 352 + parse this as a nested list?" [2 * level] spaces is not enough for 353 + ordered lists; their marker is 3 wide ("1. "). *) 354 + let count_top_level_list_items md = 355 + let doc = Cmarkit.Doc.of_string md in 356 + let block = Cmarkit.Doc.block doc in 357 + let count = ref 0 in 358 + (* Walk the top-level block tree and count immediate list items. *) 359 + let rec walk b = 360 + match b with 361 + | Cmarkit.Block.Blocks (bs, _) -> List.iter walk bs 362 + | Cmarkit.Block.List (lst, _) -> 363 + count := !count + List.length (Cmarkit.Block.List'.items lst) 364 + | _ -> () 365 + in 366 + walk block; 367 + !count 368 + 369 + let mixed_nesting_numbered () = 370 + let body = 371 + {|{"documentId":"x","title":"", 372 + "lists":{ 373 + "L1":{"listProperties":{"nestingLevels":[ 374 + {"glyphType":"DECIMAL"}, 375 + {"glyphType":"DECIMAL"} 376 + ]}} 377 + }, 378 + "body":{"content":[ 379 + { "paragraph": { 380 + "bullet": { "listId": "L1", "nestingLevel": 0 }, 381 + "elements": [ { "textRun": { "content": "one\n" } } ] 382 + } }, 383 + { "paragraph": { 384 + "bullet": { "listId": "L1", "nestingLevel": 1 }, 385 + "elements": [ { "textRun": { "content": "sub\n" } } ] 386 + } } 387 + ]}}|} 388 + in 389 + let md = render body in 390 + Alcotest.(check bool) "outer ordered" true (contains md "1. one"); 391 + Alcotest.(check bool) 392 + "inner indented by parent marker width (3)" true (contains md " 1. sub"); 393 + (* Structural check: a correctly nested list has exactly 1 top-level 394 + item, not 2. *) 395 + Alcotest.(check int) 396 + "only one top-level item" 1 397 + (count_top_level_list_items md); 398 + assert_parses md 399 + 400 + (* ── Tables ──────────────────────────────────────────────── *) 401 + 402 + let table_basic () = 403 + let body = 404 + {|{"documentId":"x","title":"", 405 + "body":{"content":[ 406 + { "table": { 407 + "rows": 2, "columns": 2, 408 + "tableRows": [ 409 + { "tableCells": [ 410 + { "content": [ { "paragraph": { "elements": [ { "textRun": { "content": "H1\n" } } ] } } ] }, 411 + { "content": [ { "paragraph": { "elements": [ { "textRun": { "content": "H2\n" } } ] } } ] } 412 + ] }, 413 + { "tableCells": [ 414 + { "content": [ { "paragraph": { "elements": [ { "textRun": { "content": "a\n" } } ] } } ] }, 415 + { "content": [ { "paragraph": { "elements": [ { "textRun": { "content": "b\n" } } ] } } ] } 416 + ] } 417 + ] 418 + } } 419 + ]}}|} 420 + in 421 + let md = render body in 422 + (* GFM requires a header separator, but Google Docs tables have no 423 + header-row semantic, so we emit an empty header and surface the 424 + first row as data. *) 425 + Alcotest.(check bool) "empty header row" true (contains md "| | |"); 426 + Alcotest.(check bool) "separator" true (contains md "| --- | --- |"); 427 + Alcotest.(check bool) 428 + "first row is data, not header" true 429 + (contains md "| H1 | H2 |"); 430 + Alcotest.(check bool) "second row is data" true (contains md "| a | b |"); 431 + assert_parses md 432 + 433 + let table_escapes_pipes () = 434 + let body = 435 + {|{"documentId":"x","title":"", 436 + "body":{"content":[ 437 + { "table": { 438 + "rows": 1, "columns": 1, 439 + "tableRows": [ 440 + { "tableCells": [ 441 + { "content": [ { "paragraph": { "elements": [ { "textRun": { "content": "a|b\n" } } ] } } ] } 442 + ] } 443 + ] 444 + } } 445 + ]}}|} 446 + in 447 + let md = render body in 448 + Alcotest.(check bool) "pipe escaped" true (contains md "a\\|b") 449 + 450 + let paragraph_after_table () = 451 + let body = 452 + {|{"documentId":"x","title":"", 453 + "body":{"content":[ 454 + { "paragraph": { "elements": [ { "textRun": { "content": "before\n" } } ] } }, 455 + { "table": { 456 + "rows": 1, "columns": 1, 457 + "tableRows": [ 458 + { "tableCells": [ 459 + { "content": [ { "paragraph": { "elements": [ { "textRun": { "content": "cell\n" } } ] } } ] } 460 + ] } 461 + ] 462 + } }, 463 + { "paragraph": { "elements": [ { "textRun": { "content": "after\n" } } ] } } 464 + ]}}|} 465 + in 466 + let md = render body in 467 + Alcotest.(check bool) "paragraph before" true (contains md "before"); 468 + Alcotest.(check bool) "cell content" true (contains md "| cell |"); 469 + Alcotest.(check bool) "paragraph after" true (contains md "after"); 470 + assert_parses md 471 + 472 + (* ── Inline comment splicing ─────────────────────────────── *) 473 + 474 + let sample_doc_for_comments = 475 + {|{"documentId":"x","title":"D","body":{"content":[ 476 + { "paragraph": { "elements": [ { "textRun": { "content": "Hello world\n" } } ] } } 477 + ]}}|} 478 + 479 + let comment_spliced_inline () = 480 + (* A comment anchored to text that's present in the output gets a 481 + footnote reference at its position. *) 482 + let doc = parse_doc sample_doc_for_comments in 483 + let comments : Gdocs.Comments.t list = 484 + [ 485 + { 486 + id = "c1"; 487 + author = "Alice"; 488 + content = "Looks good"; 489 + quoted_text = "world"; 490 + anchor = Some "kix.abc"; 491 + resolved = false; 492 + }; 493 + ] 494 + in 495 + let md = Gdocs.Markdown.of_document_with_comments ~comments doc in 496 + (* The footnote reference should immediately follow the quoted text. *) 497 + Alcotest.(check bool) 498 + "footnote marker after quote" true 499 + (contains md "world[^gdc1]"); 500 + (* Footnote definition should be in the footer. *) 501 + Alcotest.(check bool) 502 + "footnote definition" true 503 + (contains md "[^gdc1]: **Alice**"); 504 + Alcotest.(check bool) "content in definition" true (contains md "Looks good"); 505 + (* No "## Comments" section because the comment was placed inline. *) 506 + Alcotest.(check bool) "no fallback section" false (contains md "## Comments"); 507 + assert_parses md 232 508 233 - let comments_appended () = 509 + let multiple_comments_get_distinct_labels () = 234 510 let doc = 235 511 parse_doc 236 512 {|{"documentId":"x","title":"D","body":{"content":[ 237 - { "paragraph": { "elements": [ { "textRun": { "content": "body\n" } } ] } } 513 + { "paragraph": { "elements": [ { "textRun": { "content": "Hello world and goodbye\n" } } ] } } 238 514 ]}}|} 239 515 in 240 516 let comments : Gdocs.Comments.t list = ··· 242 518 { 243 519 id = "c1"; 244 520 author = "Alice"; 245 - content = "Looks good"; 246 - quoted_text = "body"; 247 - anchor = Some "kix.abc"; 521 + content = "first"; 522 + quoted_text = "world"; 523 + anchor = Some "a"; 524 + resolved = false; 525 + }; 526 + { 527 + id = "c2"; 528 + author = "Bob"; 529 + content = "second"; 530 + quoted_text = "goodbye"; 531 + anchor = Some "b"; 248 532 resolved = false; 249 533 }; 250 534 ] 251 535 in 252 536 let md = Gdocs.Markdown.of_document_with_comments ~comments doc in 253 - let contains sub = 254 - let n = String.length md and l = String.length sub in 255 - let rec loop i = 256 - if i + l > n then false 257 - else if String.sub md i l = sub then true 258 - else loop (i + 1) 259 - in 260 - loop 0 537 + Alcotest.(check bool) "marker gdc1" true (contains md "world[^gdc1]"); 538 + Alcotest.(check bool) "marker gdc2" true (contains md "goodbye[^gdc2]"); 539 + Alcotest.(check bool) "defn gdc1" true (contains md "[^gdc1]: **Alice**"); 540 + Alcotest.(check bool) "defn gdc2" true (contains md "[^gdc2]: **Bob**") 541 + 542 + let empty_quote_falls_back () = 543 + (* Document-level comments have no [quoted_text]; they fall back to 544 + the ## Comments section. *) 545 + let doc = parse_doc sample_doc_for_comments in 546 + let comments : Gdocs.Comments.t list = 547 + [ 548 + { 549 + id = "c1"; 550 + author = "Alice"; 551 + content = "overall feedback"; 552 + quoted_text = ""; 553 + anchor = None; 554 + resolved = false; 555 + }; 556 + ] 557 + in 558 + let md = Gdocs.Markdown.of_document_with_comments ~comments doc in 559 + Alcotest.(check bool) "fallback section" true (contains md "## Comments"); 560 + Alcotest.(check bool) "no inline marker" false (contains md "[^gdc") 561 + 562 + let missing_quote_falls_back () = 563 + (* If the quoted text doesn't appear in the rendered markdown, the 564 + comment still ends up in the fallback section. *) 565 + let doc = parse_doc sample_doc_for_comments in 566 + let comments : Gdocs.Comments.t list = 567 + [ 568 + { 569 + id = "c1"; 570 + author = "Alice"; 571 + content = "stale quote"; 572 + quoted_text = "this text was deleted since"; 573 + anchor = Some "a"; 574 + resolved = false; 575 + }; 576 + ] 261 577 in 262 - Alcotest.(check bool) "comments heading" true (contains "## Comments"); 263 - Alcotest.(check bool) "author" true (contains "Alice"); 264 - Alcotest.(check bool) "content" true (contains "Looks good"); 265 - Alcotest.(check bool) "quoted in blockquote" true (contains "> body"); 266 - assert_parses md 578 + let md = Gdocs.Markdown.of_document_with_comments ~comments doc in 579 + Alcotest.(check bool) "falls back to section" true (contains md "## Comments"); 580 + Alcotest.(check bool) "no inline marker" false (contains md "[^gdc") 267 581 268 582 let no_comments_no_footer () = 583 + let doc = parse_doc sample_doc_for_comments in 584 + let md = Gdocs.Markdown.of_document_with_comments ~comments:[] doc in 585 + Alcotest.(check bool) 586 + "no '## Comments' section" false 587 + (contains md "## Comments"); 588 + Alcotest.(check bool) "no footnote markers" false (contains md "[^gdc") 589 + 590 + (* A comment whose quoted text lands entirely inside a link, code 591 + span, or other markup must fall back -- splicing a footnote ref 592 + into the middle of that syntax would break it. *) 593 + let quote_inside_link_falls_back () = 269 594 let doc = 270 595 parse_doc 271 596 {|{"documentId":"x","title":"D","body":{"content":[ 272 - { "paragraph": { "elements": [ { "textRun": { "content": "body\n" } } ] } } 597 + { "paragraph": { "elements": [ 598 + { "textRun": { "content": "See ", "textStyle": {} } }, 599 + { "textRun": { "content": "Google", 600 + "textStyle": { "link": { "url": "https://g.co" } } } }, 601 + { "textRun": { "content": ".\n" } } 602 + ] } } 603 + ]}}|} 604 + in 605 + let comments : Gdocs.Comments.t list = 606 + [ 607 + { 608 + id = "c1"; 609 + author = "Alice"; 610 + content = "nice"; 611 + quoted_text = "Google"; 612 + anchor = Some "a"; 613 + resolved = false; 614 + }; 615 + ] 616 + in 617 + let md = Gdocs.Markdown.of_document_with_comments ~comments doc in 618 + (* The link stays intact; no footnote marker inside the brackets. *) 619 + Alcotest.(check bool) 620 + "link unbroken" true 621 + (contains md "[Google](https://g.co)"); 622 + Alcotest.(check bool) 623 + "no marker inside brackets" false 624 + (contains md "Google[^gdc1]"); 625 + Alcotest.(check bool) 626 + "comment appears in footer" true 627 + (contains md "## Comments") 628 + 629 + let quote_inside_code_falls_back () = 630 + let doc = 631 + parse_doc 632 + {|{"documentId":"x","title":"D","body":{"content":[ 633 + { "paragraph": { "elements": [ 634 + { "textRun": { "content": "Use ", "textStyle": {} } }, 635 + { "textRun": { "content": "main", 636 + "textStyle": { "weightedFontFamily": { "fontFamily": "Courier New" } } } }, 637 + { "textRun": { "content": ".\n" } } 638 + ] } } 273 639 ]}}|} 274 640 in 275 - let md = Gdocs.Markdown.of_document_with_comments ~comments:[] doc in 276 - let contains sub = 277 - let n = String.length md and l = String.length sub in 278 - let rec loop i = 279 - if i + l > n then false 280 - else if String.sub md i l = sub then true 281 - else loop (i + 1) 282 - in 283 - loop 0 641 + let comments : Gdocs.Comments.t list = 642 + [ 643 + { 644 + id = "c1"; 645 + author = "Alice"; 646 + content = "ok"; 647 + quoted_text = "main"; 648 + anchor = Some "a"; 649 + resolved = false; 650 + }; 651 + ] 284 652 in 653 + let md = Gdocs.Markdown.of_document_with_comments ~comments doc in 654 + Alcotest.(check bool) "code span unbroken" true (contains md "`main`"); 285 655 Alcotest.(check bool) 286 - "no '## Comments' section" false (contains "## Comments") 656 + "no marker inside backticks" false 657 + (contains md "main[^gdc1]`") 658 + 659 + (* Repeated quoted text: each comment must anchor to a distinct 660 + occurrence, in left-to-right order. *) 661 + let repeated_text_distinct_anchors () = 662 + let doc = 663 + parse_doc 664 + {|{"documentId":"x","title":"D","body":{"content":[ 665 + { "paragraph": { "elements": [ 666 + { "textRun": { "content": "foo bar foo bar foo\n" } } 667 + ] } } 668 + ]}}|} 669 + in 670 + let comments : Gdocs.Comments.t list = 671 + [ 672 + { 673 + id = "c1"; 674 + author = "A"; 675 + content = "one"; 676 + quoted_text = "foo"; 677 + anchor = None; 678 + resolved = false; 679 + }; 680 + { 681 + id = "c2"; 682 + author = "B"; 683 + content = "two"; 684 + quoted_text = "foo"; 685 + anchor = None; 686 + resolved = false; 687 + }; 688 + ] 689 + in 690 + let md = Gdocs.Markdown.of_document_with_comments ~comments doc in 691 + (* Both markers must be present somewhere in the body, and they 692 + should NOT both be adjacent to the same "foo" instance. *) 693 + Alcotest.(check bool) "gdc1 present" true (contains md "[^gdc1]"); 694 + Alcotest.(check bool) "gdc2 present" true (contains md "[^gdc2]"); 695 + Alcotest.(check bool) 696 + "not doubled on first foo" false 697 + (contains md "foo[^gdc1][^gdc2]") 698 + 699 + let resolved_marker () = 700 + let doc = parse_doc sample_doc_for_comments in 701 + let comments : Gdocs.Comments.t list = 702 + [ 703 + { 704 + id = "c1"; 705 + author = "Alice"; 706 + content = "ok"; 707 + quoted_text = "world"; 708 + anchor = Some "a"; 709 + resolved = true; 710 + }; 711 + ] 712 + in 713 + let md = Gdocs.Markdown.of_document_with_comments ~comments doc in 714 + Alcotest.(check bool) "resolved note" true (contains md "(resolved)") 287 715 288 716 (* ── Suite ───────────────────────────────────────────────── *) 289 717 ··· 300 728 Alcotest.test_case "escapes markdown metachars" `Quick 301 729 escapes_markdown_metacharacters; 302 730 Alcotest.test_case "walks tabs" `Quick walks_tabs; 303 - Alcotest.test_case "comments appended as section" `Quick comments_appended; 731 + Alcotest.test_case "body then tab keeps boundary" `Quick 732 + body_then_tab_keeps_boundary; 733 + Alcotest.test_case "sibling tabs keep boundary" `Quick 734 + sibling_tabs_keep_boundary; 735 + Alcotest.test_case "ordered (numbered) list" `Quick numbered_list; 736 + Alcotest.test_case "nested bullet list" `Quick nested_bullet_list; 737 + Alcotest.test_case "mixed nesting numbered" `Quick mixed_nesting_numbered; 738 + Alcotest.test_case "table basic" `Quick table_basic; 739 + Alcotest.test_case "table escapes pipes" `Quick table_escapes_pipes; 740 + Alcotest.test_case "paragraph before/after table" `Quick 741 + paragraph_after_table; 742 + Alcotest.test_case "comment spliced inline at quote" `Quick 743 + comment_spliced_inline; 744 + Alcotest.test_case "multiple comments get distinct labels" `Quick 745 + multiple_comments_get_distinct_labels; 746 + Alcotest.test_case "comment without quote → footer" `Quick 747 + empty_quote_falls_back; 748 + Alcotest.test_case "quote not found → footer" `Quick 749 + missing_quote_falls_back; 750 + Alcotest.test_case "quote inside link falls back" `Quick 751 + quote_inside_link_falls_back; 752 + Alcotest.test_case "quote inside code falls back" `Quick 753 + quote_inside_code_falls_back; 754 + Alcotest.test_case "repeated text distinct anchors" `Quick 755 + repeated_text_distinct_anchors; 756 + Alcotest.test_case "resolved marker" `Quick resolved_marker; 304 757 Alcotest.test_case "no comments → no section" `Quick no_comments_no_footer; 305 758 ] )