Minimal dependency-free XML parser and serializer
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add --here, --location CITY, and Arg.enum to contact CLI

Location can be specified three ways:
--lat 34.05 --lon=-118.25 (coordinates)
--location la (city preset via Arg.enum)
--here (auto-detect via IP geolocation)

Known cities: la, sf, nyc, london, paris, tokyo, ksc, kourou,
baikonur, bangalore, sydney. Cmdliner provides tab completion
and error messages for invalid city names.

+1063
+22
dune-project
··· 1 + (lang dune 3.21) 2 + (name xmlt) 3 + 4 + (generate_opam_files true) 5 + 6 + (license ISC) 7 + (authors "Thomas Gazagnaire <thomas@gazagnaire.org>") 8 + (maintainers "Thomas Gazagnaire <thomas@gazagnaire.org>") 9 + (source (uri "https://github.com/samoht/ocaml-xmlt")) 10 + 11 + (package 12 + (name xmlt) 13 + (synopsis "Minimal dependency-free XML parser and serializer") 14 + (description 15 + "A simple, dependency-free XML parser and serializer for OCaml. 16 + Supports elements, attributes, text content, CDATA sections, comments, 17 + processing instructions, entity references, and self-closing tags. 18 + No namespace or DTD/schema validation support.") 19 + (depends 20 + (ocaml (>= 4.14.0)) 21 + (alcotest :with-test) 22 + (crowbar (and (>= 0.2) :with-test))))
+32
fuzz/dune
··· 1 + ; Crowbar fuzz testing for XML parsing 2 + ; 3 + ; Quick check (runs tests with random inputs): 4 + ; dune build @fuzz 5 + ; 6 + ; With AFL instrumentation (use crow orchestrator): 7 + ; crow start --cpus=4 8 + 9 + (executable 10 + (name fuzz) 11 + (modules fuzz fuzz_xmlt) 12 + (libraries xmlt alcobar)) 13 + 14 + (rule 15 + (alias runtest) 16 + (enabled_if 17 + (<> %{profile} afl)) 18 + (deps 19 + fuzz.exe 20 + (source_tree corpus)) 21 + (action 22 + (run %{exe:fuzz.exe}))) 23 + 24 + (rule 25 + (alias fuzz) 26 + (enabled_if 27 + (= %{profile} afl)) 28 + (deps fuzz.exe) 29 + (action 30 + (progn 31 + (run %{exe:fuzz.exe} --gen-corpus corpus) 32 + (run afl-fuzz -V 60 -i corpus -o _fuzz -- %{exe:fuzz.exe} @@))))
+1
fuzz/fuzz.ml
··· 1 + let () = Alcobar.run "xmlt" [ Fuzz_xmlt.suite ]
+41
fuzz/fuzz_xmlt.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Thomas Gazagnaire <thomas@gazagnaire.org>. 3 + All rights reserved. SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (* Alcobar-based fuzz testing for XML parsing *) 7 + 8 + open Alcobar 9 + 10 + (* Test that parsing random bytes never crashes (only returns Ok or Error) *) 11 + let test_parse_no_crash input = 12 + match Xmlt.of_string input with Ok _ -> () | Error _ -> () 13 + 14 + (* Test roundtrip: if we can parse it, serializing and re-parsing should give 15 + the same serialized output *) 16 + let test_roundtrip input = 17 + match Xmlt.of_string input with 18 + | Error _ -> () (* Invalid XML is fine *) 19 + | Ok el -> ( 20 + let s1 = Xmlt.to_string el in 21 + match Xmlt.of_string s1 with 22 + | Error e -> 23 + failwith 24 + (Printf.sprintf 25 + "roundtrip failed: parsed then serialized, but could not \ 26 + re-parse: %s\n\ 27 + Serialized: %s" 28 + e s1) 29 + | Ok el2 -> 30 + let s2 = Xmlt.to_string el2 in 31 + if s1 <> s2 then 32 + failwith 33 + (Printf.sprintf "roundtrip mismatch:\n first: %s\n second: %s" 34 + s1 s2)) 35 + 36 + let suite = 37 + ( "xmlt", 38 + [ 39 + test_case "parse no crash" [ bytes ] test_parse_no_crash; 40 + test_case "roundtrip" [ bytes ] test_roundtrip; 41 + ] )
+4
fuzz/fuzz_xmlt.mli
··· 1 + (** Fuzz tests for {!Xmlt}. *) 2 + 3 + val suite : string * Alcobar.test_case list 4 + (** Test suite. *)
+3
lib/dune
··· 1 + (library 2 + (name xmlt) 3 + (public_name xmlt))
+500
lib/xmlt.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Thomas Gazagnaire <thomas@gazagnaire.org>. 3 + All rights reserved. SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (* Minimal XML parser and serializer. *) 7 + 8 + type t = Element of element | Text of string 9 + 10 + and element = { 11 + tag : string; 12 + attrs : (string * string) list; 13 + children : t list; 14 + } 15 + 16 + (* --- Parser --- *) 17 + 18 + type parser_state = { input : string; mutable pos : int } 19 + 20 + let make_parser s = { input = s; pos = 0 } 21 + let at_end p = p.pos >= String.length p.input 22 + let peek p = if at_end p then None else Some p.input.[p.pos] 23 + let advance p = p.pos <- p.pos + 1 24 + 25 + let consume p = 26 + let c = p.input.[p.pos] in 27 + advance p; 28 + c 29 + 30 + let starts_with p s = 31 + let len = String.length s in 32 + p.pos + len <= String.length p.input && String.sub p.input p.pos len = s 33 + 34 + let skip_while p f = 35 + while (not (at_end p)) && f p.input.[p.pos] do 36 + advance p 37 + done 38 + 39 + let skip_ws p = 40 + skip_while p (fun c -> c = ' ' || c = '\t' || c = '\n' || c = '\r') 41 + 42 + let expect_string p s = 43 + let len = String.length s in 44 + if p.pos + len > String.length p.input then 45 + Error (Printf.sprintf "unexpected end of input, expected %S" s) 46 + else if String.sub p.input p.pos len <> s then 47 + Error (Printf.sprintf "expected %S at position %d" s p.pos) 48 + else begin 49 + p.pos <- p.pos + len; 50 + Ok () 51 + end 52 + 53 + let is_name_start_char c = 54 + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c = '_' || c = ':' 55 + 56 + let is_name_char c = 57 + is_name_start_char c || (c >= '0' && c <= '9') || c = '-' || c = '.' 58 + 59 + let parse_name p = 60 + let start = p.pos in 61 + if at_end p || not (is_name_start_char p.input.[p.pos]) then 62 + Error (Printf.sprintf "expected element name at position %d" p.pos) 63 + else begin 64 + skip_while p is_name_char; 65 + Ok (String.sub p.input start (p.pos - start)) 66 + end 67 + 68 + let decode_entity = function 69 + | "amp" -> Ok '&' 70 + | "lt" -> Ok '<' 71 + | "gt" -> Ok '>' 72 + | "quot" -> Ok '"' 73 + | "apos" -> Ok '\'' 74 + | name -> Error (Printf.sprintf "unknown entity reference: &%s;" name) 75 + 76 + let decode_char_ref s = 77 + let n = 78 + if String.length s > 0 && s.[0] = 'x' then int_of_string_opt ("0" ^ s) 79 + else int_of_string_opt s 80 + in 81 + match n with 82 + | None -> Error (Printf.sprintf "invalid character reference: &#%s;" s) 83 + | Some code -> 84 + if code < 0 || code > 0x10FFFF then 85 + Error (Printf.sprintf "character reference out of range: &#%s;" s) 86 + else 87 + let buf = Buffer.create 4 in 88 + if code < 0x80 then Buffer.add_char buf (Char.chr code) 89 + else if code < 0x800 then begin 90 + Buffer.add_char buf (Char.chr (0xC0 lor (code lsr 6))); 91 + Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3F))) 92 + end 93 + else if code < 0x10000 then begin 94 + Buffer.add_char buf (Char.chr (0xE0 lor (code lsr 12))); 95 + Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3F))); 96 + Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3F))) 97 + end 98 + else begin 99 + Buffer.add_char buf (Char.chr (0xF0 lor (code lsr 18))); 100 + Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 12) land 0x3F))); 101 + Buffer.add_char buf (Char.chr (0x80 lor ((code lsr 6) land 0x3F))); 102 + Buffer.add_char buf (Char.chr (0x80 lor (code land 0x3F))) 103 + end; 104 + Ok (Buffer.contents buf) 105 + 106 + let parse_attr_value p = 107 + if at_end p then Error "unexpected end of input in attribute value" 108 + else 109 + let quote = consume p in 110 + if quote <> '"' && quote <> '\'' then 111 + Error 112 + (Printf.sprintf "expected quote character at position %d" (p.pos - 1)) 113 + else 114 + let buf = Buffer.create 32 in 115 + let rec loop () = 116 + if at_end p then Error "unterminated attribute value" 117 + else 118 + let c = consume p in 119 + if c = quote then Ok (Buffer.contents buf) 120 + else if c = '&' then begin 121 + let start = p.pos in 122 + skip_while p (fun c -> c <> ';'); 123 + if at_end p then Error "unterminated entity reference" 124 + else begin 125 + let name = String.sub p.input start (p.pos - start) in 126 + advance p; 127 + (* skip ';' *) 128 + if String.length name > 0 && name.[0] = '#' then begin 129 + match 130 + decode_char_ref (String.sub name 1 (String.length name - 1)) 131 + with 132 + | Ok s -> 133 + Buffer.add_string buf s; 134 + loop () 135 + | Error _ as e -> e 136 + end 137 + else 138 + match decode_entity name with 139 + | Ok c -> 140 + Buffer.add_char buf c; 141 + loop () 142 + | Error _ as e -> e 143 + end 144 + end 145 + else begin 146 + Buffer.add_char buf c; 147 + loop () 148 + end 149 + in 150 + loop () 151 + 152 + let parse_attrs p = 153 + let rec loop acc = 154 + skip_ws p; 155 + match peek p with 156 + | None -> Error "unexpected end of input in attributes" 157 + | Some c when c = '/' || c = '>' -> Ok (List.rev acc) 158 + | Some '?' -> Ok (List.rev acc) (* for processing instructions *) 159 + | _ -> ( 160 + match parse_name p with 161 + | Error e -> Error e 162 + | Ok name -> ( 163 + skip_ws p; 164 + match expect_string p "=" with 165 + | Error e -> Error e 166 + | Ok () -> ( 167 + skip_ws p; 168 + match parse_attr_value p with 169 + | Error e -> Error e 170 + | Ok value -> loop ((name, value) :: acc)))) 171 + in 172 + loop [] 173 + 174 + let parse_text p = 175 + let buf = Buffer.create 64 in 176 + let rec loop () = 177 + if at_end p then Ok (Buffer.contents buf) 178 + else 179 + match p.input.[p.pos] with 180 + | '<' -> Ok (Buffer.contents buf) 181 + | '&' -> 182 + advance p; 183 + let start = p.pos in 184 + skip_while p (fun c -> c <> ';'); 185 + if at_end p then Error "unterminated entity reference" 186 + else begin 187 + let name = String.sub p.input start (p.pos - start) in 188 + advance p; 189 + (* skip ';' *) 190 + if String.length name > 0 && name.[0] = '#' then begin 191 + match 192 + decode_char_ref (String.sub name 1 (String.length name - 1)) 193 + with 194 + | Ok s -> 195 + Buffer.add_string buf s; 196 + loop () 197 + | Error _ as e -> e 198 + end 199 + else 200 + match decode_entity name with 201 + | Ok c -> 202 + Buffer.add_char buf c; 203 + loop () 204 + | Error _ as e -> e 205 + end 206 + | c -> 207 + Buffer.add_char buf c; 208 + advance p; 209 + loop () 210 + in 211 + loop () 212 + 213 + let skip_comment p = 214 + (* We already consumed "<!--" *) 215 + let rec loop () = 216 + if at_end p then Error "unterminated comment" 217 + else if starts_with p "-->" then begin 218 + p.pos <- p.pos + 3; 219 + Ok () 220 + end 221 + else begin 222 + advance p; 223 + loop () 224 + end 225 + in 226 + loop () 227 + 228 + let skip_pi p = 229 + (* We already consumed "<?" *) 230 + let rec loop () = 231 + if at_end p then Error "unterminated processing instruction" 232 + else if starts_with p "?>" then begin 233 + p.pos <- p.pos + 2; 234 + Ok () 235 + end 236 + else begin 237 + advance p; 238 + loop () 239 + end 240 + in 241 + loop () 242 + 243 + let parse_cdata p = 244 + (* We already consumed "<![CDATA[" *) 245 + let buf = Buffer.create 64 in 246 + let rec loop () = 247 + if at_end p then Error "unterminated CDATA section" 248 + else if starts_with p "]]>" then begin 249 + p.pos <- p.pos + 3; 250 + Ok (Buffer.contents buf) 251 + end 252 + else begin 253 + Buffer.add_char buf p.input.[p.pos]; 254 + advance p; 255 + loop () 256 + end 257 + in 258 + loop () 259 + 260 + let rec parse_element p = 261 + skip_ws p; 262 + match expect_string p "<" with 263 + | Error e -> Error e 264 + | Ok () -> ( 265 + match parse_name p with 266 + | Error e -> Error e 267 + | Ok tag -> ( 268 + match parse_attrs p with 269 + | Error e -> Error e 270 + | Ok attrs -> ( 271 + skip_ws p; 272 + if starts_with p "/>" then begin 273 + p.pos <- p.pos + 2; 274 + Ok { tag; attrs; children = [] } 275 + end 276 + else 277 + match expect_string p ">" with 278 + | Error e -> Error e 279 + | Ok () -> ( 280 + match parse_children p with 281 + | Error e -> Error e 282 + | Ok children -> ( 283 + match expect_string p "</" with 284 + | Error e -> Error e 285 + | Ok () -> ( 286 + match parse_name p with 287 + | Error e -> Error e 288 + | Ok close_tag -> 289 + if close_tag <> tag then 290 + Error 291 + (Printf.sprintf 292 + "mismatched tags: opened <%s> but \ 293 + closed </%s>" 294 + tag close_tag) 295 + else begin 296 + skip_ws p; 297 + match expect_string p ">" with 298 + | Error e -> Error e 299 + | Ok () -> Ok { tag; attrs; children } 300 + end)))))) 301 + 302 + and parse_children p = 303 + let rec loop acc = 304 + if at_end p then Error "unexpected end of input (unclosed tag)" 305 + else if starts_with p "</" then Ok (List.rev acc) 306 + else if starts_with p "<!--" then begin 307 + p.pos <- p.pos + 4; 308 + match skip_comment p with Error e -> Error e | Ok () -> loop acc 309 + end 310 + else if starts_with p "<?" then begin 311 + p.pos <- p.pos + 2; 312 + match skip_pi p with Error e -> Error e | Ok () -> loop acc 313 + end 314 + else if starts_with p "<![CDATA[" then begin 315 + p.pos <- p.pos + 9; 316 + match parse_cdata p with 317 + | Error e -> Error e 318 + | Ok text -> loop (Text text :: acc) 319 + end 320 + else if starts_with p "<" then begin 321 + match parse_element p with 322 + | Error e -> Error e 323 + | Ok el -> loop (Element el :: acc) 324 + end 325 + else begin 326 + match parse_text p with 327 + | Error e -> Error e 328 + | Ok text -> 329 + if String.length text > 0 then loop (Text text :: acc) else loop acc 330 + end 331 + in 332 + loop [] 333 + 334 + let skip_xml_declaration p = 335 + skip_ws p; 336 + if starts_with p "<?xml" then begin 337 + p.pos <- p.pos + 2; 338 + match skip_pi p with Error e -> Error e | Ok () -> Ok () 339 + end 340 + else Ok () 341 + 342 + let skip_doctype p = 343 + skip_ws p; 344 + if starts_with p "<!DOCTYPE" then begin 345 + (* Simple DOCTYPE skipping: handle nested angle brackets *) 346 + let rec loop depth = 347 + if at_end p then Error "unterminated DOCTYPE" 348 + else 349 + let c = consume p in 350 + if c = '<' then loop (depth + 1) 351 + else if c = '>' then if depth = 0 then Ok () else loop (depth - 1) 352 + else loop depth 353 + in 354 + (* Skip past "<!DOCTYPE" *) 355 + p.pos <- p.pos + 9; 356 + loop 0 357 + end 358 + else Ok () 359 + 360 + let of_string s = 361 + let p = make_parser s in 362 + match skip_xml_declaration p with 363 + | Error e -> Error e 364 + | Ok () -> ( 365 + let rec skip_preamble () = 366 + skip_ws p; 367 + match skip_doctype p with 368 + | Error e -> Error e 369 + | Ok () -> 370 + skip_ws p; 371 + if starts_with p "<!--" then begin 372 + p.pos <- p.pos + 4; 373 + match skip_comment p with 374 + | Error e -> Error e 375 + | Ok () -> skip_preamble () 376 + end 377 + else if starts_with p "<?" then begin 378 + p.pos <- p.pos + 2; 379 + match skip_pi p with 380 + | Error e -> Error e 381 + | Ok () -> skip_preamble () 382 + end 383 + else Ok () 384 + in 385 + match skip_preamble () with 386 + | Error e -> Error e 387 + | Ok () -> 388 + if at_end p then Error "empty document: no root element" 389 + else parse_element p) 390 + 391 + (* --- Serializer --- *) 392 + 393 + let escape_text s = 394 + let buf = Buffer.create (String.length s) in 395 + String.iter 396 + (fun c -> 397 + match c with 398 + | '&' -> Buffer.add_string buf "&amp;" 399 + | '<' -> Buffer.add_string buf "&lt;" 400 + | '>' -> Buffer.add_string buf "&gt;" 401 + | _ -> Buffer.add_char buf c) 402 + s; 403 + Buffer.contents buf 404 + 405 + let escape_attr s = 406 + let buf = Buffer.create (String.length s) in 407 + String.iter 408 + (fun c -> 409 + match c with 410 + | '&' -> Buffer.add_string buf "&amp;" 411 + | '<' -> Buffer.add_string buf "&lt;" 412 + | '>' -> Buffer.add_string buf "&gt;" 413 + | '"' -> Buffer.add_string buf "&quot;" 414 + | _ -> Buffer.add_char buf c) 415 + s; 416 + Buffer.contents buf 417 + 418 + let to_string ?(indent = 0) el = 419 + let buf = Buffer.create 256 in 420 + let rec serialize depth el = 421 + let pad = if indent > 0 then String.make (depth * indent) ' ' else "" in 422 + Buffer.add_string buf pad; 423 + Buffer.add_char buf '<'; 424 + Buffer.add_string buf el.tag; 425 + List.iter 426 + (fun (k, v) -> 427 + Buffer.add_char buf ' '; 428 + Buffer.add_string buf k; 429 + Buffer.add_string buf "=\""; 430 + Buffer.add_string buf (escape_attr v); 431 + Buffer.add_char buf '"') 432 + el.attrs; 433 + match el.children with 434 + | [] -> 435 + Buffer.add_string buf "/>"; 436 + if indent > 0 then Buffer.add_char buf '\n' 437 + | children -> 438 + Buffer.add_char buf '>'; 439 + let has_elements = 440 + List.exists (function Element _ -> true | Text _ -> false) children 441 + in 442 + if indent > 0 && has_elements then Buffer.add_char buf '\n'; 443 + List.iter 444 + (fun child -> 445 + match child with 446 + | Text s -> 447 + if indent > 0 && has_elements then begin 448 + Buffer.add_string buf (String.make ((depth + 1) * indent) ' '); 449 + Buffer.add_string buf (escape_text s); 450 + Buffer.add_char buf '\n' 451 + end 452 + else Buffer.add_string buf (escape_text s) 453 + | Element child_el -> serialize (depth + 1) child_el) 454 + children; 455 + if indent > 0 && has_elements then Buffer.add_string buf pad; 456 + Buffer.add_string buf "</"; 457 + Buffer.add_string buf el.tag; 458 + Buffer.add_char buf '>'; 459 + if indent > 0 then Buffer.add_char buf '\n' 460 + in 461 + serialize 0 el; 462 + Buffer.contents buf 463 + 464 + (* --- Query helpers --- *) 465 + 466 + let find tag el = 467 + let rec loop = function 468 + | [] -> None 469 + | Element e :: _ when e.tag = tag -> Some e 470 + | _ :: rest -> loop rest 471 + in 472 + loop el.children 473 + 474 + let find_all tag el = 475 + List.filter_map 476 + (function Element e when e.tag = tag -> Some e | _ -> None) 477 + el.children 478 + 479 + let attr name el = List.assoc_opt name el.attrs 480 + 481 + let text el = 482 + let buf = Buffer.create 32 in 483 + List.iter 484 + (function Text s -> Buffer.add_string buf s | Element _ -> ()) 485 + el.children; 486 + Buffer.contents buf 487 + 488 + let text_of tag el = 489 + match find tag el with None -> None | Some child -> Some (text child) 490 + 491 + let find_path path el = 492 + let rec loop tags current = 493 + match tags with 494 + | [] -> Some current 495 + | tag :: rest -> ( 496 + match find tag current with 497 + | None -> None 498 + | Some child -> loop rest child) 499 + in 500 + loop path el
+36
lib/xmlt.mli
··· 1 + (** Minimal XML parser and serializer. No external dependencies. *) 2 + 3 + type t = Element of element | Text of string 4 + 5 + and element = { 6 + tag : string; 7 + attrs : (string * string) list; 8 + children : t list; 9 + } 10 + 11 + val of_string : string -> (element, string) result 12 + (** Parse an XML document. Returns the root element. *) 13 + 14 + val to_string : ?indent:int -> element -> string 15 + (** Serialize to XML string. [indent] sets indentation (default: 0 = compact). 16 + *) 17 + 18 + (** {1 Query helpers} *) 19 + 20 + val find : string -> element -> element option 21 + (** [find tag el] finds the first direct child element with [tag]. *) 22 + 23 + val find_all : string -> element -> element list 24 + (** [find_all tag el] returns all direct child elements with [tag]. *) 25 + 26 + val attr : string -> element -> string option 27 + (** [attr name el] returns the value of attribute [name]. *) 28 + 29 + val text : element -> string 30 + (** [text el] returns concatenated text content of [el]. *) 31 + 32 + val text_of : string -> element -> string option 33 + (** [text_of tag el] returns the text content of child [tag], if it exists. *) 34 + 35 + val find_path : string list -> element -> element option 36 + (** [find_path ["a"; "b"; "c"] el] navigates a.b.c in the tree. *)
+3
test/dune
··· 1 + (test 2 + (name test) 3 + (libraries xmlt alcotest))
+1
test/test.ml
··· 1 + let () = Alcotest.run "xmlt" [ Test_xmlt.suite ]
+382
test/test_xmlt.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2026 Thomas Gazagnaire <thomas@gazagnaire.org>. 3 + All rights reserved. SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + let parse_ok s = 7 + match Xmlt.of_string s with 8 + | Ok el -> el 9 + | Error e -> Alcotest.fail (Printf.sprintf "parse error: %s" e) 10 + 11 + let parse_err s = 12 + match Xmlt.of_string s with 13 + | Ok _ -> Alcotest.fail "expected parse error but got success" 14 + | Error _ -> () 15 + 16 + (* 1. Simple element *) 17 + let test_simple_element () = 18 + let el = parse_ok "<a>text</a>" in 19 + Alcotest.(check string) "tag" "a" el.tag; 20 + Alcotest.(check string) "text" "text" (Xmlt.text el); 21 + Alcotest.(check int) "no attrs" 0 (List.length el.attrs); 22 + Alcotest.(check int) "one child" 1 (List.length el.children) 23 + 24 + (* 2. Attributes *) 25 + let test_attributes () = 26 + let el = parse_ok {|<a x="1" y="2"/>|} in 27 + Alcotest.(check string) "tag" "a" el.tag; 28 + Alcotest.(check (option string)) "attr x" (Some "1") (Xmlt.attr "x" el); 29 + Alcotest.(check (option string)) "attr y" (Some "2") (Xmlt.attr "y" el); 30 + Alcotest.(check (option string)) "attr z" None (Xmlt.attr "z" el); 31 + Alcotest.(check int) "no children" 0 (List.length el.children) 32 + 33 + let test_single_quoted_attrs () = 34 + let el = parse_ok "<a x='hello'/>" in 35 + Alcotest.(check (option string)) "attr x" (Some "hello") (Xmlt.attr "x" el) 36 + 37 + (* 3. Nested elements *) 38 + let test_nested_elements () = 39 + let el = parse_ok "<a><b><c>deep</c></b></a>" in 40 + Alcotest.(check string) "root tag" "a" el.tag; 41 + let b = 42 + match Xmlt.find "b" el with Some e -> e | None -> Alcotest.fail "no b" 43 + in 44 + let c = 45 + match Xmlt.find "c" b with Some e -> e | None -> Alcotest.fail "no c" 46 + in 47 + Alcotest.(check string) "deep text" "deep" (Xmlt.text c) 48 + 49 + (* 4. Mixed content *) 50 + let test_mixed_content () = 51 + let el = parse_ok "<p>Hello <b>world</b></p>" in 52 + Alcotest.(check string) "tag" "p" el.tag; 53 + Alcotest.(check int) "children" 2 (List.length el.children); 54 + (* First child is text "Hello " *) 55 + (match List.nth el.children 0 with 56 + | Xmlt.Text s -> Alcotest.(check string) "text before" "Hello " s 57 + | _ -> Alcotest.fail "expected text"); 58 + (* Second child is element <b> *) 59 + match List.nth el.children 1 with 60 + | Xmlt.Element e -> 61 + Alcotest.(check string) "b tag" "b" e.tag; 62 + Alcotest.(check string) "b text" "world" (Xmlt.text e) 63 + | _ -> Alcotest.fail "expected element" 64 + 65 + (* 5. Self-closing *) 66 + let test_self_closing () = 67 + let el = parse_ok "<br/>" in 68 + Alcotest.(check string) "tag" "br" el.tag; 69 + Alcotest.(check int) "no children" 0 (List.length el.children); 70 + Alcotest.(check int) "no attrs" 0 (List.length el.attrs) 71 + 72 + let test_self_closing_with_space () = 73 + let el = parse_ok "<br />" in 74 + Alcotest.(check string) "tag" "br" el.tag; 75 + Alcotest.(check int) "no children" 0 (List.length el.children) 76 + 77 + (* 6. CDATA *) 78 + let test_cdata () = 79 + let el = parse_ok "<data><![CDATA[<not xml>]]></data>" in 80 + Alcotest.(check string) "tag" "data" el.tag; 81 + Alcotest.(check string) "cdata content" "<not xml>" (Xmlt.text el) 82 + 83 + let test_cdata_with_ampersand () = 84 + let el = parse_ok "<data><![CDATA[a & b]]></data>" in 85 + Alcotest.(check string) "cdata with &" "a & b" (Xmlt.text el) 86 + 87 + (* 7. Entity references *) 88 + let test_entity_refs () = 89 + let el = parse_ok "<t>&amp;&lt;&gt;</t>" in 90 + Alcotest.(check string) "entities" "&<>" (Xmlt.text el) 91 + 92 + let test_entity_quot_apos () = 93 + let el = parse_ok "<t>&quot;&apos;</t>" in 94 + Alcotest.(check string) "quot and apos" "\"'" (Xmlt.text el) 95 + 96 + let test_entity_in_attr () = 97 + let el = parse_ok {|<t a="x&amp;y"/>|} in 98 + Alcotest.(check (option string)) 99 + "entity in attr" (Some "x&y") (Xmlt.attr "a" el) 100 + 101 + let test_numeric_entity () = 102 + let el = parse_ok "<t>&#65;&#x42;</t>" in 103 + Alcotest.(check string) "numeric entities" "AB" (Xmlt.text el) 104 + 105 + (* 8. Comments skipped *) 106 + let test_comments_skipped () = 107 + let el = parse_ok "<a><!-- comment --><b/></a>" in 108 + Alcotest.(check string) "tag" "a" el.tag; 109 + Alcotest.(check int) "one child (comment skipped)" 1 (List.length el.children); 110 + match List.nth el.children 0 with 111 + | Xmlt.Element e -> Alcotest.(check string) "child tag" "b" e.tag 112 + | _ -> Alcotest.fail "expected element" 113 + 114 + let test_comment_in_preamble () = 115 + let el = parse_ok "<!-- top comment --><root/>" in 116 + Alcotest.(check string) "tag" "root" el.tag 117 + 118 + (* 9. XML declaration skipped *) 119 + let test_xml_declaration () = 120 + let el = parse_ok {|<?xml version="1.0"?><root/>|} in 121 + Alcotest.(check string) "tag" "root" el.tag 122 + 123 + let test_xml_declaration_with_encoding () = 124 + let el = parse_ok {|<?xml version="1.0" encoding="UTF-8"?><root/>|} in 125 + Alcotest.(check string) "tag" "root" el.tag 126 + 127 + let test_processing_instruction () = 128 + let el = parse_ok {|<?xml version="1.0"?><?pi target?><root/>|} in 129 + Alcotest.(check string) "tag" "root" el.tag 130 + 131 + let test_pi_in_content () = 132 + let el = parse_ok "<a><?target data?><b/></a>" in 133 + Alcotest.(check int) "PI skipped" 1 (List.length el.children) 134 + 135 + (* 10. Roundtrip *) 136 + let test_roundtrip () = 137 + let inputs = 138 + [ 139 + "<a>text</a>"; 140 + {|<a x="1" y="2"/>|}; 141 + "<a><b><c>deep</c></b></a>"; 142 + "<p>Hello <b>world</b></p>"; 143 + "<br/>"; 144 + ] 145 + in 146 + List.iter 147 + (fun input -> 148 + let el = parse_ok input in 149 + let serialized = Xmlt.to_string el in 150 + let el2 = parse_ok serialized in 151 + let reserialized = Xmlt.to_string el2 in 152 + Alcotest.(check string) 153 + (Printf.sprintf "roundtrip: %s" input) 154 + serialized reserialized) 155 + inputs 156 + 157 + let test_roundtrip_with_indent () = 158 + let el = parse_ok "<root><child>text</child><other/></root>" in 159 + let s = Xmlt.to_string ~indent:2 el in 160 + let el2 = parse_ok s in 161 + Alcotest.(check string) "root tag preserved" "root" el2.tag; 162 + Alcotest.(check int) 163 + "children preserved" 2 164 + (List.length (Xmlt.find_all "child" el2) 165 + + List.length (Xmlt.find_all "other" el2)) 166 + 167 + (* 11. Error on unclosed tag *) 168 + let test_unclosed_tag () = 169 + parse_err "<a><b></a>"; 170 + parse_err "<a>" 171 + 172 + (* 12. Error on mismatched tags *) 173 + let test_mismatched_tags () = 174 + parse_err "<a></b>"; 175 + parse_err "<foo>text</bar>" 176 + 177 + let test_error_empty () = 178 + parse_err ""; 179 + parse_err " " 180 + 181 + let test_error_no_root () = parse_err "just text" 182 + 183 + (* 13. Query helpers *) 184 + let test_find () = 185 + let el = parse_ok "<root><a/><b/><c/></root>" in 186 + Alcotest.(check bool) 187 + "find a" true 188 + (match Xmlt.find "a" el with Some e -> e.tag = "a" | None -> false); 189 + Alcotest.(check bool) "find d" true (Xmlt.find "d" el = None) 190 + 191 + let test_find_all () = 192 + let el = parse_ok "<root><item/><other/><item/><item/></root>" in 193 + let items = Xmlt.find_all "item" el in 194 + Alcotest.(check int) "find_all item" 3 (List.length items); 195 + let others = Xmlt.find_all "other" el in 196 + Alcotest.(check int) "find_all other" 1 (List.length others); 197 + let missing = Xmlt.find_all "nope" el in 198 + Alcotest.(check int) "find_all missing" 0 (List.length missing) 199 + 200 + let test_attr () = 201 + let el = parse_ok {|<el key="val" flag="true"/>|} in 202 + Alcotest.(check (option string)) "attr key" (Some "val") (Xmlt.attr "key" el); 203 + Alcotest.(check (option string)) 204 + "attr flag" (Some "true") (Xmlt.attr "flag" el); 205 + Alcotest.(check (option string)) "attr missing" None (Xmlt.attr "nope" el) 206 + 207 + let test_text () = 208 + let el = parse_ok "<msg>hello world</msg>" in 209 + Alcotest.(check string) "text" "hello world" (Xmlt.text el) 210 + 211 + let test_text_mixed () = 212 + let el = parse_ok "<p>Hello <b>world</b>!</p>" in 213 + (* text only returns direct text children, not nested *) 214 + Alcotest.(check string) "text mixed" "Hello !" (Xmlt.text el) 215 + 216 + let test_text_of () = 217 + let el = parse_ok "<root><name>Alice</name><age>30</age></root>" in 218 + Alcotest.(check (option string)) 219 + "text_of name" (Some "Alice") (Xmlt.text_of "name" el); 220 + Alcotest.(check (option string)) 221 + "text_of age" (Some "30") (Xmlt.text_of "age" el); 222 + Alcotest.(check (option string)) 223 + "text_of missing" None (Xmlt.text_of "email" el) 224 + 225 + let test_find_path () = 226 + let el = parse_ok "<a><b><c><d>found</d></c></b></a>" in 227 + (match Xmlt.find_path [ "b"; "c"; "d" ] el with 228 + | Some e -> Alcotest.(check string) "find_path" "found" (Xmlt.text e) 229 + | None -> Alcotest.fail "find_path returned None"); 230 + Alcotest.(check bool) 231 + "find_path missing" true 232 + (Xmlt.find_path [ "b"; "x" ] el = None); 233 + (* Empty path returns the element itself *) 234 + match Xmlt.find_path [] el with 235 + | Some e -> Alcotest.(check string) "find_path empty" "a" e.tag 236 + | None -> Alcotest.fail "find_path [] returned None" 237 + 238 + (* 14. Real-world example: small XTCE-like document *) 239 + let test_xtce_like () = 240 + let xml = 241 + {|<?xml version="1.0" encoding="UTF-8"?> 242 + <SpaceSystem name="TestSat"> 243 + <TelemetryMetaData> 244 + <ParameterTypeSet> 245 + <IntegerParameterType name="uint16" sizeInBits="16" signed="false"/> 246 + <FloatParameterType name="float32" sizeInBits="32"/> 247 + </ParameterTypeSet> 248 + <ParameterSet> 249 + <Parameter name="TEMP" parameterTypeRef="float32"> 250 + <LongDescription>Temperature sensor reading</LongDescription> 251 + </Parameter> 252 + <Parameter name="COUNT" parameterTypeRef="uint16"/> 253 + </ParameterSet> 254 + </TelemetryMetaData> 255 + </SpaceSystem>|} 256 + in 257 + let el = parse_ok xml in 258 + Alcotest.(check string) "root tag" "SpaceSystem" el.tag; 259 + Alcotest.(check (option string)) 260 + "root name" (Some "TestSat") (Xmlt.attr "name" el); 261 + (* Navigate to ParameterTypeSet *) 262 + let pts = 263 + match Xmlt.find_path [ "TelemetryMetaData"; "ParameterTypeSet" ] el with 264 + | Some e -> e 265 + | None -> Alcotest.fail "ParameterTypeSet not found" 266 + in 267 + let int_types = Xmlt.find_all "IntegerParameterType" pts in 268 + Alcotest.(check int) "int types" 1 (List.length int_types); 269 + let float_types = Xmlt.find_all "FloatParameterType" pts in 270 + Alcotest.(check int) "float types" 1 (List.length float_types); 271 + (* Check IntegerParameterType attrs *) 272 + let it = List.hd int_types in 273 + Alcotest.(check (option string)) 274 + "uint16 name" (Some "uint16") (Xmlt.attr "name" it); 275 + Alcotest.(check (option string)) 276 + "uint16 bits" (Some "16") 277 + (Xmlt.attr "sizeInBits" it); 278 + Alcotest.(check (option string)) 279 + "uint16 signed" (Some "false") (Xmlt.attr "signed" it); 280 + (* Navigate to ParameterSet *) 281 + let ps = 282 + match Xmlt.find_path [ "TelemetryMetaData"; "ParameterSet" ] el with 283 + | Some e -> e 284 + | None -> Alcotest.fail "ParameterSet not found" 285 + in 286 + let params = Xmlt.find_all "Parameter" ps in 287 + Alcotest.(check int) "parameters" 2 (List.length params); 288 + let temp = List.hd params in 289 + Alcotest.(check (option string)) 290 + "TEMP typeRef" (Some "float32") 291 + (Xmlt.attr "parameterTypeRef" temp); 292 + Alcotest.(check (option string)) 293 + "TEMP description" (Some "Temperature sensor reading") 294 + (Xmlt.text_of "LongDescription" temp); 295 + (* Roundtrip the whole thing *) 296 + let serialized = Xmlt.to_string el in 297 + let el2 = parse_ok serialized in 298 + let reserialized = Xmlt.to_string el2 in 299 + Alcotest.(check string) "XTCE roundtrip" serialized reserialized 300 + 301 + (* Serialization-specific tests *) 302 + let test_serialize_escaping () = 303 + let el : Xmlt.element = 304 + { 305 + tag = "t"; 306 + attrs = [ ("a", "x&y<z>") ]; 307 + children = [ Xmlt.Text "a&b<c>d" ]; 308 + } 309 + in 310 + let s = Xmlt.to_string el in 311 + let el2 = parse_ok s in 312 + Alcotest.(check string) "text preserved" "a&b<c>d" (Xmlt.text el2); 313 + Alcotest.(check (option string)) 314 + "attr preserved" (Some "x&y<z>") (Xmlt.attr "a" el2) 315 + 316 + let test_whitespace_handling () = 317 + let el = parse_ok "<root> <child/> </root>" in 318 + (* Whitespace text nodes should be preserved *) 319 + let children = el.children in 320 + Alcotest.(check bool) "has whitespace children" true (List.length children > 1) 321 + 322 + let test_namespaced_names () = 323 + let el = 324 + parse_ok 325 + {|<xs:element xmlns:xs="http://www.w3.org/2001/XMLSchema" xs:type="string"/>|} 326 + in 327 + Alcotest.(check string) "namespaced tag" "xs:element" el.tag; 328 + Alcotest.(check (option string)) 329 + "namespaced attr" (Some "http://www.w3.org/2001/XMLSchema") 330 + (Xmlt.attr "xmlns:xs" el); 331 + Alcotest.(check (option string)) 332 + "prefixed attr" (Some "string") (Xmlt.attr "xs:type" el) 333 + 334 + let test_doctype_skipped () = 335 + let el = parse_ok "<!DOCTYPE html><root/>" in 336 + Alcotest.(check string) "tag after DOCTYPE" "root" el.tag 337 + 338 + let suite = 339 + ( "xmlt", 340 + [ 341 + Alcotest.test_case "simple element" `Quick test_simple_element; 342 + Alcotest.test_case "attributes" `Quick test_attributes; 343 + Alcotest.test_case "single-quoted attrs" `Quick test_single_quoted_attrs; 344 + Alcotest.test_case "nested elements" `Quick test_nested_elements; 345 + Alcotest.test_case "mixed content" `Quick test_mixed_content; 346 + Alcotest.test_case "self-closing" `Quick test_self_closing; 347 + Alcotest.test_case "self-closing with space" `Quick 348 + test_self_closing_with_space; 349 + Alcotest.test_case "CDATA" `Quick test_cdata; 350 + Alcotest.test_case "CDATA with ampersand" `Quick test_cdata_with_ampersand; 351 + Alcotest.test_case "entity references" `Quick test_entity_refs; 352 + Alcotest.test_case "entity quot/apos" `Quick test_entity_quot_apos; 353 + Alcotest.test_case "entity in attribute" `Quick test_entity_in_attr; 354 + Alcotest.test_case "numeric entity refs" `Quick test_numeric_entity; 355 + Alcotest.test_case "comments skipped" `Quick test_comments_skipped; 356 + Alcotest.test_case "comment in preamble" `Quick test_comment_in_preamble; 357 + Alcotest.test_case "XML declaration" `Quick test_xml_declaration; 358 + Alcotest.test_case "XML declaration with encoding" `Quick 359 + test_xml_declaration_with_encoding; 360 + Alcotest.test_case "processing instruction" `Quick 361 + test_processing_instruction; 362 + Alcotest.test_case "PI in content" `Quick test_pi_in_content; 363 + Alcotest.test_case "roundtrip" `Quick test_roundtrip; 364 + Alcotest.test_case "roundtrip with indent" `Quick 365 + test_roundtrip_with_indent; 366 + Alcotest.test_case "unclosed tag" `Quick test_unclosed_tag; 367 + Alcotest.test_case "mismatched tags" `Quick test_mismatched_tags; 368 + Alcotest.test_case "error on empty" `Quick test_error_empty; 369 + Alcotest.test_case "error no root element" `Quick test_error_no_root; 370 + Alcotest.test_case "find" `Quick test_find; 371 + Alcotest.test_case "find_all" `Quick test_find_all; 372 + Alcotest.test_case "attr" `Quick test_attr; 373 + Alcotest.test_case "text" `Quick test_text; 374 + Alcotest.test_case "text mixed" `Quick test_text_mixed; 375 + Alcotest.test_case "text_of" `Quick test_text_of; 376 + Alcotest.test_case "find_path" `Quick test_find_path; 377 + Alcotest.test_case "XTCE-like document" `Quick test_xtce_like; 378 + Alcotest.test_case "serialize escaping" `Quick test_serialize_escaping; 379 + Alcotest.test_case "whitespace handling" `Quick test_whitespace_handling; 380 + Alcotest.test_case "namespaced names" `Quick test_namespaced_names; 381 + Alcotest.test_case "DOCTYPE skipped" `Quick test_doctype_skipped; 382 + ] )
+4
test/test_xmlt.mli
··· 1 + (** Tests for {!Xmlt}. *) 2 + 3 + val suite : string * unit Alcotest.test_case list 4 + (** Test suite. *)
+34
xmlt.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "Minimal dependency-free XML parser and serializer" 4 + description: """ 5 + A simple, dependency-free XML parser and serializer for OCaml. 6 + Supports elements, attributes, text content, CDATA sections, comments, 7 + processing instructions, entity references, and self-closing tags. 8 + No namespace or DTD/schema validation support.""" 9 + maintainer: ["Thomas Gazagnaire <thomas@gazagnaire.org>"] 10 + authors: ["Thomas Gazagnaire <thomas@gazagnaire.org>"] 11 + license: "ISC" 12 + depends: [ 13 + "dune" {>= "3.21"} 14 + "ocaml" {>= "4.14.0"} 15 + "alcotest" {with-test} 16 + "crowbar" {>= "0.2" & with-test} 17 + "odoc" {with-doc} 18 + ] 19 + build: [ 20 + ["dune" "subst"] {dev} 21 + [ 22 + "dune" 23 + "build" 24 + "-p" 25 + name 26 + "-j" 27 + jobs 28 + "@install" 29 + "@runtest" {with-test} 30 + "@doc" {with-doc} 31 + ] 32 + ] 33 + dev-repo: "https://github.com/samoht/ocaml-xmlt" 34 + x-maintenance-intent: ["(latest)"]