···11+ISC License
22+33+Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>
44+55+Permission to use, copy, modify, and distribute this software for any
66+purpose with or without fee is hereby granted, provided that the above
77+copyright notice and this permission notice appear in all copies.
88+99+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1010+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1111+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1212+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1313+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1414+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1515+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+67
README.md
···11+# frontmatter - Parse YAML Frontmatter from Markdown
22+33+An OCaml library for parsing YAML frontmatter (Jekyll-format) from Markdown files. Supports extracting structured metadata and body content from files with YAML headers delimited by `---` markers.
44+55+## Key Features
66+77+- Parse Jekyll-style YAML frontmatter from Markdown documents
88+- Type-safe extraction of metadata fields using jsont codecs
99+- Support for common frontmatter fields (title, date, tags, etc.)
1010+- Eio-based file I/O support via `frontmatter-eio` package
1111+1212+## Usage
1313+1414+```ocaml
1515+(* Parse frontmatter from a string *)
1616+let content = {|---
1717+title: My Post
1818+date: 2025-01-15
1919+tags:
2020+ - ocaml
2121+ - tutorial
2222+---
2323+2424+# Hello World
2525+2626+This is the body content.
2727+|}
2828+2929+let () =
3030+ match Frontmatter.of_string content with
3131+ | Ok doc ->
3232+ let title = Frontmatter.get_string "title" doc in
3333+ let body = Frontmatter.body doc in
3434+ Printf.printf "Title: %s\nBody: %s\n"
3535+ (Option.value ~default:"untitled" title)
3636+ body
3737+ | Error e ->
3838+ Printf.eprintf "Parse error: %s\n" e
3939+```
4040+4141+With Eio file I/O:
4242+4343+```ocaml
4444+let () =
4545+ Eio_main.run @@ fun env ->
4646+ let doc = Frontmatter_eio.read_file env#fs "posts/my-post.md" in
4747+ (* ... process document ... *)
4848+```
4949+5050+## Installation
5151+5252+```
5353+opam install frontmatter frontmatter-eio
5454+```
5555+5656+## Documentation
5757+5858+API documentation is available via:
5959+6060+```
6161+opam install frontmatter
6262+odig doc frontmatter
6363+```
6464+6565+## License
6666+6767+ISC
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+type yaml = Yamlrw.value
77+88+type t = {
99+ yaml : yaml;
1010+ body : string;
1111+ fname : string option;
1212+}
1313+1414+let yaml { yaml; _ } = yaml
1515+let body { body; _ } = body
1616+let fname { fname; _ } = fname
1717+1818+let error_with_fname fname msg =
1919+ let prefix = Option.fold ~none:"" ~some:(fun f -> f ^ ": ") fname in
2020+ Error (prefix ^ msg)
2121+2222+(** Parse Jekyll-style date prefix from filename.
2323+ Handles: 2025-01-15-slug.md or just slug.md *)
2424+let parse_date_prefix s =
2525+ let len = String.length s in
2626+ if len >= 11 then
2727+ try
2828+ let year = int_of_string (String.sub s 0 4) in
2929+ let month = int_of_string (String.sub s 5 2) in
3030+ let day = int_of_string (String.sub s 8 2) in
3131+ if s.[4] = '-' && s.[7] = '-' && s.[10] = '-' then
3232+ match Ptime.of_date (year, month, day) with
3333+ | Some date -> Some (date, String.sub s 11 (len - 11))
3434+ | None -> None
3535+ else None
3636+ with _ -> None
3737+ else None
3838+3939+let slug_of_fname fname =
4040+ let basename = Filename.basename fname in
4141+ let no_ext = Filename.chop_extension basename in
4242+ match parse_date_prefix no_ext with
4343+ | Some (date, slug) -> Ok (slug, Some date)
4444+ | None -> Ok (no_ext, None)
4545+4646+(** Parse frontmatter using yamlrw's streaming parser.
4747+ Uses multi-document support to find the document boundary,
4848+ then extracts the body from the byte position. *)
4949+let of_string ?fname content =
5050+ (* Check for opening delimiter *)
5151+ let content_trimmed = String.trim content in
5252+ if not (String.length content_trimmed >= 3 && String.sub content_trimmed 0 3 = "---") then
5353+ error_with_fname fname "Content does not start with '---' frontmatter delimiter"
5454+ else
5555+ let parser = Yamlrw.Parser.of_string content in
5656+ let end_pos = ref 0 in
5757+ (* Wrap parser to track Document_end position *)
5858+ let next_with_tracking () =
5959+ match Yamlrw.Parser.next parser with
6060+ | None -> None
6161+ | Some ev as result ->
6262+ (match ev.event with
6363+ | Yamlrw.Event.Document_end _ ->
6464+ end_pos := ev.span.stop.Yamlrw.Position.index
6565+ | _ -> ());
6666+ result
6767+ in
6868+ try
6969+ let yaml = Yamlrw.Loader.value_of_parser next_with_tracking in
7070+ let body_start = !end_pos in
7171+ (* Skip leading newline after document end marker *)
7272+ let body_start =
7373+ if body_start < String.length content && content.[body_start] = '\n'
7474+ then body_start + 1
7575+ else body_start
7676+ in
7777+ let body = String.sub content body_start (String.length content - body_start) in
7878+ Ok { yaml; body; fname }
7979+ with Yamlrw.Yamlrw_error e ->
8080+ error_with_fname fname ("YAML parse error: " ^ Yamlrw.Error.to_string e)
8181+8282+let of_string_exn ?fname content =
8383+ match of_string ?fname content with
8484+ | Ok t -> t
8585+ | Error msg -> failwith msg
8686+8787+let find key { yaml; _ } =
8888+ match yaml with
8989+ | `O fields -> List.assoc_opt key fields
9090+ | _ -> None
9191+9292+let find_string key t =
9393+ Option.bind (find key t) (function `String s -> Some s | _ -> None)
9494+9595+let find_strings key t =
9696+ find key t
9797+ |> Option.map (function
9898+ | `A items -> List.filter_map (function `String s -> Some s | _ -> None) items
9999+ | _ -> [])
100100+ |> Option.value ~default:[]
101101+102102+let find_bool key t =
103103+ Option.bind (find key t) (function `Bool b -> Some b | _ -> None)
104104+105105+let find_int key t =
106106+ Option.bind (find key t) (function
107107+ | `Float f when Float.is_integer f -> Some (int_of_float f)
108108+ | _ -> None)
109109+110110+let find_float key t =
111111+ Option.bind (find key t) (function `Float f -> Some f | _ -> None)
112112+113113+let decode jsont { yaml; _ } = Yamlt.decode_value jsont yaml
114114+115115+let decode_exn jsont t =
116116+ match decode jsont t with
117117+ | Ok v -> v
118118+ | Error msg -> failwith msg
+134
lib/frontmatter.mli
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+(** Parse YAML frontmatter from Markdown files (Jekyll-format).
77+88+ This library parses files with YAML frontmatter headers delimited by
99+ '---' markers:
1010+1111+ {v
1212+ ---
1313+ title: My Post
1414+ date: 2025-01-15
1515+ tags:
1616+ - ocaml
1717+ - programming
1818+ ---
1919+2020+ The body content starts here.
2121+ v}
2222+2323+ {1 Basic Usage}
2424+2525+ {[
2626+ match Frontmatter.of_string content with
2727+ | Ok fm ->
2828+ let title = Frontmatter.find_string "title" fm in
2929+ let body = Frontmatter.body fm in
3030+ ...
3131+ | Error msg -> Printf.eprintf "Parse error: %s\n" msg
3232+ ]}
3333+3434+ {1 Typed Decoding}
3535+3636+ For structured access, use Jsont codecs:
3737+3838+ {[
3939+ type post = { title: string; date: Ptime.t; tags: string list }
4040+4141+ let post_jsont =
4242+ Jsont.Object.map ~kind:"post"
4343+ (fun title date tags -> { title; date; tags })
4444+ |> Jsont.Object.mem "title" Jsont.string ~enc:(fun p -> p.title)
4545+ |> Jsont.Object.mem "date" ptime_jsont ~enc:(fun p -> p.date)
4646+ |> Jsont.Object.mem "tags" Jsont.(list string) ~dec_absent:[]
4747+ ~enc:(fun p -> p.tags)
4848+ |> Jsont.Object.finish
4949+5050+ let post = Frontmatter.decode post_jsont fm
5151+ ]}
5252+*)
5353+5454+(** {1 Types} *)
5555+5656+type t
5757+(** A parsed frontmatter document. *)
5858+5959+type yaml = Yamlrw.value
6060+(** YAML value type from yamlrw. *)
6161+6262+(** {1 Parsing} *)
6363+6464+val of_string : ?fname:string -> string -> (t, string) result
6565+(** Parse a string containing YAML frontmatter.
6666+6767+ The input should have YAML delimited by '---' markers at the start.
6868+ Everything after the closing '---' is the body.
6969+7070+ @param fname Optional filename for error messages.
7171+ @return Parsed frontmatter or an error message. *)
7272+7373+val of_string_exn : ?fname:string -> string -> t
7474+(** Like {!of_string} but raises [Failure] on parse error. *)
7575+7676+(** {1 Accessors} *)
7777+7878+val yaml : t -> yaml
7979+(** Get the raw YAML value from the frontmatter. *)
8080+8181+val body : t -> string
8282+(** Get the body content after the frontmatter. *)
8383+8484+val fname : t -> string option
8585+(** Get the filename if one was provided during parsing. *)
8686+8787+(** {1 Field Access}
8888+8989+ Convenience functions for accessing common field types. *)
9090+9191+val find : string -> t -> yaml option
9292+(** [find key fm] looks up [key] in the frontmatter YAML. *)
9393+9494+val find_string : string -> t -> string option
9595+(** [find_string key fm] gets a string field from frontmatter. *)
9696+9797+val find_strings : string -> t -> string list
9898+(** [find_strings key fm] gets a string list field, returning empty list
9999+ if not found or not a list. *)
100100+101101+val find_bool : string -> t -> bool option
102102+(** [find_bool key fm] gets a boolean field. *)
103103+104104+val find_int : string -> t -> int option
105105+(** [find_int key fm] gets an integer field. *)
106106+107107+val find_float : string -> t -> float option
108108+(** [find_float key fm] gets a float field. *)
109109+110110+(** {1 Typed Decoding}
111111+112112+ Decode frontmatter using Jsont codecs for structured access. *)
113113+114114+val decode : 'a Jsont.t -> t -> ('a, string) result
115115+(** [decode jsont fm] decodes the frontmatter YAML using the given codec.
116116+117117+ Uses {!Yamlt.decode_value} to interpret the YAML value directly through
118118+ the Jsont codec. *)
119119+120120+val decode_exn : 'a Jsont.t -> t -> 'a
121121+(** Like {!decode} but raises [Failure] on decode error. *)
122122+123123+(** {1 Slug Extraction}
124124+125125+ Jekyll-style filename slug extraction. *)
126126+127127+val slug_of_fname : string -> (string * Ptime.t option, string) result
128128+(** Extract slug and optional date from a Jekyll-style filename.
129129+130130+ Handles formats like:
131131+ - [2025-01-15-my-post.md] -> [("my-post", Some date)]
132132+ - [my-post.md] -> [("my-post", None)]
133133+134134+ @return Tuple of (slug, optional date) or error message. *)
···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
33+ SPDX-License-Identifier: ISC
44+ ---------------------------------------------------------------------------*)
55+66+let test_basic () =
77+ let content = {|---
88+title: Hello World
99+tags:
1010+ - ocaml
1111+ - yaml
1212+---
1313+# Markdown Body
1414+1515+This is the body content.
1616+|} in
1717+ match Frontmatter.of_string content with
1818+ | Error e ->
1919+ Printf.printf "ERROR: %s\n" e;
2020+ false
2121+ | Ok t ->
2222+ let title = Option.value ~default:"(none)" (Frontmatter.find_string "title" t) in
2323+ let tags = String.concat ", " (Frontmatter.find_strings "tags" t) in
2424+ let body = Frontmatter.body t in
2525+ Printf.printf "Title: %s\n" title;
2626+ Printf.printf "Tags: %s\n" tags;
2727+ Printf.printf "Body:\n%s\n" body;
2828+ title = "Hello World" &&
2929+ tags = "ocaml, yaml" &&
3030+ String.length body > 0
3131+3232+let test_no_frontmatter () =
3333+ let content = "No frontmatter here" in
3434+ match Frontmatter.of_string content with
3535+ | Error _ ->
3636+ Printf.printf "Correctly rejected content without frontmatter\n";
3737+ true
3838+ | Ok _ ->
3939+ Printf.printf "ERROR: Should have rejected content without frontmatter\n";
4040+ false
4141+4242+let test_with_dash_in_body () =
4343+ let content = {|---
4444+title: Test
4545+---
4646+Body with --- in it
4747+And more content
4848+|} in
4949+ match Frontmatter.of_string content with
5050+ | Error e ->
5151+ Printf.printf "ERROR: %s\n" e;
5252+ false
5353+ | Ok t ->
5454+ let body = Frontmatter.body t in
5555+ Printf.printf "Body with dashes: %s\n" body;
5656+ String.sub body 0 4 = "Body"
5757+5858+let test_explicit_doc_end () =
5959+ let content = {|---
6060+title: With explicit end
6161+...
6262+Body after explicit document end marker
6363+|} in
6464+ match Frontmatter.of_string content with
6565+ | Error e ->
6666+ Printf.printf "ERROR: %s\n" e;
6767+ false
6868+ | Ok t ->
6969+ let body = Frontmatter.body t in
7070+ Printf.printf "Body after ...: %s\n" body;
7171+ String.sub body 0 4 = "Body"
7272+7373+let () =
7474+ Printf.printf "=== Testing basic frontmatter ===\n";
7575+ let r1 = test_basic () in
7676+ Printf.printf "\n=== Testing no frontmatter ===\n";
7777+ let r2 = test_no_frontmatter () in
7878+ Printf.printf "\n=== Testing dash in body ===\n";
7979+ let r3 = test_with_dash_in_body () in
8080+ Printf.printf "\n=== Testing explicit doc end ===\n";
8181+ let r4 = test_explicit_doc_end () in
8282+ if r1 && r2 && r3 && r4 then (
8383+ Printf.printf "\nAll tests passed!\n";
8484+ exit 0
8585+ ) else (
8686+ Printf.printf "\nSome tests failed!\n";
8787+ exit 1
8888+ )