···11+# houdini
22+33+[](https://hex.pm/packages/houdini)
44+[](https://hexdocs.pm/houdini/)
55+66+```sh
77+gleam add houdini@1
88+```
99+```gleam
1010+import houdini
1111+1212+pub fn main() -> Nil {
1313+ // TODO: An example of the project in use
1414+}
1515+```
1616+1717+Further documentation can be found at <https://hexdocs.pm/houdini>.
1818+1919+## Development
2020+2121+```sh
2222+gleam run # Run the project
2323+gleam test # Run the tests
2424+```
+14
gleam.toml
···11+name = "houdini"
22+version = "1.0.0"
33+description = "Fast HTML escaping 🪄"
44+licences = ["Apache-2.0"]
55+repository = { type = "github", user = "giacomocavalieri", repo = "houdini" }
66+77+[dependencies]
88+gleam_stdlib = ">= 0.44.0 and < 2.0.0"
99+1010+[dev-dependencies]
1111+gleeunit = ">= 1.0.0 and < 2.0.0"
1212+gleamy_bench = ">= 0.6.0 and < 1.0.0"
1313+simplifile = ">= 2.2.1 and < 3.0.0"
1414+qcheck = ">= 1.0.0 and < 2.0.0"
+23
manifest.toml
···11+# This file was generated by Gleam
22+# You typically do not need to edit this file
33+44+packages = [
55+ { name = "exception", version = "2.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "exception", source = "hex", outer_checksum = "F5580D584F16A20B7FCDCABF9E9BE9A2C1F6AC4F9176FA6DD0B63E3B20D450AA" },
66+ { name = "filepath", version = "1.1.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "65F51013BCF78A603AFFD7992EF1CC6ECA96C74038EB48887F656DE44DBC1902" },
77+ { name = "gleam_bitwise", version = "1.3.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_bitwise", source = "hex", outer_checksum = "B36E1D3188D7F594C7FD4F43D0D2CE17561DE896202017548578B16FE1FE9EFC" },
88+ { name = "gleam_regexp", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "7F5E0C0BBEB3C58E57C9CB05FA9002F970C85AD4A63BA1E55CBCB35C15809179" },
99+ { name = "gleam_stdlib", version = "0.58.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "091F2D2C4A3A4E2047986C47E2C2C9D728A4E068ABB31FDA17B0D347E6248467" },
1010+ { name = "gleam_yielder", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_yielder", source = "hex", outer_checksum = "8E4E4ECFA7982859F430C57F549200C7749823C106759F4A19A78AEA6687717A" },
1111+ { name = "gleamy_bench", version = "0.6.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleamy_bench", source = "hex", outer_checksum = "DEF68E4B097A56781282F0F9D48371A0ABBCDDCF89CAD05B28C3BEDD6B2E8DF3" },
1212+ { name = "gleeunit", version = "1.3.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "0E6C83834BA65EDCAAF4FE4FB94AC697D9262D83E6F58A750D63C9F6C8A9D9FF" },
1313+ { name = "prng", version = "4.0.1", build_tools = ["gleam"], requirements = ["gleam_bitwise", "gleam_stdlib", "gleam_yielder"], otp_app = "prng", source = "hex", outer_checksum = "695AB70E4BE713042062E901975FC08D1EC725B85B808D4786A14C406ADFBCF1" },
1414+ { name = "qcheck", version = "1.0.0", build_tools = ["gleam"], requirements = ["exception", "gleam_regexp", "gleam_stdlib", "gleam_yielder", "prng"], otp_app = "qcheck", source = "hex", outer_checksum = "6DAE7925E350480CE813F80D07AC4B9BAB25360F0D63EC98C5742D8456C9A9A1" },
1515+ { name = "simplifile", version = "2.2.1", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "C88E0EE2D509F6D86EB55161D631657675AA7684DAB83822F7E59EB93D9A60E3" },
1616+]
1717+1818+[requirements]
1919+gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" }
2020+gleamy_bench = { version = ">= 0.6.0 and < 1.0.0" }
2121+gleeunit = { version = ">= 1.0.0 and < 2.0.0" }
2222+qcheck = { version = ">= 1.0.0 and < 2.0.0" }
2323+simplifile = { version = ">= 2.2.1 and < 3.0.0" }
+11
src/houdini.ffi.mjs
···11+export function slice(string, from, len) {
22+ return string.slice(from, from + len);
33+}
44+55+export function first(string) {
66+ return string.slice(0, 1);
77+}
88+99+export function drop_first(string) {
1010+ return string.slice(1);
1111+}
+19
src/houdini.gleam
···11+@target(javascript)
22+import internals/escape_generic as escape
33+44+@target(erlang)
55+import internals/escape_erl as escape
66+77+/// Escapes a string in a format suitable to be used inside HTML by escaping
88+/// the following characters: `<`, `>`, `&`, `"`, `'`.
99+///
1010+/// ## Examples
1111+///
1212+/// ```gleam
1313+/// assert escape("wibble & wobble") == "wibble & wobble";
1414+/// assert escape("wibble > wobble") == "wibble > wobble";
1515+/// ```
1616+///
1717+pub fn escape(string: String) -> String {
1818+ escape.escape(string)
1919+}
···11+@target(erlang)
22+import gleam/bit_array
33+@target(erlang)
44+import gleam/list
55+66+@target(erlang)
77+pub fn escape(text: String) -> String {
88+ // This version is highly optimised for the Erlang target, it treats Strings
99+ // as BitArrays and slices them to share as much as possible. You can find
1010+ // more details in `do_escape`.
1111+ let bits = coerce(text)
1212+1313+ do_escape(bits, 0, bits, [])
1414+ |> list.reverse
1515+ |> bit_array.concat
1616+ |> coerce
1717+}
1818+1919+@target(erlang)
2020+@external(erlang, "houdini_ffi", "coerce")
2121+fn coerce(bit_array: a) -> b
2222+2323+// A possible way to escape chars would be to split the string into graphemes,
2424+// traverse those one by one and accumulate them back into a string escaping
2525+// ">", "<", etc. as we see them.
2626+// However, we can be a lot more performant by working directly on the
2727+// `BitArray` used to represent a Gleam UTF-8 String: instead of popping a
2828+// grapheme at a time, we can work directly on BitArray slices: this has the big
2929+// advantage of making sure we share as much as possible with the original
3030+// string without having to build a new one from scratch.
3131+@target(erlang)
3232+fn do_escape(
3333+ bin: BitArray,
3434+ skip: Int,
3535+ original: BitArray,
3636+ acc: List(BitArray),
3737+) -> List(BitArray) {
3838+ case bin {
3939+ // If we find a char to escape we just advance the `skip` counter so that
4040+ // it will be ignored in the following slice, then we append the escaped
4141+ // version to the accumulator.
4242+ <<"<", rest:bits>> -> {
4343+ let acc = [<<"<">>, ..acc]
4444+ do_escape(rest, skip + 1, original, acc)
4545+ }
4646+4747+ <<">", rest:bits>> -> {
4848+ let acc = [<<">">>, ..acc]
4949+ do_escape(rest, skip + 1, original, acc)
5050+ }
5151+5252+ <<"&", rest:bits>> -> {
5353+ let acc = [<<"&">>, ..acc]
5454+ do_escape(rest, skip + 1, original, acc)
5555+ }
5656+5757+ <<"\"", rest:bits>> -> {
5858+ let acc = [<<""">>, ..acc]
5959+ do_escape(rest, skip + 1, original, acc)
6060+ }
6161+6262+ <<"'", rest:bits>> -> {
6363+ let acc = [<<"'">>, ..acc]
6464+ do_escape(rest, skip + 1, original, acc)
6565+ }
6666+6767+ // For any other byte that doesn't need to be escaped we go into an inner
6868+ // loop, consuming as much "non-escapable" chars as possible.
6969+ <<_char, rest:bits>> -> do_escape_normal(rest, skip, original, acc, 1)
7070+7171+ <<>> -> acc
7272+7373+ _ -> panic as "non byte aligned string, all strings should be byte aligned"
7474+ }
7575+}
7676+7777+@target(erlang)
7878+fn do_escape_normal(
7979+ bin: BitArray,
8080+ skip: Int,
8181+ original: BitArray,
8282+ acc: List(BitArray),
8383+ len: Int,
8484+) -> List(BitArray) {
8585+ // Remember, if we're here it means we've found a char that doesn't need to be
8686+ // escaped, so what we want to do is advance the `len` counter until we reach
8787+ // a char that _does_ need to be escaped and take the slice going from
8888+ // `skip`, with size `len`.
8989+ //
9090+ // Imagine we're escaping this string: "abc<def&ghi" and we've reached 'd':
9191+ // ```
9292+ // abc<def&ghi
9393+ // ^ `skip` points here
9494+ // ```
9595+ // We're going to be increasing `len` until we reach the '&':
9696+ // ```
9797+ // abc<def&ghi
9898+ // ^^^ len will be 3 when we reach the '&' that needs escaping
9999+ // ```
100100+ // So we take the slice corresponding to "def".
101101+ //
102102+ case bin {
103103+ // If we reach a char that has to be escaped we append the slice starting
104104+ // from `skip` with size `len` and the escaped char.
105105+ // This is what allows us to share as much of the original string as
106106+ // possible: we only allocate a new BitArray for the escaped chars,
107107+ // everything else is just a slice of the original String.
108108+ <<"<", rest:bits>> -> {
109109+ let acc = [<<"<">>, slice(original, skip, len), ..acc]
110110+ do_escape(rest, skip + len + 1, original, acc)
111111+ }
112112+113113+ <<">", rest:bits>> -> {
114114+ let acc = [<<">">>, slice(original, skip, len), ..acc]
115115+ do_escape(rest, skip + len + 1, original, acc)
116116+ }
117117+118118+ <<"&", rest:bits>> -> {
119119+ let acc = [<<"&">>, slice(original, skip, len), ..acc]
120120+ do_escape(rest, skip + len + 1, original, acc)
121121+ }
122122+123123+ <<"\"", rest:bits>> -> {
124124+ let acc = [<<""">>, slice(original, skip, len), ..acc]
125125+ do_escape(rest, skip + len + 1, original, acc)
126126+ }
127127+128128+ <<"'", rest:bits>> -> {
129129+ let acc = [<<"'">>, slice(original, skip, len), ..acc]
130130+ do_escape(rest, skip + len + 1, original, acc)
131131+ }
132132+133133+ // If a byte doesn't need escaping we keep increasing the length of the
134134+ // slice we're going to take.
135135+ <<_, rest:bits>> -> do_escape_normal(rest, skip, original, acc, len + 1)
136136+137137+ <<>> ->
138138+ // We start from the start of the bit array and have consumed everything
139139+ // without finding a char that is not valid. This means that the entire
140140+ // string doesn't need any escaping, we can just return it as is!
141141+ case skip {
142142+ 0 -> [original]
143143+ _ -> [slice(original, skip, len), ..acc]
144144+ }
145145+146146+ _ -> panic as "non byte aligned string, all strings should be byte aligned"
147147+ }
148148+}
149149+150150+@target(erlang)
151151+@external(erlang, "binary", "part")
152152+fn slice(bit_array: BitArray, from: Int, size: Int) -> BitArray
+132
src/internals/escape_generic.gleam
···11+import gleam/list
22+import gleam/string
33+44+/// This `escape` function will work on all targets, beware that the version
55+/// specifically optimised for Erlang will be _way faster_ than this one when
66+/// running on the BEAM. That's why this fallback implementation is only ever
77+/// used when running on the JS backend.
88+///
99+pub fn escape(text: String) -> String {
1010+ do_escape(text, 0, text, [], 0, False)
1111+ |> list.reverse
1212+ |> string.join(with: "")
1313+}
1414+1515+// The logic behind this function is exactly the same as the erlang one: we
1616+// iterate the string byte by byte and only ever take slices of it (constant
1717+// time operation that ensures maximum sharing). However, this implementation is
1818+// a little more convoluted since we cannot define it as two mutually recursive
1919+// functions as we did with the Erlang one (or it won't be tail call optimised
2020+// on the JS target).
2121+fn do_escape(
2222+ string: String,
2323+ skip: Int,
2424+ original: String,
2525+ acc: List(String),
2626+ len: Int,
2727+ found_normal: Bool,
2828+) -> List(String) {
2929+ case found_normal, first(string) {
3030+ False, "<" -> {
3131+ let rest = drop_first(string)
3232+ let acc = ["<", ..acc]
3333+ do_escape(rest, skip + 1, original, acc, 0, False)
3434+ }
3535+3636+ False, ">" -> {
3737+ let rest = drop_first(string)
3838+ let acc = [">", ..acc]
3939+ do_escape(rest, skip + 1, original, acc, 0, False)
4040+ }
4141+4242+ False, "&" -> {
4343+ let rest = drop_first(string)
4444+ let acc = ["&", ..acc]
4545+ do_escape(rest, skip + 1, original, acc, 0, False)
4646+ }
4747+4848+ False, "\"" -> {
4949+ let rest = drop_first(string)
5050+ let acc = [""", ..acc]
5151+ do_escape(rest, skip + 1, original, acc, 0, False)
5252+ }
5353+5454+ False, "'" -> {
5555+ let rest = drop_first(string)
5656+ let acc = ["'", ..acc]
5757+ do_escape(rest, skip + 1, original, acc, 0, False)
5858+ }
5959+6060+ False, "" -> acc
6161+6262+ // For any other bit that doesn't need to be escaped we go into an inner
6363+ // loop, consuming as much "non-escapable" chars as possible.
6464+ False, _ -> {
6565+ let rest = drop_first(string)
6666+ do_escape(rest, skip, original, acc, 1, True)
6767+ }
6868+6969+ True, "<" -> {
7070+ let rest = drop_first(string)
7171+ let slice = slice(original, skip, len)
7272+ let acc = ["<", slice, ..acc]
7373+ do_escape(rest, skip + len + 1, original, acc, 0, False)
7474+ }
7575+7676+ True, ">" -> {
7777+ let rest = drop_first(string)
7878+ let slice = slice(original, skip, len)
7979+ let acc = [">", slice, ..acc]
8080+ do_escape(rest, skip + len + 1, original, acc, 0, False)
8181+ }
8282+8383+ True, "&" -> {
8484+ let rest = drop_first(string)
8585+ let slice = slice(original, skip, len)
8686+ let acc = ["&", slice, ..acc]
8787+ do_escape(rest, skip + len + 1, original, acc, 0, False)
8888+ }
8989+9090+ True, "\"" -> {
9191+ let rest = drop_first(string)
9292+ let slice = slice(original, skip, len)
9393+ let acc = [""", slice, ..acc]
9494+ do_escape(rest, skip + len + 1, original, acc, 0, False)
9595+ }
9696+9797+ True, "'" -> {
9898+ let rest = drop_first(string)
9999+ let slice = slice(original, skip, len)
100100+ let acc = ["'", slice, ..acc]
101101+ do_escape(rest, skip + len + 1, original, acc, 0, False)
102102+ }
103103+104104+ True, "" ->
105105+ case skip {
106106+ 0 -> [original]
107107+ _ -> {
108108+ let slice = slice(original, skip, len)
109109+ [slice, ..acc]
110110+ }
111111+ }
112112+113113+ // If a char doesn't need escaping we keep increasing the length of the
114114+ // slice we're going to take.
115115+ True, _ -> {
116116+ let rest = drop_first(string)
117117+ do_escape(rest, skip, original, acc, len + 1, True)
118118+ }
119119+ }
120120+}
121121+122122+@external(erlang, "houdini_ffi", "first")
123123+@external(javascript, "../houdini.ffi.mjs", "first")
124124+fn first(string: String) -> String
125125+126126+@external(erlang, "houdini_ffi", "drop_first")
127127+@external(javascript, "../houdini.ffi.mjs", "drop_first")
128128+fn drop_first(string: String) -> String
129129+130130+@external(erlang, "houdini_ffi", "slice")
131131+@external(javascript, "../houdini.ffi.mjs", "slice")
132132+fn slice(string: String, from: Int, to: Int) -> String