this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

:tada: Hello, Joe!

+524
+23
.github/workflows/test.yml
··· 1 + name: test 2 + 3 + on: 4 + push: 5 + branches: 6 + - master 7 + - main 8 + pull_request: 9 + 10 + jobs: 11 + test: 12 + runs-on: ubuntu-latest 13 + steps: 14 + - uses: actions/checkout@v4 15 + - uses: erlef/setup-beam@v1 16 + with: 17 + otp-version: "27.1.2" 18 + gleam-version: "1.9.1" 19 + rebar3-version: "3" 20 + # elixir-version: "1" 21 + - run: gleam deps download 22 + - run: gleam test 23 + - run: gleam format --check src test
+4
.gitignore
··· 1 + *.beam 2 + *.ez 3 + /build 4 + erl_crash.dump
+24
README.md
··· 1 + # houdini 2 + 3 + [![Package Version](https://img.shields.io/hexpm/v/houdini)](https://hex.pm/packages/houdini) 4 + [![Hex Docs](https://img.shields.io/badge/hex-docs-ffaff3)](https://hexdocs.pm/houdini/) 5 + 6 + ```sh 7 + gleam add houdini@1 8 + ``` 9 + ```gleam 10 + import houdini 11 + 12 + pub fn main() -> Nil { 13 + // TODO: An example of the project in use 14 + } 15 + ``` 16 + 17 + Further documentation can be found at <https://hexdocs.pm/houdini>. 18 + 19 + ## Development 20 + 21 + ```sh 22 + gleam run # Run the project 23 + gleam test # Run the tests 24 + ```
+14
gleam.toml
··· 1 + name = "houdini" 2 + version = "1.0.0" 3 + description = "Fast HTML escaping 🪄" 4 + licences = ["Apache-2.0"] 5 + repository = { type = "github", user = "giacomocavalieri", repo = "houdini" } 6 + 7 + [dependencies] 8 + gleam_stdlib = ">= 0.44.0 and < 2.0.0" 9 + 10 + [dev-dependencies] 11 + gleeunit = ">= 1.0.0 and < 2.0.0" 12 + gleamy_bench = ">= 0.6.0 and < 1.0.0" 13 + simplifile = ">= 2.2.1 and < 3.0.0" 14 + qcheck = ">= 1.0.0 and < 2.0.0"
+23
manifest.toml
··· 1 + # This file was generated by Gleam 2 + # You typically do not need to edit this file 3 + 4 + packages = [ 5 + { name = "exception", version = "2.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "exception", source = "hex", outer_checksum = "F5580D584F16A20B7FCDCABF9E9BE9A2C1F6AC4F9176FA6DD0B63E3B20D450AA" }, 6 + { name = "filepath", version = "1.1.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "65F51013BCF78A603AFFD7992EF1CC6ECA96C74038EB48887F656DE44DBC1902" }, 7 + { name = "gleam_bitwise", version = "1.3.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_bitwise", source = "hex", outer_checksum = "B36E1D3188D7F594C7FD4F43D0D2CE17561DE896202017548578B16FE1FE9EFC" }, 8 + { name = "gleam_regexp", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "7F5E0C0BBEB3C58E57C9CB05FA9002F970C85AD4A63BA1E55CBCB35C15809179" }, 9 + { name = "gleam_stdlib", version = "0.58.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "091F2D2C4A3A4E2047986C47E2C2C9D728A4E068ABB31FDA17B0D347E6248467" }, 10 + { name = "gleam_yielder", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_yielder", source = "hex", outer_checksum = "8E4E4ECFA7982859F430C57F549200C7749823C106759F4A19A78AEA6687717A" }, 11 + { name = "gleamy_bench", version = "0.6.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleamy_bench", source = "hex", outer_checksum = "DEF68E4B097A56781282F0F9D48371A0ABBCDDCF89CAD05B28C3BEDD6B2E8DF3" }, 12 + { name = "gleeunit", version = "1.3.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "0E6C83834BA65EDCAAF4FE4FB94AC697D9262D83E6F58A750D63C9F6C8A9D9FF" }, 13 + { name = "prng", version = "4.0.1", build_tools = ["gleam"], requirements = ["gleam_bitwise", "gleam_stdlib", "gleam_yielder"], otp_app = "prng", source = "hex", outer_checksum = "695AB70E4BE713042062E901975FC08D1EC725B85B808D4786A14C406ADFBCF1" }, 14 + { name = "qcheck", version = "1.0.0", build_tools = ["gleam"], requirements = ["exception", "gleam_regexp", "gleam_stdlib", "gleam_yielder", "prng"], otp_app = "qcheck", source = "hex", outer_checksum = "6DAE7925E350480CE813F80D07AC4B9BAB25360F0D63EC98C5742D8456C9A9A1" }, 15 + { name = "simplifile", version = "2.2.1", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "C88E0EE2D509F6D86EB55161D631657675AA7684DAB83822F7E59EB93D9A60E3" }, 16 + ] 17 + 18 + [requirements] 19 + gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" } 20 + gleamy_bench = { version = ">= 0.6.0 and < 1.0.0" } 21 + gleeunit = { version = ">= 1.0.0 and < 2.0.0" } 22 + qcheck = { version = ">= 1.0.0 and < 2.0.0" } 23 + simplifile = { version = ">= 2.2.1 and < 3.0.0" }
+11
src/houdini.ffi.mjs
··· 1 + export function slice(string, from, len) { 2 + return string.slice(from, from + len); 3 + } 4 + 5 + export function first(string) { 6 + return string.slice(0, 1); 7 + } 8 + 9 + export function drop_first(string) { 10 + return string.slice(1); 11 + }
+19
src/houdini.gleam
··· 1 + @target(javascript) 2 + import internals/escape_generic as escape 3 + 4 + @target(erlang) 5 + import internals/escape_erl as escape 6 + 7 + /// Escapes a string in a format suitable to be used inside HTML by escaping 8 + /// the following characters: `<`, `>`, `&`, `"`, `'`. 9 + /// 10 + /// ## Examples 11 + /// 12 + /// ```gleam 13 + /// assert escape("wibble & wobble") == "wibble &amp; wobble"; 14 + /// assert escape("wibble > wobble") == "wibble &gt; wobble"; 15 + /// ``` 16 + /// 17 + pub fn escape(string: String) -> String { 18 + escape.escape(string) 19 + }
+20
src/houdini_ffi.erl
··· 1 + -module(houdini_ffi). 2 + -export([coerce/1, slice/3, first/1, drop_first/1]). 3 + 4 + coerce(X) -> X. 5 + 6 + slice(String, From, Len) -> 7 + binary:part(String, From, Len). 8 + 9 + first(String) -> 10 + case String of 11 + <<>> -> <<>>; 12 + <<First, _/bitstring>> -> 13 + <<First>> 14 + end. 15 + 16 + drop_first(String) -> 17 + case String of 18 + <<>> -> <<>>; 19 + <<_, Rest/bitstring>> -> Rest 20 + end.
+152
src/internals/escape_erl.gleam
··· 1 + @target(erlang) 2 + import gleam/bit_array 3 + @target(erlang) 4 + import gleam/list 5 + 6 + @target(erlang) 7 + pub fn escape(text: String) -> String { 8 + // This version is highly optimised for the Erlang target, it treats Strings 9 + // as BitArrays and slices them to share as much as possible. You can find 10 + // more details in `do_escape`. 11 + let bits = coerce(text) 12 + 13 + do_escape(bits, 0, bits, []) 14 + |> list.reverse 15 + |> bit_array.concat 16 + |> coerce 17 + } 18 + 19 + @target(erlang) 20 + @external(erlang, "houdini_ffi", "coerce") 21 + fn coerce(bit_array: a) -> b 22 + 23 + // A possible way to escape chars would be to split the string into graphemes, 24 + // traverse those one by one and accumulate them back into a string escaping 25 + // ">", "<", etc. as we see them. 26 + // However, we can be a lot more performant by working directly on the 27 + // `BitArray` used to represent a Gleam UTF-8 String: instead of popping a 28 + // grapheme at a time, we can work directly on BitArray slices: this has the big 29 + // advantage of making sure we share as much as possible with the original 30 + // string without having to build a new one from scratch. 31 + @target(erlang) 32 + fn do_escape( 33 + bin: BitArray, 34 + skip: Int, 35 + original: BitArray, 36 + acc: List(BitArray), 37 + ) -> List(BitArray) { 38 + case bin { 39 + // If we find a char to escape we just advance the `skip` counter so that 40 + // it will be ignored in the following slice, then we append the escaped 41 + // version to the accumulator. 42 + <<"<", rest:bits>> -> { 43 + let acc = [<<"&lt;">>, ..acc] 44 + do_escape(rest, skip + 1, original, acc) 45 + } 46 + 47 + <<">", rest:bits>> -> { 48 + let acc = [<<"&gt;">>, ..acc] 49 + do_escape(rest, skip + 1, original, acc) 50 + } 51 + 52 + <<"&", rest:bits>> -> { 53 + let acc = [<<"&amp;">>, ..acc] 54 + do_escape(rest, skip + 1, original, acc) 55 + } 56 + 57 + <<"\"", rest:bits>> -> { 58 + let acc = [<<"&quot;">>, ..acc] 59 + do_escape(rest, skip + 1, original, acc) 60 + } 61 + 62 + <<"'", rest:bits>> -> { 63 + let acc = [<<"&#39;">>, ..acc] 64 + do_escape(rest, skip + 1, original, acc) 65 + } 66 + 67 + // For any other byte that doesn't need to be escaped we go into an inner 68 + // loop, consuming as much "non-escapable" chars as possible. 69 + <<_char, rest:bits>> -> do_escape_normal(rest, skip, original, acc, 1) 70 + 71 + <<>> -> acc 72 + 73 + _ -> panic as "non byte aligned string, all strings should be byte aligned" 74 + } 75 + } 76 + 77 + @target(erlang) 78 + fn do_escape_normal( 79 + bin: BitArray, 80 + skip: Int, 81 + original: BitArray, 82 + acc: List(BitArray), 83 + len: Int, 84 + ) -> List(BitArray) { 85 + // Remember, if we're here it means we've found a char that doesn't need to be 86 + // escaped, so what we want to do is advance the `len` counter until we reach 87 + // a char that _does_ need to be escaped and take the slice going from 88 + // `skip`, with size `len`. 89 + // 90 + // Imagine we're escaping this string: "abc<def&ghi" and we've reached 'd': 91 + // ``` 92 + // abc<def&ghi 93 + // ^ `skip` points here 94 + // ``` 95 + // We're going to be increasing `len` until we reach the '&': 96 + // ``` 97 + // abc<def&ghi 98 + // ^^^ len will be 3 when we reach the '&' that needs escaping 99 + // ``` 100 + // So we take the slice corresponding to "def". 101 + // 102 + case bin { 103 + // If we reach a char that has to be escaped we append the slice starting 104 + // from `skip` with size `len` and the escaped char. 105 + // This is what allows us to share as much of the original string as 106 + // possible: we only allocate a new BitArray for the escaped chars, 107 + // everything else is just a slice of the original String. 108 + <<"<", rest:bits>> -> { 109 + let acc = [<<"&lt;">>, slice(original, skip, len), ..acc] 110 + do_escape(rest, skip + len + 1, original, acc) 111 + } 112 + 113 + <<">", rest:bits>> -> { 114 + let acc = [<<"&gt;">>, slice(original, skip, len), ..acc] 115 + do_escape(rest, skip + len + 1, original, acc) 116 + } 117 + 118 + <<"&", rest:bits>> -> { 119 + let acc = [<<"&amp;">>, slice(original, skip, len), ..acc] 120 + do_escape(rest, skip + len + 1, original, acc) 121 + } 122 + 123 + <<"\"", rest:bits>> -> { 124 + let acc = [<<"&quot;">>, slice(original, skip, len), ..acc] 125 + do_escape(rest, skip + len + 1, original, acc) 126 + } 127 + 128 + <<"'", rest:bits>> -> { 129 + let acc = [<<"&#39;">>, slice(original, skip, len), ..acc] 130 + do_escape(rest, skip + len + 1, original, acc) 131 + } 132 + 133 + // If a byte doesn't need escaping we keep increasing the length of the 134 + // slice we're going to take. 135 + <<_, rest:bits>> -> do_escape_normal(rest, skip, original, acc, len + 1) 136 + 137 + <<>> -> 138 + // We start from the start of the bit array and have consumed everything 139 + // without finding a char that is not valid. This means that the entire 140 + // string doesn't need any escaping, we can just return it as is! 141 + case skip { 142 + 0 -> [original] 143 + _ -> [slice(original, skip, len), ..acc] 144 + } 145 + 146 + _ -> panic as "non byte aligned string, all strings should be byte aligned" 147 + } 148 + } 149 + 150 + @target(erlang) 151 + @external(erlang, "binary", "part") 152 + fn slice(bit_array: BitArray, from: Int, size: Int) -> BitArray
+132
src/internals/escape_generic.gleam
··· 1 + import gleam/list 2 + import gleam/string 3 + 4 + /// This `escape` function will work on all targets, beware that the version 5 + /// specifically optimised for Erlang will be _way faster_ than this one when 6 + /// running on the BEAM. That's why this fallback implementation is only ever 7 + /// used when running on the JS backend. 8 + /// 9 + pub fn escape(text: String) -> String { 10 + do_escape(text, 0, text, [], 0, False) 11 + |> list.reverse 12 + |> string.join(with: "") 13 + } 14 + 15 + // The logic behind this function is exactly the same as the erlang one: we 16 + // iterate the string byte by byte and only ever take slices of it (constant 17 + // time operation that ensures maximum sharing). However, this implementation is 18 + // a little more convoluted since we cannot define it as two mutually recursive 19 + // functions as we did with the Erlang one (or it won't be tail call optimised 20 + // on the JS target). 21 + fn do_escape( 22 + string: String, 23 + skip: Int, 24 + original: String, 25 + acc: List(String), 26 + len: Int, 27 + found_normal: Bool, 28 + ) -> List(String) { 29 + case found_normal, first(string) { 30 + False, "<" -> { 31 + let rest = drop_first(string) 32 + let acc = ["&lt;", ..acc] 33 + do_escape(rest, skip + 1, original, acc, 0, False) 34 + } 35 + 36 + False, ">" -> { 37 + let rest = drop_first(string) 38 + let acc = ["&gt;", ..acc] 39 + do_escape(rest, skip + 1, original, acc, 0, False) 40 + } 41 + 42 + False, "&" -> { 43 + let rest = drop_first(string) 44 + let acc = ["&amp;", ..acc] 45 + do_escape(rest, skip + 1, original, acc, 0, False) 46 + } 47 + 48 + False, "\"" -> { 49 + let rest = drop_first(string) 50 + let acc = ["&quot;", ..acc] 51 + do_escape(rest, skip + 1, original, acc, 0, False) 52 + } 53 + 54 + False, "'" -> { 55 + let rest = drop_first(string) 56 + let acc = ["&#39;", ..acc] 57 + do_escape(rest, skip + 1, original, acc, 0, False) 58 + } 59 + 60 + False, "" -> acc 61 + 62 + // For any other bit that doesn't need to be escaped we go into an inner 63 + // loop, consuming as much "non-escapable" chars as possible. 64 + False, _ -> { 65 + let rest = drop_first(string) 66 + do_escape(rest, skip, original, acc, 1, True) 67 + } 68 + 69 + True, "<" -> { 70 + let rest = drop_first(string) 71 + let slice = slice(original, skip, len) 72 + let acc = ["&lt;", slice, ..acc] 73 + do_escape(rest, skip + len + 1, original, acc, 0, False) 74 + } 75 + 76 + True, ">" -> { 77 + let rest = drop_first(string) 78 + let slice = slice(original, skip, len) 79 + let acc = ["&gt;", slice, ..acc] 80 + do_escape(rest, skip + len + 1, original, acc, 0, False) 81 + } 82 + 83 + True, "&" -> { 84 + let rest = drop_first(string) 85 + let slice = slice(original, skip, len) 86 + let acc = ["&amp;", slice, ..acc] 87 + do_escape(rest, skip + len + 1, original, acc, 0, False) 88 + } 89 + 90 + True, "\"" -> { 91 + let rest = drop_first(string) 92 + let slice = slice(original, skip, len) 93 + let acc = ["&quot;", slice, ..acc] 94 + do_escape(rest, skip + len + 1, original, acc, 0, False) 95 + } 96 + 97 + True, "'" -> { 98 + let rest = drop_first(string) 99 + let slice = slice(original, skip, len) 100 + let acc = ["&#39;", slice, ..acc] 101 + do_escape(rest, skip + len + 1, original, acc, 0, False) 102 + } 103 + 104 + True, "" -> 105 + case skip { 106 + 0 -> [original] 107 + _ -> { 108 + let slice = slice(original, skip, len) 109 + [slice, ..acc] 110 + } 111 + } 112 + 113 + // If a char doesn't need escaping we keep increasing the length of the 114 + // slice we're going to take. 115 + True, _ -> { 116 + let rest = drop_first(string) 117 + do_escape(rest, skip, original, acc, len + 1, True) 118 + } 119 + } 120 + } 121 + 122 + @external(erlang, "houdini_ffi", "first") 123 + @external(javascript, "../houdini.ffi.mjs", "first") 124 + fn first(string: String) -> String 125 + 126 + @external(erlang, "houdini_ffi", "drop_first") 127 + @external(javascript, "../houdini.ffi.mjs", "drop_first") 128 + fn drop_first(string: String) -> String 129 + 130 + @external(erlang, "houdini_ffi", "slice") 131 + @external(javascript, "../houdini.ffi.mjs", "slice") 132 + fn slice(string: String, from: Int, to: Int) -> String
+32
test/benchmark.gleam
··· 1 + import gleam/int 2 + import gleam/io 3 + import gleam/list 4 + import gleam/string 5 + import gleamy/bench 6 + import houdini 7 + import simplifile 8 + 9 + pub fn main() { 10 + let files = read_project_files() 11 + let label = int.to_string(string.byte_size(files) / 1024) <> "KB" 12 + 13 + bench.run( 14 + [bench.Input(label, files)], 15 + [bench.Function("houdini.escape", houdini.escape)], 16 + [bench.Duration(5000), bench.Warmup(2000)], 17 + ) 18 + |> bench.table([bench.IPS, bench.Min, bench.P(99)]) 19 + |> io.println 20 + } 21 + 22 + fn read_project_files() -> String { 23 + let assert Ok(files) = simplifile.get_files(".") 24 + use acc, file <- list.fold(over: files, from: "") 25 + case string.ends_with(file, ".html") { 26 + False -> acc 27 + True -> { 28 + let assert Ok(file) = simplifile.read(file) 29 + acc <> file 30 + } 31 + } 32 + }
+70
test/houdini_test.gleam
··· 1 + import gleam/list 2 + import gleam/string 3 + import gleeunit 4 + import gleeunit/should 5 + import houdini 6 + import qcheck.{type Generator} 7 + 8 + pub fn main() -> Nil { 9 + gleeunit.main() 10 + } 11 + 12 + const conversions = [ 13 + #("&", "&amp;"), 14 + #("<", "&lt;"), 15 + #(">", "&gt;"), 16 + #("\"", "&quot;"), 17 + #("'", "&#39;"), 18 + ] 19 + 20 + pub fn simple_conversions_test() { 21 + use #(value, escaped) <- list.each(conversions) 22 + houdini.escape(value) |> should.equal(escaped) 23 + } 24 + 25 + pub fn strange_unicode_string_test() { 26 + let input = ">a>'ࣉa>aa<a'>><\"aa&aࣉ>aࣉaaaa>ࣉa\"a'&a<<<&\"aaa\"&a>aa\">><'ࣉ\"" 27 + houdini.escape(input) |> should.equal(escaped(input)) 28 + } 29 + 30 + pub fn regular_string_is_left_unchanged_test() { 31 + let codepoints = 32 + qcheck.from_generators(qcheck.alphanumeric_ascii_codepoint(), [ 33 + qcheck.ascii_whitespace_codepoint(), 34 + ]) 35 + 36 + use regular_string <- given(qcheck.string_from(codepoints)) 37 + houdini.escape(regular_string) |> should.equal(regular_string) 38 + } 39 + 40 + pub fn string_with_special_characters_is_escaped_test() { 41 + let codepoints = 42 + qcheck.from_generators(escaped_codepoints(), [ 43 + qcheck.alphanumeric_ascii_codepoint(), 44 + ]) 45 + 46 + use string <- given(qcheck.string_from(codepoints)) 47 + houdini.escape(string) 48 + |> should.equal(escaped(string)) 49 + } 50 + 51 + // --- PROPERTY HELPERS -------------------------------------------------------- 52 + 53 + fn escaped_codepoints() -> Generator(UtfCodepoint) { 54 + let assert [first, ..rest] = { 55 + use #(value, _) <- list.map(conversions) 56 + let assert [codepoint] = string.to_utf_codepoints(value) 57 + qcheck.constant(codepoint) 58 + } 59 + qcheck.from_generators(first, rest) 60 + } 61 + 62 + fn given(generator: Generator(a), assertion: fn(a) -> Nil) -> Nil { 63 + let config = qcheck.default_config() |> qcheck.with_test_count(5000) 64 + qcheck.run(config, generator, assertion) 65 + } 66 + 67 + fn escaped(string: String) -> String { 68 + use string, #(value, escaped) <- list.fold(over: conversions, from: string) 69 + string.replace(in: string, each: value, with: escaped) 70 + }