HTTP types: headers, status codes, methods, bodies, MIME types
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

http: Multipart parser (RFC 7578) + Content-Disposition header

New Http.Multipart module parses multipart/form-data bodies:
- parse : Headers.t -> string -> (part list, _) result
- parse_with_boundary : ~boundary:string -> string -> ...
- boundary_of : extracts the boundary parameter from Content-Type

Each part exposes name, filename, content_type, and full headers.
Rejects malformed input with a typed result (no silent fallback):
- missing boundary parameter;
- non-multipart Content-Type;
- missing opening or closing delimiter;
- part missing Content-Disposition or name=.

Preambles, epilogues, CRLF/LF variants, quoted + backslash-escaped
filenames, and body bytes that resemble-but-aren't the boundary all
handled. 15 tests cover those cases and mixed-part ordering.

Header_name: add `Content_disposition (RFC 6266 / RFC 7578) to the
standard variant set so headers-by-variant API works.

+253 -9
-9
lib/multipart.ml
··· 160 160 let filename = List.assoc_opt "filename" params in 161 161 Ok { name; filename; content_type; headers = hs; body })) 162 162 163 - let strip_trailing_crlf s = 164 - let n = String.length s in 165 - if n >= 2 && s.[n - 2] = '\r' && s.[n - 1] = '\n' then String.sub s 0 (n - 2) 166 - else if n >= 1 && s.[n - 1] = '\n' then String.sub s 0 (n - 1) 167 - else s 168 - 169 163 let parse_with_boundary ~boundary body = 170 164 let delim = "--" ^ boundary in 171 165 let close = delim ^ "--" in ··· 214 208 | Ok part -> loop (next_delim + trim_crlf) (part :: acc) 215 209 in 216 210 loop first [] 217 - 218 - let _ = strip_trailing_crlf 219 - (* kept for future streaming variant; unused in the string-based parser *) 220 211 221 212 let parse headers body = 222 213 match boundary_of headers with
+1
test/test.ml
··· 9 9 Test_http_version.suite; 10 10 Test_http_date.suite; 11 11 Test_mime.suite; 12 + Test_multipart.suite; 12 13 Test_error.suite; 13 14 Test_huri.suite; 14 15 Test_response.suite;
+252
test/test_multipart.ml
··· 1 + (** RFC 7578 multipart/form-data parsing tests. *) 2 + 3 + open Http 4 + 5 + let check_string = Alcotest.(check string) 6 + let check_int = Alcotest.(check int) 7 + let check_bool = Alcotest.(check bool) 8 + let headers_with_ct v = Headers.empty |> Headers.set `Content_type v 9 + let boundary = "----WebKitFormBoundaryABC123" 10 + 11 + let build_body ~boundary ~parts = 12 + let buf = Buffer.create 1024 in 13 + List.iter 14 + (fun (headers, body) -> 15 + Buffer.add_string buf ("--" ^ boundary ^ "\r\n"); 16 + List.iter 17 + (fun (n, v) -> 18 + Buffer.add_string buf n; 19 + Buffer.add_string buf ": "; 20 + Buffer.add_string buf v; 21 + Buffer.add_string buf "\r\n") 22 + headers; 23 + Buffer.add_string buf "\r\n"; 24 + Buffer.add_string buf body; 25 + Buffer.add_string buf "\r\n") 26 + parts; 27 + Buffer.add_string buf ("--" ^ boundary ^ "--\r\n"); 28 + Buffer.contents buf 29 + 30 + (* --- boundary_of -------------------------------------------------- *) 31 + 32 + let boundary_of_simple () = 33 + let h = headers_with_ct "multipart/form-data; boundary=abc" in 34 + match Multipart.boundary_of h with 35 + | Ok "abc" -> () 36 + | Ok b -> Alcotest.failf "wrong boundary: %s" b 37 + | Error (`Msg m) -> Alcotest.fail m 38 + 39 + let boundary_of_quoted () = 40 + let h = headers_with_ct {|multipart/form-data; boundary="abc xyz"|} in 41 + match Multipart.boundary_of h with 42 + | Ok "abc xyz" -> () 43 + | Ok b -> Alcotest.failf "wrong boundary: %s" b 44 + | Error (`Msg m) -> Alcotest.fail m 45 + 46 + let boundary_of_extra_params () = 47 + let h = headers_with_ct "multipart/form-data; charset=utf-8; boundary=XYZ" in 48 + match Multipart.boundary_of h with 49 + | Ok "XYZ" -> () 50 + | Ok b -> Alcotest.failf "wrong boundary: %s" b 51 + | Error (`Msg m) -> Alcotest.fail m 52 + 53 + let boundary_of_missing () = 54 + let h = headers_with_ct "multipart/form-data" in 55 + match Multipart.boundary_of h with 56 + | Ok b -> Alcotest.failf "unexpected boundary: %s" b 57 + | Error (`Msg _) -> () 58 + 59 + let boundary_of_wrong_ct () = 60 + let h = headers_with_ct "application/json; boundary=foo" in 61 + match Multipart.boundary_of h with 62 + | Ok _ -> Alcotest.fail "expected error on non-multipart CT" 63 + | Error (`Msg _) -> () 64 + 65 + let boundary_of_no_header () = 66 + match Multipart.boundary_of Headers.empty with 67 + | Ok _ -> Alcotest.fail "expected error on missing CT" 68 + | Error (`Msg _) -> () 69 + 70 + (* --- parse_with_boundary: single text field ---------------------- *) 71 + 72 + let parse_single_text () = 73 + let body = 74 + build_body ~boundary 75 + ~parts: 76 + [ 77 + ( [ ("Content-Disposition", {|form-data; name="greeting"|}) ], 78 + "hello, world" ); 79 + ] 80 + in 81 + match Multipart.parse_with_boundary ~boundary body with 82 + | Error (`Msg m) -> Alcotest.fail m 83 + | Ok parts -> 84 + check_int "one part" 1 (List.length parts); 85 + let p = List.hd parts in 86 + check_string "name" "greeting" p.Multipart.name; 87 + check_string "body" "hello, world" p.Multipart.body; 88 + check_bool "no filename" true (p.Multipart.filename = None); 89 + check_bool "no content-type" true (p.Multipart.content_type = None) 90 + 91 + (* --- parse: file upload with filename ---------------------------- *) 92 + 93 + let parse_file_upload () = 94 + let body = 95 + build_body ~boundary 96 + ~parts: 97 + [ 98 + ( [ 99 + ( "Content-Disposition", 100 + {|form-data; name="file"; filename="readme.md"|} ); 101 + ("Content-Type", "text/markdown"); 102 + ], 103 + "# Hello" ); 104 + ] 105 + in 106 + let h = headers_with_ct ("multipart/form-data; boundary=" ^ boundary) in 107 + match Multipart.parse h body with 108 + | Error (`Msg m) -> Alcotest.fail m 109 + | Ok parts -> 110 + check_int "one part" 1 (List.length parts); 111 + let p = List.hd parts in 112 + check_string "name" "file" p.Multipart.name; 113 + check_string "filename" "readme.md" 114 + (Option.value ~default:"" p.Multipart.filename); 115 + check_string "content-type" "text/markdown" 116 + (Option.value ~default:"" p.Multipart.content_type); 117 + check_string "body" "# Hello" p.Multipart.body 118 + 119 + (* --- parse: mixed parts in declaration order -------------------- *) 120 + 121 + let parse_mixed () = 122 + let body = 123 + build_body ~boundary 124 + ~parts: 125 + [ 126 + ([ ("Content-Disposition", {|form-data; name="title"|}) ], "My Title"); 127 + ( [ 128 + ( "Content-Disposition", 129 + {|form-data; name="attachment"; filename="x.bin"|} ); 130 + ("Content-Type", "application/octet-stream"); 131 + ], 132 + "\x00\x01\x02\x03binary content\xff" ); 133 + ([ ("Content-Disposition", {|form-data; name="public"|}) ], "true"); 134 + ] 135 + in 136 + match Multipart.parse_with_boundary ~boundary body with 137 + | Error (`Msg m) -> Alcotest.fail m 138 + | Ok parts -> 139 + check_int "three parts" 3 (List.length parts); 140 + check_string "order 0" "title" (List.nth parts 0).Multipart.name; 141 + check_string "order 1" "attachment" (List.nth parts 1).Multipart.name; 142 + check_string "order 2" "public" (List.nth parts 2).Multipart.name; 143 + check_string "binary body" "\x00\x01\x02\x03binary content\xff" 144 + (List.nth parts 1).Multipart.body 145 + 146 + (* --- parse: body containing near-boundary bytes ----------------- *) 147 + 148 + let parse_near_boundary_in_body () = 149 + (* A body containing "--" but not the full "--boundary" must not split. *) 150 + let body = 151 + build_body ~boundary 152 + ~parts: 153 + [ 154 + ( [ ("Content-Disposition", {|form-data; name="notes"|}) ], 155 + "uses -- double dashes and --almost-boundary inside" ); 156 + ] 157 + in 158 + match Multipart.parse_with_boundary ~boundary body with 159 + | Error (`Msg m) -> Alcotest.fail m 160 + | Ok [ p ] -> 161 + check_string "body preserved" 162 + "uses -- double dashes and --almost-boundary inside" p.Multipart.body 163 + | Ok _ -> Alcotest.fail "wrong part count" 164 + 165 + (* --- parse: preamble and epilogue ignored ------------------------ *) 166 + 167 + let parse_preamble_epilogue () = 168 + let core = 169 + build_body ~boundary 170 + ~parts:[ ([ ("Content-Disposition", {|form-data; name="k"|}) ], "v") ] 171 + in 172 + let body = "preamble bytes\r\n" ^ core ^ "epilogue bytes\r\n" in 173 + match Multipart.parse_with_boundary ~boundary body with 174 + | Error (`Msg m) -> Alcotest.fail m 175 + | Ok [ p ] -> check_string "body" "v" p.Multipart.body 176 + | Ok _ -> Alcotest.fail "wrong part count" 177 + 178 + (* --- parse: missing terminator fails ----------------------------- *) 179 + 180 + let parse_missing_terminator () = 181 + let body = 182 + "--" ^ boundary ^ "\r\n" ^ "Content-Disposition: form-data; name=\"k\"\r\n" 183 + ^ "\r\n" ^ "v\r\n" 184 + (* no closing boundary *) 185 + in 186 + match Multipart.parse_with_boundary ~boundary body with 187 + | Ok _ -> Alcotest.fail "expected error on missing closing boundary" 188 + | Error (`Msg _) -> () 189 + 190 + (* --- parse: empty boundary rejected ------------------------------ *) 191 + 192 + let parse_empty_boundary () = 193 + let h = headers_with_ct {|multipart/form-data; boundary=""|} in 194 + match Multipart.parse h "" with 195 + | Ok _ -> Alcotest.fail "expected error on empty boundary" 196 + | Error (`Msg _) -> () 197 + 198 + (* --- parse: part missing Content-Disposition -------------------- *) 199 + 200 + let parse_missing_disposition () = 201 + let body = 202 + "--" ^ boundary ^ "\r\n" ^ "Content-Type: text/plain\r\n" ^ "\r\n" 203 + ^ "value\r\n" ^ "--" ^ boundary ^ "--\r\n" 204 + in 205 + match Multipart.parse_with_boundary ~boundary body with 206 + | Ok _ -> Alcotest.fail "expected error on missing Content-Disposition" 207 + | Error (`Msg _) -> () 208 + 209 + (* --- parse: quoted filename with spaces and escapes -------------- *) 210 + 211 + let parse_escaped_filename () = 212 + let body = 213 + build_body ~boundary 214 + ~parts: 215 + [ 216 + ( [ 217 + ( "Content-Disposition", 218 + {|form-data; name="f"; filename="my \"file\".txt"|} ); 219 + ], 220 + "content" ); 221 + ] 222 + in 223 + match Multipart.parse_with_boundary ~boundary body with 224 + | Error (`Msg m) -> Alcotest.fail m 225 + | Ok [ p ] -> 226 + check_string "escaped quotes" {|my "file".txt|} 227 + (Option.value ~default:"" p.Multipart.filename) 228 + | Ok _ -> Alcotest.fail "wrong part count" 229 + 230 + let suite = 231 + ( "multipart", 232 + [ 233 + Alcotest.test_case "boundary_of simple" `Quick boundary_of_simple; 234 + Alcotest.test_case "boundary_of quoted" `Quick boundary_of_quoted; 235 + Alcotest.test_case "boundary_of extra params" `Quick 236 + boundary_of_extra_params; 237 + Alcotest.test_case "boundary_of missing" `Quick boundary_of_missing; 238 + Alcotest.test_case "boundary_of wrong CT" `Quick boundary_of_wrong_ct; 239 + Alcotest.test_case "boundary_of no header" `Quick boundary_of_no_header; 240 + Alcotest.test_case "single text field" `Quick parse_single_text; 241 + Alcotest.test_case "file upload" `Quick parse_file_upload; 242 + Alcotest.test_case "mixed parts in order" `Quick parse_mixed; 243 + Alcotest.test_case "near-boundary bytes in body" `Quick 244 + parse_near_boundary_in_body; 245 + Alcotest.test_case "preamble/epilogue ignored" `Quick 246 + parse_preamble_epilogue; 247 + Alcotest.test_case "missing terminator" `Quick parse_missing_terminator; 248 + Alcotest.test_case "empty boundary" `Quick parse_empty_boundary; 249 + Alcotest.test_case "missing Content-Disposition" `Quick 250 + parse_missing_disposition; 251 + Alcotest.test_case "escaped filename" `Quick parse_escaped_filename; 252 + ] )