An experimental and small s3 client for ocaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

initial commit

+2748
+52
.gitignore
··· 1 + # OCaml compiled artifacts 2 + *.annot 3 + *.cmo 4 + *.cma 5 + *.cmi 6 + *.a 7 + *.o 8 + *.cmx 9 + *.cmxs 10 + *.cmxa 11 + *.cmt 12 + *.cmti 13 + *.byte 14 + *.native 15 + *.install 16 + 17 + # Build directories 18 + _build/ 19 + _opam/ 20 + 21 + # Editor/IDE 22 + .vscode/ 23 + .idea/ 24 + .merlin 25 + 26 + # Node.js 27 + node_modules/ 28 + 29 + # OpenCode 30 + .opencode/ 31 + 32 + # Beads 33 + .beads/*.log 34 + .beads/*.lock 35 + 36 + # Benchmark results and artifacts 37 + results/ 38 + **/results/*.txt 39 + bench/certs/ 40 + bench/fasthttp/ 41 + bench/hcs/alloc_*.ml 42 + bench/nethttp/bench-nethttp 43 + perf.data 44 + perf*.data 45 + 46 + # Misc 47 + setup.data 48 + setup.log 49 + *.sh 50 + 51 + # LAS tutorial local DB 52 + las.db
+2
.ocamlformat
··· 1 + version = 0.29.0 2 + profile = default
+264
README.md
··· 1 + # s3 2 + 3 + `s3` is an OCaml library for AWS S3 and S3-compatible APIs. 4 + 5 + It includes: 6 + 7 + - streaming upload and download 8 + - list, head, get, put, copy, delete, and multipart operations 9 + - AWS SigV4 signing 10 + - support for custom endpoints and bucket addressing styles 11 + - a small `s3-cli` binary for quick testing 12 + 13 + ## Requirements 14 + 15 + - OCaml 5.1+ 16 + - Eio runtime 17 + 18 + Main dependencies used by the library: 19 + 20 + - `eio` 21 + - `tls` 22 + - `xmlm` 23 + - `simdjsont` 24 + 25 + ## Quick Start 26 + 27 + ### Environment 28 + 29 + The CLI uses these environment variables: 30 + 31 + - `S3_BUCKET` 32 + - `S3_REGION` 33 + - `S3_ACCESS_KEY_ID` 34 + - `S3_SECRET_ACCESS_KEY` 35 + - `S3_ENDPOINT` 36 + 37 + Optional: 38 + 39 + - `S3_SESSION_TOKEN` 40 + - `S3_ADDRESSING_STYLE` with `path` or `virtual` 41 + - `S3_MULTIPART_THRESHOLD` in bytes 42 + - `S3_MULTIPART_PART_SIZE` in bytes 43 + 44 + Example for Backblaze B2: 45 + 46 + ```bash 47 + export S3_BUCKET=my-bucket 48 + export S3_REGION=eu-central-003 49 + export S3_ACCESS_KEY_ID=... 50 + export S3_SECRET_ACCESS_KEY=... 51 + export S3_ENDPOINT=s3.......backblazeb2.com 52 + export S3_ADDRESSING_STYLE=virtual 53 + ``` 54 + 55 + Example for AWS S3: 56 + 57 + ```bash 58 + export S3_BUCKET=my-bucket 59 + export S3_REGION=eu-west-1 60 + export S3_ACCESS_KEY_ID=... 61 + export S3_SECRET_ACCESS_KEY=... 62 + export S3_ENDPOINT=s3........amazonaws.com 63 + export S3_ADDRESSING_STYLE=virtual 64 + ``` 65 + 66 + ### CLI 67 + 68 + List objects: 69 + 70 + ```bash 71 + dune exec -- s3-cli list 72 + dune exec -- s3-cli list hcs 73 + ``` 74 + 75 + Upload a file: 76 + 77 + ```bash 78 + dune exec -- s3-cli put ./local-file.txt folder/remote-file.txt 79 + ``` 80 + 81 + `put` automatically switches to multipart upload for large files. 82 + 83 + To force multipart for testing, set a low threshold: 84 + 85 + ```bash 86 + export S3_MULTIPART_THRESHOLD=5242880 87 + export S3_MULTIPART_PART_SIZE=5242880 88 + dune exec -- s3-cli put ./large-file.bin test/large-file.bin 89 + ``` 90 + 91 + Download a file: 92 + 93 + ```bash 94 + dune exec -- s3-cli get folder/remote-file.txt ./local-file.txt 95 + ``` 96 + 97 + Delete an object: 98 + 99 + ```bash 100 + dune exec -- s3-cli delete folder/remote-file.txt 101 + ``` 102 + 103 + Show request/signing debug output: 104 + 105 + ```bash 106 + dune exec -- s3-cli list --debug hcs 107 + ``` 108 + 109 + You can also use `--list`, `--put`, `--get`, and `--delete` as shorthand entry forms. 110 + 111 + ## Library Usage 112 + 113 + Create a client: 114 + 115 + ```ocaml 116 + let region = Result.get_ok (S3.Region.of_string "eu-west-1") 117 + 118 + let endpoint = 119 + S3.Endpoint.custom 120 + ~scheme:"https" 121 + ~host:"s3.eu-west-1.amazonaws.com" 122 + ~addressing_style:S3.Endpoint.Virtual_hosted 123 + ~signing_region:region 124 + () 125 + 126 + let credentials = 127 + S3.Credentials.Source.static 128 + { 129 + S3.Credentials.access_key_id = "..."; 130 + secret_access_key = "..."; 131 + session_token = None; 132 + expiration = None; 133 + } 134 + 135 + let client = S3.create ~endpoint ~credentials () 136 + ``` 137 + 138 + List objects: 139 + 140 + ```ocaml 141 + let list env client bucket prefix = 142 + S3.list_objects_v2 ~env client 143 + { 144 + S3.List_objects_v2.bucket = bucket; 145 + prefix; 146 + delimiter = None; 147 + continuation_token = None; 148 + start_after = None; 149 + max_keys = None; 150 + } 151 + ``` 152 + 153 + Upload a string: 154 + 155 + ```ocaml 156 + let put env client bucket key contents = 157 + S3.put_object ~env client 158 + { 159 + S3.Put_object.bucket = bucket; 160 + key; 161 + body = S3.Stream.of_string contents; 162 + content_type = Some "text/plain"; 163 + metadata = []; 164 + checksum = None; 165 + } 166 + ``` 167 + 168 + Multipart upload: 169 + 170 + ```ocaml 171 + let multipart_put env client bucket key part1 part2 = 172 + match 173 + S3.create_multipart_upload ~env client 174 + { 175 + S3.Multipart.bucket = bucket; 176 + key; 177 + content_type = Some "application/octet-stream"; 178 + metadata = []; 179 + } 180 + with 181 + | Error err -> Error err 182 + | Ok upload -> ( 183 + match 184 + S3.upload_part ~env client 185 + { 186 + S3.Multipart.upload; 187 + part_number = 1; 188 + body = S3.Stream.of_string part1; 189 + checksum = None; 190 + } 191 + with 192 + | Error err -> 193 + ignore (S3.abort_multipart_upload ~env client upload); 194 + Error err 195 + | Ok part1_response -> ( 196 + match 197 + S3.upload_part ~env client 198 + { 199 + S3.Multipart.upload; 200 + part_number = 2; 201 + body = S3.Stream.of_string part2; 202 + checksum = None; 203 + } 204 + with 205 + | Error err -> 206 + ignore (S3.abort_multipart_upload ~env client upload); 207 + Error err 208 + | Ok part2_response -> 209 + let open S3.Multipart in 210 + let parts = 211 + [ { part_number = 1; etag = Option.get part1_response.etag; checksum = None }; 212 + { part_number = 2; etag = Option.get part2_response.etag; checksum = None } ] 213 + in 214 + S3.complete_multipart_upload ~env client { upload; parts })) 215 + ``` 216 + 217 + Download to a buffer: 218 + 219 + ```ocaml 220 + let get_to_string env client bucket key = 221 + let buffer = Buffer.create 1024 in 222 + let sink = S3.Stream.to_buffer buffer in 223 + match 224 + S3.get_object ~env client 225 + { 226 + S3.Get_object.bucket = bucket; 227 + key; 228 + range = None; 229 + if_match = None; 230 + if_none_match = None; 231 + } 232 + ~sink 233 + with 234 + | Ok _ -> Ok (Buffer.contents buffer) 235 + | Error err -> Error err 236 + ``` 237 + 238 + Delete an object: 239 + 240 + ```ocaml 241 + let delete env client bucket key = 242 + S3.delete_object ~env client 243 + { S3.Delete_object.bucket = bucket; key; version_id = None } 244 + ``` 245 + 246 + ## Addressing Style 247 + 248 + Use `path` when the bucket is part of the URL path: 249 + 250 + - `https://s3.example.com/my-bucket/object.txt` 251 + 252 + Use `virtual` when the bucket is part of the host: 253 + 254 + - `https://my-bucket.s3.example.com/object.txt` 255 + 256 + Some S3-compatible providers require `virtual` for correct signing. 257 + 258 + ## Notes 259 + 260 + - Runtime capabilities are passed with `~env` on operations. 261 + - `S3_ENDPOINT` may be a bare host or a full URI. 262 + - For streaming uploads from files, use `S3.Stream.of_flow` or a custom `S3.Stream.source`. 263 + - For streaming downloads, pass a `S3.Stream.sink` to `S3.get_object`. 264 + - Multipart upload is supported through `create_multipart_upload`, `upload_part`, `complete_multipart_upload`, and `abort_multipart_upload`.
+4
bin/dune
··· 1 + (executable 2 + (name main) 3 + (public_name s3-cli) 4 + (libraries climate eio_main mirage-crypto-rng.unix s3 uri))
+399
bin/main.ml
··· 1 + open Climate 2 + 3 + type action = 4 + | Put of { src : string; dst : string; debug : bool } 5 + | Get of { src : string; dst : string; debug : bool } 6 + | List of { prefix : string option; debug : bool } 7 + | Delete of { key : string; debug : bool } 8 + 9 + let getenv name = 10 + match Sys.getenv_opt name with 11 + | Some value when String.trim value <> "" -> Ok value 12 + | _ -> Error (Printf.sprintf "missing environment variable %s" name) 13 + 14 + let read_required_env () = 15 + match 16 + ( getenv "S3_BUCKET", 17 + getenv "S3_REGION", 18 + getenv "S3_ACCESS_KEY_ID", 19 + getenv "S3_SECRET_ACCESS_KEY", 20 + getenv "S3_ENDPOINT" ) 21 + with 22 + | Ok bucket, Ok region, Ok access_key_id, Ok secret_access_key, Ok endpoint -> 23 + Ok (bucket, region, access_key_id, secret_access_key, endpoint) 24 + | Error err, _, _, _, _ 25 + | _, Error err, _, _, _ 26 + | _, _, Error err, _, _ 27 + | _, _, _, Error err, _ 28 + | _, _, _, _, Error err -> 29 + Error err 30 + 31 + let region_of_string value = 32 + match S3.Region.of_string value with 33 + | Ok region -> Ok region 34 + | Error err -> Error (S3.Error.to_string err) 35 + 36 + let addressing_style_of_env () = 37 + match Sys.getenv_opt "S3_ADDRESSING_STYLE" with 38 + | None | Some "" -> Ok S3.Endpoint.Path 39 + | Some value -> ( 40 + match String.lowercase_ascii (String.trim value) with 41 + | "path" -> Ok S3.Endpoint.Path 42 + | "virtual" | "virtual-hosted" | "virtual_hosted" -> 43 + Ok S3.Endpoint.Virtual_hosted 44 + | _ -> 45 + Error 46 + "S3_ADDRESSING_STYLE must be one of: path, virtual, virtual-hosted") 47 + 48 + let uri_of_endpoint value = 49 + if String.contains value ':' then Uri.of_string value 50 + else Uri.of_string ("https://" ^ value) 51 + 52 + let endpoint_of_env ~region value = 53 + let uri = uri_of_endpoint value in 54 + match Uri.host uri with 55 + | None -> Error "S3_ENDPOINT must include a host" 56 + | Some host -> ( 57 + match addressing_style_of_env () with 58 + | Error _ as err -> err 59 + | Ok addressing_style -> 60 + let scheme = Option.value (Uri.scheme uri) ~default:"https" in 61 + let base_path = 62 + match Uri.path uri with "" | "/" -> None | path -> Some path 63 + in 64 + Ok 65 + (S3.Endpoint.custom ?port:(Uri.port uri) ?base_path ~scheme ~host 66 + ~addressing_style ~signing_region:region ())) 67 + 68 + let client_from_env ~debug () = 69 + match read_required_env () with 70 + | Error msg -> Error msg 71 + | Ok (bucket, region, access_key_id, secret_access_key, endpoint) -> ( 72 + match region_of_string region with 73 + | Error msg -> Error msg 74 + | Ok region -> ( 75 + match endpoint_of_env ~region endpoint with 76 + | Error msg -> Error msg 77 + | Ok endpoint -> 78 + let credentials = 79 + S3.Credentials.Source.static 80 + { 81 + S3.Credentials.access_key_id; 82 + secret_access_key; 83 + session_token = Sys.getenv_opt "S3_SESSION_TOKEN"; 84 + expiration = None; 85 + } 86 + in 87 + let debug_fn = 88 + if debug then Some (fun msg -> prerr_endline ("[debug] " ^ msg)) 89 + else None 90 + in 91 + Ok (bucket, S3.create ~endpoint ~credentials ?debug:debug_fn ()))) 92 + 93 + let with_open_in path fn = 94 + let ic = open_in_bin path in 95 + Fun.protect ~finally:(fun () -> close_in_noerr ic) (fun () -> fn ic) 96 + 97 + let with_open_out path fn = 98 + let oc = open_out_bin path in 99 + Fun.protect ~finally:(fun () -> close_out_noerr oc) (fun () -> fn oc) 100 + 101 + let source_of_in_channel ~length ic = 102 + let buffer = Bytes.create 0x10000 in 103 + { 104 + S3.Stream.length = Some length; 105 + emit = 106 + (fun push -> 107 + let rec loop () = 108 + let n = input ic buffer 0 (Bytes.length buffer) in 109 + if n > 0 then ( 110 + push buffer ~off:0 ~len:n; 111 + loop ()) 112 + in 113 + loop ()); 114 + } 115 + 116 + let content_type_of_path path = 117 + match String.lowercase_ascii (Filename.extension path) with 118 + | ".txt" -> Some "text/plain" 119 + | ".html" -> Some "text/html" 120 + | ".json" -> Some "application/json" 121 + | ".xml" -> Some "application/xml" 122 + | ".csv" -> Some "text/csv" 123 + | ".jpg" | ".jpeg" -> Some "image/jpeg" 124 + | ".png" -> Some "image/png" 125 + | ".gif" -> Some "image/gif" 126 + | ".pdf" -> Some "application/pdf" 127 + | ".tar" -> Some "application/x-tar" 128 + | ".gz" -> Some "application/gzip" 129 + | ".zip" -> Some "application/zip" 130 + | _ -> None 131 + 132 + let getenv_int name ~default = 133 + match Sys.getenv_opt name with 134 + | None | Some "" -> Ok default 135 + | Some value -> ( 136 + match int_of_string_opt (String.trim value) with 137 + | Some n when n > 0 -> Ok n 138 + | _ -> Error (Printf.sprintf "%s must be a positive integer" name)) 139 + 140 + let multipart_settings () = 141 + match 142 + ( getenv_int "S3_MULTIPART_THRESHOLD" ~default:(64 * 1024 * 1024), 143 + getenv_int "S3_MULTIPART_PART_SIZE" ~default:(8 * 1024 * 1024) ) 144 + with 145 + | Ok threshold, Ok part_size -> 146 + if part_size < 5 * 1024 * 1024 then 147 + Error "S3_MULTIPART_PART_SIZE must be at least 5242880" 148 + else Ok (threshold, part_size) 149 + | Error err, _ | _, Error err -> Error err 150 + 151 + let upload_part_from_channel ~part_size ic = 152 + let buffer = Bytes.create part_size in 153 + let rec read_loop off remaining = 154 + if remaining = 0 then off 155 + else 156 + let n = input ic buffer off remaining in 157 + if n = 0 then off else read_loop (off + n) (remaining - n) 158 + in 159 + let len = read_loop 0 part_size in 160 + if len = 0 then None 161 + else 162 + Some 163 + { 164 + S3.Stream.length = Some (Int64.of_int len); 165 + emit = (fun push -> push buffer ~off:0 ~len); 166 + } 167 + 168 + let multipart_put ~env client ~bucket ~src ~dst ~part_size = 169 + let content_type = content_type_of_path src in 170 + match 171 + S3.create_multipart_upload ~env client 172 + { S3.Multipart.bucket; key = dst; content_type; metadata = [] } 173 + with 174 + | Error err -> Error err 175 + | Ok upload -> 176 + with_open_in src (fun ic -> 177 + let rec upload_parts part_number acc = 178 + match upload_part_from_channel ~part_size ic with 179 + | None -> Ok (List.rev acc) 180 + | Some body -> ( 181 + match 182 + S3.upload_part ~env client 183 + { S3.Multipart.upload; part_number; body; checksum = None } 184 + with 185 + | Error err -> Error err 186 + | Ok response -> ( 187 + match response.etag with 188 + | None -> 189 + Error 190 + (S3.Error.Protocol 191 + "multipart upload part missing ETag") 192 + | Some etag -> 193 + upload_parts (part_number + 1) 194 + ({ S3.Multipart.part_number; etag; checksum = None } 195 + :: acc))) 196 + in 197 + match upload_parts 1 [] with 198 + | Error err -> 199 + ignore (S3.abort_multipart_upload ~env client upload); 200 + Error err 201 + | Ok parts -> ( 202 + match 203 + S3.complete_multipart_upload ~env client 204 + { S3.Multipart.upload; parts } 205 + with 206 + | Ok response -> Ok response 207 + | Error err -> 208 + ignore (S3.abort_multipart_upload ~env client upload); 209 + Error err)) 210 + 211 + let single_put ~env client ~bucket ~src ~dst = 212 + let size = Int64.of_int Unix.(stat src).st_size in 213 + with_open_in src (fun ic -> 214 + let body = source_of_in_channel ~length:size ic in 215 + S3.put_object ~env client 216 + { 217 + S3.Put_object.bucket; 218 + key = dst; 219 + body; 220 + content_type = content_type_of_path src; 221 + metadata = []; 222 + checksum = None; 223 + }) 224 + 225 + let put ~env client ~bucket ~src ~dst = 226 + let size = Unix.(stat src).st_size in 227 + match multipart_settings () with 228 + | Error msg -> Error (S3.Error.Invalid_argument msg) 229 + | Ok (threshold, part_size) -> 230 + if size >= threshold then 231 + match multipart_put ~env client ~bucket ~src ~dst ~part_size with 232 + | Ok response -> 233 + Ok 234 + { 235 + S3.Put_object.etag = response.S3.Multipart.etag; 236 + version_id = response.version_id; 237 + } 238 + | Error err -> Error err 239 + else single_put ~env client ~bucket ~src ~dst 240 + 241 + let sink_of_out_channel oc = 242 + { 243 + S3.Stream.consume = (fun bytes ~off ~len -> output oc bytes off len); 244 + close = (fun () -> flush oc); 245 + } 246 + 247 + let get ~env client ~bucket ~src ~dst = 248 + with_open_out dst (fun oc -> 249 + let sink = sink_of_out_channel oc in 250 + S3.get_object ~env client 251 + { 252 + S3.Get_object.bucket; 253 + key = src; 254 + range = None; 255 + if_match = None; 256 + if_none_match = None; 257 + } 258 + ~sink) 259 + 260 + let list ~env client ~bucket ~prefix = 261 + S3.list_objects_v2 ~env client 262 + { 263 + S3.List_objects_v2.bucket; 264 + prefix; 265 + delimiter = None; 266 + continuation_token = None; 267 + start_after = None; 268 + max_keys = None; 269 + } 270 + 271 + let delete ~env client ~bucket ~key = 272 + S3.delete_object ~env client 273 + { S3.Delete_object.bucket; key; version_id = None } 274 + 275 + let run_action env = function 276 + | Put { src; dst; debug } -> ( 277 + match client_from_env ~debug () with 278 + | Error msg -> 279 + prerr_endline msg; 280 + 2 281 + | Ok (bucket, client) -> ( 282 + match put ~env client ~bucket ~src ~dst with 283 + | Ok response -> 284 + Option.iter print_endline response.S3.Put_object.etag; 285 + 0 286 + | Error err -> 287 + prerr_endline (S3.Error.to_string err); 288 + 1)) 289 + | Get { src; dst; debug } -> ( 290 + match client_from_env ~debug () with 291 + | Error msg -> 292 + prerr_endline msg; 293 + 2 294 + | Ok (bucket, client) -> ( 295 + match get ~env client ~bucket ~src ~dst with 296 + | Ok _ -> 297 + print_endline dst; 298 + 0 299 + | Error err -> 300 + prerr_endline (S3.Error.to_string err); 301 + 1)) 302 + | List { prefix; debug } -> ( 303 + match client_from_env ~debug () with 304 + | Error msg -> 305 + prerr_endline msg; 306 + 2 307 + | Ok (bucket, client) -> ( 308 + match list ~env client ~bucket ~prefix with 309 + | Ok response -> 310 + List.iter 311 + (fun obj -> 312 + Printf.printf "%Ld\t%s\n" obj.S3.List_objects_v2.size obj.key) 313 + response.objects; 314 + 0 315 + | Error err -> 316 + prerr_endline (S3.Error.to_string err); 317 + 1)) 318 + | Delete { key; debug } -> ( 319 + match client_from_env ~debug () with 320 + | Error msg -> 321 + prerr_endline msg; 322 + 2 323 + | Ok (bucket, client) -> ( 324 + match delete ~env client ~bucket ~key with 325 + | Ok () -> 326 + print_endline key; 327 + 0 328 + | Error err -> 329 + prerr_endline (S3.Error.to_string err); 330 + 1)) 331 + 332 + let put_parser = 333 + let open Arg_parser in 334 + let+ src = pos_req 0 file ~doc:"Local path to upload" 335 + and+ dst = pos_req 1 string ~doc:"Destination object key" 336 + and+ debug = 337 + flag [ "debug" ] ~doc:"Print request/signing debug information" 338 + in 339 + Put { src; dst; debug } 340 + 341 + let get_parser = 342 + let open Arg_parser in 343 + let+ src = pos_req 0 string ~doc:"Source object key" 344 + and+ dst = pos_req 1 file ~doc:"Destination local path" 345 + and+ debug = 346 + flag [ "debug" ] ~doc:"Print request/signing debug information" 347 + in 348 + Get { src; dst; debug } 349 + 350 + let list_parser = 351 + let open Arg_parser in 352 + let+ prefix = pos_opt 0 string ~doc:"Optional key prefix to list" 353 + and+ debug = 354 + flag [ "debug" ] ~doc:"Print request/signing debug information" 355 + in 356 + List { prefix; debug } 357 + 358 + let delete_parser = 359 + let open Arg_parser in 360 + let+ key = pos_req 0 string ~doc:"Object key to delete" 361 + and+ debug = 362 + flag [ "debug" ] ~doc:"Print request/signing debug information" 363 + in 364 + Delete { key; debug } 365 + 366 + let normalize_args argv = 367 + match argv with 368 + | [] -> [] 369 + | "--list" :: rest -> "list" :: rest 370 + | "--put" :: rest -> "put" :: rest 371 + | "--get" :: rest -> "get" :: rest 372 + | "--delete" :: rest -> "delete" :: rest 373 + | args -> args 374 + 375 + let command = 376 + Command.group ~doc:"S3 smoke-test CLI" 377 + [ 378 + Command.subcommand "put" 379 + (Command.singleton ~doc:"Upload a file to S3" put_parser); 380 + Command.subcommand "get" 381 + (Command.singleton ~doc:"Download a file from S3" get_parser); 382 + Command.subcommand "list" 383 + (Command.singleton ~doc:"List objects in the bucket" list_parser); 384 + Command.subcommand "delete" 385 + (Command.singleton ~doc:"Delete an object from the bucket" delete_parser); 386 + Command.subcommand "help" Command.help; 387 + ] 388 + 389 + let () = 390 + Mirage_crypto_rng_unix.use_default (); 391 + let args = 392 + normalize_args 393 + (Array.to_list (Array.sub Sys.argv 1 (Array.length Sys.argv - 1))) 394 + in 395 + let action = 396 + Command.eval ~program_name:(Program_name.Literal "s3-cli") ~version:"0.1.0" 397 + command args 398 + in 399 + Eio_main.run @@ fun env -> exit (run_action env action)
+48
dune-project
··· 1 + (lang dune 3.20) 2 + 3 + (name s3) 4 + 5 + (version 0.1.0) 6 + 7 + (generate_opam_files true) 8 + 9 + (source 10 + ; (tangled @gdiazlo.tngl.sh/s3) 11 + (uri git+https://tangled.org/gdiazlo.tngl.sh/s3)) 12 + 13 + (authors "Gabriel Díaz") 14 + 15 + (maintainers "Gabriel Díaz") 16 + 17 + (license ISC) 18 + (homepage https://tangled.org/gdiazlo.tngl.sh/s3) 19 + (bug_reports https://tangled.org/gdiazlo.tngl.sh/s3/issues) 20 + (documentation https://tangled.org/gdiazlo.tngl.sh/s3) 21 + 22 + 23 + (package 24 + (name s3) 25 + (synopsis "Small AWS S3 client for OCaml") 26 + (description 27 + "Small and experimental AWS S3 client.") 28 + (depends 29 + (ocaml (>= 5.1)) 30 + dune 31 + eio 32 + eio_main 33 + base64 34 + climate 35 + cstruct 36 + digestif 37 + domain-name 38 + hcs 39 + mirage-crypto-rng 40 + simdjsont 41 + tls 42 + tls-eio 43 + mirage-crypto 44 + unix 45 + xmlm 46 + uri) 47 + (tags 48 + ("aws" "s3" "eio" "streaming" "cloud")))
+17
lib/dune
··· 1 + (library 2 + (name s3) 3 + (public_name s3) 4 + (modules s3) 5 + (libraries 6 + base64 7 + cstruct 8 + digestif 9 + domain-name 10 + eio 11 + hcs 12 + simdjsont 13 + tls 14 + tls-eio 15 + unix 16 + uri 17 + xmlm))
+1539
lib/s3.ml
··· 1 + module Error = struct 2 + type t = 3 + | Invalid_argument of string 4 + | Missing_credentials of string 5 + | Protocol of string 6 + | Http of { 7 + status : int; 8 + code : string option; 9 + message : string option; 10 + request_id : string option; 11 + host_id : string option; 12 + body_preview : string option; 13 + } 14 + 15 + exception E of t 16 + 17 + let fail err = raise (E err) 18 + 19 + let to_string = function 20 + | Invalid_argument msg -> "invalid argument: " ^ msg 21 + | Missing_credentials msg -> "missing credentials: " ^ msg 22 + | Protocol msg -> "protocol error: " ^ msg 23 + | Http { status; code; message; request_id; host_id; body_preview } -> 24 + let details = 25 + [ 26 + Option.map (fun v -> "code=" ^ v) code; 27 + Option.map (fun v -> "message=" ^ v) message; 28 + Option.map (fun v -> "request_id=" ^ v) request_id; 29 + Option.map (fun v -> "host_id=" ^ v) host_id; 30 + Option.map (fun v -> "body=" ^ v) body_preview; 31 + ] 32 + |> List.filter_map Fun.id |> String.concat ", " 33 + in 34 + if details = "" then Printf.sprintf "http error: status=%d" status 35 + else Printf.sprintf "http error: status=%d, %s" status details 36 + end 37 + 38 + module Region = struct 39 + type t = string 40 + 41 + let of_string region = 42 + let region = String.trim region in 43 + if region = "" then 44 + Error (Error.Invalid_argument "region must not be empty") 45 + else Ok region 46 + 47 + let us_east_1 = "us-east-1" 48 + let to_string region = region 49 + end 50 + 51 + module Endpoint = struct 52 + type addressing_style = Virtual_hosted | Path 53 + 54 + type t = { 55 + scheme : string; 56 + host : string; 57 + port : int option; 58 + base_path : string; 59 + addressing_style : addressing_style; 60 + signing_region : Region.t; 61 + signing_service : string; 62 + } 63 + 64 + let normalize_base_path = function 65 + | None | Some "" -> "" 66 + | Some value when String.get value 0 = '/' -> value 67 + | Some value -> "/" ^ value 68 + 69 + let custom ?(scheme = "https") ?port ?base_path 70 + ?(addressing_style = Virtual_hosted) ?(signing_region = Region.us_east_1) 71 + ?(signing_service = "s3") ~host () = 72 + { 73 + scheme; 74 + host; 75 + port; 76 + base_path = normalize_base_path base_path; 77 + addressing_style; 78 + signing_region; 79 + signing_service; 80 + } 81 + 82 + let aws ?(scheme = "https") ?port ?base_path 83 + ?(addressing_style = Virtual_hosted) ?(dual_stack = false) 84 + ?(accelerate = false) ~region () = 85 + let signing_region = region in 86 + let region = Region.to_string region in 87 + let host = 88 + match (accelerate, dual_stack) with 89 + | true, true -> "s3-accelerate.dualstack.amazonaws.com" 90 + | true, false -> "s3-accelerate.amazonaws.com" 91 + | false, true -> Printf.sprintf "s3.dualstack.%s.amazonaws.com" region 92 + | false, false -> Printf.sprintf "s3.%s.amazonaws.com" region 93 + in 94 + { 95 + scheme; 96 + host; 97 + port; 98 + base_path = normalize_base_path base_path; 99 + addressing_style; 100 + signing_region; 101 + signing_service = "s3"; 102 + } 103 + 104 + let authority t = 105 + match t.port with 106 + | None -> t.host 107 + | Some port -> Printf.sprintf "%s:%d" t.host port 108 + 109 + let addressing_style t = t.addressing_style 110 + let signing_region t = t.signing_region 111 + let signing_service t = t.signing_service 112 + 113 + let is_unreserved ~slash = function 114 + | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' | '_' | '.' | '~' -> true 115 + | '/' when slash -> true 116 + | _ -> false 117 + 118 + let pct_encode ?(slash = false) value = 119 + let buffer = Buffer.create (String.length value) in 120 + String.iter 121 + (fun ch -> 122 + if is_unreserved ~slash ch then Buffer.add_char buffer ch 123 + else Buffer.add_string buffer (Printf.sprintf "%%%02X" (Char.code ch))) 124 + value; 125 + Buffer.contents buffer 126 + 127 + let scheme t = t.scheme 128 + 129 + let default_port t = 130 + match t.scheme with "https" -> 443 | "http" -> 80 | _ -> 443 131 + 132 + let authority_without_port t = t.host 133 + 134 + let host_header ?bucket t = 135 + let host = 136 + match (bucket, t.addressing_style) with 137 + | Some bucket, Virtual_hosted -> bucket ^ "." ^ t.host 138 + | _ -> t.host 139 + in 140 + match t.port with 141 + | None -> host 142 + | Some port when port = default_port t -> host 143 + | Some port -> Printf.sprintf "%s:%d" host port 144 + 145 + let connect_host ?bucket t = 146 + match (bucket, t.addressing_style) with 147 + | Some bucket, Virtual_hosted -> bucket ^ "." ^ authority_without_port t 148 + | _ -> authority_without_port t 149 + 150 + let path_prefix t = if t.base_path = "" then "" else t.base_path 151 + 152 + let path_for_bucket t ~bucket = 153 + match t.addressing_style with 154 + | Virtual_hosted -> 155 + let prefix = path_prefix t in 156 + if prefix = "" then "/" else prefix ^ "/" 157 + | Path -> 158 + let prefix = path_prefix t in 159 + if prefix = "" then "/" ^ pct_encode bucket 160 + else prefix ^ "/" ^ pct_encode bucket 161 + 162 + let path_for_object t ~bucket ~key = 163 + let key = pct_encode ~slash:true key in 164 + match t.addressing_style with 165 + | Virtual_hosted -> 166 + let prefix = path_prefix t in 167 + if prefix = "" then "/" ^ key else prefix ^ "/" ^ key 168 + | Path -> 169 + let prefix = path_prefix t in 170 + let bucket = pct_encode bucket in 171 + if prefix = "" then Printf.sprintf "/%s/%s" bucket key 172 + else Printf.sprintf "%s/%s/%s" prefix bucket key 173 + 174 + let uri_for_bucket t ~bucket = 175 + let host = host_header ~bucket t in 176 + let path = path_for_bucket t ~bucket in 177 + Printf.sprintf "%s://%s%s" t.scheme host path 178 + 179 + let uri_for_object t ~bucket ~key = 180 + let host = host_header ~bucket t in 181 + let path = path_for_object t ~bucket ~key in 182 + Printf.sprintf "%s://%s%s" t.scheme host path 183 + end 184 + 185 + module Stream = struct 186 + type source = { 187 + length : int64 option; 188 + emit : (bytes -> off:int -> len:int -> unit) -> unit; 189 + } 190 + 191 + type sink = { 192 + consume : bytes -> off:int -> len:int -> unit; 193 + close : unit -> unit; 194 + } 195 + 196 + let empty = { length = Some 0L; emit = (fun _ -> ()) } 197 + 198 + let of_string value = 199 + let data = Bytes.of_string value in 200 + { 201 + length = Some (Int64.of_int (Bytes.length data)); 202 + emit = (fun push -> push data ~off:0 ~len:(Bytes.length data)); 203 + } 204 + 205 + let of_flow ?length flow = 206 + let buffer = Bytes.create 0x10000 in 207 + { 208 + length; 209 + emit = 210 + (fun push -> 211 + let rec loop () = 212 + let chunk = Cstruct.of_bytes buffer in 213 + try 214 + let n = Eio.Flow.single_read flow chunk in 215 + if n > 0 then begin 216 + push buffer ~off:0 ~len:n; 217 + loop () 218 + end 219 + with End_of_file -> () 220 + in 221 + loop ()); 222 + } 223 + 224 + let to_buffer buffer = 225 + { 226 + consume = 227 + (fun bytes ~off ~len -> Buffer.add_subbytes buffer bytes off len); 228 + close = (fun () -> ()); 229 + } 230 + 231 + let to_flow flow = 232 + { 233 + consume = 234 + (fun bytes ~off ~len -> 235 + Eio.Flow.write flow [ Cstruct.of_bytes ~off ~len bytes ]); 236 + close = (fun () -> ()); 237 + } 238 + 239 + let to_string source = 240 + let buffer = Buffer.create 1024 in 241 + source.emit (fun bytes ~off ~len -> 242 + Buffer.add_subbytes buffer bytes off len); 243 + Buffer.contents buffer 244 + end 245 + 246 + module Credentials = struct 247 + type t = { 248 + access_key_id : string; 249 + secret_access_key : string; 250 + session_token : string option; 251 + expiration : float option; 252 + } 253 + 254 + module Source = struct 255 + type credentials = t 256 + 257 + type error = 258 + [ `Missing of string | `Invalid of string | `Unavailable of string ] 259 + 260 + type provider = 261 + | Static of credentials 262 + | Env 263 + | Imds of Uri.t option 264 + | Chain of t list 265 + 266 + and t = provider 267 + 268 + let static credentials = Static credentials 269 + let env () = Env 270 + let imds ?endpoint () = Imds endpoint 271 + let chain providers = Chain providers 272 + 273 + let load_env () = 274 + let get name = 275 + match Sys.getenv_opt name with 276 + | Some value when String.trim value <> "" -> Ok value 277 + | _ -> Error (`Missing name) 278 + in 279 + match (get "AWS_ACCESS_KEY_ID", get "AWS_SECRET_ACCESS_KEY") with 280 + | Ok access_key_id, Ok secret_access_key -> 281 + Ok 282 + { 283 + access_key_id; 284 + secret_access_key; 285 + session_token = Sys.getenv_opt "AWS_SESSION_TOKEN"; 286 + expiration = None; 287 + } 288 + | Error err, _ -> Error err 289 + | _, Error err -> Error err 290 + 291 + let rec load = function 292 + | Static credentials -> Ok credentials 293 + | Env -> load_env () 294 + | Imds _ -> Error (`Unavailable "IMDS provider requires a live client") 295 + | Chain providers -> 296 + let rec go last_error = function 297 + | [] -> 298 + Error 299 + (Option.value last_error 300 + ~default:(`Unavailable "no credential sources configured")) 301 + | provider :: rest -> ( 302 + match load provider with 303 + | Ok credentials -> Ok credentials 304 + | Error err -> go (Some err) rest) 305 + in 306 + go None providers 307 + end 308 + end 309 + 310 + type retry_policy = { 311 + max_attempts : int; 312 + base_delay_s : float; 313 + max_delay_s : float; 314 + } 315 + 316 + type checksum = [ `Md5 of string | `Sha256 of string | `Crc32c of string ] 317 + 318 + module List_objects_v2 = struct 319 + type request = { 320 + bucket : string; 321 + prefix : string option; 322 + delimiter : string option; 323 + continuation_token : string option; 324 + start_after : string option; 325 + max_keys : int option; 326 + } 327 + 328 + type object_entry = { 329 + key : string; 330 + etag : string option; 331 + size : int64; 332 + last_modified : string option; 333 + storage_class : string option; 334 + } 335 + 336 + type response = { 337 + objects : object_entry list; 338 + common_prefixes : string list; 339 + is_truncated : bool; 340 + next_continuation_token : string option; 341 + } 342 + end 343 + 344 + module Head_object = struct 345 + type request = { bucket : string; key : string } 346 + 347 + type response = { 348 + content_length : int64; 349 + etag : string option; 350 + last_modified : string option; 351 + version_id : string option; 352 + metadata : (string * string) list; 353 + } 354 + end 355 + 356 + module Get_object = struct 357 + type request = { 358 + bucket : string; 359 + key : string; 360 + range : string option; 361 + if_match : string option; 362 + if_none_match : string option; 363 + } 364 + 365 + type response = { 366 + content_length : int64 option; 367 + etag : string option; 368 + version_id : string option; 369 + } 370 + end 371 + 372 + module Put_object = struct 373 + type request = { 374 + bucket : string; 375 + key : string; 376 + body : Stream.source; 377 + content_type : string option; 378 + metadata : (string * string) list; 379 + checksum : checksum option; 380 + } 381 + 382 + type response = { etag : string option; version_id : string option } 383 + end 384 + 385 + module Copy_object = struct 386 + type request = { 387 + src_bucket : string; 388 + src_key : string; 389 + dst_bucket : string; 390 + dst_key : string; 391 + metadata : (string * string) list; 392 + } 393 + 394 + type response = { etag : string option; last_modified : string option } 395 + end 396 + 397 + module Delete_object = struct 398 + type request = { bucket : string; key : string; version_id : string option } 399 + end 400 + 401 + module Delete_objects = struct 402 + type object_id = { key : string; version_id : string option } 403 + type request = { bucket : string; quiet : bool; objects : object_id list } 404 + type deleted = { key : string; version_id : string option } 405 + 406 + type error = { 407 + key : string; 408 + version_id : string option; 409 + code : string option; 410 + message : string option; 411 + } 412 + 413 + type response = { deleted : deleted list; errors : error list } 414 + end 415 + 416 + module Multipart = struct 417 + type upload = { bucket : string; key : string; upload_id : string } 418 + 419 + type completed_part = { 420 + part_number : int; 421 + etag : string; 422 + checksum : checksum option; 423 + } 424 + 425 + type create_request = { 426 + bucket : string; 427 + key : string; 428 + content_type : string option; 429 + metadata : (string * string) list; 430 + } 431 + 432 + type create_response = upload 433 + 434 + type upload_part_request = { 435 + upload : upload; 436 + part_number : int; 437 + body : Stream.source; 438 + checksum : checksum option; 439 + } 440 + 441 + type upload_part_response = { 442 + etag : string option; 443 + checksum : checksum option; 444 + } 445 + 446 + type complete_request = { upload : upload; parts : completed_part list } 447 + type complete_response = { etag : string option; version_id : string option } 448 + type abort_request = upload 449 + end 450 + 451 + type credentials_cache = Credentials.t option 452 + 453 + type t = { 454 + endpoint : Endpoint.t; 455 + credentials : Credentials.Source.t; 456 + debug : (string -> unit) option; 457 + user_agent : string; 458 + retry_policy : retry_policy; 459 + mutable cached_credentials : credentials_cache; 460 + } 461 + 462 + let default_retry_policy = 463 + { max_attempts = 3; base_delay_s = 0.05; max_delay_s = 2.0 } 464 + 465 + let create ~endpoint ~credentials ?debug ?(part_size = 8 * 1024 * 1024) 466 + ?(user_agent = "ocaml-s3/0.1.0") ?(retry_policy = default_retry_policy) () = 467 + if part_size < 5 * 1024 * 1024 then 468 + Error.fail (Error.Invalid_argument "part_size must be at least 5 MiB"); 469 + { 470 + endpoint; 471 + credentials; 472 + debug; 473 + user_agent; 474 + retry_policy; 475 + cached_credentials = None; 476 + } 477 + 478 + let starts_with ~prefix value = 479 + let prefix_len = String.length prefix in 480 + String.length value >= prefix_len && String.sub value 0 prefix_len = prefix 481 + 482 + let trim = String.trim 483 + let lowercase = String.lowercase_ascii 484 + let option_map_default default f = function Some x -> f x | None -> default 485 + let first_some a b = match a with Some _ -> a | None -> b 486 + 487 + let non_empty_string value = 488 + let value = String.trim value in 489 + if value = "" then None else Some value 490 + 491 + let option_value_map option ~default ~f = 492 + match option with Some x -> f x | None -> default 493 + 494 + let sanitize_body_preview body = 495 + let body = String.trim body in 496 + if body = "" then None 497 + else 498 + let max_len = min 200 (String.length body) in 499 + let buffer = Buffer.create max_len in 500 + for i = 0 to max_len - 1 do 501 + let ch = body.[i] in 502 + let code = Char.code ch in 503 + Buffer.add_char buffer 504 + (if (code >= 32 && code < 127) || ch = '\n' || ch = '\r' || ch = '\t' 505 + then ch 506 + else '.') 507 + done; 508 + let preview = Buffer.contents buffer |> String.trim in 509 + if preview = "" then None 510 + else if String.length body > max_len then Some (preview ^ "...") 511 + else Some preview 512 + 513 + let debug_log client msg = Option.iter (fun f -> f msg) client.debug 514 + 515 + let format_headers headers = 516 + headers |> List.map (fun (k, v) -> k ^ ": " ^ v) |> String.concat "\n" 517 + 518 + let header_get headers name = 519 + let name = lowercase name in 520 + headers 521 + |> List.find_map (fun (k, v) -> if lowercase k = name then Some v else None) 522 + 523 + let metadata_headers headers = 524 + headers 525 + |> List.filter_map (fun (k, v) -> 526 + let key = lowercase k in 527 + if starts_with ~prefix:"x-amz-meta-" key then 528 + Some (String.sub key 11 (String.length key - 11), v) 529 + else None) 530 + 531 + let int64_of_header headers name = 532 + match header_get headers name with 533 + | Some v -> Int64.of_string_opt v 534 + | None -> None 535 + 536 + let aws_pct_encode ?(slash = false) = Endpoint.pct_encode ~slash 537 + let empty_sha256 = Digestif.SHA256.digest_string "" |> Digestif.SHA256.to_hex 538 + 539 + let checksum_header = function 540 + | `Md5 value -> [ ("content-md5", value) ] 541 + | `Sha256 value -> [ ("x-amz-checksum-sha256", value) ] 542 + | `Crc32c value -> [ ("x-amz-checksum-crc32c", value) ] 543 + 544 + let rfc3339_expiration _value = None 545 + 546 + type xml = 547 + | Element of string * (string * string) list * xml list 548 + | Data of string 549 + 550 + let xml_of_string body = 551 + let input = Xmlm.make_input ~strip:true (`String (0, body)) in 552 + let rec node () = 553 + match Xmlm.input input with 554 + | `El_start ((_, name), attrs) -> 555 + let attrs = List.map (fun ((_, k), v) -> (k, v)) attrs in 556 + let rec children acc = 557 + match Xmlm.peek input with 558 + | `El_end -> 559 + ignore (Xmlm.input input); 560 + List.rev acc 561 + | _ -> children (node () :: acc) 562 + in 563 + Element (name, attrs, children []) 564 + | `Data value -> Data value 565 + | `Dtd _ -> node () 566 + | `El_end -> Error.fail (Error.Protocol "unexpected XML end tag") 567 + in 568 + match node () with 569 + | Element _ as root -> root 570 + | Data _ -> Error.fail (Error.Protocol "expected XML element") 571 + 572 + let xml_children = function 573 + | Element (_, _, children) -> children 574 + | Data _ -> [] 575 + 576 + let xml_name = function Element (name, _, _) -> Some name | Data _ -> None 577 + 578 + let xml_find_child name node = 579 + xml_children node |> List.find_opt (fun child -> xml_name child = Some name) 580 + 581 + let xml_find_children name node = 582 + xml_children node |> List.filter (fun child -> xml_name child = Some name) 583 + 584 + let rec xml_text = function 585 + | Data value -> value 586 + | Element (_, _, children) -> 587 + children |> List.map xml_text |> String.concat "" 588 + 589 + let xml_text_opt name node = xml_find_child name node |> Option.map xml_text 590 + 591 + let bool_of_string_default value = 592 + match lowercase (trim value) with "true" -> true | _ -> false 593 + 594 + let base64_md5 value = 595 + Digestif.MD5.digest_string value 596 + |> Digestif.MD5.to_raw_string |> Base64.encode_string 597 + 598 + let format_amz_date seconds = 599 + let tm = Unix.gmtime seconds in 600 + Printf.sprintf "%04d%02d%02dT%02d%02d%02dZ" (tm.tm_year + 1900) 601 + (tm.tm_mon + 1) tm.tm_mday tm.tm_hour tm.tm_min tm.tm_sec 602 + 603 + let format_date_stamp seconds = 604 + let tm = Unix.gmtime seconds in 605 + Printf.sprintf "%04d%02d%02d" (tm.tm_year + 1900) (tm.tm_mon + 1) tm.tm_mday 606 + 607 + let hmac_sha256_raw ~key data = 608 + Digestif.SHA256.hmac_string ~key data |> Digestif.SHA256.to_raw_string 609 + 610 + let hmac_sha256_hex ~key data = 611 + Digestif.SHA256.hmac_string ~key data |> Digestif.SHA256.to_hex 612 + 613 + let sha256_hex data = 614 + Digestif.SHA256.digest_string data |> Digestif.SHA256.to_hex 615 + 616 + let canonical_query params = 617 + params 618 + |> List.map (fun (k, v) -> (aws_pct_encode k, aws_pct_encode v)) 619 + |> List.sort compare 620 + |> List.map (fun (k, v) -> k ^ "=" ^ v) 621 + |> String.concat "&" 622 + 623 + let normalize_header_value value = 624 + value |> String.split_on_char ' ' 625 + |> List.filter (fun s -> s <> "") 626 + |> String.concat " " 627 + 628 + let is_signable_header name = 629 + let name = lowercase (trim name) in 630 + name = "host" || name = "content-md5" || starts_with ~prefix:"x-amz-" name 631 + 632 + let canonical_headers headers = 633 + headers 634 + |> List.map (fun (k, v) -> 635 + (lowercase (trim k), normalize_header_value (trim v))) 636 + |> List.filter (fun (k, _) -> is_signable_header k) 637 + |> List.sort compare 638 + 639 + let signed_headers headers = 640 + canonical_headers headers |> List.map fst |> String.concat ";" 641 + 642 + let canonical_headers_string headers = 643 + canonical_headers headers 644 + |> List.map (fun (k, v) -> k ^ ":" ^ v ^ "\n") 645 + |> String.concat "" 646 + 647 + let authorization_header ~credentials ~endpoint ~seconds ~meth ~path ~query 648 + ~headers ~payload_hash = 649 + let amz_date = format_amz_date seconds in 650 + let date_stamp = format_date_stamp seconds in 651 + let credential_scope = 652 + String.concat "/" 653 + [ 654 + date_stamp; 655 + Region.to_string (Endpoint.signing_region endpoint); 656 + Endpoint.signing_service endpoint; 657 + "aws4_request"; 658 + ] 659 + in 660 + let canonical_request = 661 + String.concat "\n" 662 + [ 663 + meth; 664 + path; 665 + canonical_query query; 666 + canonical_headers_string headers; 667 + signed_headers headers; 668 + payload_hash; 669 + ] 670 + in 671 + let string_to_sign = 672 + String.concat "\n" 673 + [ 674 + "AWS4-HMAC-SHA256"; 675 + amz_date; 676 + credential_scope; 677 + sha256_hex canonical_request; 678 + ] 679 + in 680 + let k_date = 681 + hmac_sha256_raw 682 + ~key:("AWS4" ^ credentials.Credentials.secret_access_key) 683 + date_stamp 684 + in 685 + let k_region = 686 + hmac_sha256_raw ~key:k_date 687 + (Region.to_string (Endpoint.signing_region endpoint)) 688 + in 689 + let k_service = 690 + hmac_sha256_raw ~key:k_region (Endpoint.signing_service endpoint) 691 + in 692 + let k_signing = hmac_sha256_raw ~key:k_service "aws4_request" in 693 + let signature = hmac_sha256_hex ~key:k_signing string_to_sign in 694 + let authorization = 695 + Printf.sprintf 696 + "AWS4-HMAC-SHA256 Credential=%s/%s, SignedHeaders=%s, Signature=%s" 697 + credentials.access_key_id credential_scope (signed_headers headers) 698 + signature 699 + in 700 + (amz_date, authorization, canonical_request, string_to_sign, credential_scope) 701 + 702 + type http_response = { 703 + status : int; 704 + headers : (string * string) list; 705 + body : string; 706 + } 707 + 708 + let parse_status_line line = 709 + match String.split_on_char ' ' line with 710 + | _http :: code :: _ -> int_of_string code 711 + | _ -> Error.fail (Error.Protocol ("invalid status line: " ^ line)) 712 + 713 + let read_headers reader = 714 + let status_line = Eio.Buf_read.line reader in 715 + let rec loop acc = 716 + let line = Eio.Buf_read.line reader in 717 + if line = "" then List.rev acc 718 + else 719 + match String.index_opt line ':' with 720 + | None -> Error.fail (Error.Protocol ("invalid header line: " ^ line)) 721 + | Some idx -> 722 + let name = String.sub line 0 idx in 723 + let value = 724 + String.sub line (idx + 1) (String.length line - idx - 1) |> trim 725 + in 726 + loop ((name, value) :: acc) 727 + in 728 + (parse_status_line status_line, loop []) 729 + 730 + let consume_string sink data = 731 + let bytes = Bytes.of_string data in 732 + sink.Stream.consume bytes ~off:0 ~len:(Bytes.length bytes) 733 + 734 + let copy_from_reader_to_sink reader remaining sink = 735 + let rec loop remaining = 736 + if remaining > 0L then begin 737 + let to_read = Int64.to_int (Int64.min remaining 0x10000L) in 738 + let chunk = Eio.Buf_read.take to_read reader in 739 + consume_string sink chunk; 740 + loop Int64.(sub remaining (of_int to_read)) 741 + end 742 + in 743 + loop remaining 744 + 745 + let copy_chunked_body reader sink = 746 + let rec read_chunk () = 747 + let line = Eio.Buf_read.line reader in 748 + let size_hex = 749 + match String.index_opt line ';' with 750 + | Some idx -> String.sub line 0 idx 751 + | None -> line 752 + in 753 + let size = int_of_string ("0x" ^ trim size_hex) in 754 + if size = 0 then begin 755 + let rec trailers () = 756 + let line = Eio.Buf_read.line reader in 757 + if line <> "" then trailers () 758 + in 759 + trailers () 760 + end 761 + else begin 762 + let remaining = ref size in 763 + while !remaining > 0 do 764 + let n = min !remaining 0x10000 in 765 + let chunk = Eio.Buf_read.take n reader in 766 + consume_string sink chunk; 767 + remaining := !remaining - n 768 + done; 769 + ignore (Eio.Buf_read.line reader); 770 + read_chunk () 771 + end 772 + in 773 + read_chunk () 774 + 775 + let buffer_sink () = 776 + let buffer = Buffer.create 1024 in 777 + let sink = Stream.to_buffer buffer in 778 + (buffer, sink) 779 + 780 + let wrap_tls host flow = 781 + match 782 + Hcs.Tls_config.Client.to_tls_config Hcs.Tls_config.Client.default ~host 783 + with 784 + | Error msg -> Error.fail (Error.Protocol msg) 785 + | Ok tls_config -> 786 + let host_domain = 787 + match Domain_name.of_string host with 788 + | Ok dn -> ( 789 + match Domain_name.host dn with Ok h -> Some h | Error _ -> None) 790 + | Error _ -> None 791 + in 792 + (Tls_eio.client_of_flow tls_config ?host:host_domain flow 793 + :> Eio.Flow.two_way_ty Eio.Std.r) 794 + 795 + type request_body = Empty | String of string | Stream of Stream.source 796 + 797 + type request = { 798 + meth : string; 799 + bucket : string option; 800 + path : string; 801 + query : (string * string) list; 802 + headers : (string * string) list; 803 + body : request_body; 804 + payload_hash : string; 805 + } 806 + 807 + let signed_request ~env client request = 808 + let now = Eio.Time.now (Eio.Stdenv.clock env) in 809 + let credentials = 810 + let rec load_with_client source = 811 + match source with 812 + | Credentials.Source.Static credentials -> Ok credentials 813 + | Env -> Credentials.Source.load source 814 + | Imds endpoint -> ( 815 + let endpoint = 816 + Option.value endpoint 817 + ~default:(Uri.of_string "http://169.254.169.254") 818 + in 819 + let endpoint_host = 820 + Uri.host endpoint |> Option.value ~default:"169.254.169.254" 821 + in 822 + let endpoint_port = Uri.port endpoint |> Option.value ~default:80 in 823 + let make_request ~meth ~path ~headers = 824 + let req = 825 + { 826 + meth; 827 + bucket = None; 828 + path; 829 + query = []; 830 + headers; 831 + body = Empty; 832 + payload_hash = empty_sha256; 833 + } 834 + in 835 + let response = 836 + let endpoint = 837 + Endpoint.custom ~scheme:"http" ~host:endpoint_host 838 + ~port:endpoint_port ~addressing_style:Endpoint.Path 839 + ~signing_region:Region.us_east_1 ~signing_service:"s3" () 840 + in 841 + let host = Endpoint.connect_host endpoint in 842 + let host_header = Endpoint.host_header endpoint in 843 + Eio.Net.with_tcp_connect ~host 844 + ~service:(string_of_int endpoint_port) (Eio.Stdenv.net env) 845 + (fun flow -> 846 + let flow = (flow :> Eio.Flow.two_way_ty Eio.Std.r) in 847 + let headers = 848 + [ ("Host", host_header); ("Connection", "close") ] @ headers 849 + in 850 + let query = "" in 851 + let request_text = 852 + Printf.sprintf "%s %s%s HTTP/1.1\r\n%s\r\n" req.meth 853 + req.path query 854 + (headers 855 + |> List.map (fun (k, v) -> k ^ ": " ^ v ^ "\r\n") 856 + |> String.concat "") 857 + in 858 + Eio.Flow.copy_string request_text flow; 859 + let reader = Eio.Buf_read.of_flow flow ~max_size:max_int in 860 + let status, response_headers = read_headers reader in 861 + let buffer, sink = buffer_sink () in 862 + let transfer_encoding = 863 + header_get response_headers "transfer-encoding" 864 + in 865 + (match transfer_encoding with 866 + | Some v when lowercase v = "chunked" -> 867 + copy_chunked_body reader sink 868 + | _ -> 869 + Option.iter 870 + (fun len -> copy_from_reader_to_sink reader len sink) 871 + (int64_of_header response_headers "content-length")); 872 + sink.close (); 873 + { 874 + status; 875 + headers = response_headers; 876 + body = Buffer.contents buffer; 877 + }) 878 + in 879 + if response.status >= 200 && response.status < 300 then 880 + Ok response.body 881 + else 882 + Error 883 + (`Unavailable ("IMDS HTTP " ^ string_of_int response.status)) 884 + in 885 + match 886 + make_request ~meth:"PUT" ~path:"/latest/api/token" 887 + ~headers:[ ("X-aws-ec2-metadata-token-ttl-seconds", "21600") ] 888 + with 889 + | Error err -> Error err 890 + | Ok token -> ( 891 + match 892 + make_request ~meth:"GET" 893 + ~path:"/latest/meta-data/iam/security-credentials/" 894 + ~headers:[ ("X-aws-ec2-metadata-token", token) ] 895 + with 896 + | Error err -> Error err 897 + | Ok roles -> ( 898 + let role = 899 + roles |> String.split_on_char '\n' |> List.map trim 900 + |> List.find_opt (fun v -> v <> "") 901 + in 902 + match role with 903 + | None -> Error (`Unavailable "IMDS returned no IAM role") 904 + | Some role -> ( 905 + match 906 + make_request ~meth:"GET" 907 + ~path: 908 + ("/latest/meta-data/iam/security-credentials/" 909 + ^ role) 910 + ~headers:[ ("X-aws-ec2-metadata-token", token) ] 911 + with 912 + | Error err -> Error err 913 + | Ok json -> ( 914 + let get field = 915 + Simdjsont.Extract.string json ~pointer:("/" ^ field) 916 + in 917 + match (get "AccessKeyId", get "SecretAccessKey") with 918 + | Ok access_key_id, Ok secret_access_key -> 919 + let session_token = 920 + match get "Token" with 921 + | Ok value -> Some value 922 + | Error _ -> None 923 + in 924 + let expiration = 925 + match get "Expiration" with 926 + | Ok value -> rfc3339_expiration value 927 + | Error _ -> None 928 + in 929 + Ok 930 + { 931 + Credentials.access_key_id; 932 + secret_access_key; 933 + session_token; 934 + expiration; 935 + } 936 + | Error msg, _ | _, Error msg -> Error (`Invalid msg)) 937 + )))) 938 + | Chain providers -> 939 + let rec go last_error = function 940 + | [] -> 941 + Error 942 + (Option.value last_error 943 + ~default:(`Unavailable "no credential sources configured")) 944 + | provider :: rest -> ( 945 + match load_with_client provider with 946 + | Ok credentials -> Ok credentials 947 + | Error err -> go (Some err) rest) 948 + in 949 + go None providers 950 + in 951 + match client.cached_credentials with 952 + | Some credentials -> Ok credentials 953 + | None -> ( 954 + match load_with_client client.credentials with 955 + | Ok credentials -> 956 + client.cached_credentials <- Some credentials; 957 + Ok credentials 958 + | Error (`Missing msg) -> Error (Error.Missing_credentials msg) 959 + | Error (`Invalid msg) -> Error (Error.Protocol msg) 960 + | Error (`Unavailable msg) -> Error (Error.Missing_credentials msg)) 961 + in 962 + match credentials with 963 + | Error err -> Error err 964 + | Ok credentials -> 965 + let host = 966 + match request.bucket with 967 + | Some bucket -> Endpoint.host_header ~bucket client.endpoint 968 + | None -> Endpoint.host_header client.endpoint 969 + in 970 + let base_headers = 971 + [ 972 + ("host", host); 973 + ("x-amz-content-sha256", request.payload_hash); 974 + ("x-amz-date", format_amz_date now); 975 + ("user-agent", client.user_agent); 976 + ("connection", "close"); 977 + ] 978 + in 979 + let base_headers = 980 + match credentials.session_token with 981 + | None -> base_headers 982 + | Some token -> ("x-amz-security-token", token) :: base_headers 983 + in 984 + let headers = request.headers @ base_headers in 985 + let ( _amz_date, 986 + authorization, 987 + canonical_request, 988 + string_to_sign, 989 + credential_scope ) = 990 + authorization_header ~credentials ~endpoint:client.endpoint ~seconds:now 991 + ~meth:request.meth ~path:request.path ~query:request.query ~headers 992 + ~payload_hash:request.payload_hash 993 + in 994 + debug_log client 995 + (Printf.sprintf 996 + "S3 request\n\ 997 + method: %s\n\ 998 + host: %s\n\ 999 + path: %s\n\ 1000 + query: %s\n\ 1001 + addressing_style: %s\n\ 1002 + signing_region: %s\n\ 1003 + signing_service: %s\n\ 1004 + credential_scope: %s\n\ 1005 + headers:\n\ 1006 + %s\n\n\ 1007 + canonical_request:\n\ 1008 + %s\n\n\ 1009 + string_to_sign:\n\ 1010 + %s\n" 1011 + request.meth host request.path 1012 + (canonical_query request.query) 1013 + (match Endpoint.addressing_style client.endpoint with 1014 + | Endpoint.Virtual_hosted -> "virtual-hosted" 1015 + | Endpoint.Path -> "path") 1016 + (Region.to_string (Endpoint.signing_region client.endpoint)) 1017 + (Endpoint.signing_service client.endpoint) 1018 + credential_scope (format_headers headers) canonical_request 1019 + string_to_sign); 1020 + Ok (("Authorization", authorization) :: headers) 1021 + 1022 + let query_string params = 1023 + match canonical_query params with "" -> "" | q -> "?" ^ q 1024 + 1025 + let write_body flow = function 1026 + | Empty -> () 1027 + | String body -> Eio.Flow.copy_string body flow 1028 + | Stream source -> 1029 + source.emit (fun bytes ~off ~len -> 1030 + Eio.Flow.write flow [ Cstruct.of_bytes ~off ~len bytes ]) 1031 + 1032 + let write_chunked_body flow source = 1033 + source.Stream.emit (fun bytes ~off ~len -> 1034 + Eio.Flow.copy_string (Printf.sprintf "%x\r\n" len) flow; 1035 + Eio.Flow.write flow [ Cstruct.of_bytes ~off ~len bytes ]; 1036 + Eio.Flow.copy_string "\r\n" flow); 1037 + Eio.Flow.copy_string "0\r\n\r\n" flow 1038 + 1039 + let maybe_sleep ~env client attempt = 1040 + if attempt > 1 then 1041 + let mult = Float.pow 2.0 (float_of_int (attempt - 2)) in 1042 + let delay = 1043 + min client.retry_policy.max_delay_s 1044 + (client.retry_policy.base_delay_s *. mult) 1045 + in 1046 + Eio.Time.sleep (Eio.Stdenv.clock env) delay 1047 + 1048 + let perform ?sink ~env client request = 1049 + let rec loop attempt = 1050 + maybe_sleep ~env client attempt; 1051 + match signed_request ~env client request with 1052 + | Error err -> Error err 1053 + | Ok signed_headers -> ( 1054 + let host = 1055 + match request.bucket with 1056 + | Some bucket -> Endpoint.connect_host ~bucket client.endpoint 1057 + | None -> Endpoint.connect_host client.endpoint 1058 + in 1059 + let port = 1060 + Option.value client.endpoint.Endpoint.port 1061 + ~default:(Endpoint.default_port client.endpoint) 1062 + in 1063 + let service = string_of_int port in 1064 + try 1065 + let response = 1066 + Eio.Net.with_tcp_connect ~host ~service (Eio.Stdenv.net env) 1067 + (fun tcp_flow -> 1068 + let flow : Eio.Flow.two_way_ty Eio.Std.r = 1069 + match Endpoint.scheme client.endpoint with 1070 + | "https" -> wrap_tls host (Obj.magic tcp_flow) 1071 + | _ -> (tcp_flow :> Eio.Flow.two_way_ty Eio.Std.r) 1072 + in 1073 + let body_headers, body_writer = 1074 + match request.body with 1075 + | Empty -> ([ ("Content-Length", "0") ], fun () -> ()) 1076 + | String body -> 1077 + ( [ 1078 + ("Content-Length", string_of_int (String.length body)); 1079 + ], 1080 + fun () -> write_body flow request.body ) 1081 + | Stream source -> ( 1082 + match source.length with 1083 + | Some len -> 1084 + ( [ ("Content-Length", Int64.to_string len) ], 1085 + fun () -> write_body flow request.body ) 1086 + | None -> 1087 + ( [ ("Transfer-Encoding", "chunked") ], 1088 + fun () -> write_chunked_body flow source )) 1089 + in 1090 + let all_headers = signed_headers @ body_headers in 1091 + let request_text = 1092 + Printf.sprintf "%s %s%s HTTP/1.1\r\n%s\r\n" request.meth 1093 + request.path 1094 + (query_string request.query) 1095 + (all_headers 1096 + |> List.map (fun (k, v) -> k ^ ": " ^ v ^ "\r\n") 1097 + |> String.concat "") 1098 + in 1099 + debug_log client 1100 + (Printf.sprintf "wire_request:\n%s" request_text); 1101 + Eio.Flow.copy_string request_text flow; 1102 + body_writer (); 1103 + let reader = Eio.Buf_read.of_flow flow ~max_size:max_int in 1104 + let status, headers = read_headers reader in 1105 + debug_log client 1106 + (Printf.sprintf "wire_response:\nstatus: %d\nheaders:\n%s" 1107 + status (format_headers headers)); 1108 + let body_sink = 1109 + Option.value sink ~default:(snd (buffer_sink ())) 1110 + in 1111 + (if request.meth <> "HEAD" then 1112 + match header_get headers "transfer-encoding" with 1113 + | Some value when lowercase value = "chunked" -> 1114 + copy_chunked_body reader body_sink 1115 + | _ -> ( 1116 + match int64_of_header headers "content-length" with 1117 + | Some len -> 1118 + copy_from_reader_to_sink reader len body_sink 1119 + | None -> 1120 + let source = Eio.Buf_read.as_flow reader in 1121 + let buffer = Bytes.create 0x10000 in 1122 + let rec drain () = 1123 + let chunk = Cstruct.of_bytes buffer in 1124 + try 1125 + let n = Eio.Flow.single_read source chunk in 1126 + if n > 0 then ( 1127 + body_sink.consume buffer ~off:0 ~len:n; 1128 + drain ()) 1129 + with End_of_file -> () 1130 + in 1131 + drain ())); 1132 + body_sink.close (); 1133 + { status; headers; body = "" }) 1134 + in 1135 + Ok response 1136 + with 1137 + | _ when attempt < client.retry_policy.max_attempts -> loop (attempt + 1) 1138 + | exn -> Error (Error.Protocol (Printexc.to_string exn))) 1139 + in 1140 + loop 1 1141 + 1142 + let buffered_request ~env client request = 1143 + let buffer, sink = buffer_sink () in 1144 + match perform ~env ~sink client request with 1145 + | Ok response -> Ok { response with body = Buffer.contents buffer } 1146 + | Error err -> Error err 1147 + 1148 + let xml_error (response : http_response) = 1149 + try 1150 + let root = xml_of_string response.body in 1151 + let code = xml_text_opt "Code" root in 1152 + let message = 1153 + match xml_text_opt "Message" root with 1154 + | Some value -> non_empty_string value 1155 + | None -> None 1156 + in 1157 + let request_id = 1158 + first_some 1159 + (xml_text_opt "RequestId" root) 1160 + (header_get response.headers "x-amz-request-id") 1161 + in 1162 + let host_id = 1163 + first_some 1164 + (xml_text_opt "HostId" root) 1165 + (header_get response.headers "x-amz-id-2") 1166 + in 1167 + let body_preview = 1168 + if code = None && message = None then sanitize_body_preview response.body 1169 + else None 1170 + in 1171 + Error.Http 1172 + { 1173 + status = response.status; 1174 + code; 1175 + message; 1176 + request_id; 1177 + host_id; 1178 + body_preview; 1179 + } 1180 + with _ -> 1181 + Error.Http 1182 + { 1183 + status = response.status; 1184 + code = None; 1185 + message = None; 1186 + request_id = header_get response.headers "x-amz-request-id"; 1187 + host_id = header_get response.headers "x-amz-id-2"; 1188 + body_preview = sanitize_body_preview response.body; 1189 + } 1190 + 1191 + let expect_success (response : http_response) = 1192 + if response.status >= 200 && response.status < 300 then Ok response 1193 + else Error (xml_error response) 1194 + 1195 + let request ?sink ~env client request = 1196 + match sink with 1197 + | Some sink -> Result.bind (perform ~env ~sink client request) expect_success 1198 + | None -> Result.bind (buffered_request ~env client request) expect_success 1199 + 1200 + let payload_hash_for_body body = 1201 + match body with 1202 + | Empty -> empty_sha256 1203 + | String body -> sha256_hex body 1204 + | Stream _ -> "UNSIGNED-PAYLOAD" 1205 + 1206 + let request_for_object ?(query = []) ?(headers = []) ?(body = Empty) 1207 + ?payload_hash ~meth ~bucket ~key client = 1208 + let payload_hash = 1209 + Option.value payload_hash ~default:(payload_hash_for_body body) 1210 + in 1211 + { 1212 + meth; 1213 + bucket = Some bucket; 1214 + path = Endpoint.path_for_object client.endpoint ~bucket ~key; 1215 + query; 1216 + headers; 1217 + body; 1218 + payload_hash; 1219 + } 1220 + 1221 + let request_for_bucket ?(query = []) ?(headers = []) ?(body = Empty) 1222 + ?payload_hash ~meth ~bucket client = 1223 + let payload_hash = 1224 + Option.value payload_hash ~default:(payload_hash_for_body body) 1225 + in 1226 + { 1227 + meth; 1228 + bucket = Some bucket; 1229 + path = Endpoint.path_for_bucket client.endpoint ~bucket; 1230 + query; 1231 + headers; 1232 + body; 1233 + payload_hash; 1234 + } 1235 + 1236 + let list_objects_v2 ~env client (req : List_objects_v2.request) = 1237 + let query = 1238 + [ 1239 + Some ("list-type", "2"); 1240 + Option.map (fun v -> ("prefix", v)) req.prefix; 1241 + Option.map (fun v -> ("delimiter", v)) req.delimiter; 1242 + Option.map (fun v -> ("continuation-token", v)) req.continuation_token; 1243 + Option.map (fun v -> ("start-after", v)) req.start_after; 1244 + Option.map (fun v -> ("max-keys", string_of_int v)) req.max_keys; 1245 + ] 1246 + |> List.filter_map Fun.id 1247 + in 1248 + match 1249 + request ~env client 1250 + (request_for_bucket ~meth:"GET" ~bucket:req.bucket ~query client) 1251 + with 1252 + | Error err -> Error err 1253 + | Ok response -> 1254 + let root = xml_of_string response.body in 1255 + let objects = 1256 + xml_find_children "Contents" root 1257 + |> List.map (fun node -> 1258 + { 1259 + List_objects_v2.key = 1260 + option_map_default "" xml_text (xml_find_child "Key" node); 1261 + etag = xml_text_opt "ETag" node; 1262 + size = 1263 + (match xml_text_opt "Size" node with 1264 + | Some value -> 1265 + Option.value (Int64.of_string_opt value) ~default:0L 1266 + | None -> 0L); 1267 + last_modified = xml_text_opt "LastModified" node; 1268 + storage_class = xml_text_opt "StorageClass" node; 1269 + }) 1270 + in 1271 + let common_prefixes = 1272 + xml_find_children "CommonPrefixes" root 1273 + |> List.filter_map (xml_text_opt "Prefix") 1274 + in 1275 + Ok 1276 + { 1277 + List_objects_v2.objects; 1278 + common_prefixes; 1279 + is_truncated = 1280 + option_map_default false bool_of_string_default 1281 + (xml_text_opt "IsTruncated" root); 1282 + next_continuation_token = xml_text_opt "NextContinuationToken" root; 1283 + } 1284 + 1285 + let head_object ~env client (req : Head_object.request) = 1286 + match 1287 + request ~env client 1288 + (request_for_object ~meth:"HEAD" ~bucket:req.bucket ~key:req.key client) 1289 + with 1290 + | Error err -> Error err 1291 + | Ok response -> ( 1292 + match int64_of_header response.headers "content-length" with 1293 + | None -> Error (Error.Protocol "missing Content-Length header") 1294 + | Some content_length -> 1295 + Ok 1296 + { 1297 + Head_object.content_length; 1298 + etag = header_get response.headers "etag"; 1299 + last_modified = header_get response.headers "last-modified"; 1300 + version_id = header_get response.headers "x-amz-version-id"; 1301 + metadata = metadata_headers response.headers; 1302 + }) 1303 + 1304 + let get_object ~env client (req : Get_object.request) ~sink = 1305 + let headers = 1306 + [ 1307 + Option.map (fun v -> ("Range", v)) req.range; 1308 + Option.map (fun v -> ("If-Match", v)) req.if_match; 1309 + Option.map (fun v -> ("If-None-Match", v)) req.if_none_match; 1310 + ] 1311 + |> List.filter_map Fun.id 1312 + in 1313 + match 1314 + request ~env ~sink client 1315 + (request_for_object ~meth:"GET" ~bucket:req.bucket ~key:req.key ~headers 1316 + client) 1317 + with 1318 + | Error err -> Error err 1319 + | Ok response -> 1320 + Ok 1321 + { 1322 + Get_object.content_length = 1323 + int64_of_header response.headers "content-length"; 1324 + etag = header_get response.headers "etag"; 1325 + version_id = header_get response.headers "x-amz-version-id"; 1326 + } 1327 + 1328 + let put_object ~env client (req : Put_object.request) = 1329 + let headers = 1330 + (match req.content_type with 1331 + | Some v -> [ ("Content-Type", v) ] 1332 + | None -> []) 1333 + @ List.map (fun (k, v) -> ("x-amz-meta-" ^ k, v)) req.metadata 1334 + @ option_value_map req.checksum ~default:[] ~f:checksum_header 1335 + in 1336 + match 1337 + request ~env client 1338 + (request_for_object ~meth:"PUT" ~bucket:req.bucket ~key:req.key ~headers 1339 + ~body:(Stream req.body) client) 1340 + with 1341 + | Error err -> Error err 1342 + | Ok response -> 1343 + Ok 1344 + { 1345 + Put_object.etag = header_get response.headers "etag"; 1346 + version_id = header_get response.headers "x-amz-version-id"; 1347 + } 1348 + 1349 + let copy_object ~env client (req : Copy_object.request) = 1350 + let copy_source = 1351 + "/" 1352 + ^ aws_pct_encode req.src_bucket 1353 + ^ "/" 1354 + ^ aws_pct_encode ~slash:true req.src_key 1355 + in 1356 + let metadata_headers = 1357 + List.map (fun (k, v) -> ("x-amz-meta-" ^ k, v)) req.metadata 1358 + in 1359 + let headers = ("x-amz-copy-source", copy_source) :: metadata_headers in 1360 + match 1361 + request ~env client 1362 + (request_for_object ~meth:"PUT" ~bucket:req.dst_bucket ~key:req.dst_key 1363 + ~headers client) 1364 + with 1365 + | Error err -> Error err 1366 + | Ok response -> 1367 + let root = xml_of_string response.body in 1368 + Ok 1369 + { 1370 + Copy_object.etag = xml_text_opt "ETag" root; 1371 + last_modified = xml_text_opt "LastModified" root; 1372 + } 1373 + 1374 + let delete_object ~env client (req : Delete_object.request) = 1375 + let query = 1376 + match req.version_id with Some v -> [ ("versionId", v) ] | None -> [] 1377 + in 1378 + match 1379 + request ~env client 1380 + (request_for_object ~meth:"DELETE" ~bucket:req.bucket ~key:req.key ~query 1381 + client) 1382 + with 1383 + | Ok _ -> Ok () 1384 + | Error err -> Error err 1385 + 1386 + let delete_objects_body (req : Delete_objects.request) = 1387 + let buffer = Buffer.create 256 in 1388 + let output = Xmlm.make_output ~decl:false (`Buffer buffer) in 1389 + Xmlm.output output (`El_start (("", "Delete"), [])); 1390 + Xmlm.output output (`El_start (("", "Quiet"), [])); 1391 + Xmlm.output output 1392 + (`Data (if req.Delete_objects.quiet then "true" else "false")); 1393 + Xmlm.output output `El_end; 1394 + List.iter 1395 + (fun (obj : Delete_objects.object_id) -> 1396 + Xmlm.output output (`El_start (("", "Object"), [])); 1397 + Xmlm.output output (`El_start (("", "Key"), [])); 1398 + Xmlm.output output (`Data obj.Delete_objects.key); 1399 + Xmlm.output output `El_end; 1400 + Option.iter 1401 + (fun version_id -> 1402 + Xmlm.output output (`El_start (("", "VersionId"), [])); 1403 + Xmlm.output output (`Data version_id); 1404 + Xmlm.output output `El_end) 1405 + obj.version_id; 1406 + Xmlm.output output `El_end) 1407 + req.objects; 1408 + Xmlm.output output `El_end; 1409 + Buffer.contents buffer 1410 + 1411 + let delete_objects ~env client (req : Delete_objects.request) = 1412 + let body = delete_objects_body req in 1413 + let headers = 1414 + [ ("Content-Type", "application/xml"); ("Content-MD5", base64_md5 body) ] 1415 + in 1416 + match 1417 + request ~env client 1418 + (request_for_bucket ~meth:"POST" ~bucket:req.bucket 1419 + ~query:[ ("delete", "") ] 1420 + ~headers ~body:(String body) ~payload_hash:(sha256_hex body) client) 1421 + with 1422 + | Error err -> Error err 1423 + | Ok response -> 1424 + let root = xml_of_string response.body in 1425 + let deleted = 1426 + xml_find_children "Deleted" root 1427 + |> List.map (fun node -> 1428 + { 1429 + Delete_objects.key = 1430 + option_map_default "" xml_text (xml_find_child "Key" node); 1431 + version_id = xml_text_opt "VersionId" node; 1432 + }) 1433 + in 1434 + let errors = 1435 + xml_find_children "Error" root 1436 + |> List.map (fun node -> 1437 + { 1438 + Delete_objects.key = 1439 + option_map_default "" xml_text (xml_find_child "Key" node); 1440 + version_id = xml_text_opt "VersionId" node; 1441 + code = xml_text_opt "Code" node; 1442 + message = xml_text_opt "Message" node; 1443 + }) 1444 + in 1445 + Ok { Delete_objects.deleted; errors } 1446 + 1447 + let create_multipart_upload ~env client (req : Multipart.create_request) = 1448 + let headers = 1449 + (match req.Multipart.content_type with 1450 + | Some v -> [ ("Content-Type", v) ] 1451 + | None -> []) 1452 + @ List.map (fun (k, v) -> ("x-amz-meta-" ^ k, v)) req.metadata 1453 + in 1454 + match 1455 + request ~env client 1456 + (request_for_object ~meth:"POST" ~bucket:req.bucket ~key:req.key 1457 + ~query:[ ("uploads", "") ] 1458 + ~headers client) 1459 + with 1460 + | Error err -> Error err 1461 + | Ok response -> ( 1462 + let root = xml_of_string response.body in 1463 + let upload_id = xml_text_opt "UploadId" root in 1464 + match upload_id with 1465 + | None -> Error (Error.Protocol "missing UploadId in multipart response") 1466 + | Some upload_id -> 1467 + Ok { Multipart.bucket = req.bucket; key = req.key; upload_id }) 1468 + 1469 + let upload_part ~env client (req : Multipart.upload_part_request) = 1470 + let query = 1471 + [ 1472 + ("partNumber", string_of_int req.part_number); 1473 + ("uploadId", req.upload.upload_id); 1474 + ] 1475 + in 1476 + let headers = option_value_map req.checksum ~default:[] ~f:checksum_header in 1477 + match 1478 + request ~env client 1479 + (request_for_object ~meth:"PUT" ~bucket:req.upload.bucket 1480 + ~key:req.upload.key ~query ~headers ~body:(Stream req.body) client) 1481 + with 1482 + | Error err -> Error err 1483 + | Ok response -> 1484 + Ok 1485 + { 1486 + Multipart.etag = header_get response.headers "etag"; 1487 + checksum = 1488 + (match header_get response.headers "x-amz-checksum-sha256" with 1489 + | Some value -> Some (`Sha256 value) 1490 + | None -> None); 1491 + } 1492 + 1493 + let complete_multipart_body (req : Multipart.complete_request) = 1494 + let buffer = Buffer.create 256 in 1495 + let output = Xmlm.make_output ~decl:false (`Buffer buffer) in 1496 + Xmlm.output output (`El_start (("", "CompleteMultipartUpload"), [])); 1497 + List.iter 1498 + (fun (part : Multipart.completed_part) -> 1499 + Xmlm.output output (`El_start (("", "Part"), [])); 1500 + Xmlm.output output (`El_start (("", "PartNumber"), [])); 1501 + Xmlm.output output (`Data (string_of_int part.Multipart.part_number)); 1502 + Xmlm.output output `El_end; 1503 + Xmlm.output output (`El_start (("", "ETag"), [])); 1504 + Xmlm.output output (`Data part.etag); 1505 + Xmlm.output output `El_end; 1506 + Xmlm.output output `El_end) 1507 + req.Multipart.parts; 1508 + Xmlm.output output `El_end; 1509 + Buffer.contents buffer 1510 + 1511 + let complete_multipart_upload ~env client (req : Multipart.complete_request) = 1512 + let body = complete_multipart_body req in 1513 + let query = [ ("uploadId", req.upload.upload_id) ] in 1514 + match 1515 + request ~env client 1516 + (request_for_object ~meth:"POST" ~bucket:req.upload.bucket 1517 + ~key:req.upload.key ~query 1518 + ~headers:[ ("Content-Type", "application/xml") ] 1519 + ~body:(String body) ~payload_hash:(sha256_hex body) client) 1520 + with 1521 + | Error err -> Error err 1522 + | Ok response -> 1523 + let root = xml_of_string response.body in 1524 + Ok 1525 + { 1526 + Multipart.etag = xml_text_opt "ETag" root; 1527 + version_id = header_get response.headers "x-amz-version-id"; 1528 + } 1529 + 1530 + let abort_multipart_upload ~env client (req : Multipart.abort_request) = 1531 + match 1532 + request ~env client 1533 + (request_for_object ~meth:"DELETE" ~bucket:req.Multipart.bucket 1534 + ~key:req.key 1535 + ~query:[ ("uploadId", req.upload_id) ] 1536 + client) 1537 + with 1538 + | Ok _ -> Ok () 1539 + | Error err -> Error err
+325
lib/s3.mli
··· 1 + module Error : sig 2 + type t = 3 + | Invalid_argument of string 4 + | Missing_credentials of string 5 + | Protocol of string 6 + | Http of { 7 + status : int; 8 + code : string option; 9 + message : string option; 10 + request_id : string option; 11 + host_id : string option; 12 + body_preview : string option; 13 + } 14 + 15 + exception E of t 16 + 17 + val fail : t -> 'a 18 + val to_string : t -> string 19 + end 20 + 21 + module Region : sig 22 + type t 23 + 24 + val of_string : string -> (t, Error.t) result 25 + val us_east_1 : t 26 + val to_string : t -> string 27 + end 28 + 29 + module Endpoint : sig 30 + type addressing_style = Virtual_hosted | Path 31 + type t 32 + 33 + val custom : 34 + ?scheme:string -> 35 + ?port:int -> 36 + ?base_path:string -> 37 + ?addressing_style:addressing_style -> 38 + ?signing_region:Region.t -> 39 + ?signing_service:string -> 40 + host:string -> 41 + unit -> 42 + t 43 + 44 + val aws : 45 + ?scheme:string -> 46 + ?port:int -> 47 + ?base_path:string -> 48 + ?addressing_style:addressing_style -> 49 + ?dual_stack:bool -> 50 + ?accelerate:bool -> 51 + region:Region.t -> 52 + unit -> 53 + t 54 + 55 + val authority : t -> string 56 + val addressing_style : t -> addressing_style 57 + val signing_region : t -> Region.t 58 + val signing_service : t -> string 59 + val uri_for_bucket : t -> bucket:string -> string 60 + val uri_for_object : t -> bucket:string -> key:string -> string 61 + end 62 + 63 + module Stream : sig 64 + type source = { 65 + length : int64 option; 66 + emit : (bytes -> off:int -> len:int -> unit) -> unit; 67 + } 68 + 69 + type sink = { 70 + consume : bytes -> off:int -> len:int -> unit; 71 + close : unit -> unit; 72 + } 73 + 74 + val empty : source 75 + val of_string : string -> source 76 + val of_flow : ?length:int64 -> _ Eio.Flow.source -> source 77 + val to_buffer : Buffer.t -> sink 78 + val to_flow : _ Eio.Flow.sink -> sink 79 + val to_string : source -> string 80 + end 81 + 82 + module Credentials : sig 83 + type t = { 84 + access_key_id : string; 85 + secret_access_key : string; 86 + session_token : string option; 87 + expiration : float option; 88 + } 89 + 90 + module Source : sig 91 + type credentials = t 92 + 93 + type error = 94 + [ `Missing of string | `Invalid of string | `Unavailable of string ] 95 + 96 + type t 97 + 98 + val static : credentials -> t 99 + val env : unit -> t 100 + val imds : ?endpoint:Uri.t -> unit -> t 101 + val chain : t list -> t 102 + val load : t -> (credentials, error) result 103 + end 104 + end 105 + 106 + type retry_policy = { 107 + max_attempts : int; 108 + base_delay_s : float; 109 + max_delay_s : float; 110 + } 111 + 112 + type checksum = [ `Md5 of string | `Sha256 of string | `Crc32c of string ] 113 + type t 114 + 115 + val create : 116 + endpoint:Endpoint.t -> 117 + credentials:Credentials.Source.t -> 118 + ?debug:(string -> unit) -> 119 + ?part_size:int -> 120 + ?user_agent:string -> 121 + ?retry_policy:retry_policy -> 122 + unit -> 123 + t 124 + 125 + val default_retry_policy : retry_policy 126 + 127 + module List_objects_v2 : sig 128 + type request = { 129 + bucket : string; 130 + prefix : string option; 131 + delimiter : string option; 132 + continuation_token : string option; 133 + start_after : string option; 134 + max_keys : int option; 135 + } 136 + 137 + type object_entry = { 138 + key : string; 139 + etag : string option; 140 + size : int64; 141 + last_modified : string option; 142 + storage_class : string option; 143 + } 144 + 145 + type response = { 146 + objects : object_entry list; 147 + common_prefixes : string list; 148 + is_truncated : bool; 149 + next_continuation_token : string option; 150 + } 151 + end 152 + 153 + module Head_object : sig 154 + type request = { bucket : string; key : string } 155 + 156 + type response = { 157 + content_length : int64; 158 + etag : string option; 159 + last_modified : string option; 160 + version_id : string option; 161 + metadata : (string * string) list; 162 + } 163 + end 164 + 165 + module Get_object : sig 166 + type request = { 167 + bucket : string; 168 + key : string; 169 + range : string option; 170 + if_match : string option; 171 + if_none_match : string option; 172 + } 173 + 174 + type response = { 175 + content_length : int64 option; 176 + etag : string option; 177 + version_id : string option; 178 + } 179 + end 180 + 181 + module Put_object : sig 182 + type request = { 183 + bucket : string; 184 + key : string; 185 + body : Stream.source; 186 + content_type : string option; 187 + metadata : (string * string) list; 188 + checksum : checksum option; 189 + } 190 + 191 + type response = { etag : string option; version_id : string option } 192 + end 193 + 194 + module Copy_object : sig 195 + type request = { 196 + src_bucket : string; 197 + src_key : string; 198 + dst_bucket : string; 199 + dst_key : string; 200 + metadata : (string * string) list; 201 + } 202 + 203 + type response = { etag : string option; last_modified : string option } 204 + end 205 + 206 + module Delete_object : sig 207 + type request = { bucket : string; key : string; version_id : string option } 208 + end 209 + 210 + module Delete_objects : sig 211 + type object_id = { key : string; version_id : string option } 212 + type request = { bucket : string; quiet : bool; objects : object_id list } 213 + type deleted = { key : string; version_id : string option } 214 + 215 + type error = { 216 + key : string; 217 + version_id : string option; 218 + code : string option; 219 + message : string option; 220 + } 221 + 222 + type response = { deleted : deleted list; errors : error list } 223 + end 224 + 225 + module Multipart : sig 226 + type upload = { bucket : string; key : string; upload_id : string } 227 + 228 + type completed_part = { 229 + part_number : int; 230 + etag : string; 231 + checksum : checksum option; 232 + } 233 + 234 + type create_request = { 235 + bucket : string; 236 + key : string; 237 + content_type : string option; 238 + metadata : (string * string) list; 239 + } 240 + 241 + type create_response = upload 242 + 243 + type upload_part_request = { 244 + upload : upload; 245 + part_number : int; 246 + body : Stream.source; 247 + checksum : checksum option; 248 + } 249 + 250 + type upload_part_response = { 251 + etag : string option; 252 + checksum : checksum option; 253 + } 254 + 255 + type complete_request = { upload : upload; parts : completed_part list } 256 + type complete_response = { etag : string option; version_id : string option } 257 + type abort_request = upload 258 + end 259 + 260 + val list_objects_v2 : 261 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 262 + t -> 263 + List_objects_v2.request -> 264 + (List_objects_v2.response, Error.t) result 265 + 266 + val head_object : 267 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 268 + t -> 269 + Head_object.request -> 270 + (Head_object.response, Error.t) result 271 + 272 + val get_object : 273 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 274 + t -> 275 + Get_object.request -> 276 + sink:Stream.sink -> 277 + (Get_object.response, Error.t) result 278 + 279 + val put_object : 280 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 281 + t -> 282 + Put_object.request -> 283 + (Put_object.response, Error.t) result 284 + 285 + val copy_object : 286 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 287 + t -> 288 + Copy_object.request -> 289 + (Copy_object.response, Error.t) result 290 + 291 + val delete_object : 292 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 293 + t -> 294 + Delete_object.request -> 295 + (unit, Error.t) result 296 + 297 + val delete_objects : 298 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 299 + t -> 300 + Delete_objects.request -> 301 + (Delete_objects.response, Error.t) result 302 + 303 + val create_multipart_upload : 304 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 305 + t -> 306 + Multipart.create_request -> 307 + (Multipart.create_response, Error.t) result 308 + 309 + val upload_part : 310 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 311 + t -> 312 + Multipart.upload_part_request -> 313 + (Multipart.upload_part_response, Error.t) result 314 + 315 + val complete_multipart_upload : 316 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 317 + t -> 318 + Multipart.complete_request -> 319 + (Multipart.complete_response, Error.t) result 320 + 321 + val abort_multipart_upload : 322 + env:< net : _ Eio.Net.t ; clock : _ Eio.Time.clock ; .. > -> 323 + t -> 324 + Multipart.abort_request -> 325 + (unit, Error.t) result
+49
s3.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + version: "0.1.0" 4 + synopsis: "Small AWS S3 client for OCaml" 5 + description: "Small and experimental AWS S3 client." 6 + maintainer: ["Gabriel Díaz"] 7 + authors: ["Gabriel Díaz"] 8 + license: "ISC" 9 + tags: ["aws" "s3" "eio" "streaming" "cloud"] 10 + homepage: "https://tangled.org/gdiazlo.tngl.sh/s3" 11 + doc: "https://tangled.org/gdiazlo.tngl.sh/s3" 12 + bug-reports: "https://tangled.org/gdiazlo.tngl.sh/s3/issues" 13 + depends: [ 14 + "ocaml" {>= "5.1"} 15 + "dune" {>= "3.20"} 16 + "eio" 17 + "eio_main" 18 + "base64" 19 + "climate" 20 + "cstruct" 21 + "digestif" 22 + "domain-name" 23 + "hcs" 24 + "mirage-crypto-rng" 25 + "simdjsont" 26 + "tls" 27 + "tls-eio" 28 + "mirage-crypto" 29 + "unix" 30 + "xmlm" 31 + "uri" 32 + "odoc" {with-doc} 33 + ] 34 + build: [ 35 + ["dune" "subst"] {dev} 36 + [ 37 + "dune" 38 + "build" 39 + "-p" 40 + name 41 + "-j" 42 + jobs 43 + "@install" 44 + "@runtest" {with-test} 45 + "@doc" {with-doc} 46 + ] 47 + ] 48 + dev-repo: "git+https://tangled.org/gdiazlo.tngl.sh/s3" 49 + x-maintenance-intent: ["(latest)"]
+3
test/dune
··· 1 + (test 2 + (name test_s3) 3 + (libraries s3))
+46
test/test_s3.ml
··· 1 + let test_region_validation () = 2 + match S3.Region.of_string "" with 3 + | Error (S3.Error.Invalid_argument _) -> () 4 + | _ -> failwith "expected invalid empty region" 5 + 6 + let test_virtual_hosted_uri () = 7 + let region = Result.get_ok (S3.Region.of_string "eu-west-1") in 8 + let endpoint = S3.Endpoint.aws ~region () in 9 + let uri = 10 + S3.Endpoint.uri_for_object endpoint ~bucket:"my-bucket" 11 + ~key:"folder/file.txt" 12 + in 13 + if uri <> "https://my-bucket.s3.eu-west-1.amazonaws.com/folder/file.txt" then 14 + failwith ("unexpected uri: " ^ uri) 15 + 16 + let test_path_style_uri () = 17 + let region = Result.get_ok (S3.Region.of_string "us-east-1") in 18 + let endpoint = 19 + S3.Endpoint.aws ~region ~addressing_style:S3.Endpoint.Path () 20 + in 21 + let uri = 22 + S3.Endpoint.uri_for_object endpoint ~bucket:"bucket" ~key:"a b.txt" 23 + in 24 + if uri <> "https://s3.us-east-1.amazonaws.com/bucket/a%20b.txt" then 25 + failwith ("unexpected uri: " ^ uri) 26 + 27 + let test_custom_endpoint_uri () = 28 + let region = Result.get_ok (S3.Region.of_string "auto") in 29 + let endpoint = 30 + S3.Endpoint.custom ~host:"objects.example.internal" ~port:9000 31 + ~base_path:"storage" ~addressing_style:S3.Endpoint.Path 32 + ~signing_region:region () 33 + in 34 + let uri = 35 + S3.Endpoint.uri_for_object endpoint ~bucket:"bucket" ~key:"folder/file.txt" 36 + in 37 + if 38 + uri 39 + <> "https://objects.example.internal:9000/storage/bucket/folder/file.txt" 40 + then failwith ("unexpected uri: " ^ uri) 41 + 42 + let () = 43 + test_region_validation (); 44 + test_virtual_hosted_uri (); 45 + test_path_style_uri (); 46 + test_custom_endpoint_uri ()