···33let handler =
44 Xrpc.handler ~auth:Authorization (fun ctx ->
55 let did = Auth.get_authed_did_exn ctx.auth in
66+ let%lwt data = Dream.body ctx.req |> Lwt.map Bytes.of_string in
67 let mime_type =
77- Option.value ~default:"application/octet-stream"
88- (Dream.header ctx.req "Content-Type")
88+ match Dream.header ctx.req "Content-Type" with
99+ | Some ct ->
1010+ ct
1111+ | None ->
1212+ Util.Mime_sniff.sniff data
913 in
1014 Auth.assert_blob_scope ctx.auth ~mime:mime_type ;
1111- let%lwt data = Dream.body ctx.req |> Lwt.map Bytes.of_string in
1215 let size = Int64.of_int @@ Bytes.length data in
1316 let cid = Cid.create Raw data in
1417 let%lwt user_db = User_store.connect did in
+158
pegasus/lib/util/mime_sniff.ml
···11+let matches_at data offset pattern =
22+ let plen = String.length pattern in
33+ if Bytes.length data < offset + plen then false
44+ else
55+ let rec loop i =
66+ if i >= plen then true
77+ else if Bytes.get_uint8 data (offset + i) <> Char.code pattern.[i] then
88+ false
99+ else loop (i + 1)
1010+ in
1111+ loop 0
1212+1313+let matches_at_bytes data offset pattern mask =
1414+ let plen = Bytes.length pattern in
1515+ if Bytes.length data < offset + plen then false
1616+ else
1717+ let rec loop i =
1818+ if i >= plen then true
1919+ else
2020+ let m = Bytes.get_uint8 mask i in
2121+ if
2222+ Bytes.get_uint8 data (offset + i) land m
2323+ <> Bytes.get_uint8 pattern i land m
2424+ then false
2525+ else loop (i + 1)
2626+ in
2727+ loop 0
2828+2929+let get_be_u32 data offset =
3030+ (Bytes.get_uint8 data offset lsl 24)
3131+ lor (Bytes.get_uint8 data (offset + 1) lsl 16)
3232+ lor (Bytes.get_uint8 data (offset + 2) lsl 8)
3333+ lor Bytes.get_uint8 data (offset + 3)
3434+3535+let simple_signatures =
3636+ [ (* images *)
3737+ (8, "\x89PNG\r\n\x1a\n", "image/png")
3838+ ; (3, "\xff\xd8\xff", "image/jpeg")
3939+ ; (6, "GIF87a", "image/gif")
4040+ ; (6, "GIF89a", "image/gif")
4141+ ; (2, "BM", "image/bmp")
4242+ ; (4, "\x00\x00\x01\x00", "image/x-icon")
4343+ ; (4, "\x00\x00\x02\x00", "image/x-icon")
4444+ ; (* audio *)
4545+ (3, "ID3", "audio/mpeg")
4646+ ; (5, "OggS\x00", "audio/ogg")
4747+ ; (8, "MThd\x00\x00\x00\x06", "audio/midi")
4848+ ; (* other *)
4949+ (5, "%PDF-", "application/pdf")
5050+ ; (4, "PK\x03\x04", "application/zip")
5151+ ; (3, "\x1f\x8b\x08", "application/gzip") ]
5252+5353+let check_simple data =
5454+ List.find_map
5555+ (fun (min_len, pattern, mime) ->
5656+ if Bytes.length data >= min_len && matches_at data 0 pattern then
5757+ Some mime
5858+ else None )
5959+ simple_signatures
6060+6161+(* RIFF container: bytes 0-3 = "RIFF", 4-7 = size (ignored), 8-11 = sub-format *)
6262+let check_riff data =
6363+ if Bytes.length data < 12 || not (matches_at data 0 "RIFF") then None
6464+ else if matches_at data 8 "WAVE" then Some "audio/wave"
6565+ else if matches_at data 8 "AVI " then Some "video/avi"
6666+ else if Bytes.length data >= 14 && matches_at data 8 "WEBPVP" then
6767+ Some "image/webp"
6868+ else None
6969+7070+(* MP4: look for ftyp box and scan compatible brands for "mp4" *)
7171+let check_mp4 data =
7272+ if Bytes.length data < 12 then None
7373+ else
7474+ let box_size = get_be_u32 data 0 in
7575+ if not (matches_at data 4 "ftyp") then None
7676+ else if box_size < 8 || box_size land 3 <> 0 then None
7777+ else
7878+ let limit = min box_size (Bytes.length data) in
7979+ (* major brand at offset 8, then compatible brands every 4 bytes from 16 *)
8080+ let rec scan off =
8181+ if off + 3 > limit then None
8282+ else if matches_at data off "mp4" then Some "video/mp4"
8383+ else scan (off + 4)
8484+ in
8585+ (* check major brand (offset 8) first, then compatible brands from 16 *)
8686+ if limit >= 11 && matches_at data 8 "mp4" then Some "video/mp4"
8787+ else scan 16
8888+8989+(* WebM: EBML header 1A 45 DF A3, then scan for DocType element 42 82
9090+ containing "webm" *)
9191+let check_webm data =
9292+ if Bytes.length data < 4 then None
9393+ else if
9494+ Bytes.get_uint8 data 0 <> 0x1a
9595+ || Bytes.get_uint8 data 1 <> 0x45
9696+ || Bytes.get_uint8 data 2 <> 0xdf
9797+ || Bytes.get_uint8 data 3 <> 0xa3
9898+ then None
9999+ else
100100+ let limit = min 38 (Bytes.length data - 6) in
101101+ let rec scan i =
102102+ if i > limit then None
103103+ else if
104104+ Bytes.get_uint8 data i = 0x42 && Bytes.get_uint8 data (i + 1) = 0x82
105105+ then
106106+ (* next byte is VINT size; for "webm" it will be 0x84 (4) or 0x04 *)
107107+ let size_byte = Bytes.get_uint8 data (i + 2) in
108108+ let doc_len =
109109+ if size_byte land 0x80 <> 0 then size_byte land 0x7f
110110+ else if size_byte land 0x40 <> 0 then size_byte land 0x3f
111111+ else size_byte
112112+ in
113113+ let doc_off = i + 3 in
114114+ if doc_off + doc_len <= Bytes.length data && doc_len >= 4 then
115115+ if matches_at data doc_off "webm" then Some "video/webm" else None
116116+ else None
117117+ else scan (i + 1)
118118+ in
119119+ scan 4
120120+121121+(* MP3 without ID3: frame sync 0xFF 0xE0 mask *)
122122+let check_mp3_frame data =
123123+ if Bytes.length data < 4 then None
124124+ else if
125125+ Bytes.get_uint8 data 0 <> 0xff || Bytes.get_uint8 data 1 land 0xe0 <> 0xe0
126126+ then None
127127+ else
128128+ let b1 = Bytes.get_uint8 data 1 in
129129+ let b2 = Bytes.get_uint8 data 2 in
130130+ let layer = (b1 lsr 1) land 0x03 in
131131+ let bitrate_idx = (b2 lsr 4) land 0x0f in
132132+ let sample_idx = (b2 lsr 2) land 0x03 in
133133+ if layer = 0 || bitrate_idx = 0 || bitrate_idx = 15 || sample_idx = 3 then
134134+ None
135135+ else Some "audio/mpeg"
136136+137137+let sniff data =
138138+ match check_simple data with
139139+ | Some mime ->
140140+ mime
141141+ | None -> (
142142+ match check_riff data with
143143+ | Some mime ->
144144+ mime
145145+ | None -> (
146146+ match check_mp4 data with
147147+ | Some mime ->
148148+ mime
149149+ | None -> (
150150+ match check_webm data with
151151+ | Some mime ->
152152+ mime
153153+ | None -> (
154154+ match check_mp3_frame data with
155155+ | Some mime ->
156156+ mime
157157+ | None ->
158158+ "application/octet-stream" ) ) ) )
+1
pegasus/lib/util/util.ml
···66module Time = Time
77module Http = Http_
88module Html = Html
99+module Mime_sniff = Mime_sniff
9101011(* returns all blob refs in a record *)
1112let rec find_blob_refs (record : Mist.Lex.repo_record) : Mist.Blob_ref.t list =