objective categorical abstract machine language personal data server
65
fork

Configure Feed

Select the types of activity you want to include in your feed.

Sniff mimetypes from uploaded blobs without Content-Type

fixes #2

futurGH f5dd05eb bb592a5e

+165 -3
+6 -3
pegasus/lib/api/repo/uploadBlob.ml
··· 3 3 let handler = 4 4 Xrpc.handler ~auth:Authorization (fun ctx -> 5 5 let did = Auth.get_authed_did_exn ctx.auth in 6 + let%lwt data = Dream.body ctx.req |> Lwt.map Bytes.of_string in 6 7 let mime_type = 7 - Option.value ~default:"application/octet-stream" 8 - (Dream.header ctx.req "Content-Type") 8 + match Dream.header ctx.req "Content-Type" with 9 + | Some ct -> 10 + ct 11 + | None -> 12 + Util.Mime_sniff.sniff data 9 13 in 10 14 Auth.assert_blob_scope ctx.auth ~mime:mime_type ; 11 - let%lwt data = Dream.body ctx.req |> Lwt.map Bytes.of_string in 12 15 let size = Int64.of_int @@ Bytes.length data in 13 16 let cid = Cid.create Raw data in 14 17 let%lwt user_db = User_store.connect did in
+158
pegasus/lib/util/mime_sniff.ml
··· 1 + let matches_at data offset pattern = 2 + let plen = String.length pattern in 3 + if Bytes.length data < offset + plen then false 4 + else 5 + let rec loop i = 6 + if i >= plen then true 7 + else if Bytes.get_uint8 data (offset + i) <> Char.code pattern.[i] then 8 + false 9 + else loop (i + 1) 10 + in 11 + loop 0 12 + 13 + let matches_at_bytes data offset pattern mask = 14 + let plen = Bytes.length pattern in 15 + if Bytes.length data < offset + plen then false 16 + else 17 + let rec loop i = 18 + if i >= plen then true 19 + else 20 + let m = Bytes.get_uint8 mask i in 21 + if 22 + Bytes.get_uint8 data (offset + i) land m 23 + <> Bytes.get_uint8 pattern i land m 24 + then false 25 + else loop (i + 1) 26 + in 27 + loop 0 28 + 29 + let get_be_u32 data offset = 30 + (Bytes.get_uint8 data offset lsl 24) 31 + lor (Bytes.get_uint8 data (offset + 1) lsl 16) 32 + lor (Bytes.get_uint8 data (offset + 2) lsl 8) 33 + lor Bytes.get_uint8 data (offset + 3) 34 + 35 + let simple_signatures = 36 + [ (* images *) 37 + (8, "\x89PNG\r\n\x1a\n", "image/png") 38 + ; (3, "\xff\xd8\xff", "image/jpeg") 39 + ; (6, "GIF87a", "image/gif") 40 + ; (6, "GIF89a", "image/gif") 41 + ; (2, "BM", "image/bmp") 42 + ; (4, "\x00\x00\x01\x00", "image/x-icon") 43 + ; (4, "\x00\x00\x02\x00", "image/x-icon") 44 + ; (* audio *) 45 + (3, "ID3", "audio/mpeg") 46 + ; (5, "OggS\x00", "audio/ogg") 47 + ; (8, "MThd\x00\x00\x00\x06", "audio/midi") 48 + ; (* other *) 49 + (5, "%PDF-", "application/pdf") 50 + ; (4, "PK\x03\x04", "application/zip") 51 + ; (3, "\x1f\x8b\x08", "application/gzip") ] 52 + 53 + let check_simple data = 54 + List.find_map 55 + (fun (min_len, pattern, mime) -> 56 + if Bytes.length data >= min_len && matches_at data 0 pattern then 57 + Some mime 58 + else None ) 59 + simple_signatures 60 + 61 + (* RIFF container: bytes 0-3 = "RIFF", 4-7 = size (ignored), 8-11 = sub-format *) 62 + let check_riff data = 63 + if Bytes.length data < 12 || not (matches_at data 0 "RIFF") then None 64 + else if matches_at data 8 "WAVE" then Some "audio/wave" 65 + else if matches_at data 8 "AVI " then Some "video/avi" 66 + else if Bytes.length data >= 14 && matches_at data 8 "WEBPVP" then 67 + Some "image/webp" 68 + else None 69 + 70 + (* MP4: look for ftyp box and scan compatible brands for "mp4" *) 71 + let check_mp4 data = 72 + if Bytes.length data < 12 then None 73 + else 74 + let box_size = get_be_u32 data 0 in 75 + if not (matches_at data 4 "ftyp") then None 76 + else if box_size < 8 || box_size land 3 <> 0 then None 77 + else 78 + let limit = min box_size (Bytes.length data) in 79 + (* major brand at offset 8, then compatible brands every 4 bytes from 16 *) 80 + let rec scan off = 81 + if off + 3 > limit then None 82 + else if matches_at data off "mp4" then Some "video/mp4" 83 + else scan (off + 4) 84 + in 85 + (* check major brand (offset 8) first, then compatible brands from 16 *) 86 + if limit >= 11 && matches_at data 8 "mp4" then Some "video/mp4" 87 + else scan 16 88 + 89 + (* WebM: EBML header 1A 45 DF A3, then scan for DocType element 42 82 90 + containing "webm" *) 91 + let check_webm data = 92 + if Bytes.length data < 4 then None 93 + else if 94 + Bytes.get_uint8 data 0 <> 0x1a 95 + || Bytes.get_uint8 data 1 <> 0x45 96 + || Bytes.get_uint8 data 2 <> 0xdf 97 + || Bytes.get_uint8 data 3 <> 0xa3 98 + then None 99 + else 100 + let limit = min 38 (Bytes.length data - 6) in 101 + let rec scan i = 102 + if i > limit then None 103 + else if 104 + Bytes.get_uint8 data i = 0x42 && Bytes.get_uint8 data (i + 1) = 0x82 105 + then 106 + (* next byte is VINT size; for "webm" it will be 0x84 (4) or 0x04 *) 107 + let size_byte = Bytes.get_uint8 data (i + 2) in 108 + let doc_len = 109 + if size_byte land 0x80 <> 0 then size_byte land 0x7f 110 + else if size_byte land 0x40 <> 0 then size_byte land 0x3f 111 + else size_byte 112 + in 113 + let doc_off = i + 3 in 114 + if doc_off + doc_len <= Bytes.length data && doc_len >= 4 then 115 + if matches_at data doc_off "webm" then Some "video/webm" else None 116 + else None 117 + else scan (i + 1) 118 + in 119 + scan 4 120 + 121 + (* MP3 without ID3: frame sync 0xFF 0xE0 mask *) 122 + let check_mp3_frame data = 123 + if Bytes.length data < 4 then None 124 + else if 125 + Bytes.get_uint8 data 0 <> 0xff || Bytes.get_uint8 data 1 land 0xe0 <> 0xe0 126 + then None 127 + else 128 + let b1 = Bytes.get_uint8 data 1 in 129 + let b2 = Bytes.get_uint8 data 2 in 130 + let layer = (b1 lsr 1) land 0x03 in 131 + let bitrate_idx = (b2 lsr 4) land 0x0f in 132 + let sample_idx = (b2 lsr 2) land 0x03 in 133 + if layer = 0 || bitrate_idx = 0 || bitrate_idx = 15 || sample_idx = 3 then 134 + None 135 + else Some "audio/mpeg" 136 + 137 + let sniff data = 138 + match check_simple data with 139 + | Some mime -> 140 + mime 141 + | None -> ( 142 + match check_riff data with 143 + | Some mime -> 144 + mime 145 + | None -> ( 146 + match check_mp4 data with 147 + | Some mime -> 148 + mime 149 + | None -> ( 150 + match check_webm data with 151 + | Some mime -> 152 + mime 153 + | None -> ( 154 + match check_mp3_frame data with 155 + | Some mime -> 156 + mime 157 + | None -> 158 + "application/octet-stream" ) ) ) )
+1
pegasus/lib/util/util.ml
··· 6 6 module Time = Time 7 7 module Http = Http_ 8 8 module Html = Html 9 + module Mime_sniff = Mime_sniff 9 10 10 11 (* returns all blob refs in a record *) 11 12 let rec find_blob_refs (record : Mist.Lex.repo_record) : Mist.Blob_ref.t list =