this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Initial import from monorepo

+1085
+13
dune-project
··· 1 + (lang dune 3.17) 2 + (name zarr-v3) 3 + (generate_opam_files true) 4 + (license ISC) 5 + (package 6 + (name zarr-v3) 7 + (synopsis "Pure OCaml Zarr v3 reader with pluggable codecs") 8 + (description "Async Zarr v3 store reader supporting sharding, pluggable compression codecs, and HTTP range requests. Platform-independent.") 9 + (depends 10 + (ocaml (>= 5.2)) 11 + (lwt (>= 5.0)) 12 + (yojson (>= 2.0)) 13 + (alcotest (and :with-test (>= 0.8)))))
+105
lib/blosc.ml
··· 1 + type header = { 2 + nbytes : int; 3 + cbytes : int; 4 + typesize : int; 5 + blocksize : int; 6 + flags : int; 7 + } 8 + 9 + type shuffle_mode = No_shuffle | Byte_shuffle | Bit_shuffle 10 + 11 + let get_u32_le s off = 12 + Char.code s.[off] 13 + lor (Char.code s.[off+1] lsl 8) 14 + lor (Char.code s.[off+2] lsl 16) 15 + lor (Char.code s.[off+3] lsl 24) 16 + 17 + let parse_header s = 18 + if String.length s < 16 then failwith "Blosc: frame too short"; 19 + let flags = Char.code s.[2] in 20 + let typesize = Char.code s.[3] in 21 + let nbytes = get_u32_le s 4 in 22 + let blocksize = get_u32_le s 8 in 23 + let cbytes = get_u32_le s 12 in 24 + { nbytes; cbytes; typesize; blocksize; flags } 25 + 26 + let shuffle_mode h = 27 + if h.flags land 0x04 <> 0 then Bit_shuffle 28 + else if h.flags land 0x01 <> 0 then Byte_shuffle 29 + else No_shuffle 30 + 31 + (* Undo byte shuffle: elements were rearranged so that all first bytes 32 + of each element are contiguous, then all second bytes, etc. 33 + typesize = element size in bytes, n = number of elements *) 34 + let unshuffle ~typesize data = 35 + let len = String.length data in 36 + if typesize <= 1 then data (* no-op for single-byte elements *) 37 + else begin 38 + let n = len / typesize in 39 + let out = Bytes.create len in 40 + for i = 0 to n - 1 do 41 + for j = 0 to typesize - 1 do 42 + (* In shuffled layout: byte j of element i is at position j*n + i *) 43 + Bytes.set out (i * typesize + j) data.[j * n + i] 44 + done 45 + done; 46 + Bytes.to_string out 47 + end 48 + 49 + (* Undo bit shuffle: bits were rearranged across elements. 50 + For typesize=1 (int8), each bit position is grouped together: 51 + all bit-0s of every byte, then all bit-1s, etc. *) 52 + let unbitshuffle ~typesize data = 53 + let len = String.length data in 54 + let n_elems = len / typesize in 55 + let out = Bytes.make len '\x00' in 56 + (* Process element by element *) 57 + for elem = 0 to n_elems - 1 do 58 + for byte_in_elem = 0 to typesize - 1 do 59 + let out_byte_idx = elem * typesize + byte_in_elem in 60 + let v = ref 0 in 61 + for bit = 0 to 7 do 62 + (* In bitshuffled layout, bit 'bit' of byte 'byte_in_elem' of element 'elem' 63 + is at: bit_offset = (byte_in_elem * 8 + bit) * n_elems + elem 64 + stored as: byte_offset = bit_offset / 8, bit_within_byte = bit_offset mod 8 *) 65 + let bit_offset = (byte_in_elem * 8 + bit) * n_elems + elem in 66 + let src_byte = bit_offset / 8 in 67 + let src_bit = bit_offset mod 8 in 68 + if src_byte < len then begin 69 + let src_val = Char.code data.[src_byte] in 70 + if src_val land (1 lsl src_bit) <> 0 then 71 + v := !v lor (1 lsl bit) 72 + end 73 + done; 74 + Bytes.set out out_byte_idx (Char.chr !v) 75 + done 76 + done; 77 + Bytes.to_string out 78 + 79 + let decode ?decompress s = 80 + let h = parse_header s in 81 + if h.cbytes = h.nbytes + 16 then 82 + (* Memcpy mode: data is stored uncompressed, no shuffle applied *) 83 + String.sub s 16 h.nbytes 84 + else begin 85 + (* Compressed: parse block structure, decompress, then unshuffle. 86 + After the 16-byte header: 87 + - uint32 LE: offset to first block within the frame 88 + - At that offset: uint32 LE compressed block size 89 + - Then: the actual compressed data *) 90 + let block_offset = get_u32_le s 16 in 91 + let block_csize = get_u32_le s block_offset in 92 + let compressed = String.sub s (block_offset + 4) block_csize in 93 + let decompressed = match decompress with 94 + | Some f -> f compressed h.nbytes 95 + | None -> 96 + failwith (Printf.sprintf 97 + "Blosc: compressed frame (cbytes=%d, nbytes=%d) but no decompressor provided" 98 + h.cbytes h.nbytes) 99 + in 100 + (* Apply unshuffle if needed *) 101 + match shuffle_mode h with 102 + | No_shuffle -> decompressed 103 + | Byte_shuffle -> unshuffle ~typesize:h.typesize decompressed 104 + | Bit_shuffle -> unbitshuffle ~typesize:h.typesize decompressed 105 + end
+37
lib/blosc.mli
··· 1 + (** Blosc frame decoder. 2 + 3 + Parses the 16-byte Blosc header, decompresses the payload via a 4 + pluggable decompressor, and applies unshuffle (byte or bit) if needed. 5 + 6 + For uncompressed (memcpy) frames, the raw data is returned directly. *) 7 + 8 + type header = { 9 + nbytes : int; (** Uncompressed data size in bytes *) 10 + cbytes : int; (** Compressed size including the 16-byte header *) 11 + typesize : int; (** Element size in bytes (for shuffle) *) 12 + blocksize : int; (** Block size in bytes *) 13 + flags : int; (** Raw flags byte *) 14 + } 15 + (** Parsed Blosc frame header. *) 16 + 17 + type shuffle_mode = No_shuffle | Byte_shuffle | Bit_shuffle 18 + 19 + val parse_header : string -> header 20 + (** Parse a Blosc header from the first 16 bytes of a frame. 21 + @raise Failure if input is shorter than 16 bytes. *) 22 + 23 + val shuffle_mode : header -> shuffle_mode 24 + (** Extract the shuffle mode from the header flags. *) 25 + 26 + val decode : ?decompress:(string -> int -> string) -> string -> string 27 + (** Decode a Blosc frame, returning the raw uncompressed data. 28 + 29 + If the frame uses memcpy mode ([cbytes = nbytes + 16]), the payload 30 + is returned directly without decompression or unshuffle. 31 + 32 + If compressed, [decompress compressed_payload expected_size] is called 33 + to decompress the inner payload, then unshuffle is applied based on 34 + the header flags. 35 + 36 + @raise Failure if the frame is too short, or if compressed 37 + without a [decompress] callback. *)
+4
lib/dune
··· 1 + (library 2 + (name zarr_v3) 3 + (public_name zarr-v3) 4 + (libraries lwt yojson))
+384
lib/store.ml
··· 1 + type fetch = string -> ?off:int -> ?len:int -> unit -> string Lwt.t 2 + type codec = string -> string 3 + type codec_registry = string -> codec option 4 + 5 + type data_type = Int8 | Uint8 | Int32 | Float32 | Float64 6 + 7 + type array_meta = { 8 + shape : int array; 9 + data_type : data_type; 10 + chunk_shape : int array; 11 + chunk_separator : string; 12 + is_sharded : bool; 13 + inner_chunk_shape : int array option; 14 + inner_codecs : string list; 15 + index_location : [ `Start | `End ]; 16 + } 17 + 18 + type store = { 19 + base_url : string; 20 + fetch : fetch; 21 + codecs : codec_registry; 22 + consolidated : (string * Yojson.Safe.t) list; 23 + } 24 + 25 + type arr = { 26 + store : store; 27 + path : string; 28 + meta : array_meta; 29 + } 30 + 31 + let data_type_size = function 32 + | Int8 | Uint8 -> 1 33 + | Int32 | Float32 -> 4 34 + | Float64 -> 8 35 + 36 + let data_type_of_string = function 37 + | "int8" -> Int8 38 + | "uint8" -> Uint8 39 + | "int32" -> Int32 40 + | "float32" -> Float32 41 + | "float64" -> Float64 42 + | s -> failwith (Printf.sprintf "Unsupported data type: %s" s) 43 + 44 + (* --- JSON helpers --- *) 45 + 46 + let json_member key = function 47 + | `Assoc l -> (try List.assoc key l with Not_found -> `Null) 48 + | _ -> `Null 49 + 50 + let json_to_int = function `Int i -> i | _ -> failwith "expected int" 51 + let json_to_string = function `String s -> s | _ -> failwith "expected string" 52 + let json_to_list f = function `List l -> List.map f l | _ -> failwith "expected list" 53 + let json_to_int_list j = json_to_list json_to_int j 54 + 55 + (* --- Metadata parsing --- *) 56 + 57 + let parse_array_meta json_str = 58 + let j = Yojson.Safe.from_string json_str in 59 + let shape = Array.of_list (json_to_int_list (json_member "shape" j)) in 60 + let data_type = data_type_of_string (json_to_string (json_member "data_type" j)) in 61 + let chunk_grid = json_member "chunk_grid" j in 62 + let chunk_shape = Array.of_list (json_to_int_list 63 + (json_member "chunk_shape" (json_member "configuration" chunk_grid))) in 64 + let chunk_key = json_member "chunk_key_encoding" j in 65 + let chunk_separator = match json_member "separator" 66 + (json_member "configuration" chunk_key) with 67 + | `String s -> s | _ -> "/" in 68 + let codecs_json = match json_member "codecs" j with 69 + | `List l -> l | _ -> [] in 70 + (* Check for sharding *) 71 + let sharding = List.find_opt (fun c -> 72 + json_to_string (json_member "name" c) = "sharding_indexed") codecs_json in 73 + match sharding with 74 + | Some shard_codec -> 75 + let config = json_member "configuration" shard_codec in 76 + let inner_chunk_shape = Array.of_list 77 + (json_to_int_list (json_member "chunk_shape" config)) in 78 + let inner_codecs_json = match json_member "codecs" config with 79 + | `List l -> l | _ -> [] in 80 + let inner_codecs = List.map (fun c -> 81 + json_to_string (json_member "name" c)) inner_codecs_json in 82 + let index_location = match json_member "index_location" config with 83 + | `String "start" -> `Start | _ -> `End in 84 + { shape; data_type; chunk_shape; chunk_separator; 85 + is_sharded = true; inner_chunk_shape = Some inner_chunk_shape; 86 + inner_codecs; index_location } 87 + | None -> 88 + let codecs = List.map (fun c -> 89 + json_to_string (json_member "name" c)) codecs_json in 90 + { shape; data_type; chunk_shape; chunk_separator; 91 + is_sharded = false; inner_chunk_shape = None; 92 + inner_codecs = codecs; index_location = `End } 93 + 94 + let parse_consolidated json_str = 95 + let j = Yojson.Safe.from_string json_str in 96 + let cm = json_member "consolidated_metadata" j in 97 + match json_member "metadata" cm with 98 + | `Assoc entries -> entries 99 + | _ -> [] 100 + 101 + (* --- Store and array access --- *) 102 + 103 + let open_store ~(fetch : fetch) ~(codecs : codec_registry) base_url = 104 + let open Lwt.Syntax in 105 + let+ root_json = fetch (base_url ^ "/zarr.json") () in 106 + let consolidated = parse_consolidated root_json in 107 + { base_url; fetch; codecs; consolidated } 108 + 109 + let store_meta store = store.consolidated 110 + 111 + let open_array store path = 112 + let meta_json = try 113 + let (_, j) = List.find (fun (k, _) -> k = path) store.consolidated in 114 + Yojson.Safe.to_string j 115 + with Not_found -> 116 + failwith (Printf.sprintf "Array %s not found in consolidated metadata" path) 117 + in 118 + let meta = parse_array_meta meta_json in 119 + Lwt.return { store; path; meta } 120 + 121 + let array_meta arr = arr.meta 122 + 123 + let group_attrs store path = 124 + let (_, j) = try 125 + List.find (fun (k, _) -> k = path) store.consolidated 126 + with Not_found -> 127 + failwith (Printf.sprintf "Group %s not found in consolidated metadata" path) 128 + in 129 + match json_member "attributes" j with 130 + | `Assoc l -> l 131 + | _ -> [] 132 + 133 + (* --- Shard reading --- *) 134 + 135 + let get_u64_le s off = 136 + let b i = Int64.of_int (Char.code s.[off + i]) in 137 + let ( lor ) = Int64.logor in 138 + let ( lsl ) = Int64.shift_left in 139 + Int64.to_int ( 140 + (b 0) lor ((b 1) lsl 8) lor ((b 2) lsl 16) lor ((b 3) lsl 24) 141 + lor ((b 4) lsl 32) lor ((b 5) lsl 40) lor ((b 6) lsl 48) lor ((b 7) lsl 56)) 142 + 143 + (* Apply the inner codec chain to a raw chunk. 144 + Blosc.decode now handles the full pipeline: decompress + unshuffle. 145 + The "zstd" codec from the registry provides the raw decompressor. *) 146 + let apply_inner_codecs codecs codec_names data = 147 + List.fold_right (fun name acc -> 148 + match name with 149 + | "bytes" -> acc 150 + | "blosc" -> 151 + let decompress = match codecs "zstd" with 152 + | Some f -> Some (fun s _nbytes -> f s) 153 + | None -> None 154 + in 155 + Blosc.decode ?decompress acc 156 + | "crc32c" -> acc 157 + | other -> 158 + match codecs other with 159 + | Some f -> f acc 160 + | None -> failwith (Printf.sprintf "Zarr: unknown codec %s" other) 161 + ) codec_names data 162 + 163 + (* Compute the linear index of an inner chunk within a shard. 164 + inner_idx is the chunk's position within the shard (per dimension). 165 + inner_per_shard is the number of inner chunks per dimension. *) 166 + let linearize_inner_idx inner_idx inner_per_shard ndim = 167 + let idx = ref 0 in 168 + let stride = ref 1 in 169 + for d = ndim - 1 downto 0 do 170 + idx := !idx + inner_idx.(d) * !stride; 171 + stride := !stride * inner_per_shard.(d) 172 + done; 173 + !idx 174 + 175 + (* Decompress an inner chunk and copy overlapping pixels to the output buffer *) 176 + let decode_inner codecs codec_names 177 + data local_off nbytes chunk_pixel_start chunk_pixel_stop 178 + ndim start shape elem_size inner_chunk_shape out_buf = 179 + let compressed = String.sub data local_off nbytes in 180 + let raw = apply_inner_codecs codecs codec_names compressed in 181 + let stop = Array.init ndim (fun d -> start.(d) + shape.(d)) in 182 + let copy_lo = Array.init ndim (fun d -> 183 + max chunk_pixel_start.(d) start.(d)) in 184 + let copy_hi = Array.init ndim (fun d -> 185 + min chunk_pixel_stop.(d) stop.(d)) in 186 + let idx = Array.make ndim 0 in 187 + let rec copy dim = 188 + if dim = ndim then begin 189 + let src_off = ref 0 in 190 + let src_stride = ref elem_size in 191 + for d = ndim - 1 downto 0 do 192 + src_off := !src_off + 193 + (idx.(d) - chunk_pixel_start.(d)) * !src_stride; 194 + src_stride := !src_stride * inner_chunk_shape.(d) 195 + done; 196 + let dst_off = ref 0 in 197 + let dst_stride = ref elem_size in 198 + for d = ndim - 1 downto 0 do 199 + dst_off := !dst_off + 200 + (idx.(d) - start.(d)) * !dst_stride; 201 + dst_stride := !dst_stride * shape.(d) 202 + done; 203 + Bytes.blit_string raw !src_off out_buf !dst_off elem_size 204 + end else begin 205 + for i = copy_lo.(dim) to copy_hi.(dim) - 1 do 206 + idx.(dim) <- i; 207 + copy (dim + 1) 208 + done 209 + end 210 + in 211 + copy 0 212 + 213 + let read ?on_shard arr ~start ~shape = 214 + let open Lwt.Syntax in 215 + let meta = arr.meta in 216 + let ndim = Array.length meta.shape in 217 + let stop = Array.init ndim (fun d -> start.(d) + shape.(d)) in 218 + let elem_size = data_type_size meta.data_type in 219 + let chunk_shape = meta.chunk_shape in 220 + 221 + (* For sharded arrays *) 222 + let inner_chunk_shape = match meta.inner_chunk_shape with 223 + | Some s -> s 224 + | None -> chunk_shape (* non-sharded: treat the chunk as both shard and inner *) 225 + in 226 + 227 + (* Output buffer *) 228 + let out_elems = Array.fold_left ( * ) 1 shape in 229 + let out_buf = Bytes.make (out_elems * elem_size) '\x00' in 230 + 231 + (* Inner chunks per shard, per dimension *) 232 + let inner_per_shard = Array.init ndim (fun d -> 233 + chunk_shape.(d) / inner_chunk_shape.(d)) in 234 + let n_inner_chunks = Array.fold_left ( * ) 1 inner_per_shard in 235 + let index_entry_size = 16 in (* 2 × uint64 *) 236 + let index_size = n_inner_chunks * index_entry_size + 4 (* CRC32C *) in 237 + 238 + (* Inner chunk size in bytes *) 239 + let inner_chunk_elems = Array.fold_left ( * ) 1 inner_chunk_shape in 240 + let _inner_chunk_bytes = inner_chunk_elems * elem_size in 241 + 242 + (* Which shards do we need? *) 243 + let shard_start = Array.init ndim (fun d -> start.(d) / chunk_shape.(d)) in 244 + let shard_stop = Array.init ndim (fun d -> (stop.(d) - 1) / chunk_shape.(d) + 1) in 245 + 246 + (* Count total shards *) 247 + let n_shards = Array.init ndim (fun d -> shard_stop.(d) - shard_start.(d)) 248 + |> Array.fold_left ( * ) 1 in 249 + let shards_done = ref 0 in 250 + 251 + (* Iterate over all needed shards *) 252 + let shard_tasks = ref [] in 253 + 254 + let rec iter_shards shard_idx dim = 255 + if dim = ndim then begin 256 + (* Build shard URL *) 257 + let shard_key = String.concat meta.chunk_separator 258 + ("c" :: Array.to_list (Array.map string_of_int shard_idx)) in 259 + let shard_url = Printf.sprintf "%s/%s/%s" 260 + arr.store.base_url arr.path shard_key in 261 + 262 + let task = 263 + (* Phase 1: Fetch just the shard index via suffix byte-range 264 + request (bytes=-N fetches last N bytes). 265 + For non-sharded arrays, fetch the whole chunk. *) 266 + let* index_data = 267 + if meta.is_sharded then 268 + arr.store.fetch shard_url ~len:index_size () 269 + else 270 + arr.store.fetch shard_url () 271 + in 272 + 273 + (* The server may not support suffix ranges (returns full shard) 274 + or may return slightly more data. Handle gracefully. *) 275 + let shard_data_opt, index_data = 276 + if String.length index_data > index_size then 277 + (* Got the whole shard — use it directly for sub-chunk reads *) 278 + let full = index_data in 279 + let idx_off = String.length full - index_size in 280 + (Some full, String.sub full idx_off index_size) 281 + else 282 + (None, index_data) 283 + in 284 + 285 + incr shards_done; 286 + (match on_shard with 287 + | Some f -> f !shards_done n_shards 288 + | None -> ()); 289 + 290 + (* Phase 2: Collect all overlapping inner chunks, then fetch 291 + the byte range spanning all of them in a single request. *) 292 + let needed_chunks = ref [] in 293 + let rec collect_inner inner_idx dim = 294 + if dim = ndim then begin 295 + let chunk_pixel_start = Array.init ndim (fun d -> 296 + shard_idx.(d) * chunk_shape.(d) + 297 + inner_idx.(d) * inner_chunk_shape.(d)) in 298 + let chunk_pixel_stop = Array.init ndim (fun d -> 299 + min (chunk_pixel_start.(d) + inner_chunk_shape.(d)) 300 + meta.shape.(d)) in 301 + let overlaps = ref true in 302 + for d = 0 to ndim - 1 do 303 + if chunk_pixel_start.(d) >= stop.(d) || 304 + chunk_pixel_stop.(d) <= start.(d) then 305 + overlaps := false 306 + done; 307 + if !overlaps then begin 308 + let lin = linearize_inner_idx inner_idx inner_per_shard ndim in 309 + let offset = get_u64_le index_data (lin * index_entry_size) in 310 + let nbytes = get_u64_le index_data (lin * index_entry_size + 8) in 311 + if offset < max_int && nbytes > 0 then 312 + needed_chunks := (offset, nbytes, 313 + chunk_pixel_start, chunk_pixel_stop) :: !needed_chunks 314 + end 315 + end else 316 + for i = 0 to inner_per_shard.(dim) - 1 do 317 + inner_idx.(dim) <- i; 318 + collect_inner (Array.copy inner_idx) (dim + 1) 319 + done 320 + in 321 + collect_inner (Array.make ndim 0) 0; 322 + 323 + let chunks = !needed_chunks in 324 + if chunks = [] then Lwt.return_unit 325 + else match shard_data_opt with 326 + | Some full -> 327 + (* Already have the full shard — just decompress in place *) 328 + List.iter (fun (offset, nbytes, cps, cpe) -> 329 + decode_inner arr.store.codecs meta.inner_codecs 330 + full offset nbytes cps cpe 331 + ndim start shape elem_size inner_chunk_shape out_buf 332 + ) chunks; 333 + Lwt.return_unit 334 + | None -> 335 + (* Group nearby sub-chunks into merged byte ranges. 336 + Sort by offset, then merge when gap < 64KB. *) 337 + let sorted = List.sort (fun (a,_,_,_) (b,_,_,_) -> compare a b) chunks in 338 + let max_gap = 65536 in 339 + (* Build groups: each group is (range_start, range_end, chunk list) *) 340 + let groups = ref [] in 341 + let cur_start = ref 0 in 342 + let cur_end = ref 0 in 343 + let cur_chunks = ref [] in 344 + List.iter (fun ((off, nb, _, _) as chunk) -> 345 + if !cur_chunks = [] then begin 346 + cur_start := off; 347 + cur_end := off + nb; 348 + cur_chunks := [chunk] 349 + end else if off - !cur_end <= max_gap then begin 350 + cur_end := max !cur_end (off + nb); 351 + cur_chunks := chunk :: !cur_chunks 352 + end else begin 353 + groups := (!cur_start, !cur_end, !cur_chunks) :: !groups; 354 + cur_start := off; 355 + cur_end := off + nb; 356 + cur_chunks := [chunk] 357 + end 358 + ) sorted; 359 + if !cur_chunks <> [] then 360 + groups := (!cur_start, !cur_end, !cur_chunks) :: !groups; 361 + 362 + (* Fetch each group in parallel *) 363 + let group_tasks = List.map (fun (g_start, g_end, g_chunks) -> 364 + let+ data = arr.store.fetch shard_url 365 + ~off:g_start ~len:(g_end - g_start) () in 366 + List.iter (fun (offset, nbytes, cps, cpe) -> 367 + decode_inner arr.store.codecs meta.inner_codecs 368 + data (offset - g_start) nbytes cps cpe 369 + ndim start shape elem_size inner_chunk_shape out_buf 370 + ) g_chunks 371 + ) !groups in 372 + Lwt.join group_tasks 373 + in 374 + shard_tasks := task :: !shard_tasks 375 + end else begin 376 + for i = shard_start.(dim) to shard_stop.(dim) - 1 do 377 + shard_idx.(dim) <- i; 378 + iter_shards (Array.copy shard_idx) (dim + 1) 379 + done 380 + end 381 + in 382 + iter_shards (Array.make ndim 0) 0; 383 + let+ () = Lwt.join !shard_tasks in 384 + Bytes.to_string out_buf
+135
lib/store.mli
··· 1 + (** Pure OCaml Zarr v3 store reader. 2 + 3 + {b Warning:} This library was vibe-coded with AI assistance and has not 4 + been thoroughly reviewed or tested. Use at your own risk and expect 5 + breaking changes. 6 + 7 + Reads sharded Zarr v3 arrays over HTTP with pluggable codecs and 8 + fetch functions. Platform-independent — bring your own HTTP client 9 + and decompressors. 10 + 11 + {2 Example} 12 + 13 + {[ 14 + let store = Zarr_v3.open_store ~fetch ~codecs url in 15 + let arr = Zarr_v3.open_array store "utm31/embeddings" in 16 + let data = Zarr_v3.read arr ~start:[|100; 200; 0|] ~shape:[|4; 4; 128|] in 17 + (* data is a string of raw bytes in C-order *) 18 + ]} 19 + 20 + {2 Pluggable I/O} 21 + 22 + The [fetch] parameter provides HTTP access. The [codecs] parameter 23 + provides decompression. Both are passed in by platform backends 24 + (e.g., zarr-v3-unix for testing, tessera-zarr-jsoo for the browser). *) 25 + 26 + (** {1 Pluggable interfaces} *) 27 + 28 + type fetch = string -> ?off:int -> ?len:int -> unit -> string Lwt.t 29 + (** [fetch url ?off ?len ()] fetches bytes from [url]. 30 + If [off] and [len] are provided, fetches byte range [off..off+len-1]. 31 + If only [len] is provided (no [off]), fetches the last [len] bytes 32 + (suffix range, i.e. HTTP [bytes=-len]). 33 + Returns the response body as a string. *) 34 + 35 + type codec = string -> string 36 + (** A decompression codec. Takes compressed bytes, returns decompressed bytes. *) 37 + 38 + type codec_registry = string -> codec option 39 + (** Maps codec names (e.g., ["zstd"]) to decompression functions. 40 + Return [None] for unknown codecs. The built-in [bytes] and [blosc] 41 + (memcpy mode) codecs are handled internally. *) 42 + 43 + (** {1 Metadata types} *) 44 + 45 + type data_type = 46 + | Int8 47 + | Uint8 48 + | Int32 49 + | Float32 50 + | Float64 51 + (** Supported Zarr data types. *) 52 + 53 + type array_meta = { 54 + shape : int array; 55 + data_type : data_type; 56 + chunk_shape : int array; 57 + chunk_separator : string; 58 + is_sharded : bool; 59 + inner_chunk_shape : int array option; 60 + inner_codecs : string list; 61 + index_location : [ `Start | `End ]; 62 + } 63 + (** Parsed metadata for a Zarr v3 array. *) 64 + 65 + (** {1 Store and array handles} *) 66 + 67 + type store 68 + (** An open Zarr v3 store backed by HTTP. Holds the base URL, 69 + fetch function, codec registry, and consolidated metadata. *) 70 + 71 + type arr 72 + (** An open Zarr v3 array with parsed metadata and shard access methods. *) 73 + 74 + (** {1 Metadata parsing} *) 75 + 76 + val parse_array_meta : string -> array_meta 77 + (** Parse array metadata from a JSON string. 78 + @raise Failure on invalid or unsupported metadata. *) 79 + 80 + val parse_consolidated : string -> (string * Yojson.Safe.t) list 81 + (** Parse consolidated metadata from a root [zarr.json] string. 82 + Returns a list of [(path, metadata_json)] pairs. *) 83 + 84 + (** {1 Opening stores and arrays} *) 85 + 86 + val open_store : fetch:fetch -> codecs:codec_registry -> string -> store Lwt.t 87 + (** [open_store ~fetch ~codecs base_url] opens a Zarr v3 store. 88 + Fetches and parses the root [zarr.json], including any 89 + consolidated metadata. *) 90 + 91 + val open_array : store -> string -> arr Lwt.t 92 + (** [open_array store path] opens an array by path (e.g., ["utm31/scales"]). 93 + Uses consolidated metadata if available. 94 + @raise Failure if the array is not found. *) 95 + 96 + (** {1 Metadata access} *) 97 + 98 + val array_meta : arr -> array_meta 99 + (** Get the parsed metadata for an open array. *) 100 + 101 + val store_meta : store -> (string * Yojson.Safe.t) list 102 + (** Access the consolidated metadata entries. 103 + Returns all [(path, json)] pairs from the root [zarr.json]. *) 104 + 105 + val group_attrs : store -> string -> (string * Yojson.Safe.t) list 106 + (** [group_attrs store path] returns the attributes of a group 107 + (e.g., ["utm31"] for spatial transform and CRS info). 108 + @raise Failure if the group is not found. *) 109 + 110 + (** {1 Reading data} *) 111 + 112 + val read : ?on_shard:(int -> int -> unit) -> 113 + arr -> start:int array -> shape:int array -> string Lwt.t 114 + (** [read ?on_shard arr ~start ~shape] reads a rectangular region of an array. 115 + 116 + [start] is the origin (inclusive) in pixel coordinates. 117 + [shape] is the size of the region in each dimension. 118 + Returns raw bytes in C-order. The caller must interpret the bytes 119 + according to {!array_meta.data_type}. 120 + 121 + [on_shard i n] is called when shard [i] of [n] total has been fetched. 122 + 123 + For sharded arrays, fetches only the shards that overlap the 124 + requested region. Shard fetches run in parallel via [Lwt.join]. 125 + 126 + @raise Failure if the region is out of bounds. *) 127 + 128 + (** {1 Utility} *) 129 + 130 + val data_type_size : data_type -> int 131 + (** Size in bytes of a single element of the given data type. *) 132 + 133 + val data_type_of_string : string -> data_type 134 + (** Parse a Zarr data type string (e.g., ["int8"], ["float32"]). 135 + @raise Failure for unsupported types. *)
+7
test/dune
··· 1 + (executable 2 + (name test_zarr_v3) 3 + (libraries zarr-v3 alcotest)) 4 + 5 + (executable 6 + (name test_live) 7 + (libraries zarr-v3 zarr-v3-unix tessera-zarr tessera-linalg lwt lwt.unix))
+132
test/test_live.ml
··· 1 + (* Integration test against the live GeoTessera Zarr store. 2 + Requires network access and curl on PATH. 3 + 4 + Usage: dune exec zarr-v3/test/test_live.exe *) 5 + 6 + let () = Lwt_main.run begin 7 + let open Lwt.Syntax in 8 + let base = "https://dl2.geotessera.org/zarr/v1/2024.zarr" in 9 + 10 + Printf.printf "=== Opening store ===\n%!"; 11 + let* store = Zarr_v3.Store.open_store ~fetch:Zarr_v3_unix.fetch 12 + ~codecs:Zarr_v3_unix.codecs base in 13 + 14 + (* Check consolidated metadata *) 15 + let entries = Zarr_v3.Store.store_meta store in 16 + Printf.printf "Consolidated metadata: %d entries\n%!" (List.length entries); 17 + List.iter (fun (k, _) -> Printf.printf " %s\n" k) entries; 18 + 19 + (* Check group attributes for utm31 *) 20 + Printf.printf "\n=== UTM31 group attributes ===\n%!"; 21 + let attrs = Zarr_v3.Store.group_attrs store "utm31" in 22 + (match List.assoc_opt "spatial:transform" (List.map (fun (k,v) -> (k, v)) attrs) with 23 + | Some j -> Printf.printf " spatial:transform = %s\n" (Yojson.Safe.to_string j) 24 + | None -> Printf.printf " (no spatial:transform)\n"); 25 + (match List.assoc_opt "proj:code" attrs with 26 + | Some j -> Printf.printf " proj:code = %s\n" (Yojson.Safe.to_string j) 27 + | None -> Printf.printf " (no proj:code)\n"); 28 + 29 + (* Open scales array and check metadata *) 30 + Printf.printf "\n=== Scales array ===\n%!"; 31 + let* scales = Zarr_v3.Store.open_array store "utm31/scales" in 32 + let sm = Zarr_v3.Store.array_meta scales in 33 + Printf.printf " shape = %s\n" 34 + (String.concat " x " (Array.to_list (Array.map string_of_int sm.shape))); 35 + Printf.printf " dtype = %s, chunk_shape = %s\n" 36 + (match sm.data_type with Zarr_v3.Store.Float32 -> "float32" | _ -> "other") 37 + (String.concat " x " (Array.to_list (Array.map string_of_int sm.chunk_shape))); 38 + 39 + (* Read a 4x4 region of scales near Cambridge *) 40 + (* Cambridge UTM31N: easting~305000, northing~5782000 *) 41 + (* pixel_col = (305000 - 167600) / 10 = 13740 *) 42 + (* pixel_row = (6933480 - 5782000) / 10 = 115148 *) 43 + Printf.printf "\n=== Reading 4x4 scales near Cambridge ===\n%!"; 44 + let* data = Zarr_v3.Store.read scales ~start:[|115148; 13740|] ~shape:[|4; 4|] in 45 + Printf.printf " Got %d bytes (expected %d for 4x4 float32)\n%!" 46 + (String.length data) (4 * 4 * 4); 47 + for i = 0 to 15 do 48 + let off = i * 4 in 49 + let bits = Int32.logor (Int32.of_int (Char.code data.[off])) 50 + (Int32.logor (Int32.shift_left (Int32.of_int (Char.code data.[off+1])) 8) 51 + (Int32.logor (Int32.shift_left (Int32.of_int (Char.code data.[off+2])) 16) 52 + (Int32.shift_left (Int32.of_int (Char.code data.[off+3])) 24))) in 53 + let v = Int32.float_of_bits bits in 54 + Printf.printf " scale[%d,%d] = %.6f\n" (i / 4) (i mod 4) v 55 + done; 56 + 57 + (* Also test tessera-zarr layer *) 58 + Printf.printf "\n=== Testing tessera-zarr zone_info ===\n%!"; 59 + let zi = Tessera_zarr.zone_info store "utm31" in 60 + Printf.printf " zone=%d, origin_e=%.1f, origin_n=%.1f, pixel_size=%.1f\n" 61 + zi.zone zi.origin_easting zi.origin_northing zi.pixel_size; 62 + 63 + (* Test full fetch_region pipeline *) 64 + Printf.printf "\n=== Testing v1 fetch_region (small bbox near Cambridge) ===\n%!"; 65 + let bbox = Geotessera.{ min_lon = 0.11; min_lat = 52.19; 66 + max_lon = 0.13; max_lat = 52.21 } in 67 + let progress msg = Printf.printf " [v1 progress] %s\n%!" msg in 68 + let* (mat, h, w, bounds) = Tessera_zarr.fetch_region ~progress ~store bbox in 69 + Printf.printf " mosaic: %d x %d (%d rows, %d cols in mat)\n%!" h w mat.Linalg.rows mat.Linalg.cols; 70 + Printf.printf " bounds: S=%.4f N=%.4f W=%.4f E=%.4f\n%!" 71 + bounds.min_lat bounds.max_lat bounds.min_lon bounds.max_lon; 72 + (* Spot-check: first pixel should have non-zero embedding values *) 73 + let v0 = Linalg.mat_get mat 0 0 in 74 + let v1 = Linalg.mat_get mat 0 1 in 75 + Printf.printf " first pixel: feat[0]=%.4f, feat[1]=%.4f\n%!" v0 v1; 76 + let mid = (h / 2) * w + (w / 2) in 77 + let vm0 = Linalg.mat_get mat mid 0 in 78 + let vm1 = Linalg.mat_get mat mid 1 in 79 + Printf.printf " center pixel: feat[0]=%.4f, feat[1]=%.4f\n%!" vm0 vm1; 80 + (* Count non-zero pixels *) 81 + let nz = ref 0 in 82 + for i = 0 to h * w - 1 do 83 + if Float.abs (Linalg.mat_get mat i 0) > 0.001 then incr nz 84 + done; 85 + Printf.printf " non-zero pixels: %d / %d\n%!" !nz (h * w); 86 + 87 + (* === V2 store test === *) 88 + Printf.printf "\n=== Opening v2 store ===\n%!"; 89 + let v2_base = "https://dl2.geotessera.org/zarr/v2/store.zarr" in 90 + let* store2 = Zarr_v3.Store.open_store ~fetch:Zarr_v3_unix.fetch 91 + ~codecs:Zarr_v3_unix.codecs v2_base in 92 + 93 + let entries2 = Zarr_v3.Store.store_meta store2 in 94 + Printf.printf "V2 consolidated metadata: %d entries\n%!" (List.length entries2); 95 + 96 + (* Open v2 embeddings to check shape *) 97 + Printf.printf "\n=== V2 utm31/embeddings ===\n%!"; 98 + let* emb2 = Zarr_v3.Store.open_array store2 "utm31/embeddings" in 99 + let em2 = Zarr_v3.Store.array_meta emb2 in 100 + Printf.printf " shape = %s\n" 101 + (String.concat " x " (Array.to_list (Array.map string_of_int em2.shape))); 102 + Printf.printf " sharded = %b, chunk_shape = %s\n" em2.is_sharded 103 + (String.concat " x " (Array.to_list (Array.map string_of_int em2.chunk_shape))); 104 + (match em2.inner_chunk_shape with 105 + | Some s -> Printf.printf " inner_chunk_shape = %s\n" 106 + (String.concat " x " (Array.to_list (Array.map string_of_int s))) 107 + | None -> Printf.printf " (no inner chunks)\n"); 108 + 109 + (* Test fetch_region with v2 store *) 110 + Printf.printf "\n=== V2 fetch_region (small bbox near Cambridge, year=2024) ===\n%!"; 111 + let progress msg = Printf.printf " [progress] %s\n%!" msg in 112 + let* (mat2, h2, w2, bounds2) = 113 + Tessera_zarr.fetch_region ~progress ~year:2024 ~store:store2 bbox in 114 + Printf.printf " mosaic: %d x %d (%d rows, %d cols in mat)\n%!" h2 w2 mat2.Linalg.rows mat2.Linalg.cols; 115 + Printf.printf " bounds: S=%.4f N=%.4f W=%.4f E=%.4f\n%!" 116 + bounds2.min_lat bounds2.max_lat bounds2.min_lon bounds2.max_lon; 117 + let v0_2 = Linalg.mat_get mat2 0 0 in 118 + let v1_2 = Linalg.mat_get mat2 0 1 in 119 + Printf.printf " first pixel: feat[0]=%.4f, feat[1]=%.4f\n%!" v0_2 v1_2; 120 + let mid2 = (h2 / 2) * w2 + (w2 / 2) in 121 + let vm0_2 = Linalg.mat_get mat2 mid2 0 in 122 + let vm1_2 = Linalg.mat_get mat2 mid2 1 in 123 + Printf.printf " center pixel: feat[0]=%.4f, feat[1]=%.4f\n%!" vm0_2 vm1_2; 124 + let nz2 = ref 0 in 125 + for i = 0 to h2 * w2 - 1 do 126 + if Float.abs (Linalg.mat_get mat2 i 0) > 0.001 then incr nz2 127 + done; 128 + Printf.printf " non-zero pixels: %d / %d\n%!" !nz2 (h2 * w2); 129 + 130 + Printf.printf "\n=== Done ===\n%!"; 131 + Lwt.return_unit 132 + end
+240
test/test_zarr_v3.ml
··· 1 + (* Unit tests for zarr-v3: Blosc header parsing and metadata parsing *) 2 + 3 + (* ---- Blosc tests ---- *) 4 + 5 + let make_blosc_frame ~nbytes ~data = 6 + (* Build a minimal Blosc frame: 16-byte header + payload *) 7 + let cbytes = 16 + String.length data in 8 + let buf = Bytes.create cbytes in 9 + Bytes.set buf 0 '\x02'; (* version *) 10 + Bytes.set buf 1 '\x01'; (* versionlz *) 11 + Bytes.set buf 2 '\x93'; (* flags *) 12 + Bytes.set buf 3 '\x04'; (* typesize *) 13 + (* nbytes LE uint32 *) 14 + Bytes.set buf 4 (Char.chr (nbytes land 0xFF)); 15 + Bytes.set buf 5 (Char.chr ((nbytes lsr 8) land 0xFF)); 16 + Bytes.set buf 6 (Char.chr ((nbytes lsr 16) land 0xFF)); 17 + Bytes.set buf 7 (Char.chr ((nbytes lsr 24) land 0xFF)); 18 + (* blocksize = nbytes *) 19 + Bytes.set buf 8 (Char.chr (nbytes land 0xFF)); 20 + Bytes.set buf 9 (Char.chr ((nbytes lsr 8) land 0xFF)); 21 + Bytes.set buf 10 (Char.chr ((nbytes lsr 16) land 0xFF)); 22 + Bytes.set buf 11 (Char.chr ((nbytes lsr 24) land 0xFF)); 23 + (* cbytes LE uint32 *) 24 + Bytes.set buf 12 (Char.chr (cbytes land 0xFF)); 25 + Bytes.set buf 13 (Char.chr ((cbytes lsr 8) land 0xFF)); 26 + Bytes.set buf 14 (Char.chr ((cbytes lsr 16) land 0xFF)); 27 + Bytes.set buf 15 (Char.chr ((cbytes lsr 24) land 0xFF)); 28 + Bytes.blit_string data 0 buf 16 (String.length data); 29 + Bytes.to_string buf 30 + 31 + let test_blosc_parse_header () = 32 + let frame = make_blosc_frame ~nbytes:64 ~data:(String.make 64 '\x42') in 33 + let h = Zarr_v3.Blosc.parse_header frame in 34 + Alcotest.(check int) "nbytes" 64 h.nbytes; 35 + Alcotest.(check int) "cbytes" 80 h.cbytes; 36 + Alcotest.(check int) "typesize" 4 h.typesize; 37 + Alcotest.(check int) "blocksize" 64 h.blocksize 38 + 39 + let test_blosc_decode_memcpy () = 40 + let payload = String.init 16 (fun i -> Char.chr (i + 1)) in 41 + let frame = make_blosc_frame ~nbytes:16 ~data:payload in 42 + let result = Zarr_v3.Blosc.decode frame in 43 + Alcotest.(check int) "length" 16 (String.length result); 44 + Alcotest.(check char) "first" '\x01' result.[0]; 45 + Alcotest.(check char) "last" '\x10' result.[15] 46 + 47 + let test_blosc_decode_too_short () = 48 + Alcotest.check_raises "too short" (Failure "Blosc: frame too short") 49 + (fun () -> ignore (Zarr_v3.Blosc.decode "short")) 50 + 51 + let test_blosc_decode_compressed_no_decompressor () = 52 + (* Compressed Blosc frame with block offset table: 53 + 16 bytes header + 4 bytes block offset + 4 bytes block csize + compressed data *) 54 + let compressed_data = "\xDE\xAD\xBE\xEF" in (* 4 bytes fake compressed *) 55 + let cbytes = 16 + 4 + 4 + 4 in (* = 28 *) 56 + let buf = Bytes.create cbytes in 57 + Bytes.set buf 0 '\x02'; (* version *) 58 + Bytes.set buf 3 '\x01'; (* typesize *) 59 + (* nbytes = 64 *) 60 + Bytes.set buf 4 '\x40'; 61 + (* blocksize = 64 *) 62 + Bytes.set buf 8 '\x40'; 63 + (* cbytes = 28 *) 64 + Bytes.set buf 12 (Char.chr cbytes); 65 + (* block offset: 20 (= 16 header + 4 offset table) *) 66 + Bytes.set buf 16 '\x14'; 67 + (* block csize: 4 *) 68 + Bytes.set buf 20 '\x04'; 69 + (* compressed payload *) 70 + Bytes.blit_string compressed_data 0 buf 24 4; 71 + let frame = Bytes.to_string buf in 72 + Alcotest.check_raises "no decompressor" 73 + (Failure "Blosc: compressed frame (cbytes=28, nbytes=64) but no decompressor provided") 74 + (fun () -> ignore (Zarr_v3.Blosc.decode frame)) 75 + 76 + let test_blosc_decode_with_decompressor () = 77 + (* Same structure as above but with a decompressor *) 78 + let compressed_data = "\xDE\xAD\xBE\xEF" in 79 + let cbytes = 16 + 4 + 4 + 4 in 80 + let buf = Bytes.create cbytes in 81 + Bytes.set buf 0 '\x02'; 82 + Bytes.set buf 3 '\x01'; (* typesize=1 *) 83 + Bytes.set buf 4 '\x40'; (* nbytes=64 *) 84 + Bytes.set buf 8 '\x40'; (* blocksize=64 *) 85 + Bytes.set buf 12 (Char.chr cbytes); 86 + Bytes.set buf 16 '\x14'; (* block offset=20 *) 87 + Bytes.set buf 20 '\x04'; (* block csize=4 *) 88 + Bytes.blit_string compressed_data 0 buf 24 4; 89 + let frame = Bytes.to_string buf in 90 + let decompress _data nbytes = String.make nbytes '\x42' in 91 + let result = Zarr_v3.Blosc.decode ~decompress frame in 92 + Alcotest.(check int) "decompressed length" 64 (String.length result); 93 + Alcotest.(check char) "decompressed content" '\x42' result.[0] 94 + 95 + (* ---- Metadata parsing tests ---- *) 96 + 97 + let embeddings_meta_json = {|{ 98 + "shape": [1297408, 66560, 128], 99 + "data_type": "int8", 100 + "chunk_grid": { 101 + "name": "regular", 102 + "configuration": {"chunk_shape": [256, 256, 128]} 103 + }, 104 + "chunk_key_encoding": { 105 + "name": "default", 106 + "configuration": {"separator": "/"} 107 + }, 108 + "fill_value": 0, 109 + "codecs": [{ 110 + "name": "sharding_indexed", 111 + "configuration": { 112 + "chunk_shape": [4, 4, 128], 113 + "codecs": [ 114 + {"name": "bytes"}, 115 + {"name": "blosc", "configuration": {"cname": "zstd"}} 116 + ], 117 + "index_codecs": [ 118 + {"name": "bytes", "configuration": {"endian": "little"}}, 119 + {"name": "crc32c"} 120 + ], 121 + "index_location": "end" 122 + } 123 + }], 124 + "zarr_format": 3, 125 + "node_type": "array" 126 + }|} 127 + 128 + let scales_meta_json = {|{ 129 + "shape": [1297408, 66560], 130 + "data_type": "float32", 131 + "chunk_grid": { 132 + "name": "regular", 133 + "configuration": {"chunk_shape": [256, 256]} 134 + }, 135 + "chunk_key_encoding": { 136 + "name": "default", 137 + "configuration": {"separator": "/"} 138 + }, 139 + "fill_value": "NaN", 140 + "codecs": [{ 141 + "name": "sharding_indexed", 142 + "configuration": { 143 + "chunk_shape": [4, 4], 144 + "codecs": [ 145 + {"name": "bytes", "configuration": {"endian": "little"}}, 146 + {"name": "blosc", "configuration": {"cname": "zstd"}} 147 + ], 148 + "index_codecs": [ 149 + {"name": "bytes", "configuration": {"endian": "little"}}, 150 + {"name": "crc32c"} 151 + ], 152 + "index_location": "end" 153 + } 154 + }], 155 + "zarr_format": 3, 156 + "node_type": "array" 157 + }|} 158 + 159 + let simple_meta_json = {|{ 160 + "shape": [128], 161 + "data_type": "int32", 162 + "chunk_grid": { 163 + "name": "regular", 164 + "configuration": {"chunk_shape": [128]} 165 + }, 166 + "chunk_key_encoding": { 167 + "name": "default", 168 + "configuration": {"separator": "/"} 169 + }, 170 + "fill_value": 0, 171 + "codecs": [{"name": "bytes", "configuration": {"endian": "little"}}], 172 + "zarr_format": 3, 173 + "node_type": "array" 174 + }|} 175 + 176 + let int_array = Alcotest.testable 177 + (fun fmt a -> Format.fprintf fmt "[%s]" 178 + (String.concat "; " (Array.to_list (Array.map string_of_int a)))) 179 + ( = ) 180 + 181 + let test_parse_embeddings_meta () = 182 + let open Zarr_v3.Store in 183 + let m = Zarr_v3.Store.parse_array_meta embeddings_meta_json in 184 + Alcotest.(check int_array) "shape" [|1297408; 66560; 128|] m.shape; 185 + Alcotest.(check bool) "dtype is Int8" true (m.data_type = Int8); 186 + Alcotest.(check int_array) "chunk_shape" [|256; 256; 128|] m.chunk_shape; 187 + Alcotest.(check bool) "is sharded" true m.is_sharded; 188 + Alcotest.(check (option int_array)) "inner_chunk_shape" 189 + (Some [|4; 4; 128|]) m.inner_chunk_shape; 190 + Alcotest.(check (list string)) "inner codecs" ["bytes"; "blosc"] m.inner_codecs; 191 + Alcotest.(check bool) "index at end" true (m.index_location = `End) 192 + 193 + let test_parse_scales_meta () = 194 + let open Zarr_v3.Store in 195 + let m = Zarr_v3.Store.parse_array_meta scales_meta_json in 196 + Alcotest.(check int_array) "shape" [|1297408; 66560|] m.shape; 197 + Alcotest.(check bool) "dtype is Float32" true (m.data_type = Float32); 198 + Alcotest.(check int_array) "chunk_shape" [|256; 256|] m.chunk_shape; 199 + Alcotest.(check bool) "is sharded" true m.is_sharded; 200 + Alcotest.(check (option int_array)) "inner_chunk_shape" 201 + (Some [|4; 4|]) m.inner_chunk_shape 202 + 203 + let test_parse_simple_meta () = 204 + let open Zarr_v3.Store in 205 + let m = Zarr_v3.Store.parse_array_meta simple_meta_json in 206 + Alcotest.(check int_array) "shape" [|128|] m.shape; 207 + Alcotest.(check bool) "dtype is Int32" true (m.data_type = Int32); 208 + Alcotest.(check bool) "not sharded" false m.is_sharded; 209 + Alcotest.(check (option int_array)) "no inner chunks" None m.inner_chunk_shape 210 + 211 + let test_data_type_size () = 212 + Alcotest.(check int) "int8" 1 (Zarr_v3.Store.data_type_size Zarr_v3.Store.Int8); 213 + Alcotest.(check int) "float32" 4 (Zarr_v3.Store.data_type_size Zarr_v3.Store.Float32); 214 + Alcotest.(check int) "float64" 8 (Zarr_v3.Store.data_type_size Zarr_v3.Store.Float64) 215 + 216 + let test_data_type_of_string () = 217 + Alcotest.(check bool) "int8" true (Zarr_v3.Store.data_type_of_string "int8" = Zarr_v3.Store.Int8); 218 + Alcotest.(check bool) "float32" true (Zarr_v3.Store.data_type_of_string "float32" = Zarr_v3.Store.Float32); 219 + Alcotest.check_raises "bad type" (Failure "Unsupported data type: complex128") 220 + (fun () -> ignore (Zarr_v3.Store.data_type_of_string "complex128")) 221 + 222 + (* ---- Run ---- *) 223 + 224 + let () = 225 + Alcotest.run "zarr-v3" [ 226 + ("blosc", [ 227 + Alcotest.test_case "parse header" `Quick test_blosc_parse_header; 228 + Alcotest.test_case "decode memcpy" `Quick test_blosc_decode_memcpy; 229 + Alcotest.test_case "too short" `Quick test_blosc_decode_too_short; 230 + Alcotest.test_case "compressed no decompressor" `Quick test_blosc_decode_compressed_no_decompressor; 231 + Alcotest.test_case "compressed with decompressor" `Quick test_blosc_decode_with_decompressor; 232 + ]); 233 + ("metadata", [ 234 + Alcotest.test_case "embeddings" `Quick test_parse_embeddings_meta; 235 + Alcotest.test_case "scales" `Quick test_parse_scales_meta; 236 + Alcotest.test_case "simple (non-sharded)" `Quick test_parse_simple_meta; 237 + Alcotest.test_case "data_type_size" `Quick test_data_type_size; 238 + Alcotest.test_case "data_type_of_string" `Quick test_data_type_of_string; 239 + ]); 240 + ]
+28
zarr-v3.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "Pure OCaml Zarr v3 reader with pluggable codecs" 4 + description: 5 + "Async Zarr v3 store reader supporting sharding, pluggable compression codecs, and HTTP range requests. Platform-independent." 6 + license: "ISC" 7 + depends: [ 8 + "dune" {>= "3.17"} 9 + "ocaml" {>= "5.2"} 10 + "lwt" {>= "5.0"} 11 + "yojson" {>= "2.0"} 12 + "alcotest" {with-test & >= "0.8"} 13 + "odoc" {with-doc} 14 + ] 15 + build: [ 16 + ["dune" "subst"] {dev} 17 + [ 18 + "dune" 19 + "build" 20 + "-p" 21 + name 22 + "-j" 23 + jobs 24 + "@install" 25 + "@runtest" {with-test} 26 + "@doc" {with-doc} 27 + ] 28 + ]