this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Initial import from monorepo

+321
+14
dune-project
··· 1 + (lang dune 3.17) 2 + (name tessera-zarr) 3 + (generate_opam_files true) 4 + (license ISC) 5 + (package 6 + (name tessera-zarr) 7 + (synopsis "GeoTessera Zarr v3 client") 8 + (description "Fetches GeoTessera embeddings from sharded Zarr v3 stores. Maps WGS84 bounding boxes to UTM pixel ranges, dequantizes, and reprojects.") 9 + (depends 10 + (ocaml (>= 5.2)) 11 + (zarr-v3 (>= 0.1)) 12 + (tessera-geotessera (>= 0.1)) 13 + (tessera-linalg (>= 0.1)) 14 + (lwt (>= 5.0))))
+4
lib/dune
··· 1 + (library 2 + (name tessera_zarr) 3 + (public_name tessera-zarr) 4 + (libraries zarr-v3 tessera-geotessera tessera-linalg lwt))
+189
lib/tessera_zarr.ml
··· 1 + type zone_info = { 2 + zone : int; 3 + origin_easting : float; 4 + origin_northing : float; 5 + pixel_size : float; 6 + } 7 + 8 + let zone_info store zone_name = 9 + let attrs = Zarr_v3.Store.group_attrs store zone_name in 10 + let transform = match List.assoc_opt "spatial:transform" attrs with 11 + | Some (`List l) -> Array.of_list (List.map (function 12 + | `Float f -> f | `Int i -> Float.of_int i | _ -> 0.0) l) 13 + | _ -> failwith (Printf.sprintf "Missing spatial:transform in %s" zone_name) 14 + in 15 + let zone = match List.assoc_opt "tessera:utm_zone" attrs with 16 + | Some (`Int z) -> z 17 + | _ -> 18 + int_of_string (String.sub zone_name 3 (String.length zone_name - 3)) 19 + in 20 + { 21 + zone; 22 + origin_easting = transform.(2); 23 + origin_northing = transform.(5); 24 + pixel_size = Float.abs transform.(0); 25 + } 26 + 27 + let read_f32_le s off = 28 + let bits = Int32.logor (Int32.of_int (Char.code s.[off])) 29 + (Int32.logor (Int32.shift_left (Int32.of_int (Char.code s.[off+1])) 8) 30 + (Int32.logor (Int32.shift_left (Int32.of_int (Char.code s.[off+2])) 16) 31 + (Int32.shift_left (Int32.of_int (Char.code s.[off+3])) 24))) in 32 + Int32.float_of_bits bits 33 + 34 + let fetch_region ?(progress = fun (_:string) -> ()) ?(year = 2024) ~store bbox = 35 + let open Lwt.Syntax in 36 + let open Geotessera in 37 + (* 1. Determine UTM zone from bbox centre *) 38 + let center_lon = (bbox.min_lon +. bbox.max_lon) /. 2.0 in 39 + let zone = Geotessera.Utm.zone_of_lon center_lon in 40 + let zone_name = Printf.sprintf "utm%d" zone in 41 + 42 + (* 2. Get spatial metadata *) 43 + let zi = zone_info store zone_name in 44 + 45 + (* 3. Convert WGS84 bbox corners to UTM to find pixel range *) 46 + let corners = [ 47 + (bbox.min_lon, bbox.min_lat); (bbox.max_lon, bbox.min_lat); 48 + (bbox.min_lon, bbox.max_lat); (bbox.max_lon, bbox.max_lat); 49 + ] in 50 + let utm_corners = List.map (fun (lon, lat) -> 51 + Geotessera.Utm.wgs84_to_utm ~zone lon lat) corners in 52 + let min_e = List.fold_left (fun acc (e, _) -> Float.min acc e) 53 + Float.max_float utm_corners in 54 + let max_e = List.fold_left (fun acc (e, _) -> Float.max acc e) 55 + Float.neg_infinity utm_corners in 56 + let min_n = List.fold_left (fun acc (_, n) -> Float.min acc n) 57 + Float.max_float utm_corners in 58 + let max_n = List.fold_left (fun acc (_, n) -> Float.max acc n) 59 + Float.neg_infinity utm_corners in 60 + 61 + (* Pixel coordinates (row 0 = max northing, row increases southward) *) 62 + let pixel_size = zi.pixel_size in 63 + let col_start = max 0 64 + (Float.to_int (Float.floor ((min_e -. zi.origin_easting) /. pixel_size))) in 65 + let col_stop = 66 + Float.to_int (Float.ceil ((max_e -. zi.origin_easting) /. pixel_size)) in 67 + let row_start = max 0 68 + (Float.to_int (Float.floor ((zi.origin_northing -. max_n) /. pixel_size))) in 69 + let row_stop = 70 + Float.to_int (Float.ceil ((zi.origin_northing -. min_n) /. pixel_size)) in 71 + 72 + let tile_h = row_stop - row_start in 73 + let tile_w = col_stop - col_start in 74 + let n_features = 128 in 75 + 76 + (* 4. Fetch embeddings and scales in parallel. 77 + Detect layout by the number of dimensions in the embeddings array: 78 + MegaZarr: [time, band, y, x] — 4D, single store with year as dimension 79 + Legacy: [y, x, band] — 3D, one store per year *) 80 + let* emb_arr = Zarr_v3.Store.open_array store (zone_name ^ "/embeddings") in 81 + let* scales_arr = Zarr_v3.Store.open_array store (zone_name ^ "/scales") in 82 + 83 + let emb_meta = Zarr_v3.Store.array_meta emb_arr in 84 + let is_megazarr = Array.length emb_meta.shape = 4 in 85 + 86 + let on_emb_shard i n = 87 + progress (Printf.sprintf "Fetching embeddings: shard %d/%d" i n) in 88 + let on_scales_shard i n = 89 + progress (Printf.sprintf "Fetching scales: shard %d/%d" i n) in 90 + 91 + let emb_fetch, scales_fetch = 92 + if is_megazarr then begin 93 + (* MegaZarr: embeddings[time, band, y, x], scales[time, y, x] *) 94 + let time_idx = year - 2017 in 95 + let n_times = emb_meta.shape.(0) in 96 + if time_idx < 0 || time_idx >= n_times then 97 + failwith (Printf.sprintf "Year %d out of range (2017-%d)" year 98 + (2016 + n_times)); 99 + (* MegaZarr layout *) 100 + ( Zarr_v3.Store.read ~on_shard:on_emb_shard emb_arr 101 + ~start:[| time_idx; 0; row_start; col_start |] 102 + ~shape:[| 1; n_features; tile_h; tile_w |], 103 + Zarr_v3.Store.read ~on_shard:on_scales_shard scales_arr 104 + ~start:[| time_idx; row_start; col_start |] 105 + ~shape:[| 1; tile_h; tile_w |] ) 106 + end else 107 + (* Legacy per-year layout: embeddings[y, x, band], scales[y, x] *) 108 + ( Zarr_v3.Store.read ~on_shard:on_emb_shard emb_arr 109 + ~start:[| row_start; col_start; 0 |] 110 + ~shape:[| tile_h; tile_w; n_features |], 111 + Zarr_v3.Store.read ~on_shard:on_scales_shard scales_arr 112 + ~start:[| row_start; col_start |] 113 + ~shape:[| tile_h; tile_w |] ) 114 + in 115 + 116 + let* emb_data = emb_fetch 117 + and* scales_data = scales_fetch in 118 + 119 + progress "Dequantizing..."; 120 + (* 5. Dequantize: float32 = int8 × scale. 121 + Legacy data is C-order [y, x, band] → offset = (row*W + col)*128 + f 122 + MegaZarr data is C-order [1, band, y, x] → offset = (f*H*W + row*W + col) *) 123 + let mat = Linalg.create_mat ~rows:(tile_h * tile_w) ~cols:n_features in 124 + for i = 0 to tile_h - 1 do 125 + for j = 0 to tile_w - 1 do 126 + let pixel = i * tile_w + j in 127 + (* Scales offset is pixel*4 for both layouts *) 128 + let scale = read_f32_le scales_data (pixel * 4) in 129 + for f = 0 to n_features - 1 do 130 + let e_off = 131 + if is_megazarr then 132 + (* MegaZarr: C-order [1, 128, H, W] → offset = f*H*W + pixel *) 133 + f * tile_h * tile_w + pixel 134 + else 135 + (* Legacy: C-order [H, W, 128] → offset = pixel*128 + f *) 136 + pixel * n_features + f 137 + in 138 + let emb_val = Char.code emb_data.[e_off] in 139 + let emb_signed = if emb_val >= 128 then emb_val - 256 else emb_val in 140 + Linalg.mat_set mat pixel f (Float.of_int emb_signed *. scale) 141 + done 142 + done 143 + done; 144 + 145 + (* 6. Compute actual UTM bounds of fetched pixels *) 146 + let utm_w = zi.origin_easting +. Float.of_int col_start *. pixel_size in 147 + let utm_e = zi.origin_easting +. Float.of_int col_stop *. pixel_size in 148 + let utm_n = zi.origin_northing -. Float.of_int row_start *. pixel_size in 149 + let utm_s = zi.origin_northing -. Float.of_int row_stop *. pixel_size in 150 + 151 + (* 7. Compute WGS84 bounds from the UTM corners *) 152 + let (w_lon, s_lat) = Geotessera.Utm.utm_to_wgs84 ~zone utm_w utm_s in 153 + let (e_lon, n_lat) = Geotessera.Utm.utm_to_wgs84 ~zone utm_e utm_n in 154 + let wgs_bbox = { min_lon = w_lon; min_lat = s_lat; 155 + max_lon = e_lon; max_lat = n_lat } in 156 + 157 + (* 8. Reproject from UTM to WGS84 using the actual bounds. 158 + For each pixel in the output WGS84 grid, convert to UTM and 159 + sample the nearest input pixel. *) 160 + progress "Reprojecting..."; 161 + let out_h = tile_h in 162 + let out_w = tile_w in 163 + let reprojected = Linalg.create_mat ~rows:(out_h * out_w) ~cols:n_features in 164 + for oi = 0 to out_h - 1 do 165 + let lat = wgs_bbox.max_lat -. 166 + (Float.of_int oi +. 0.5) *. (wgs_bbox.max_lat -. wgs_bbox.min_lat) 167 + /. Float.of_int out_h in 168 + for oj = 0 to out_w - 1 do 169 + let lon = wgs_bbox.min_lon +. 170 + (Float.of_int oj +. 0.5) *. (wgs_bbox.max_lon -. wgs_bbox.min_lon) 171 + /. Float.of_int out_w in 172 + let (e, n) = Geotessera.Utm.wgs84_to_utm ~zone lon lat in 173 + (* Map UTM coord to local pixel in mat. 174 + Row 0 = utm_n (north edge), row increases southward. 175 + Col 0 = utm_w (west edge), col increases eastward. *) 176 + let pi = Float.to_int (Float.floor ((utm_n -. n) /. pixel_size)) in 177 + let pj = Float.to_int (Float.floor ((e -. utm_w) /. pixel_size)) in 178 + if pi >= 0 && pi < tile_h && pj >= 0 && pj < tile_w then begin 179 + let in_idx = pi * tile_w + pj in 180 + let out_idx = oi * out_w + oj in 181 + for f = 0 to n_features - 1 do 182 + Linalg.mat_set reprojected out_idx f 183 + (Linalg.mat_get mat in_idx f) 184 + done 185 + end 186 + done 187 + done; 188 + 189 + Lwt.return (reprojected, out_h, out_w, wgs_bbox)
+85
lib/tessera_zarr.mli
··· 1 + (** GeoTessera Zarr v3 client. 2 + 3 + {b Warning:} This library was vibe-coded with AI assistance and has not 4 + been thoroughly reviewed or tested. Use at your own risk and expect 5 + breaking changes. 6 + 7 + Fetches GeoTessera embeddings from sharded Zarr v3 stores, 8 + mapping WGS84 bounding boxes to UTM pixel ranges. Dequantizes 9 + int8 embeddings using float32 scales and reprojects from the 10 + native UTM grid to a regular WGS84 grid. 11 + 12 + Supports the MegaZarr store layout (single store with year as 13 + dimension) as well as legacy per-year stores. The layout is 14 + detected automatically from the array dimensionality. 15 + 16 + {2 Example} 17 + 18 + {[ 19 + let store = Zarr_v3.Store.open_store ~fetch ~codecs 20 + "https://dl2.geotessera.org/zarr/v2/store.zarr" in 21 + let (mat, h, w, bounds) = Tessera_zarr.fetch_region ~year:2024 ~store bbox in 22 + ]} 23 + 24 + {2 MegaZarr store layout} 25 + 26 + {[ 27 + store.zarr/ 28 + ├── zarr.json (consolidated metadata for all zones) 29 + ├── utm30/ 30 + │ ├── embeddings (int8, T×128×H×W, sharded 1×128×4096×4096) 31 + │ ├── scales (float32, T×H×W, sharded 1×4096×4096) 32 + │ └── ... 33 + └── utm31/ 34 + └── ... 35 + ]} 36 + 37 + Each zone group carries [spatial:transform] (6-element affine) 38 + and [proj:code] (e.g., ["EPSG:32631"]) attributes. *) 39 + 40 + (** {1 Spatial metadata} *) 41 + 42 + type zone_info = { 43 + zone : int; (** UTM zone number *) 44 + origin_easting : float; (** Easting of pixel (0, 0) *) 45 + origin_northing : float; (** Northing of pixel (0, 0) *) 46 + pixel_size : float; (** Pixel size in metres (typically 10.0) *) 47 + } 48 + (** Spatial metadata extracted from a zone group's attributes. *) 49 + 50 + val zone_info : Zarr_v3.Store.store -> string -> zone_info 51 + (** [zone_info store zone_name] extracts spatial metadata from a zone 52 + group (e.g., ["utm31"]). 53 + @raise Failure if the group or required attributes are missing. *) 54 + 55 + (** {1 Fetching embeddings} *) 56 + 57 + val fetch_region : 58 + ?progress:(string -> unit) -> 59 + ?year:int -> 60 + store:Zarr_v3.Store.store -> 61 + Geotessera.bbox -> 62 + (Linalg.mat * int * int * Geotessera.bbox) Lwt.t 63 + (** [fetch_region ?year ~store bbox] fetches dequantized embeddings for 64 + a WGS84 bounding box. 65 + 66 + Automatically detects the store layout from array dimensionality: 67 + - {b MegaZarr} stores have 4D embeddings [[time, band, y, x]] — single 68 + store with year as dimension. [year] selects the time slice 69 + (default 2024, range 2017–2025). 70 + - Legacy per-year stores have 3D embeddings [[y, x, band]] — 71 + [year] is ignored. 72 + 73 + Steps: 74 + + Determines the UTM zone from the bbox centre 75 + + Converts bbox corners to UTM pixel coordinates 76 + + Fetches [embeddings] and [scales] shards (in parallel) 77 + + Dequantizes: [float32 = int8 × scale] 78 + + Reprojects from the UTM pixel grid to a regular WGS84 grid 79 + 80 + Returns [(mosaic_mat, height, width, wgs84_bounds)] — the same 81 + tuple shape as {!Geotessera.fetch_mosaic_sync} for compatibility 82 + with existing notebook code. 83 + 84 + @raise Failure if the UTM zone is not in the store or [year] is 85 + out of range for MegaZarr stores. *)
+29
tessera-zarr.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "GeoTessera Zarr v3 client" 4 + description: 5 + "Fetches GeoTessera embeddings from sharded Zarr v3 stores. Maps WGS84 bounding boxes to UTM pixel ranges, dequantizes, and reprojects." 6 + license: "ISC" 7 + depends: [ 8 + "dune" {>= "3.17"} 9 + "ocaml" {>= "5.2"} 10 + "zarr-v3" {>= "0.1"} 11 + "tessera-geotessera" {>= "0.1"} 12 + "tessera-linalg" {>= "0.1"} 13 + "lwt" {>= "5.0"} 14 + "odoc" {with-doc} 15 + ] 16 + build: [ 17 + ["dune" "subst"] {dev} 18 + [ 19 + "dune" 20 + "build" 21 + "-p" 22 + name 23 + "-j" 24 + jobs 25 + "@install" 26 + "@runtest" {with-test} 27 + "@doc" {with-doc} 28 + ] 29 + ]