My aggregated monorepo of OCaml code, automaintained
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Add Zarr-based notebook with browser backend

- tessera-zarr-jsoo: synchronous XHR fetch with range headers,
fetch_region_sync for notebook cells, Lwt.poll for sync execution
- New notebook: interactive_map_zarr.mld — same workflow as the
original but fetches from the Zarr v3 store instead of .npy tiles
- Add tessera-zarr-jsoo to jtw universe in build-site.sh

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+346 -19
+1
build-site.sh
··· 71 71 opam-format bos odoc.model tyxml yojson uri jsonm \ 72 72 js_top_worker-widget-leaflet \ 73 73 tessera-geotessera-jsoo tessera-viz-jsoo tessera-tfjs \ 74 + tessera-zarr-jsoo \ 74 75 onnxrt -o "$SITE/_opam" 75 76 echo " universe built → $SITE/_opam/" 76 77 fi
+288
site/notebooks/interactive_map_zarr.mld
··· 1 + {0 TESSERA Interactive Map (Zarr)} 2 + 3 + Explore geospatial embeddings from the 4 + {{:https://geotessera.org}GeoTessera} foundation model 5 + directly in the browser. This version fetches embeddings from 6 + the GeoTessera {{:https://anil.recoil.org/notes/tessera-zarr-v3-layout}Zarr v3 store}, 7 + loading only the shards needed for your region of interest. 8 + 9 + {ol 10 + {- Draw a bounding box to select your region of interest} 11 + {- Fetch and visualise GeoTessera embeddings with PCA} 12 + {- Click on the map to place labelled training points} 13 + {- Run k-nearest-neighbours classification} 14 + {- View the classification overlay}} 15 + 16 + {@ocaml kind=setup[ 17 + #require "tessera-zarr-jsoo";; 18 + #require "tessera-viz-jsoo";; 19 + #require "tessera-tfjs";; 20 + #require "js_top_worker-widget-leaflet";; 21 + Widget_leaflet.register ();; 22 + (* Load TensorFlow.js in the worker *) 23 + let () = 24 + let open Js_of_ocaml in 25 + Js.Unsafe.fun_call 26 + (Js.Unsafe.get Js.Unsafe.global (Js.string "importScripts")) 27 + [| Js.Unsafe.inject (Js.string "https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@4/dist/tf.min.js") |] 28 + ]} 29 + 30 + {1 Select region of interest} 31 + 32 + Draw a rectangle on the map to select the area you want to classify. 33 + The map is centred on Cambridge, UK — navigate to any area of interest 34 + before drawing. 35 + 36 + {@ocaml x[ 37 + (* Shared state *) 38 + let bbox : Geotessera.bbox option ref = ref None 39 + let mosaic : (Linalg.mat * int * int) option ref = ref None 40 + let mosaic_bounds : Geotessera.bbox option ref = ref None 41 + let projected : Linalg.mat option ref = ref None 42 + let training_points : (float * float * int) list ref = ref [] 43 + let current_class = ref 0 44 + let class_names = ref [| "water"; "land" |] 45 + let class_colors = [| "#2196F3"; "#4CAF50"; "#FF9800"; "#9C27B0"; "#F44336"; 46 + "#00BCD4"; "#795548"; "#607D8B" |] 47 + 48 + (* Downsample a mosaic to at most max_pixels, preserving aspect ratio *) 49 + let downsample_mosaic mat ~h ~w ~max_pixels = 50 + let n_pixels = h * w in 51 + if n_pixels <= max_pixels then (mat, h, w) 52 + else 53 + let stride = int_of_float (ceil (sqrt (float_of_int n_pixels /. float_of_int max_pixels))) in 54 + let h' = (h + stride - 1) / stride in 55 + let w' = (w + stride - 1) / stride in 56 + let out = Linalg.create_mat ~rows:(h' * w') ~cols:mat.Linalg.cols in 57 + for i = 0 to h' - 1 do 58 + for j = 0 to w' - 1 do 59 + let si = min (i * stride) (h - 1) in 60 + let sj = min (j * stride) (w - 1) in 61 + let src = si * w + sj in 62 + let dst = i * w' + j in 63 + for f = 0 to mat.Linalg.cols - 1 do 64 + Linalg.mat_set out dst f (Linalg.mat_get mat src f) 65 + done 66 + done 67 + done; 68 + (out, h', w') 69 + 70 + let map_id = "tessera-map" 71 + 72 + let status_view text = 73 + let open Widget.View in 74 + Element { tag = "div"; attrs = [Style ("padding", "8px"); Style ("font-family", "monospace")]; 75 + children = [Text text] } 76 + 77 + let () = Widget.display ~id:"status" ~handlers:[] (status_view "Draw a rectangle on the map.") 78 + 79 + let () = 80 + Widget.display_managed ~id:map_id 81 + ~kind:"leaflet-map" 82 + ~config:{| {"center": [52.2, 0.12], "zoom": 13, "height": "500px"} |} 83 + ~handlers:[ 84 + "bbox_drawn", (fun v -> 85 + match v with 86 + | Some json -> 87 + let s = Scanf.sscanf json 88 + {| {"south":%f,"west":%f,"north":%f,"east":%f}|} 89 + (fun s w n e -> Geotessera.{ min_lat = s; min_lon = w; max_lat = n; max_lon = e }) 90 + in 91 + bbox := Some s; 92 + Widget.update ~id:"status" 93 + (status_view (Printf.sprintf "Selected: %.4f,%.4f to %.4f,%.4f — run next cell to fetch embeddings." 94 + s.min_lat s.min_lon s.max_lat s.max_lon)) 95 + | None -> ()); 96 + "click", (fun v -> 97 + match v with 98 + | Some json -> 99 + let lat, lng = Scanf.sscanf json {| {"lat":%f,"lng":%f}|} (fun a b -> (a, b)) in 100 + (match !mosaic with 101 + | None -> () 102 + | Some _ -> 103 + let cls = !current_class in 104 + training_points := (lat, lng, cls) :: !training_points; 105 + let color = class_colors.(cls mod Array.length class_colors) in 106 + let label = (!class_names).(cls) in 107 + Widget.command ~id:map_id "addMarker" 108 + (Printf.sprintf {|{"lat":%f,"lng":%f,"color":"%s","label":"%s"}|} 109 + lat lng color label); 110 + Widget.update ~id:"status" 111 + (status_view (Printf.sprintf "Added %s point at %.4f, %.4f (%d points total)" 112 + label lat lng (List.length !training_points)))) 113 + | None -> ()); 114 + ] 115 + 116 + let () = Widget.command ~id:map_id "enableBboxDraw" "" 117 + ]} 118 + 119 + {1 Fetch embeddings and visualise} 120 + 121 + After drawing your bounding box, run this cell to fetch GeoTessera 122 + embeddings from the Zarr v3 store and display a PCA false-colour 123 + visualisation. Only the shards covering your region are fetched — 124 + much faster than downloading full tiles. 125 + 126 + {@ocaml x[ 127 + let () = 128 + match !bbox with 129 + | None -> Widget.update ~id:"status" (status_view "Error: draw a bounding box first!") 130 + | Some b -> 131 + Widget.update ~id:"status" (status_view "Fetching embeddings from Zarr store..."); 132 + let mat_full, h_full, w_full, mosaic_bbox = 133 + Tessera_zarr_jsoo.fetch_region_sync b in 134 + Widget.update ~id:"status" 135 + (status_view (Printf.sprintf "Fetched %d×%d mosaic. Downsampling..." h_full w_full)); 136 + let mat, h, w = downsample_mosaic mat_full ~h:h_full ~w:w_full ~max_pixels:50_000 in 137 + mosaic := Some (mat, h, w); 138 + mosaic_bounds := Some mosaic_bbox; 139 + let mosaic_south = mosaic_bbox.Geotessera.min_lat in 140 + let mosaic_north = mosaic_bbox.Geotessera.max_lat in 141 + let mosaic_west = mosaic_bbox.Geotessera.min_lon in 142 + let mosaic_east = mosaic_bbox.Geotessera.max_lon in 143 + Widget.update ~id:"status" 144 + (status_view (Printf.sprintf "Working at %d×%d (%d pixels). Computing PCA..." h w (h * w))); 145 + let proj = Tfjs.pca mat ~n_components:3 in 146 + projected := Some proj; 147 + let pca_img = Viz.pca_to_rgba ~width:w ~height:h proj in 148 + let url = Viz_jsoo.to_data_url pca_img in 149 + Widget.command ~id:map_id "addImageOverlay" 150 + (Printf.sprintf {|{"url":"%s","bounds":[[%f,%f],[%f,%f]],"opacity":0.7}|} 151 + url mosaic_south mosaic_west mosaic_north mosaic_east); 152 + Widget.update ~id:"status" 153 + (status_view "PCA overlay added. Click on the map to place training points, then run the classification cell.") 154 + ]} 155 + 156 + {1 Label training points} 157 + 158 + Click on the map to add training points. Use the buttons below to 159 + switch between classes before clicking. 160 + 161 + {@ocaml x[ 162 + let make_class_buttons () = 163 + let open Widget.View in 164 + let buttons = Array.to_list (Array.mapi (fun i name -> 165 + let color = class_colors.(i mod Array.length class_colors) in 166 + let border = if i = !current_class then "3px solid black" else "1px solid #ccc" in 167 + Element { tag = "button"; 168 + attrs = [ 169 + Handler ("click", "class_" ^ string_of_int i); 170 + Style ("margin", "4px"); 171 + Style ("padding", "8px 16px"); 172 + Style ("background", color); 173 + Style ("color", "white"); 174 + Style ("border", border); 175 + Style ("border-radius", "4px"); 176 + Style ("cursor", "pointer"); 177 + ]; 178 + children = [Text name] } 179 + ) !class_names) in 180 + Element { tag = "div"; 181 + attrs = [Style ("padding", "8px")]; 182 + children = 183 + Element { tag = "b"; attrs = []; children = [Text "Active class: "] } 184 + :: buttons 185 + @ [Element { tag = "button"; 186 + attrs = [ 187 + Handler ("click", "add_class"); 188 + Style ("margin", "4px"); 189 + Style ("padding", "8px 16px"); 190 + Style ("border", "1px dashed #999"); 191 + Style ("border-radius", "4px"); 192 + Style ("cursor", "pointer"); 193 + ]; 194 + children = [Text "+ Add class"] }; 195 + Element { tag = "button"; 196 + attrs = [ 197 + Handler ("click", "clear_points"); 198 + Style ("margin", "4px"); 199 + Style ("padding", "8px 16px"); 200 + Style ("border", "1px solid #ccc"); 201 + Style ("border-radius", "4px"); 202 + Style ("cursor", "pointer"); 203 + ]; 204 + children = [Text "Clear all"] }] 205 + } 206 + 207 + let class_handler_list () = 208 + let base = Array.to_list (Array.mapi (fun i _name -> 209 + "class_" ^ string_of_int i, (fun (_v : string option) -> 210 + current_class := i; 211 + Widget.update ~id:"class-buttons" (make_class_buttons ())) 212 + ) !class_names) in 213 + base @ [ 214 + "add_class", (fun _v -> 215 + let n = Array.length !class_names in 216 + let name = "class_" ^ string_of_int n in 217 + class_names := Array.append !class_names [| name |]; 218 + current_class := n; 219 + Widget.update ~id:"class-buttons" (make_class_buttons ())); 220 + "clear_points", (fun _v -> 221 + training_points := []; 222 + Widget.command ~id:map_id "clearMarkers" ""; 223 + Widget.update ~id:"status" (status_view "Cleared all training points.")); 224 + ] 225 + 226 + let () = 227 + Widget.display ~id:"class-buttons" 228 + ~handlers:(class_handler_list ()) 229 + (make_class_buttons ()) 230 + ]} 231 + 232 + {1 Classify and display} 233 + 234 + Run this cell after placing training points to classify the entire 235 + region using k-nearest neighbours. 236 + 237 + {@ocaml x[ 238 + let () = 239 + match !mosaic, !projected, !mosaic_bounds with 240 + | Some (mat, h, w), Some _proj, Some mb -> 241 + let points = !training_points in 242 + if points = [] then 243 + Widget.update ~id:"status" (status_view "Error: place some training points first!") 244 + else begin 245 + Widget.update ~id:"status" 246 + (status_view (Printf.sprintf "Classifying with %d training points..." (List.length points))); 247 + let mosaic_south = mb.Geotessera.min_lat in 248 + let mosaic_north = mb.Geotessera.max_lat in 249 + let mosaic_west = mb.Geotessera.min_lon in 250 + let mosaic_east = mb.Geotessera.max_lon in 251 + let n_train = List.length points in 252 + let train_mat = Linalg.create_mat ~rows:n_train ~cols:mat.Linalg.cols in 253 + let train_labels = Array.make n_train 0 in 254 + List.iteri (fun i (lat, lng, cls) -> 255 + let row = int_of_float ((mosaic_north -. lat) /. (mosaic_north -. mosaic_south) *. float_of_int h) in 256 + let col = int_of_float ((lng -. mosaic_west) /. (mosaic_east -. mosaic_west) *. float_of_int w) in 257 + let row = max 0 (min (h - 1) row) in 258 + let col = max 0 (min (w - 1) col) in 259 + let src_offset = (row * w + col) * mat.Linalg.cols in 260 + let dst_offset = i * mat.Linalg.cols in 261 + for j = 0 to mat.Linalg.cols - 1 do 262 + Bigarray.Array1.set train_mat.Linalg.data (dst_offset + j) 263 + (Bigarray.Array1.get mat.Linalg.data (src_offset + j)) 264 + done; 265 + train_labels.(i) <- cls 266 + ) points; 267 + let model = Linalg.knn_fit ~embeddings:train_mat ~labels:train_labels in 268 + let k = min 5 n_train in 269 + let result = Linalg.knn_predict model ~k mat in 270 + let n_classes = Array.fold_left max 0 train_labels + 1 in 271 + let colors = List.init n_classes (fun i -> 272 + let hex = class_colors.(i mod Array.length class_colors) in 273 + (i, Viz.color_of_hex hex) 274 + ) in 275 + let class_img = Viz.classification_to_rgba 276 + ~predictions:result.Linalg.predictions 277 + ~colors ~width:w ~height:h () in 278 + let class_url = Viz_jsoo.to_data_url class_img in 279 + Widget.command ~id:map_id "addImageOverlay" 280 + (Printf.sprintf {|{"url":"%s","bounds":[[%f,%f],[%f,%f]],"opacity":0.7}|} 281 + class_url mosaic_south mosaic_west mosaic_north mosaic_east); 282 + Widget.update ~id:"status" 283 + (status_view (Printf.sprintf "Classification complete! %d classes, %d training points." 284 + n_classes n_train)) 285 + end 286 + | _ -> 287 + Widget.update ~id:"status" (status_view "Error: fetch embeddings first!") 288 + ]}
+47 -3
tessera-zarr-jsoo/lib/tessera_zarr_jsoo.ml
··· 1 - let fetch _url ?off:_ ?len:_ () = failwith "TODO" 1 + open Js_of_ocaml 2 + 3 + (* Synchronous XHR fetch — matches the existing Geotessera_jsoo pattern. 4 + All Lwt promises resolve immediately, so Lwt.join degenerates to 5 + sequential execution. This is fine for a web worker. *) 6 + let fetch url ?off ?len () = 7 + let xhr = XmlHttpRequest.create () in 8 + xhr##.responseType := Js.string "arraybuffer"; 9 + xhr##_open (Js.string "GET") (Js.string url) Js._false; (* sync! *) 10 + (match off, len with 11 + | Some o, Some l -> 12 + xhr##setRequestHeader (Js.string "Range") 13 + (Js.string (Printf.sprintf "bytes=%d-%d" o (o + l - 1))) 14 + | _ -> ()); 15 + xhr##send Js.null; 16 + let data = match xhr##.status with 17 + | 200 | 206 -> 18 + Js.Opt.case 19 + (File.CoerceTo.arrayBuffer xhr##.response) 20 + (fun () -> failwith ("Failed to read response from " ^ url)) 21 + (fun b -> Typed_array.String.of_arrayBuffer b) 22 + | code -> 23 + failwith (Printf.sprintf "HTTP %d fetching %s" code url) 24 + in 25 + Lwt.return data 26 + 2 27 let codecs _name = None 3 - let open_store ?base_url:_ ?year:_ () = failwith "TODO" 4 - let fetch_region ?base_url:_ ?year:_ _bbox = failwith "TODO" 28 + 29 + let open_store ?(base_url = "https://dl2.geotessera.org/zarr/v1") 30 + ?(year = 2024) () = 31 + let url = Printf.sprintf "%s/%d.zarr" base_url year in 32 + Zarr_v3.Store.open_store ~fetch ~codecs url 33 + 34 + (* Run a synchronous Lwt computation. Works because all our fetches 35 + are synchronous XHR, so every Lwt.t is already resolved. *) 36 + let run_sync t = 37 + match Lwt.poll t with 38 + | Some v -> v 39 + | None -> failwith "Tessera_zarr_jsoo: unexpected async operation" 40 + 41 + let fetch_region ?(base_url = "https://dl2.geotessera.org/zarr/v1") 42 + ?(year = 2024) bbox = 43 + let open Lwt.Syntax in 44 + let* store = open_store ~base_url ~year () in 45 + Tessera_zarr.fetch_region ~store bbox 46 + 47 + let fetch_region_sync ?base_url ?year bbox = 48 + run_sync (fetch_region ?base_url ?year bbox)
+10 -16
tessera-zarr-jsoo/lib/tessera_zarr_jsoo.mli
··· 1 1 (** Browser backend for tessera-zarr. 2 2 3 - Provides async XHR-based HTTP fetch with byte-range support 3 + Provides synchronous XHR-based HTTP fetch with byte-range support 4 4 and convenience wrappers for opening the GeoTessera Zarr store 5 - and fetching embeddings from the browser. 6 - 7 - {2 Example} 8 - 9 - {[ 10 - let%lwt (mat, h, w, bounds) = 11 - Tessera_zarr_jsoo.fetch_region 12 - Geotessera.{ min_lon = 0.1; min_lat = 52.1; 13 - max_lon = 0.2; max_lat = 52.2 } in 14 - (* mat is a Linalg.mat with h*w rows, 128 cols *) 15 - ]} *) 5 + and fetching embeddings from the browser. *) 16 6 17 7 val fetch : Zarr_v3.Store.fetch 18 - (** Async HTTP fetch via [XMLHttpRequest] with [Range] header support. 19 - Uses [responseType = "arraybuffer"] for binary data. *) 8 + (** Synchronous HTTP fetch via [XMLHttpRequest] with [Range] header support. 9 + Returns an immediately-resolved [Lwt.t]. *) 20 10 21 11 val codecs : Zarr_v3.Store.codec_registry 22 12 (** Codec registry. Currently returns [None] for all codecs — ··· 30 20 31 21 val fetch_region : ?base_url:string -> ?year:int -> 32 22 Geotessera.bbox -> (Linalg.mat * int * int * Geotessera.bbox) Lwt.t 33 - (** Convenience wrapper: opens the store and fetches embeddings 34 - for a WGS84 bounding box in one call. *) 23 + (** Async wrapper: opens the store and fetches embeddings for a bbox. *) 24 + 25 + val fetch_region_sync : ?base_url:string -> ?year:int -> 26 + Geotessera.bbox -> Linalg.mat * int * int * Geotessera.bbox 27 + (** Synchronous convenience for notebook cells. Internally uses 28 + synchronous XHR so all Lwt promises resolve immediately. *)