this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

more

+275
+275
toru/test/test_tessera_integration.ml
··· 1 + open Toru 2 + open Printf 3 + 4 + (* Test URLs for tessera-manifests *) 5 + let embeddings_url = "https://raw.githubusercontent.com/ucam-eo/tessera-manifests/main/registry/embeddings/embeddings_2024_lon-10_lat50.txt" 6 + let landmasks_url = "https://raw.githubusercontent.com/ucam-eo/tessera-manifests/main/registry/landmasks/landmasks_lon-10_lat50.txt" 7 + 8 + (* Helper function to create a test cache directory *) 9 + let create_test_cache () = 10 + let cache_dir = "/tmp/toru_tessera_test_" ^ (string_of_int (Random.int 10000)) in 11 + Unix.mkdir cache_dir 0o755; 12 + cache_dir 13 + 14 + (* Test 1: Registry loading from tessera-manifests URLs *) 15 + let test_registry_loading () = 16 + printf "=== Test 1: Registry loading from tessera-manifests URLs ===\n"; 17 + 18 + printf "Loading embeddings registry from URL...\n"; 19 + let embeddings_registry = Registry.load_from_url embeddings_url in 20 + printf "Embeddings registry size: %d entries\n" (Registry.size embeddings_registry); 21 + 22 + printf "Loading landmasks registry from URL...\n"; 23 + let landmasks_registry = Registry.load_from_url landmasks_url in 24 + printf "Landmasks registry size: %d entries\n" (Registry.size landmasks_registry); 25 + 26 + (* Verify both registries are non-empty *) 27 + assert (Registry.size embeddings_registry > 0); 28 + assert (Registry.size landmasks_registry > 0); 29 + 30 + printf "✓ Registry loading test passed\n\n"; 31 + (embeddings_registry, landmasks_registry) 32 + 33 + (* Test 2: Registry structure and content validation *) 34 + let test_registry_structure embeddings_registry landmasks_registry = 35 + printf "=== Test 2: Registry structure and content validation ===\n"; 36 + 37 + (* Test embeddings registry structure *) 38 + printf "Testing embeddings registry structure...\n"; 39 + let embeddings_entries = Registry.entries embeddings_registry in 40 + List.iteri (fun i entry -> 41 + if i < 3 then ( (* Only test first 3 entries for brevity *) 42 + let filename = Registry.filename entry in 43 + let hash = Registry.hash entry in 44 + printf " Entry %d: %s -> %s\n" (i+1) filename (Hash.to_string hash); 45 + 46 + (* Verify filename format for embeddings *) 47 + assert (String.contains filename '/'); 48 + assert (Filename.extension filename = ".npy"); 49 + 50 + (* Verify hash format (should be SHA-256: 64 chars) *) 51 + assert (Hash.algorithm hash = SHA256); 52 + assert (String.length (Hash.value hash) = 64); 53 + ) 54 + ) embeddings_entries; 55 + 56 + (* Test landmasks registry structure *) 57 + printf "Testing landmasks registry structure...\n"; 58 + let landmasks_entries = Registry.entries landmasks_registry in 59 + List.iteri (fun i entry -> 60 + if i < 3 then ( (* Only test first 3 entries for brevity *) 61 + let filename = Registry.filename entry in 62 + let hash = Registry.hash entry in 63 + printf " Entry %d: %s -> %s\n" (i+1) filename (Hash.to_string hash); 64 + 65 + (* Verify filename format for landmasks *) 66 + assert (Filename.extension filename = ".tiff"); 67 + assert (String.contains filename '_'); 68 + 69 + (* Verify hash format *) 70 + assert (Hash.algorithm hash = SHA256); 71 + assert (String.length (Hash.value hash) = 64); 72 + ) 73 + ) landmasks_entries; 74 + 75 + printf "✓ Registry structure validation test passed\n\n" 76 + 77 + (* Test 3: Hash format validation with real data *) 78 + let test_hash_validation embeddings_registry _landmasks_registry = 79 + printf "=== Test 3: Hash format validation ===\n"; 80 + 81 + (* Test hash parsing and formatting *) 82 + let test_entry = List.hd (Registry.entries embeddings_registry) in 83 + let hash = Registry.hash test_entry in 84 + 85 + printf "Testing hash operations on real data...\n"; 86 + printf " Original hash: %s\n" (Hash.to_string hash); 87 + printf " Algorithm: %s\n" (Hash.algorithm_to_string (Hash.algorithm hash)); 88 + 89 + (* Test round-trip parsing *) 90 + let hash_string = Hash.to_string hash in 91 + let parsed_hash = Hash.of_string hash_string in 92 + assert (Hash.equal hash parsed_hash); 93 + 94 + (* Test prefixed format *) 95 + let prefixed = Hash.format_prefixed hash in 96 + printf " Prefixed format: %s\n" prefixed; 97 + 98 + (match Hash.parse_prefixed prefixed with 99 + | Some (alg, value) -> 100 + assert (alg = Hash.algorithm hash); 101 + assert (value = Hash.value hash); 102 + printf " Prefixed parsing: ✓\n" 103 + | None -> failwith "Failed to parse prefixed hash"); 104 + 105 + printf "✓ Hash validation test passed\n\n" 106 + 107 + (* Test 4: Registry query operations *) 108 + let test_registry_queries embeddings_registry _landmasks_registry = 109 + printf "=== Test 4: Registry query operations ===\n"; 110 + 111 + (* Test finding specific entries *) 112 + let embeddings_entries = Registry.entries embeddings_registry in 113 + let test_filename = Registry.filename (List.hd embeddings_entries) in 114 + 115 + printf "Testing query operations...\n"; 116 + printf " Looking for: %s\n" test_filename; 117 + 118 + (match Registry.find test_filename embeddings_registry with 119 + | Some found_entry -> 120 + printf " Found entry: %s\n" (Registry.filename found_entry); 121 + assert (Registry.filename found_entry = test_filename); 122 + assert (Registry.exists test_filename embeddings_registry); 123 + printf " Exists check: ✓\n" 124 + | None -> failwith "Failed to find known entry"); 125 + 126 + (* Test non-existent file *) 127 + let fake_filename = "non/existent/file.npy" in 128 + assert (not (Registry.exists fake_filename embeddings_registry)); 129 + assert (Registry.find fake_filename embeddings_registry = None); 130 + printf " Non-existent file handling: ✓\n"; 131 + 132 + printf "✓ Registry query operations test passed\n\n" 133 + 134 + (* Test 5: Full Toru workflow without downloads *) 135 + let test_toru_workflow embeddings_registry = 136 + printf "=== Test 5: Toru workflow (without actual downloads) ===\n"; 137 + 138 + (* Since we can't run Eio tests in this context, we'll simulate the workflow *) 139 + let cache_path = create_test_cache () in 140 + printf "Created test cache at: %s\n" cache_path; 141 + 142 + (* Test default cache path generation *) 143 + let default_path = Toru.default_cache_path ~app_name:"tessera_test" () in 144 + printf "Default cache path: %s\n" default_path; 145 + assert (String.contains default_path '/'); 146 + 147 + (* Test registry serialization/deserialization *) 148 + printf "Testing registry serialization...\n"; 149 + let serialized = Registry.to_string embeddings_registry in 150 + let deserialized = Registry.of_string serialized in 151 + 152 + assert (Registry.size embeddings_registry = Registry.size deserialized); 153 + printf " Serialization round-trip: ✓\n"; 154 + 155 + (* Clean up test cache *) 156 + (try Unix.rmdir cache_path with _ -> ()); 157 + 158 + printf "✓ Toru workflow test passed\n\n" 159 + 160 + (* Test 6: Geographic coordinate parsing *) 161 + let test_geographic_parsing landmasks_registry = 162 + printf "=== Test 6: Geographic coordinate parsing ===\n"; 163 + 164 + let entries = Registry.entries landmasks_registry in 165 + let rec take n lst = 166 + match n, lst with 167 + | 0, _ -> [] 168 + | _, [] -> [] 169 + | n, x :: xs -> x :: (take (n-1) xs) 170 + in 171 + let test_filenames = List.map Registry.filename entries |> take 5 in 172 + 173 + printf "Testing coordinate extraction from filenames...\n"; 174 + List.iter (fun filename -> 175 + printf " File: %s\n" filename; 176 + 177 + (* Extract coordinates using regex-like pattern matching *) 178 + if String.contains filename '_' then ( 179 + let parts = String.split_on_char '_' filename in 180 + match parts with 181 + | "grid" :: lon_str :: lat_str :: _ -> 182 + (try 183 + let lon = Float.of_string lon_str in 184 + let lat = Float.of_string (String.split_on_char '.' lat_str |> List.hd) in 185 + printf " Coordinates: %.2f, %.2f\n" lon lat; 186 + (* Basic coordinate validation *) 187 + assert (lon >= -180.0 && lon <= 180.0); 188 + assert (lat >= -90.0 && lat <= 90.0); 189 + with _ -> printf " Could not parse coordinates\n") 190 + | _ -> printf " Unexpected filename format\n" 191 + ) 192 + ) test_filenames; 193 + 194 + printf "✓ Geographic coordinate parsing test passed\n\n" 195 + 196 + (* Test 7: Registry comparison and merging *) 197 + let test_registry_operations embeddings_registry _landmasks_registry = 198 + printf "=== Test 7: Registry operations ===\n"; 199 + 200 + (* Create a small test registry *) 201 + let test_entry = Registry.create_entry 202 + ~filename:"test/file.txt" 203 + ~hash:(Hash.create SHA256 "abc123def456") 204 + () in 205 + 206 + let small_registry = Registry.add test_entry Registry.empty in 207 + printf "Created test registry with %d entries\n" (Registry.size small_registry); 208 + 209 + (* Test adding to existing registry *) 210 + let expanded_registry = Registry.add test_entry embeddings_registry in 211 + assert (Registry.size expanded_registry = Registry.size embeddings_registry + 1); 212 + printf "Successfully added entry to registry\n"; 213 + 214 + (* Test removal *) 215 + let reduced_registry = Registry.remove "test/file.txt" expanded_registry in 216 + assert (Registry.size reduced_registry = Registry.size embeddings_registry); 217 + printf "Successfully removed entry from registry\n"; 218 + 219 + printf "✓ Registry operations test passed\n\n" 220 + 221 + (* Test 8: Error handling *) 222 + let test_error_handling () = 223 + printf "=== Test 8: Error handling ===\n"; 224 + 225 + (* Test invalid hash parsing *) 226 + (try 227 + let _ = Hash.of_string "invalid_hash_format:xyz" in 228 + printf "Hash parsing with invalid format (should work or fail gracefully)\n" 229 + with _ -> printf "✓ Invalid hash format properly handled\n"); 230 + 231 + (* Test malformed registry parsing *) 232 + let malformed_registry = "invalid registry content without proper format" in 233 + let parsed = Registry.of_string malformed_registry in 234 + (* Should create empty registry or handle gracefully *) 235 + printf "Malformed registry size: %d (expected 0)\n" (Registry.size parsed); 236 + 237 + (* Test empty registry operations *) 238 + let empty_reg = Registry.empty in 239 + assert (Registry.size empty_reg = 0); 240 + assert (Registry.find "any_file.txt" empty_reg = None); 241 + printf "✓ Empty registry operations work correctly\n"; 242 + 243 + printf "✓ Error handling test passed\n\n" 244 + 245 + (* Main test runner *) 246 + let run_tests () = 247 + printf "Starting Tessera-Manifests Integration Tests\n"; 248 + printf "============================================\n\n"; 249 + 250 + Random.self_init (); 251 + 252 + try 253 + (* Run tests sequentially *) 254 + let (embeddings_registry, landmasks_registry) = test_registry_loading () in 255 + test_registry_structure embeddings_registry landmasks_registry; 256 + test_hash_validation embeddings_registry landmasks_registry; 257 + test_registry_queries embeddings_registry landmasks_registry; 258 + test_toru_workflow embeddings_registry; 259 + test_geographic_parsing landmasks_registry; 260 + test_registry_operations embeddings_registry landmasks_registry; 261 + test_error_handling (); 262 + 263 + printf "============================================\n"; 264 + printf "All Tessera Integration Tests PASSED! ✅\n"; 265 + printf "============================================\n\n"; 266 + 267 + with 268 + | Failure msg -> 269 + printf "❌ TEST FAILED: %s\n" msg; 270 + exit 1 271 + | e -> 272 + printf "❌ UNEXPECTED ERROR: %s\n" (Printexc.to_string e); 273 + exit 1 274 + 275 + let () = run_tests ()