···133133 Alcotest.(check int) "k=1 pred" 0 res.predictions.(0);
134134 Alcotest.(check (float 1e-6)) "k=1 confidence" 1.0 res.confidences.(0)
135135136136+(* ---- Integration test ---- *)
137137+138138+let test_integration_pca_knn () =
139139+ (* 100 pixels with 128-dim embeddings, 2 clusters *)
140140+ let n = 100 in
141141+ let dim = 128 in
142142+ let data = create_mat ~rows:n ~cols:dim in
143143+ let labels = Array.init n (fun i -> if i < 50 then 0 else 1) in
144144+ (* Cluster 0: features near 0.0, Cluster 1: features near 10.0 *)
145145+ for i = 0 to n - 1 do
146146+ let base = if i < 50 then 0.0 else 10.0 in
147147+ for j = 0 to dim - 1 do
148148+ let noise = Float.of_int ((i * 7 + j * 13) mod 100) *. 0.001 in
149149+ mat_set data i j (base +. noise)
150150+ done
151151+ done;
152152+ let pca_model = pca_fit data ~n_components:3 in
153153+ let reduced = pca_transform pca_model data in
154154+ Alcotest.(check int) "reduced cols" 3 reduced.cols;
155155+ (* Use first 80 as training, last 20 as test *)
156156+ let train_rows = 80 in
157157+ let test_rows = 20 in
158158+ let train_mat = create_mat ~rows:train_rows ~cols:3 in
159159+ let test_mat = create_mat ~rows:test_rows ~cols:3 in
160160+ let train_labels = Array.init train_rows (fun i -> labels.(i)) in
161161+ for i = 0 to train_rows - 1 do
162162+ for j = 0 to 2 do
163163+ mat_set train_mat i j (mat_get reduced i j)
164164+ done
165165+ done;
166166+ for i = 0 to test_rows - 1 do
167167+ for j = 0 to 2 do
168168+ mat_set test_mat i j (mat_get reduced (train_rows + i) j)
169169+ done
170170+ done;
171171+ let knn_model = knn_fit ~embeddings:train_mat ~labels:train_labels in
172172+ let res = knn_predict knn_model ~k:5 test_mat in
173173+ let expected = Array.init test_rows (fun i -> labels.(train_rows + i)) in
174174+ let correct = ref 0 in
175175+ for i = 0 to test_rows - 1 do
176176+ if res.predictions.(i) = expected.(i) then incr correct
177177+ done;
178178+ let accuracy = Float.of_int !correct /. Float.of_int test_rows in
179179+ Alcotest.(check bool) "accuracy >= 0.9" true (accuracy >= 0.9)
180180+136181(* ---- Test runner ---- *)
137182138183let () =
···151196 Alcotest.test_case "two clusters" `Quick test_knn_two_clusters;
152197 Alcotest.test_case "distance weighting" `Quick test_knn_distance_weighting;
153198 Alcotest.test_case "k=1 confidence" `Quick test_knn_k1_confidence;
199199+ ];
200200+ "integration", [
201201+ Alcotest.test_case "pca then knn" `Quick test_integration_pca_knn;
154202 ];
155203 ]