···1717let mat_set m i j v =
1818 Bigarray.Array1.set m.data (i * m.cols + j) v
19192020-(* ---- PCA (stub) ---- *)
2020+(* ---- PCA ---- *)
21212222type pca_model = {
2323 mean : float array;
2424- components : float array array;
2424+ components : float array array; (* n_components x n_features *)
2525}
26262727-let pca_fit ?(max_samples = 100_000) _mat ~n_components =
2828- ignore max_samples;
2929- { mean = [||]; components = Array.make n_components [||] }
2727+let pca_fit ?(max_samples = 100_000) mat ~n_components =
2828+ let n_features = mat.cols in
2929+ (* Subsample if needed *)
3030+ let sample_indices =
3131+ if mat.rows <= max_samples then
3232+ Array.init mat.rows Fun.id
3333+ else
3434+ Array.init max_samples (fun i ->
3535+ i * mat.rows / max_samples
3636+ )
3737+ in
3838+ let n_samples = Array.length sample_indices in
3939+ (* Compute mean *)
4040+ let mean = Array.make n_features 0.0 in
4141+ Array.iter (fun row ->
4242+ for j = 0 to n_features - 1 do
4343+ mean.(j) <- mean.(j) +. mat_get mat row j
4444+ done
4545+ ) sample_indices;
4646+ let inv_n = 1.0 /. Float.of_int n_samples in
4747+ Array.iteri (fun j v -> mean.(j) <- v *. inv_n) mean;
4848+ (* Compute covariance matrix *)
4949+ let cov = Array.init n_features (fun _ -> Array.make n_features 0.0) in
5050+ Array.iter (fun row ->
5151+ for i = 0 to n_features - 1 do
5252+ let xi = mat_get mat row i -. mean.(i) in
5353+ for j = i to n_features - 1 do
5454+ let xj = mat_get mat row j -. mean.(j) in
5555+ cov.(i).(j) <- cov.(i).(j) +. xi *. xj
5656+ done
5757+ done
5858+ ) sample_indices;
5959+ (* Symmetrize and normalize *)
6060+ for i = 0 to n_features - 1 do
6161+ for j = i to n_features - 1 do
6262+ let v = cov.(i).(j) *. inv_n in
6363+ cov.(i).(j) <- v;
6464+ cov.(j).(i) <- v
6565+ done
6666+ done;
6767+ (* Power iteration for top n_components eigenvectors *)
6868+ let components = Array.make n_components [||] in
6969+ for comp = 0 to n_components - 1 do
7070+ (* Deterministic initial vector *)
7171+ let v = Array.init n_features (fun i ->
7272+ Float.sin (Float.of_int i +. 1.0)
7373+ ) in
7474+ (* 200 iterations of power iteration *)
7575+ for _ = 1 to 200 do
7676+ (* w = C * v *)
7777+ let w = Array.make n_features 0.0 in
7878+ for i = 0 to n_features - 1 do
7979+ let s = ref 0.0 in
8080+ for j = 0 to n_features - 1 do
8181+ s := !s +. cov.(i).(j) *. v.(j)
8282+ done;
8383+ w.(i) <- !s
8484+ done;
8585+ (* Normalize *)
8686+ let norm = ref 0.0 in
8787+ Array.iter (fun x -> norm := !norm +. x *. x) w;
8888+ let norm = Float.sqrt !norm in
8989+ let inv_norm = if norm > 1e-15 then 1.0 /. norm else 1.0 in
9090+ Array.iteri (fun i x -> v.(i) <- x *. inv_norm) w
9191+ done;
9292+ (* Compute eigenvalue = v^T * C * v *)
9393+ let eigenvalue = ref 0.0 in
9494+ for i = 0 to n_features - 1 do
9595+ let s = ref 0.0 in
9696+ for j = 0 to n_features - 1 do
9797+ s := !s +. cov.(i).(j) *. v.(j)
9898+ done;
9999+ eigenvalue := !eigenvalue +. v.(i) *. !s
100100+ done;
101101+ let eigenvalue = !eigenvalue in
102102+ (* Deflate: C = C - eigenvalue * v * v^T *)
103103+ for i = 0 to n_features - 1 do
104104+ for j = 0 to n_features - 1 do
105105+ cov.(i).(j) <- cov.(i).(j) -. eigenvalue *. v.(i) *. v.(j)
106106+ done
107107+ done;
108108+ components.(comp) <- Array.copy v
109109+ done;
110110+ { mean; components }
301113131-let pca_transform _model mat =
3232- create_mat ~rows:mat.rows ~cols:0
112112+let pca_transform model mat =
113113+ let n_components = Array.length model.components in
114114+ let result = create_mat ~rows:mat.rows ~cols:n_components in
115115+ for i = 0 to mat.rows - 1 do
116116+ for k = 0 to n_components - 1 do
117117+ let dot = ref 0.0 in
118118+ for j = 0 to mat.cols - 1 do
119119+ dot := !dot +. (mat_get mat i j -. model.mean.(j)) *. model.components.(k).(j)
120120+ done;
121121+ mat_set result i k !dot
122122+ done
123123+ done;
124124+ result
3312534126(* ---- kNN (stub) ---- *)
35127
+64-1
tessera-linalg/test/test_linalg.ml
···1212 done;
1313 m
14141515-let _ = mat_of_arrays
1515+let eps = 0.1
16161717(* ---- Matrix tests ---- *)
1818···3535 let m = create_mat ~rows:2 ~cols:2 in
3636 Alcotest.(check (float 1e-6)) "zero init" 0.0 (mat_get m 0 0)
37373838+(* ---- PCA tests ---- *)
3939+4040+let test_pca_x_axis_points () =
4141+ (* 4 points along x-axis in 3D: (1,0,0), (2,0,0), (3,0,0), (4,0,0)
4242+ PCA to 1D should produce evenly spaced output *)
4343+ let data = mat_of_arrays [|
4444+ [| 1.0; 0.0; 0.0 |];
4545+ [| 2.0; 0.0; 0.0 |];
4646+ [| 3.0; 0.0; 0.0 |];
4747+ [| 4.0; 0.0; 0.0 |];
4848+ |] in
4949+ let model = pca_fit data ~n_components:1 in
5050+ let result = pca_transform model data in
5151+ Alcotest.(check int) "result rows" 4 result.rows;
5252+ Alcotest.(check int) "result cols" 1 result.cols;
5353+ (* Should be evenly spaced: differences between consecutive should be equal *)
5454+ let v0 = mat_get result 0 0 in
5555+ let v1 = mat_get result 1 0 in
5656+ let v2 = mat_get result 2 0 in
5757+ let v3 = mat_get result 3 0 in
5858+ let d01 = Float.abs (v1 -. v0) in
5959+ let d12 = Float.abs (v2 -. v1) in
6060+ let d23 = Float.abs (v3 -. v2) in
6161+ Alcotest.(check (float eps)) "spacing d01~d12" d01 d12;
6262+ Alcotest.(check (float eps)) "spacing d12~d23" d12 d23
6363+6464+let test_pca_diagonal_monotonic () =
6565+ (* 100 points along y=x in 2D -> PCA to 1D -> monotonic output *)
6666+ let rows_data = Array.init 100 (fun i ->
6767+ let v = Float.of_int i in
6868+ [| v; v |]
6969+ ) in
7070+ let data = mat_of_arrays rows_data in
7171+ let model = pca_fit data ~n_components:1 in
7272+ let result = pca_transform model data in
7373+ Alcotest.(check int) "result rows" 100 result.rows;
7474+ (* Check monotonic (either all increasing or all decreasing) *)
7575+ let increasing = ref true in
7676+ let decreasing = ref true in
7777+ for i = 1 to 99 do
7878+ let prev = mat_get result (i - 1) 0 in
7979+ let curr = mat_get result i 0 in
8080+ if curr <= prev then increasing := false;
8181+ if curr >= prev then decreasing := false;
8282+ done;
8383+ Alcotest.(check bool) "monotonic" true (!increasing || !decreasing)
8484+8585+let test_pca_output_dims () =
8686+ let data = mat_of_arrays [|
8787+ [| 1.0; 2.0; 3.0; 4.0; 5.0 |];
8888+ [| 2.0; 3.0; 4.0; 5.0; 6.0 |];
8989+ [| 3.0; 4.0; 5.0; 6.0; 7.0 |];
9090+ |] in
9191+ let model = pca_fit data ~n_components:2 in
9292+ let result = pca_transform model data in
9393+ Alcotest.(check int) "rows" 3 result.rows;
9494+ Alcotest.(check int) "cols" 2 result.cols
9595+3896(* ---- Test runner ---- *)
39974098let () =
···43101 Alcotest.test_case "create_mat dimensions" `Quick test_create_mat_dims;
44102 Alcotest.test_case "get/set roundtrip" `Quick test_mat_get_set_roundtrip;
45103 Alcotest.test_case "zero initialization" `Quick test_mat_zero_init;
104104+ ];
105105+ "pca", [
106106+ Alcotest.test_case "x-axis points evenly spaced" `Quick test_pca_x_axis_points;
107107+ Alcotest.test_case "diagonal monotonic" `Quick test_pca_diagonal_monotonic;
108108+ Alcotest.test_case "output dimensions" `Quick test_pca_output_dims;
46109 ];
47110 ]