this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

tessera-linalg: implement PCA with power iteration

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+163 -8
+99 -7
tessera-linalg/lib/linalg.ml
··· 17 17 let mat_set m i j v = 18 18 Bigarray.Array1.set m.data (i * m.cols + j) v 19 19 20 - (* ---- PCA (stub) ---- *) 20 + (* ---- PCA ---- *) 21 21 22 22 type pca_model = { 23 23 mean : float array; 24 - components : float array array; 24 + components : float array array; (* n_components x n_features *) 25 25 } 26 26 27 - let pca_fit ?(max_samples = 100_000) _mat ~n_components = 28 - ignore max_samples; 29 - { mean = [||]; components = Array.make n_components [||] } 27 + let pca_fit ?(max_samples = 100_000) mat ~n_components = 28 + let n_features = mat.cols in 29 + (* Subsample if needed *) 30 + let sample_indices = 31 + if mat.rows <= max_samples then 32 + Array.init mat.rows Fun.id 33 + else 34 + Array.init max_samples (fun i -> 35 + i * mat.rows / max_samples 36 + ) 37 + in 38 + let n_samples = Array.length sample_indices in 39 + (* Compute mean *) 40 + let mean = Array.make n_features 0.0 in 41 + Array.iter (fun row -> 42 + for j = 0 to n_features - 1 do 43 + mean.(j) <- mean.(j) +. mat_get mat row j 44 + done 45 + ) sample_indices; 46 + let inv_n = 1.0 /. Float.of_int n_samples in 47 + Array.iteri (fun j v -> mean.(j) <- v *. inv_n) mean; 48 + (* Compute covariance matrix *) 49 + let cov = Array.init n_features (fun _ -> Array.make n_features 0.0) in 50 + Array.iter (fun row -> 51 + for i = 0 to n_features - 1 do 52 + let xi = mat_get mat row i -. mean.(i) in 53 + for j = i to n_features - 1 do 54 + let xj = mat_get mat row j -. mean.(j) in 55 + cov.(i).(j) <- cov.(i).(j) +. xi *. xj 56 + done 57 + done 58 + ) sample_indices; 59 + (* Symmetrize and normalize *) 60 + for i = 0 to n_features - 1 do 61 + for j = i to n_features - 1 do 62 + let v = cov.(i).(j) *. inv_n in 63 + cov.(i).(j) <- v; 64 + cov.(j).(i) <- v 65 + done 66 + done; 67 + (* Power iteration for top n_components eigenvectors *) 68 + let components = Array.make n_components [||] in 69 + for comp = 0 to n_components - 1 do 70 + (* Deterministic initial vector *) 71 + let v = Array.init n_features (fun i -> 72 + Float.sin (Float.of_int i +. 1.0) 73 + ) in 74 + (* 200 iterations of power iteration *) 75 + for _ = 1 to 200 do 76 + (* w = C * v *) 77 + let w = Array.make n_features 0.0 in 78 + for i = 0 to n_features - 1 do 79 + let s = ref 0.0 in 80 + for j = 0 to n_features - 1 do 81 + s := !s +. cov.(i).(j) *. v.(j) 82 + done; 83 + w.(i) <- !s 84 + done; 85 + (* Normalize *) 86 + let norm = ref 0.0 in 87 + Array.iter (fun x -> norm := !norm +. x *. x) w; 88 + let norm = Float.sqrt !norm in 89 + let inv_norm = if norm > 1e-15 then 1.0 /. norm else 1.0 in 90 + Array.iteri (fun i x -> v.(i) <- x *. inv_norm) w 91 + done; 92 + (* Compute eigenvalue = v^T * C * v *) 93 + let eigenvalue = ref 0.0 in 94 + for i = 0 to n_features - 1 do 95 + let s = ref 0.0 in 96 + for j = 0 to n_features - 1 do 97 + s := !s +. cov.(i).(j) *. v.(j) 98 + done; 99 + eigenvalue := !eigenvalue +. v.(i) *. !s 100 + done; 101 + let eigenvalue = !eigenvalue in 102 + (* Deflate: C = C - eigenvalue * v * v^T *) 103 + for i = 0 to n_features - 1 do 104 + for j = 0 to n_features - 1 do 105 + cov.(i).(j) <- cov.(i).(j) -. eigenvalue *. v.(i) *. v.(j) 106 + done 107 + done; 108 + components.(comp) <- Array.copy v 109 + done; 110 + { mean; components } 30 111 31 - let pca_transform _model mat = 32 - create_mat ~rows:mat.rows ~cols:0 112 + let pca_transform model mat = 113 + let n_components = Array.length model.components in 114 + let result = create_mat ~rows:mat.rows ~cols:n_components in 115 + for i = 0 to mat.rows - 1 do 116 + for k = 0 to n_components - 1 do 117 + let dot = ref 0.0 in 118 + for j = 0 to mat.cols - 1 do 119 + dot := !dot +. (mat_get mat i j -. model.mean.(j)) *. model.components.(k).(j) 120 + done; 121 + mat_set result i k !dot 122 + done 123 + done; 124 + result 33 125 34 126 (* ---- kNN (stub) ---- *) 35 127
+64 -1
tessera-linalg/test/test_linalg.ml
··· 12 12 done; 13 13 m 14 14 15 - let _ = mat_of_arrays 15 + let eps = 0.1 16 16 17 17 (* ---- Matrix tests ---- *) 18 18 ··· 35 35 let m = create_mat ~rows:2 ~cols:2 in 36 36 Alcotest.(check (float 1e-6)) "zero init" 0.0 (mat_get m 0 0) 37 37 38 + (* ---- PCA tests ---- *) 39 + 40 + let test_pca_x_axis_points () = 41 + (* 4 points along x-axis in 3D: (1,0,0), (2,0,0), (3,0,0), (4,0,0) 42 + PCA to 1D should produce evenly spaced output *) 43 + let data = mat_of_arrays [| 44 + [| 1.0; 0.0; 0.0 |]; 45 + [| 2.0; 0.0; 0.0 |]; 46 + [| 3.0; 0.0; 0.0 |]; 47 + [| 4.0; 0.0; 0.0 |]; 48 + |] in 49 + let model = pca_fit data ~n_components:1 in 50 + let result = pca_transform model data in 51 + Alcotest.(check int) "result rows" 4 result.rows; 52 + Alcotest.(check int) "result cols" 1 result.cols; 53 + (* Should be evenly spaced: differences between consecutive should be equal *) 54 + let v0 = mat_get result 0 0 in 55 + let v1 = mat_get result 1 0 in 56 + let v2 = mat_get result 2 0 in 57 + let v3 = mat_get result 3 0 in 58 + let d01 = Float.abs (v1 -. v0) in 59 + let d12 = Float.abs (v2 -. v1) in 60 + let d23 = Float.abs (v3 -. v2) in 61 + Alcotest.(check (float eps)) "spacing d01~d12" d01 d12; 62 + Alcotest.(check (float eps)) "spacing d12~d23" d12 d23 63 + 64 + let test_pca_diagonal_monotonic () = 65 + (* 100 points along y=x in 2D -> PCA to 1D -> monotonic output *) 66 + let rows_data = Array.init 100 (fun i -> 67 + let v = Float.of_int i in 68 + [| v; v |] 69 + ) in 70 + let data = mat_of_arrays rows_data in 71 + let model = pca_fit data ~n_components:1 in 72 + let result = pca_transform model data in 73 + Alcotest.(check int) "result rows" 100 result.rows; 74 + (* Check monotonic (either all increasing or all decreasing) *) 75 + let increasing = ref true in 76 + let decreasing = ref true in 77 + for i = 1 to 99 do 78 + let prev = mat_get result (i - 1) 0 in 79 + let curr = mat_get result i 0 in 80 + if curr <= prev then increasing := false; 81 + if curr >= prev then decreasing := false; 82 + done; 83 + Alcotest.(check bool) "monotonic" true (!increasing || !decreasing) 84 + 85 + let test_pca_output_dims () = 86 + let data = mat_of_arrays [| 87 + [| 1.0; 2.0; 3.0; 4.0; 5.0 |]; 88 + [| 2.0; 3.0; 4.0; 5.0; 6.0 |]; 89 + [| 3.0; 4.0; 5.0; 6.0; 7.0 |]; 90 + |] in 91 + let model = pca_fit data ~n_components:2 in 92 + let result = pca_transform model data in 93 + Alcotest.(check int) "rows" 3 result.rows; 94 + Alcotest.(check int) "cols" 2 result.cols 95 + 38 96 (* ---- Test runner ---- *) 39 97 40 98 let () = ··· 43 101 Alcotest.test_case "create_mat dimensions" `Quick test_create_mat_dims; 44 102 Alcotest.test_case "get/set roundtrip" `Quick test_mat_get_set_roundtrip; 45 103 Alcotest.test_case "zero initialization" `Quick test_mat_zero_init; 104 + ]; 105 + "pca", [ 106 + Alcotest.test_case "x-axis points evenly spaced" `Quick test_pca_x_axis_points; 107 + Alcotest.test_case "diagonal monotonic" `Quick test_pca_diagonal_monotonic; 108 + Alcotest.test_case "output dimensions" `Quick test_pca_output_dims; 46 109 ]; 47 110 ]