My aggregated monorepo of OCaml code, automaintained
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Replace SVD with QR algorithm for PCA eigendecomposition

TF.js 4.x doesn't have tf.linalg.svd. Use the QR algorithm instead:
iterative QR decomposition on the 128x128 covariance matrix converges
to eigenvalues/eigenvectors. 50 iterations is plenty for this size.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+52 -16
+52 -16
tessera-tfjs/lib/tfjs.ml
··· 43 43 (* Dispose a tensor to free GPU/CPU memory *) 44 44 let dispose t = ignore (call t "dispose" []) 45 45 46 + (* QR algorithm for eigendecomposition of a symmetric matrix. 47 + Returns the eigenvector matrix (columns = eigenvectors, sorted by 48 + descending eigenvalue). Uses tf.linalg.qr which is available in TF.js 4.x 49 + (unlike SVD which is not). *) 50 + let eigen_qr ~n_iters (a : Js.Unsafe.any) : Js.Unsafe.any * Js.Unsafe.any = 51 + let tf = tf () in 52 + let shape = Js.to_array (get a "shape") in 53 + let n = shape.(0) in 54 + (* Accumulate eigenvectors: start with identity *) 55 + let vecs = ref (Js.Unsafe.meth_call tf "eye" [| Js.Unsafe.inject n |]) in 56 + let mat = ref a in 57 + let to_dispose = ref [] in 58 + for _ = 1 to n_iters do 59 + let qr_result = Js.Unsafe.meth_call (get tf "linalg") "qr" 60 + [| Js.Unsafe.inject !mat |] in 61 + let q = Js.array_get qr_result 0 |> Js.Optdef.to_option |> Option.get in 62 + let r = Js.array_get qr_result 1 |> Js.Optdef.to_option |> Option.get in 63 + (* New A = R * Q (similarity transform) *) 64 + let new_mat = Js.Unsafe.meth_call tf "matMul" 65 + [| Js.Unsafe.inject r; Js.Unsafe.inject q |] in 66 + (* Accumulate eigenvectors: V = V * Q *) 67 + let new_vecs = Js.Unsafe.meth_call tf "matMul" 68 + [| Js.Unsafe.inject !vecs; Js.Unsafe.inject q |] in 69 + to_dispose := !mat :: !vecs :: q :: r :: !to_dispose; 70 + mat := new_mat; 71 + vecs := new_vecs 72 + done; 73 + (* Eigenvalues are on the diagonal of the converged matrix *) 74 + let eigenvalues = Js.Unsafe.meth_call (Js.Unsafe.meth_call !mat "flatten" [||]) "abs" [||] in 75 + (* Dispose intermediates but NOT the final mat, vecs, eigenvalues *) 76 + List.iter dispose !to_dispose; 77 + dispose !mat; 78 + (eigenvalues, !vecs) 79 + 46 80 let pca (mat : Linalg.mat) ~n_components : Linalg.mat = 47 81 let tf = tf () in 48 82 (* 1. Convert input to tensor *) ··· 53 87 let x_centered = call x "sub" [Js.Unsafe.inject mean] in 54 88 (* 4. Covariance matrix: X^T X / (n-1), shape (n_features, n_features). 55 89 For n_samples >> n_features (typical: 40k x 128), this is much faster 56 - than SVD on the full data matrix. *) 90 + than decomposing the full data matrix. *) 57 91 let n_samples = Float.of_int mat.rows in 58 92 let xt = call x_centered "transpose" [] in 59 93 let xtx = Js.Unsafe.meth_call tf "matMul" ··· 61 95 let scale = Js.Unsafe.meth_call tf "scalar" 62 96 [| Js.Unsafe.inject (n_samples -. 1.0) |] in 63 97 let cov = call xtx "div" [Js.Unsafe.inject scale] in 64 - (* 5. SVD of symmetric covariance matrix. 65 - For symmetric PSD matrices, SVD gives eigendecomposition: 66 - V columns are eigenvectors, S values are eigenvalues. 67 - tf.linalg.svd returns [s, u, v] as a JS array (not an object). *) 68 - let svd_arr = Js.Unsafe.meth_call (get tf "linalg") "svd" 69 - [| Js.Unsafe.inject cov; Js.Unsafe.inject Js._true |] in 70 - let svd_s = Js.array_get svd_arr 0 |> Js.Optdef.to_option |> Option.get in 71 - let svd_u = Js.array_get svd_arr 1 |> Js.Optdef.to_option |> Option.get in 72 - let v_full = Js.array_get svd_arr 2 |> Js.Optdef.to_option |> Option.get in 73 - (* 6. Take top n_components columns of V *) 74 - let v_top = Js.Unsafe.meth_call v_full "slice" 75 - [| Js.Unsafe.inject (Js.array [| 0; 0 |]); 76 - Js.Unsafe.inject (Js.array [| mat.cols; n_components |]) |] in 98 + (* 5. Eigendecomposition via QR algorithm. 99 + 50 iterations is plenty for a 128x128 symmetric PSD matrix. *) 100 + let (eigenvalues, eigenvectors) = eigen_qr ~n_iters:50 cov in 101 + (* 6. Sort eigenvectors by descending eigenvalue and take top n_components. 102 + tf.topk gives the indices of the largest values. *) 103 + let topk = Js.Unsafe.meth_call tf "topk" 104 + [| Js.Unsafe.inject eigenvalues; Js.Unsafe.inject n_components |] in 105 + let indices = get topk "indices" in 106 + (* Gather the top eigenvector columns *) 107 + let v_top = Js.Unsafe.meth_call tf "gather" 108 + [| Js.Unsafe.inject (call eigenvectors "transpose" []); 109 + Js.Unsafe.inject indices |] in 110 + let v_top_t = call v_top "transpose" [] in 77 111 (* 7. Project: result = x_centered @ v_top, shape (n_samples, n_components) *) 78 112 let projected = Js.Unsafe.meth_call tf "matMul" 79 - [| Js.Unsafe.inject x_centered; Js.Unsafe.inject v_top |] in 113 + [| Js.Unsafe.inject x_centered; Js.Unsafe.inject v_top_t |] in 80 114 (* 8. Convert back to Linalg.mat *) 81 115 let result = tensor_to_mat projected in 82 116 (* 9. Clean up all tensors *) 83 117 List.iter dispose [x; mean; x_centered; xt; xtx; scale; cov; 84 - svd_s; svd_u; v_full; v_top; projected]; 118 + eigenvalues; eigenvectors; 119 + get topk "values"; indices; 120 + v_top; v_top_t; projected]; 85 121 result