rust-ml · YuhanLiin · May 25, 2022 · May 25, 2022 · May 25, 2022 · May 25, 2022
diff --git a/src/lobpcg/eig.rs b/src/lobpcg/eig.rs
@@ -2,6 +2,7 @@
 //!
 use super::random;
 use crate::{
+    eigh::{EigSort, Eigh},
     lobpcg::{lobpcg, Lobpcg, LobpcgResult},
     Order, Result,
 };
@@ -148,6 +149,26 @@ impl<A: NdFloat + Sum, R: Rng> TruncatedEig<A, R> {
         let x: Array2<f64> = random((self.problem.len_of(Axis(0)), num), &mut self.rng);
         let x = x.mapv(|x| NumCast::from(x).unwrap());
 
+        // use dense eigenproblem solver if more than 1/5 eigenvalues requested
+        if num * 5 > self.problem.nrows() {
+            let (eigvals, eigvecs) = self
+                .problem
+                .eigh()
+                .map_err(|e| (e, None))?
+                .sort_eig(self.order);
+
+            let (eigvals, eigvecs) = (
+                eigvals.slice_move(s![..num]),
+                eigvecs.slice_move(s![.., ..num]),
+            );
+
+            return Ok(Lobpcg {
+                eigvals,
+                eigvecs,
+                rnorm: Vec::new(),
+            });
+        }
+
         if let Some(ref preconditioner) = self.preconditioner {
             lobpcg(
                 |y| self.problem.dot(&y),

diff --git a/src/lobpcg/mod.rs b/src/lobpcg/mod.rs
@@ -45,6 +45,8 @@ where
 /// as it could be of value. If there is no result at all, then the second field is `None`.
 /// This happens if the algorithm fails in an early stage, for example if the matrix `A` is not SPD
 pub type LobpcgResult<A> = std::result::Result<Lobpcg<A>, (LinalgError, Option<Lobpcg<A>>)>;
+
+#[derive(Debug, Clone, PartialEq)]
 pub struct Lobpcg<A> {
     pub eigvals: Array1<A>,
     pub eigvecs: Array2<A>,

diff --git a/src/lobpcg/svd.rs b/src/lobpcg/svd.rs
@@ -2,6 +2,7 @@
 ///!
 ///! This module computes the k largest/smallest singular values/vectors for a dense matrix.
 use crate::{
+    eigh::{EigSort, Eigh},
     lobpcg::{lobpcg, random, Lobpcg},
     Order, Result,
 };
@@ -188,6 +189,31 @@ impl<A: NdFloat + Sum, R: Rng> TruncatedSvd<A, R> {
         }
 
         let (n, m) = (self.problem.nrows(), self.problem.ncols());
+        let ngm = n > m;
+
+        // use dense eigenproblem solver if more than 1/5 eigenvalues requested
+        if num * 5 > n.min(m) {
+            let problem = if ngm {
+                self.problem.t().dot(&self.problem)
+            } else {
+                self.problem.dot(&self.problem.t())
+            };
+
+            let (eigvals, eigvecs) = problem.eigh()?.sort_eig(self.order);
+
+            let (eigvals, eigvecs) = (
+                eigvals.slice_move(s![..num]),
+                eigvecs.slice_move(s![..num, ..]),
+            );
+
+            return Ok(TruncatedSvdResult {
+                eigvals,
+                eigvecs,
+                problem: self.problem,
+                order: self.order,
+                ngm,
+            });
+        }
 
         // generate initial matrix
         let x: Array2<f32> = random((usize::min(n, m), num), &mut self.rng);
@@ -234,7 +260,7 @@ impl<A: NdFloat + Sum, R: Rng> TruncatedSvd<A, R> {
                 eigvals,
                 eigvecs,
                 order: self.order,
-                ngm: n > m,
+                ngm,
             }),
             Err((err, None)) => Err(err),
         }
@@ -268,8 +294,7 @@ mod tests {
     use super::TruncatedSvd;
 
     use approx::assert_abs_diff_eq;
-    use ndarray::{arr1, arr2, s, Array1, Array2, NdFloat};
-    use ndarray_rand::{rand_distr::StandardNormal, RandomExt};
+    use ndarray::{arr1, arr2, Array2, NdFloat};
     use rand::distributions::{Distribution, Standard};
     use rand::SeedableRng;
     use rand_xoshiro::Xoshiro256Plus;
@@ -318,60 +343,4 @@ mod tests {
 
         assert_abs_diff_eq!(&a, &reconstructed, epsilon = 1e-5);
     }
-
-    /// Eigenvalue structure in high dimensions
-    ///
-    /// This test checks that the eigenvalues are following the Marchensko-Pastur law. The data is
-    /// standard uniformly distributed (i.e. E(x) = 0, E^2(x) = 1) and we have twice the amount of
-    /// data when compared to features. The probability density of the eigenvalues should then follow
-    /// a special densitiy function, described by the Marchenko-Pastur law.
-    ///
-    /// See also https://en.wikipedia.org/wiki/Marchenko%E2%80%93Pastur_distribution
-    #[test]
-    fn test_marchenko_pastur() {
-        // create random number generator
-        let mut rng = Xoshiro256Plus::seed_from_u64(3);
-
-        // generate normal distribution random data with N >> p
-        let data = Array2::random_using((1000, 500), StandardNormal, &mut rng) / 1000f64.sqrt();
-
-        let res =
-            TruncatedSvd::new_with_rng(data, Order::Largest, Xoshiro256Plus::seed_from_u64(42))
-                .precision(1e-3)
-                .decompose(500)
-                .unwrap();
-
-        let sv = res.values().mapv(|x: f64| x * x);
-
-        // we have created a random spectrum and can apply the Marchenko-Pastur law
-        // with variance 1 and p/n = 0.5
-        let (a, b) = (
-            1. * (1. - 0.5f64.sqrt()).powf(2.0),
-            1. * (1. + 0.5f64.sqrt()).powf(2.0),
-        );
-
-        // check that the spectrum has correct boundaries
-        assert_abs_diff_eq!(b, sv[0], epsilon = 0.1);
-        assert_abs_diff_eq!(a, sv[sv.len() - 1], epsilon = 0.1);
-
-        // estimate density empirical and compare with Marchenko-Pastur law
-        let mut i = 0;
-        'outer: for th in Array1::linspace(0.1f64, 2.8, 28).slice(s![..;-1]) {
-            let mut count = 0;
-            while sv[i] >= *th {
-                count += 1;
-                i += 1;
-
-                if i == sv.len() {
-                    break 'outer;
-                }
-            }
-
-            let x = th + 0.05;
-            let mp_law = ((b - x) * (x - a)).sqrt() / std::f64::consts::PI / x;
-            let empirical = count as f64 / 500. / ((2.8 - 0.1) / 28.);
-
-            assert_abs_diff_eq!(mp_law, empirical, epsilon = 0.05);
-        }
-    }
 }
diff --git a/tests/cholesky.rs b/tests/cholesky.rs
@@ -6,18 +6,6 @@ use linfa_linalg::{cholesky::*, triangular::*};
 
 mod common;
 
-prop_compose! {
-    fn hpd_arr()
-        (arr in common::square_arr()) -> Array2<f64> {
-        let dim = arr.nrows();
-        let mut mul = arr.t().dot(&arr);
-        for i in 0..dim {
-            mul[(i, i)] += 1.0;
-        }
-        mul
-    }
-}
-
 fn run_cholesky_test(orig: Array2<f64>) {
     let chol = orig.cholesky().unwrap();
     assert_abs_diff_eq!(chol.dot(&chol.t()), orig, epsilon = 1e-7);
@@ -69,17 +57,17 @@ fn run_invc_test(a: Array2<f64>) {
 proptest! {
     #![proptest_config(ProptestConfig::with_cases(1000))]
     #[test]
-    fn cholesky_test(arr in hpd_arr()) {
+    fn cholesky_test(arr in common::hpd_arr()) {
         run_cholesky_test(arr)
     }
 
     #[test]
-    fn solvec_test((a, x) in common::system_of_arr(hpd_arr())) {
+    fn solvec_test((a, x) in common::system_of_arr(common::hpd_arr())) {
         run_solvec_test(a, x)
     }
 
     #[test]
-    fn invc_test(arr in hpd_arr()) {
+    fn invc_test(arr in common::hpd_arr()) {
         run_invc_test(arr)
     }
 }

diff --git a/tests/common.rs b/tests/common.rs
@@ -2,6 +2,7 @@
 
 use std::ops::RangeInclusive;
 
+use approx::assert_abs_diff_eq;
 use ndarray::prelude::*;
 use proptest::prelude::*;
 use proptest_derive::Arbitrary;
@@ -49,6 +50,18 @@ prop_compose! {
     }
 }
 
+prop_compose! {
+    pub fn hpd_arr()
+        (arr in square_arr()) -> Array2<f64> {
+        let dim = arr.nrows();
+        let mut mul = arr.t().dot(&arr);
+        for i in 0..dim {
+            mul[(i, i)] += 1.0;
+        }
+        mul
+    }
+}
+
 prop_compose! {
     pub fn rect_arr()(rows in DIM_RANGE, cols in DIM_RANGE)
         (arr in matrix(rows, cols)) -> Array2<f64> {
@@ -93,3 +106,12 @@ pub fn system_of_arr(
         )
     })
 }
+
+pub fn check_eigh(arr: &Array2<f64>, vals: &Array1<f64>, vecs: &Array2<f64>, eps: f64) {
+    // Original array multiplied with eigenvec should equal eigenval times eigenvec
+    for (i, v) in vecs.axis_iter(Axis(1)).enumerate() {
+        let av = arr.dot(&v);
+        let ev = v.mapv(|x| vals[i] * x);
+        assert_abs_diff_eq!(av, ev, epsilon = eps);
+    }
+}
diff --git a/tests/eigh.rs b/tests/eigh.rs
@@ -6,15 +6,6 @@ use linfa_linalg::eigh::*;
 
 mod common;
 
-fn check_eigh(arr: &Array2<f64>, vals: &Array1<f64>, vecs: &Array2<f64>) {
-    // Original array multiplied with eigenvec should equal eigenval times eigenvec
-    for (i, v) in vecs.axis_iter(Axis(1)).enumerate() {
-        let av = arr.dot(&v);
-        let ev = v.mapv(|x| vals[i] * x);
-        assert_abs_diff_eq!(av, ev, epsilon = 1e-5);
-    }
-}
-
 fn run_eigh_test(arr: Array2<f64>) {
     let n = arr.nrows();
     let d = arr.eigh().unwrap();
@@ -23,7 +14,7 @@ fn run_eigh_test(arr: Array2<f64>) {
     // Eigenvecs should be orthogonal
     let s = vecs.t().dot(&vecs);
     assert_abs_diff_eq!(s, Array2::eye(n), epsilon = 1e-5);
-    check_eigh(&arr, &vals, &vecs);
+    common::check_eigh(&arr, &vals, &vecs, 1e-5);
 
     let (evals, evecs) = arr.clone().eigh_into().unwrap();
     assert_abs_diff_eq!(evals, vals);
@@ -33,12 +24,12 @@ fn run_eigh_test(arr: Array2<f64>) {
 
     // Check if ascending eigen is actually sorted and valid
     let (vals, vecs) = d.clone().sort_eig_asc();
-    check_eigh(&arr, &vals, &vecs);
+    common::check_eigh(&arr, &vals, &vecs, 1e-5);
     assert!(vals.windows(2).into_iter().all(|w| w[0] <= w[1]));
 
     // Check if descending eigen is actually sorted and valid
     let (vals, vecs) = d.sort_eig_desc();
-    check_eigh(&arr, &vals, &vecs);
+    common::check_eigh(&arr, &vals, &vecs, 1e-5);
     assert!(vals.windows(2).into_iter().all(|w| w[0] >= w[1]));
 }