From 3de8b0433b083c0a398a9831155a9fc4c3a86182 Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Mon, 22 Aug 2022 21:09:54 -0400
Subject: [PATCH 01/16] structure for test

---
 ndarray-linalg/src/expm.rs | 17 +++++++++++++++++
 ndarray-linalg/src/lib.rs  |  2 ++
 2 files changed, 19 insertions(+)
 create mode 100644 ndarray-linalg/src/expm.rs
diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
new file mode 100644
index 00000000..503684a8
--- /dev/null
+++ b/ndarray-linalg/src/expm.rs
@@ -0,0 +1,17 @@
+mod tests {
+    use crate::diagonal;
+
+    /// Compares expm acting on a matrix with random eigenvalues (drawn from
+    /// Gaussians) and with random eigenvectors (drawn from Haar distribution)
+    /// to the exact answer. The exact answer is done by exponentiating each
+    /// diagonal entry in the eigenvalue matrix before conjugating with the
+    /// eigenvector matrices. In other words, let A = U D U^\dagger, then 
+    /// because e^A = e^(U D U^\dagger) = U (e^D) U^dagger. We use expm
+    /// to compute e^A and normal floating point exponentiation to compute e^D
+    fn expm_test_gaussian_random_input(dim: usize) -> f32 {
+        let D = diagonal::Diagonal::from([1,2,3,4,5]);
+        let U = haar_random(dim);
+        let F = floating_exp(D)
+        let diff = expm(U D U.conj().T) - U F U.conj().T;
+    }
+}
\ No newline at end of file
diff --git a/ndarray-linalg/src/lib.rs b/ndarray-linalg/src/lib.rs
index 8a8088c8..00cdead7 100644
--- a/ndarray-linalg/src/lib.rs
+++ b/ndarray-linalg/src/lib.rs
@@ -73,6 +73,7 @@ pub mod trace;
 pub mod triangular;
 pub mod tridiagonal;
 pub mod types;
+pub mod expm;
 
 pub use crate::assert::*;
 pub use crate::cholesky::*;
@@ -97,3 +98,4 @@ pub use crate::trace::*;
 pub use crate::triangular::*;
 pub use crate::tridiagonal::*;
 pub use crate::types::*;
+pub use crate::expm::*;

From aae2485bf41d4363ff2781095718a4e2391307af Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Tue, 11 Oct 2022 15:27:35 -0400
Subject: [PATCH 02/16] structure

---
 ndarray-linalg/src/expm.rs     | 55 ++++++++++++++++++---
 ndarray-linalg/src/lib.rs      |  3 +-
 ndarray-linalg/src/normest1.rs | 88 ++++++++++++++++++++++++++++++++++
 3 files changed, 139 insertions(+), 7 deletions(-)
 create mode 100644 ndarray-linalg/src/normest1.rs

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index 503684a8..d56ffffb 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -1,5 +1,44 @@
+use crate::types;
+use ndarray::prelude::*;
+use condest::Normest1;
+use statrs::function::factorial::{binomial, factorial};
+
+// These constants are hard-coded from Al-Mohy & Higham
+const THETA_3: f64 = 1.495585217958292e-2;
+const THETA_5: f64 = 2.539398330063230e-1;
+const THETA_7: f64 = 9.504178996162932e-1;
+const THETA_9: f64 = 2.097847961257068e0;
+const THETA_13: f64 = 4.25; // Alg 5.1
+
+// Corresponds to even powers of x. Note that the denominator coefficients are same magnitude but opposite sign. Zeroth order coefficient is 1.
+const PADE_COEFFS: [f64; 7] = [0.5, 11. / 600., 3. / 18400., 1. / 1932000., 1. / 1585785600., 1. / 3953892096000. , 1. / 64764752532480000. ];
+
+
+
+
+// helper function used in Al-M & H. Name is unchanged for future reference.
+fn ell(A: Array2<f64>, m: u64, normestimator: Normest1) -> i32 {
+    1
+}
+
+/// Calculates the leading term of the error series for the [m/m] Pade approximation to exp(x).
+fn pade_error_coefficient(m: u64) -> f64 {
+    1.0 / (binomial(2 * m, m) * factorial(2 * m + 1))
+}
+
+fn paterson_stockmeyer() -> () {
+    
+}
+
+
+/// This function is based on the scale-and-squaring algorithm by 
+pub fn expm(A: Array2<i32>) -> Array2<i32> {
+    arr2(&[[2]])
+}
+
+
 mod tests {
-    use crate::diagonal;
+    use ndarray::{*, linalg::Dot};
 
     /// Compares expm acting on a matrix with random eigenvalues (drawn from
     /// Gaussians) and with random eigenvectors (drawn from Haar distribution)
@@ -8,10 +47,14 @@ mod tests {
     /// eigenvector matrices. In other words, let A = U D U^\dagger, then 
     /// because e^A = e^(U D U^\dagger) = U (e^D) U^dagger. We use expm
     /// to compute e^A and normal floating point exponentiation to compute e^D
-    fn expm_test_gaussian_random_input(dim: usize) -> f32 {
-        let D = diagonal::Diagonal::from([1,2,3,4,5]);
-        let U = haar_random(dim);
-        let F = floating_exp(D)
-        let diff = expm(U D U.conj().T) - U F U.conj().T;
+    #[test]
+    fn expm_test_gaussian_random_input() {
+        let a: Array2<i32> = arr2(&[[1,2],[3,4]]);
+        let b: Array2<i32> = arr2(&[[0,1],[1,0]]);
+        println!("matrix multiplication? {:?}", a.dot(&b));
+        // let eigenvals = diagonal::Diagonal::from([1,2,3,4,5]);
+        // let eigenvecs = haar_random(dim);
+        // let exact = floating_exp(D);
+        // let diff = expm(U D U.conj().T) - U F U.conj().T;
     }
 }
\ No newline at end of file
diff --git a/ndarray-linalg/src/lib.rs b/ndarray-linalg/src/lib.rs
index 00cdead7..541e504e 100644
--- a/ndarray-linalg/src/lib.rs
+++ b/ndarray-linalg/src/lib.rs
@@ -74,6 +74,7 @@ pub mod triangular;
 pub mod tridiagonal;
 pub mod types;
 pub mod expm;
+pub mod normest1;
 
 pub use crate::assert::*;
 pub use crate::cholesky::*;
@@ -98,4 +99,4 @@ pub use crate::trace::*;
 pub use crate::triangular::*;
 pub use crate::tridiagonal::*;
 pub use crate::types::*;
-pub use crate::expm::*;
+// pub use crate::expm::*;
diff --git a/ndarray-linalg/src/normest1.rs b/ndarray-linalg/src/normest1.rs
new file mode 100644
index 00000000..d8598dbd
--- /dev/null
+++ b/ndarray-linalg/src/normest1.rs
@@ -0,0 +1,88 @@
+use ndarray::prelude::*;
+use num_complex::ComplexFloat;
+use rand::Rng;
+// use rand::prelude::*;
+
+const MAX_COLUMN_RESAMPLES: u32 = 10;
+
+fn prepare_x_matrix(num_rows: usize, num_columns: usize) -> Array2<f64> {
+    let mut rng = rand::thread_rng();
+    let mut output: Array2<f64> = Array::<f64,_>::zeros((num_rows, num_columns).f());
+    output.mapv_inplace(|_| if rng.gen_bool(0.5) {1.0} else {-1.0});
+    // todo - check sizes?
+    output.column_mut(0).fill(1.);
+    resample_parallel_columns(&mut output);
+    output
+}
+
+fn is_column_parallel(index: usize, matrix: &Array2<f64>) -> bool {
+    let dot_prods = matrix.t().dot(&matrix.column(index));
+    for ix in 0..index {
+        if f64::abs(dot_prods[ix]) == (matrix.nrows() as f64) {
+            return true;
+        }
+    }
+    false
+}
+
+fn resample_parallel_columns(mat: &mut Array2<f64>) {
+    let mut rng = rand::thread_rng();
+    for col_ix in 0..mat.ncols() {
+        // TODO- What if we hit the resample limit? should we warn or just ignore?
+        for _ in 0..MAX_COLUMN_RESAMPLES {
+            if is_column_parallel(col_ix, mat) {
+                mat.column_mut(col_ix).mapv_inplace(|_| if rng.gen_bool(0.5) {1.0} else {-1.0});
+            }
+        }
+    }
+}
+
+fn normest(A: Array2<f64>, t: u32, itmax: u32) -> f64 {
+    let mut est = 0.;
+    // Need to ensure that A is square
+    let n = A.nrows();
+    let mut x_matrix = prepare_x_matrix(n, t as usize);
+    let mut index_history: Vec<usize> = Vec::new();
+    let mut est_old = 0.;
+    let mut ind: Array2<f64> = Array::<f64, _>::zeros((n, 1).f());
+    let mut S_matrix: Array2<f64> = Array::<_,_>::zeros((n, t as usize).f());
+    // Main loop of algorithm 2.4 in higham and tisseur
+    for iteration in 0..=itmax {
+        let Y = A.dot(&x_matrix);
+        est = Y.columns().into_iter().map(|col| {
+            col.iter().map(|elem| f64::abs(*elem)).sum()
+        }).reduce(f64::max).unwrap()
+    }
+    est
+}
+
+mod tests {
+    use ndarray::Array2;
+    use crate::{ndarray::ShapeBuilder, normest1::{prepare_x_matrix, is_column_parallel, resample_parallel_columns}};
+
+    #[test]
+    fn test_prep() {
+        let n = 4;
+        let t = 5;
+        let mut x_mat: Array2<f64> = ndarray::Array::<f64, _>::zeros((n,t).f());
+        println!("x_mat before");
+        println!("{:?}", x_mat);
+        prepare_x_matrix(&mut x_mat);
+        println!("x_mat after");
+        println!("{:?}", x_mat);
+        println!("any parallel columns? {:}", is_column_parallel(x_mat.ncols() - 1, &x_mat));
+        println!("resampling columns");
+        resample_parallel_columns(&mut x_mat);
+        println!("after resampling");
+        println!("{:?}", x_mat);
+    }
+
+    #[test]
+    fn test_one_norm() {
+        let Y: Array2<f64> = array![[1.,2.,3.,4.],[1.,2.,3.,4.],[1.,2.,3.,4.]];
+        let est = Y.columns().into_iter().map(|col| {
+            col.iter().map(|elem| f64::abs(*elem)).sum()
+        }).reduce(f64::max).unwrap();
+        println!("est: {:}", est);
+    }
+}
\ No newline at end of file

From 7f75dab1798de3aeb9971225ac5134db5ce3532b Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Wed, 12 Oct 2022 16:11:59 -0400
Subject: [PATCH 03/16] normest working

---
 ndarray-linalg/src/expm.rs     |  33 ++---
 ndarray-linalg/src/lib.rs      |   4 +-
 ndarray-linalg/src/normest1.rs | 227 ++++++++++++++++++++++++++++-----
 3 files changed, 217 insertions(+), 47 deletions(-)

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index d56ffffb..842a99c9 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -1,6 +1,6 @@
 use crate::types;
-use ndarray::prelude::*;
 use condest::Normest1;
+use ndarray::prelude::*;
 use statrs::function::factorial::{binomial, factorial};
 
 // These constants are hard-coded from Al-Mohy & Higham
@@ -11,10 +11,15 @@ const THETA_9: f64 = 2.097847961257068e0;
 const THETA_13: f64 = 4.25; // Alg 5.1
 
 // Corresponds to even powers of x. Note that the denominator coefficients are same magnitude but opposite sign. Zeroth order coefficient is 1.
-const PADE_COEFFS: [f64; 7] = [0.5, 11. / 600., 3. / 18400., 1. / 1932000., 1. / 1585785600., 1. / 3953892096000. , 1. / 64764752532480000. ];
-
-
-
+const PADE_COEFFS: [f64; 7] = [
+    0.5,
+    11. / 600.,
+    3. / 18400.,
+    1. / 1932000.,
+    1. / 1585785600.,
+    1. / 3953892096000.,
+    1. / 64764752532480000.,
+];
 
 // helper function used in Al-M & H. Name is unchanged for future reference.
 fn ell(A: Array2<f64>, m: u64, normestimator: Normest1) -> i32 {
@@ -26,35 +31,31 @@ fn pade_error_coefficient(m: u64) -> f64 {
     1.0 / (binomial(2 * m, m) * factorial(2 * m + 1))
 }
 
-fn paterson_stockmeyer() -> () {
-    
-}
-
+fn paterson_stockmeyer() -> () {}
 
-/// This function is based on the scale-and-squaring algorithm by 
+/// This function is based on the scale-and-squaring algorithm by
 pub fn expm(A: Array2<i32>) -> Array2<i32> {
     arr2(&[[2]])
 }
 
-
 mod tests {
-    use ndarray::{*, linalg::Dot};
+    use ndarray::{linalg::Dot, *};
 
     /// Compares expm acting on a matrix with random eigenvalues (drawn from
     /// Gaussians) and with random eigenvectors (drawn from Haar distribution)
     /// to the exact answer. The exact answer is done by exponentiating each
     /// diagonal entry in the eigenvalue matrix before conjugating with the
-    /// eigenvector matrices. In other words, let A = U D U^\dagger, then 
+    /// eigenvector matrices. In other words, let A = U D U^\dagger, then
     /// because e^A = e^(U D U^\dagger) = U (e^D) U^dagger. We use expm
     /// to compute e^A and normal floating point exponentiation to compute e^D
     #[test]
     fn expm_test_gaussian_random_input() {
-        let a: Array2<i32> = arr2(&[[1,2],[3,4]]);
-        let b: Array2<i32> = arr2(&[[0,1],[1,0]]);
+        let a: Array2<i32> = arr2(&[[1, 2], [3, 4]]);
+        let b: Array2<i32> = arr2(&[[0, 1], [1, 0]]);
         println!("matrix multiplication? {:?}", a.dot(&b));
         // let eigenvals = diagonal::Diagonal::from([1,2,3,4,5]);
         // let eigenvecs = haar_random(dim);
         // let exact = floating_exp(D);
         // let diff = expm(U D U.conj().T) - U F U.conj().T;
     }
-}
\ No newline at end of file
+}
diff --git a/ndarray-linalg/src/lib.rs b/ndarray-linalg/src/lib.rs
index 541e504e..2545a412 100644
--- a/ndarray-linalg/src/lib.rs
+++ b/ndarray-linalg/src/lib.rs
@@ -55,6 +55,7 @@ pub mod diagonal;
 pub mod eig;
 pub mod eigh;
 pub mod error;
+pub mod expm;
 pub mod generate;
 pub mod inner;
 pub mod krylov;
@@ -62,6 +63,7 @@ pub mod layout;
 pub mod least_squares;
 pub mod lobpcg;
 pub mod norm;
+pub mod normest1;
 pub mod operator;
 pub mod opnorm;
 pub mod qr;
@@ -73,8 +75,6 @@ pub mod trace;
 pub mod triangular;
 pub mod tridiagonal;
 pub mod types;
-pub mod expm;
-pub mod normest1;
 
 pub use crate::assert::*;
 pub use crate::cholesky::*;
diff --git a/ndarray-linalg/src/normest1.rs b/ndarray-linalg/src/normest1.rs
index d8598dbd..ea3939d3 100644
--- a/ndarray-linalg/src/normest1.rs
+++ b/ndarray-linalg/src/normest1.rs
@@ -1,4 +1,6 @@
-use ndarray::prelude::*;
+use std::iter::{Zip, zip};
+
+use ndarray::{concatenate, prelude::*};
 use num_complex::ComplexFloat;
 use rand::Rng;
 // use rand::prelude::*;
@@ -7,82 +9,249 @@ const MAX_COLUMN_RESAMPLES: u32 = 10;
 
 fn prepare_x_matrix(num_rows: usize, num_columns: usize) -> Array2<f64> {
     let mut rng = rand::thread_rng();
-    let mut output: Array2<f64> = Array::<f64,_>::zeros((num_rows, num_columns).f());
-    output.mapv_inplace(|_| if rng.gen_bool(0.5) {1.0} else {-1.0});
+    let mut output: Array2<f64> = Array::<f64, _>::zeros((num_rows, num_columns).f());
+    output.mapv_inplace(|_| if rng.gen_bool(0.5) { 1.0 } else { -1.0 });
     // todo - check sizes?
     output.column_mut(0).fill(1.);
-    resample_parallel_columns(&mut output);
+    ensure_no_parallel_columns(&mut output);
     output
 }
 
 fn is_column_parallel(index: usize, matrix: &Array2<f64>) -> bool {
+    // TODO - no need to dot product entire matrix, just the preceeding columns
     let dot_prods = matrix.t().dot(&matrix.column(index));
     for ix in 0..index {
-        if f64::abs(dot_prods[ix]) == (matrix.nrows() as f64) {
+        if f64::abs(dot_prods[ix]) == matrix.nrows() as f64 {
             return true;
         }
     }
     false
 }
 
-fn resample_parallel_columns(mat: &mut Array2<f64>) {
+fn ensure_column_not_parallel(matrix: &mut Array2<f64>, column_index: usize) {
     let mut rng = rand::thread_rng();
+    for _ in 0..MAX_COLUMN_RESAMPLES {
+        if is_column_parallel(column_index, matrix) {
+            matrix.column_mut(column_index).mapv_inplace(|_| {
+                if rng.gen_bool(0.5) {
+                    1.0
+                } else {
+                    -1.0
+                }
+            });
+        } else {
+            break;
+        }
+    }
+}
+
+fn ensure_no_parallel_columns(mat: &mut Array2<f64>) {
     for col_ix in 0..mat.ncols() {
-        // TODO- What if we hit the resample limit? should we warn or just ignore?
-        for _ in 0..MAX_COLUMN_RESAMPLES {
-            if is_column_parallel(col_ix, mat) {
-                mat.column_mut(col_ix).mapv_inplace(|_| if rng.gen_bool(0.5) {1.0} else {-1.0});
+        ensure_column_not_parallel(mat, col_ix);
+    }
+}
+
+/// Outputs true if every column of s_new is parallel to some column in s_old, false otherwise.
+fn special_delivery(s_new: &Array2<f64>, s_old: &Array2<f64>) -> bool {
+    let mut ret = true;
+    let dots = s_old.t().dot(s_new);
+    for col in dots.columns() {
+        let mut is_col_good = false;
+        for elem in col.iter() {
+            if f64::abs(*elem) == col.len() as f64 {
+                is_col_good = true;
             }
         }
+        if is_col_good == false {
+            ret = false;
+            break;
+        }
+    }
+    ret
+}
+
+fn ensure_new_s_matrix(s: &mut Array2<f64>, s_old: &Array2<f64>) {
+    let mut big_matrix: Array2<f64> = concatenate(Axis(1), &[s_old.view(), s.view()]).unwrap();
+    for col_ix in 0..s.ncols() {
+        ensure_column_not_parallel(&mut big_matrix, s_old.ncols() + col_ix);
+        for row_ix in 0..s.nrows() {
+            s.column_mut(col_ix)[row_ix] = big_matrix.column(s_old.ncols() + col_ix)[row_ix];
+        }
+    }
+}
+
+fn check_index_history(indices: &Vec<usize>, index_history: &Vec<usize>, t: usize) -> bool {
+    let mut ret = false;
+    for chunk in index_history.chunks(t) {
+        if *chunk == indices[0..t] {
+            ret = true;
+            break;
+        }
+    }
+    ret
+}
+
+fn update_indices(indices: &mut Vec<usize>, index_history: &Vec<usize>, t: usize) {
+    let mut unique_indices: Vec<usize> = Vec::with_capacity(t);
+    for ix in indices.iter() {
+        if index_history.contains(&ix) == false {
+            unique_indices.push(*ix);
+            if unique_indices.len() == t {
+                break;
+            }
+        }
+    }
+    for ix in 0..unique_indices.len() {
+        indices[ix] = unique_indices[ix];
     }
 }
 
-fn normest(A: Array2<f64>, t: u32, itmax: u32) -> f64 {
-    let mut est = 0.;
+fn normest(A: &Array2<f64>, t: u32, itmax: u32) -> f64 {
+    let mut est = 0.0;
+    let mut best_index = 0;
     // Need to ensure that A is square
     let n = A.nrows();
     let mut x_matrix = prepare_x_matrix(n, t as usize);
     let mut index_history: Vec<usize> = Vec::new();
-    let mut est_old = 0.;
-    let mut ind: Array2<f64> = Array::<f64, _>::zeros((n, 1).f());
-    let mut S_matrix: Array2<f64> = Array::<_,_>::zeros((n, t as usize).f());
+    let mut est_old = 0.0;
+    let mut indices: Vec<usize> = (0..n).collect();
+    let mut S_matrix: Array2<f64> = Array::<_, _>::zeros((n, t as usize).f());
+    let mut S_old = S_matrix.clone();
     // Main loop of algorithm 2.4 in higham and tisseur
-    for iteration in 0..=itmax {
+    for iteration in 0..itmax {
         let Y = A.dot(&x_matrix);
-        est = Y.columns().into_iter().map(|col| {
-            col.iter().map(|elem| f64::abs(*elem)).sum()
-        }).reduce(f64::max).unwrap()
+        let index_norm_pairs: Vec<(usize, f64)> = (0..Y.ncols())
+            .into_iter()
+            .map(|ix| (ix, Y.column(ix).map(|elem| f64::abs(*elem)).sum()))
+            .collect();
+        let mut ix_best = 0;
+        let mut max_est = -1.0;
+        for ix in 0..index_norm_pairs.len() {
+            if index_norm_pairs[ix].1 > max_est {
+                ix_best = ix;
+                max_est = index_norm_pairs[ix].1;
+            }
+        }
+        est = max_est;
+        if est > est_old || iteration == 1 {
+            best_index = ix_best;
+        }
+        if iteration > 1 && est < est_old {
+            est = est_old;
+            break;
+        }
+        S_old = S_matrix.clone();
+        S_matrix = Y.mapv(|x| x.signum());
+        if special_delivery(&S_matrix, &S_old) {
+            break;
+        }
+        if t > 1 {
+            ensure_new_s_matrix(&mut S_matrix, &S_old);
+        }
+        let z_matrix: Array2<f64> = A.t().dot(&S_matrix);
+        let mut h: Array1<f64> = ndarray::Array::<f64, _>::zeros((z_matrix.nrows()).f());
+        let mut max_h = f64::MIN;
+        for row_ix in 0..z_matrix.nrows() {
+            let mut max = f64::MIN;
+            for col_ix in 0..z_matrix.ncols() {
+                if z_matrix[[row_ix, col_ix]] > max {
+                    max = z_matrix[[row_ix, col_ix]];
+                }
+            }
+            h[[row_ix]] = max;
+            if max > max_h {
+                max_h = max;
+            }
+        }
+        if iteration >= 2 && max_h == h[[best_index]] {
+            break;
+        }
+        let mut zipped_pairs: Vec<(f64, usize)> = zip(h.to_vec(), indices.clone()).collect();
+        zipped_pairs.sort_unstable_by(|a,b| b.0.partial_cmp(&a.0).unwrap());
+        for ix in 0..zipped_pairs.len() {
+            h[[ix]] = zipped_pairs[ix].0;
+            indices[ix] = zipped_pairs[ix].1;
+        }
+        if t > 1 {
+            if check_index_history(&indices, &index_history, t as usize) {
+                break;
+            }
+            update_indices(&mut indices, &index_history, t as usize);
+        }
+        for ix in 0..(t as usize) {
+            x_matrix.column_mut(ix).fill(0.0);
+            x_matrix[[indices[ix], ix]] = 1.0;
+            index_history.push(indices[ix]);
+        }
     }
     est
 }
 
 mod tests {
+    use crate::{
+        ndarray::ShapeBuilder,
+        normest1::{ensure_no_parallel_columns, is_column_parallel, prepare_x_matrix}, OperationNorm,
+    };
     use ndarray::Array2;
-    use crate::{ndarray::ShapeBuilder, normest1::{prepare_x_matrix, is_column_parallel, resample_parallel_columns}};
+    use rand::{thread_rng, Rng};
+
+    use super::{special_delivery, normest};
 
     #[test]
     fn test_prep() {
         let n = 4;
         let t = 5;
-        let mut x_mat: Array2<f64> = ndarray::Array::<f64, _>::zeros((n,t).f());
+        let mut x_mat: Array2<f64> = ndarray::Array::<f64, _>::zeros((n, t).f());
         println!("x_mat before");
         println!("{:?}", x_mat);
-        prepare_x_matrix(&mut x_mat);
+        // prepare_x_matrix(&mut x_mat);
         println!("x_mat after");
         println!("{:?}", x_mat);
-        println!("any parallel columns? {:}", is_column_parallel(x_mat.ncols() - 1, &x_mat));
+        println!(
+            "any parallel columns? {:}",
+            is_column_parallel(x_mat.ncols() - 1, &x_mat)
+        );
         println!("resampling columns");
-        resample_parallel_columns(&mut x_mat);
+        ensure_no_parallel_columns(&mut x_mat);
         println!("after resampling");
         println!("{:?}", x_mat);
     }
 
     #[test]
     fn test_one_norm() {
-        let Y: Array2<f64> = array![[1.,2.,3.,4.],[1.,2.,3.,4.],[1.,2.,3.,4.]];
-        let est = Y.columns().into_iter().map(|col| {
-            col.iter().map(|elem| f64::abs(*elem)).sum()
-        }).reduce(f64::max).unwrap();
+        let Y: Array2<f64> = array![[1., 2., 3., 4.], [1., 2., 3., 0.], [1., 2., 3., 0.]];
+        let est = Y
+            .columns()
+            .into_iter()
+            .map(|col| col.map(|x| f64::abs(*x)).sum())
+            .reduce(f64::max)
+            .unwrap();
         println!("est: {:}", est);
     }
-}
\ No newline at end of file
+
+    #[test]
+    fn test_special_delivery() {
+        let n = 100;
+        let t = 4;
+        let s_1: Array2<f64> = array![[-1., 1., 1.], [1., 1., 1.], [-1., -1., 1.]];
+        let s_2: Array2<f64> = array![[-1., -1., 1.], [-1., -1., 1.], [-1., 1., -1.]];
+        println!("special delivery: {:}", special_delivery(&s_2, &s_1));
+    }
+
+    #[test]
+    fn test_normest() {
+        let n = 100;
+        let mut results = Vec::new();
+        let t = 10;
+        let itmax = 10;
+        let mut mat: Array2<f64> = ndarray::Array::<f64, _>::zeros((n, n).f());
+        let mut rng = rand::thread_rng();
+        for _ in 0..1000 {
+            mat.mapv_inplace(|_| rng.gen());
+            results.push({
+                normest(&mat, t as u32, itmax) / mat.opnorm_one().unwrap()
+            });
+        }
+        println!("results average: {:}", results.iter().sum::<f64>() / results.len() as f64);
+    }
+}

From 917c69142af47adb4588c0c1935d1887793e7176 Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Thu, 13 Oct 2022 22:43:20 -0400
Subject: [PATCH 04/16] minor progress

---
 ndarray-linalg/src/expm.rs     | 223 +++++++++++++++++++++++++++++++--
 ndarray-linalg/src/normest1.rs |  20 +--
 2 files changed, 223 insertions(+), 20 deletions(-)

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index 842a99c9..1134de4a 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -1,5 +1,4 @@
-use crate::types;
-use condest::Normest1;
+use crate::{types, Inverse};
 use ndarray::prelude::*;
 use statrs::function::factorial::{binomial, factorial};
 
@@ -10,19 +9,62 @@ const THETA_7: f64 = 9.504178996162932e-1;
 const THETA_9: f64 = 2.097847961257068e0;
 const THETA_13: f64 = 4.25; // Alg 5.1
 
-// Corresponds to even powers of x. Note that the denominator coefficients are same magnitude but opposite sign. Zeroth order coefficient is 1.
-const PADE_COEFFS: [f64; 7] = [
+// this is pure laziness aka "ergonomics"
+const THETA_MAP: [f64; 14] = [
+    0., 0., 0., THETA_3, 0., THETA_5, 0., THETA_7, 0., THETA_9, 0., 0., 0., THETA_13,
+];
+
+// The Pade Coefficients for the numerator of the diagonal approximation to Exp[x]. Computed via Mathematica/WolframAlpha.
+// Note that the denominator has the same coefficients but odd powers have an opposite sign.
+// Coefficients are also stored via the power of x, so for example the numerator would look like
+// x^0 * PADE_COEFF_M[0] + x^1 * PADE_COEFF_M[1] + ... + x^m * PADE_COEFF_M[M]
+const PADE_COEFFS_3: [f64; 4] = [1., 0.5, 0.1, 1. / 120.];
+
+const PADE_COEFFS_5: [f64; 6] = [1., 0.5, 1. / 9., 1. / 72., 1. / 1_008., 1. / 30_240.];
+
+const PADE_COEFFS_7: [f64; 8] = [
+    1.,
+    0.5,
+    3. / 26.,
+    5. / 312.,
+    5. / 3_432.,
+    1. / 11_440.,
+    1. / 308_880.,
+    1. / 17_297_280.,
+];
+
+const PADE_COEFFS_9: [f64; 10] = [
+    1.,
+    0.5,
+    2. / 17.,
+    7. / 408.,
+    7. / 4_080.,
+    1. / 8_160.,
+    1. / 159_120.,
+    1. / 4_455_360.,
+    1. / 196_035_840.,
+    1. / 17_643_225_600.,
+];
+
+const PADE_COEFFS_13: [f64; 14] = [
+    1.,
     0.5,
+    3. / 25.,
     11. / 600.,
-    3. / 18400.,
-    1. / 1932000.,
-    1. / 1585785600.,
-    1. / 3953892096000.,
-    1. / 64764752532480000.,
+    11. / 5_520.,
+    3. / 18_400.,
+    1. / 96_600.,
+    1. / 1_932_000.,
+    1. / 48_944_000.,
+    1. / 1_585_785_600.,
+    1. / 67_395_888_000.,
+    1. / 3_953_892_096_000.,
+    1. / 355_850_288_640_000.,
+    1. / 64_764_752_532_480_000.,
 ];
 
 // helper function used in Al-M & H. Name is unchanged for future reference.
-fn ell(A: Array2<f64>, m: u64, normestimator: Normest1) -> i32 {
+fn ell(A: Array2<f64>, m: u64) -> i32 {
     1
 }
 
@@ -31,16 +73,175 @@ fn pade_error_coefficient(m: u64) -> f64 {
     1.0 / (binomial(2 * m, m) * factorial(2 * m + 1))
 }
 
-fn paterson_stockmeyer() -> () {}
+fn pade_approximation_3(input: &Array2<f64>, output: &mut Array2<f64>) {
+    let input_2 = input.dot(input);
+    let evens = PADE_COEFFS_3[0] * Array::eye(input.nrows()) + PADE_COEFFS_3[2] * input_2.clone();
+    let odds =
+        (PADE_COEFFS_3[1] * Array::eye(input.nrows()) + PADE_COEFFS_3[3] * input_2).dot(input);
+    let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
+    output.assign(&inverted.dot(&(odds + evens)));
+}
+
+fn pade_approximation_5(input: &Array2<f64>, output: &mut Array2<f64>) {
+    let input_2 = input.dot(input);
+    let input_4 = input_2.dot(&input_2);
+    let evens = PADE_COEFFS_5[0] * Array::eye(input.nrows())
+        + PADE_COEFFS_5[2] * input_2.clone()
+        + PADE_COEFFS_5[4] * input_4.clone();
+    let odds = (PADE_COEFFS_5[1] * Array::eye(input.nrows())
+        + PADE_COEFFS_5[3] * input_2
+        + PADE_COEFFS_5[5] * input_4)
+        .dot(input);
+    let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
+    output.assign(&inverted.dot(&(odds + evens)));
+}
+
+fn pade_approximation_7(input: &Array2<f64>, output: &mut Array2<f64>) {
+    let input_2 = input.dot(input);
+    let input_4 = input_2.dot(&input_2);
+    let input_6 = input_2.dot(&input_4);
+    let evens = PADE_COEFFS_7[0] * Array::eye(input.nrows())
+        + PADE_COEFFS_7[2] * input_2.clone()
+        + PADE_COEFFS_7[4] * input_4.clone()
+        + PADE_COEFFS_7[6] * input_6.clone();
+    let odds = (PADE_COEFFS_7[1] * Array::eye(input.nrows())
+        + PADE_COEFFS_7[3] * input_2
+        + PADE_COEFFS_7[5] * input_4
+        + PADE_COEFFS_7[7] * input_6)
+        .dot(input);
+    let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
+    output.assign(&inverted.dot(&(odds + evens)));
+}
+fn pade_approximation_9(input: &Array2<f64>, output: &mut Array2<f64>) {
+    let input_2 = input.dot(input);
+    let input_4 = input_2.dot(&input_2);
+    let input_6 = input_2.dot(&input_4);
+    let input_8 = input_4.dot(&input_4);
+    let evens = PADE_COEFFS_9[0] * Array::eye(input.nrows())
+        + PADE_COEFFS_9[2] * input_2.clone()
+        + PADE_COEFFS_9[4] * input_4.clone()
+        + PADE_COEFFS_9[6] * input_6.clone()
+        + PADE_COEFFS_9[8] * input_8.clone();
+    let odds = (PADE_COEFFS_9[1] * Array::eye(input.nrows())
+        + PADE_COEFFS_9[3] * input_2
+        + PADE_COEFFS_9[5] * input_4
+        + PADE_COEFFS_9[7] * input_6
+        + PADE_COEFFS_9[9] * input_8)
+        .dot(input);
+    let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
+    output.assign(&inverted.dot(&(odds + evens)));
+}
+fn pade_approximation_13(input: &Array2<f64>, output: &mut Array2<f64>) {
+    let input_2 = input.dot(input);
+    let input_4 = input_2.dot(&input_2);
+    let input_6 = input_2.dot(&input_4);
+    let input_8 = input_4.dot(&input_4);
+    let evens_1 = PADE_COEFFS_13[0] * Array::eye(input.nrows())
+        + PADE_COEFFS_13[2] * input_2.clone()
+        + PADE_COEFFS_13[4] * input_4.clone()
+        + PADE_COEFFS_13[6] * input_6.clone();
+    let evens_2 = (PADE_COEFFS_13[8] * input_2.clone()
+        + PADE_COEFFS_13[10] * input_4.clone()
+        + PADE_COEFFS_13[12] * input_6.clone())
+    .dot(&input_6);
+    let evens = evens_1 + evens_2;
+    let odds_1 = PADE_COEFFS_13[1] * Array::eye(input.nrows())
+        + PADE_COEFFS_13[3] * input_2.clone()
+        + PADE_COEFFS_13[5] * input_4.clone()
+        + PADE_COEFFS_13[7] * input_6.clone();
+    let odds_2 = (PADE_COEFFS_13[9] * input_2
+        + PADE_COEFFS_13[11] * input_4
+        + PADE_COEFFS_13[13] * input_6.clone())
+    .dot(&input_6);
+    let odds = (odds_1 + odds_2).dot(input);
+    let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
+    output.assign(&inverted.dot(&(odds + evens)));
+}
+
+fn pade_approximation(input: &Array2<f64>, output: &mut Array2<f64>, degree: &usize) {
+    match *degree {
+        3 => pade_approximation_3(input, output),
+        5 => pade_approximation_5(input, output),
+        7 => pade_approximation_7(input, output),
+        9 => pade_approximation_9(input, output),
+        13 => pade_approximation_13(input, output),
+        _ => {
+            println!("Undefined pade approximant order.")
+        }
+    }
+}
 
+fn old_expm(matrix: &Array2<f64>) -> Array2<f64> {
+    let mut ret: Array2<f64> = ndarray::Array2::<f64>::zeros((matrix.nrows(), matrix.ncols()));
+    for m in vec![3, 5, 7, 9].iter() {
+        if super::normest1::normest(matrix, 4, 4) <= THETA_MAP[*m] {
+            pade_approximation(matrix, &mut ret, m);
+            return ret;
+        }
+    }
+    ret
+}
 /// This function is based on the scale-and-squaring algorithm by
 pub fn expm(A: Array2<i32>) -> Array2<i32> {
     arr2(&[[2]])
 }
 
 mod tests {
+    use crate::expm::{
+        pade_approximation_13, pade_approximation_3, pade_approximation_5, pade_approximation_7,
+        pade_approximation_9,
+    };
     use ndarray::{linalg::Dot, *};
 
+    #[test]
+    fn test_pade_approximants() {
+        let mat = 10. * array![[0.1, 0.2, 0.3], [0.2, 0.1, 0.5], [0.11, 0.22, 0.32]];
+        let mut output_3: Array2<f64> = Array::zeros((3, 3).f());
+        let mut output_5: Array2<f64> = Array::zeros((3, 3).f());
+        let mut output_7: Array2<f64> = Array::zeros((3, 3).f());
+        let mut output_9: Array2<f64> = Array::zeros((3, 3).f());
+        let mut output_13: Array2<f64> = Array::zeros((3, 3).f());
+        fn compute_pade_diff_error(output: &Array2<f64>, expected: &Array2<f64>) -> f64 {
+            let mut tot = 0.0;
+            for row_ix in 0..output.nrows() {
+                for col_ix in 0..output.ncols() {
+                    tot += (output[[row_ix, col_ix]] - expected[[row_ix, col_ix]]).abs();
+                }
+            }
+            tot
+        }
+        pade_approximation_3(&mat, &mut output_3);
+        pade_approximation_5(&mat, &mut output_5);
+        pade_approximation_7(&mat, &mut output_7);
+        pade_approximation_9(&mat, &mut output_9);
+        pade_approximation_13(&mat, &mut output_13);
+        let expected = array![
+            [157.766, 217.949, 432.144],
+            [200.674, 278.691, 552.725],
+            [169.969, 236.289, 469.437]
+        ];
+        println!("output?");
+        println!(
+            "3 norm error: {:}",
+            compute_pade_diff_error(&output_3, &expected)
+        );
+        println!(
+            "5 norm error: {:}",
+            compute_pade_diff_error(&output_5, &expected)
+        );
+        println!(
+            "7 norm error: {:}",
+            compute_pade_diff_error(&output_7, &expected)
+        );
+        println!(
+            "9 norm error: {:}",
+            compute_pade_diff_error(&output_9, &expected)
+        );
+        println!(
+            "13 norm error: {:}",
+            compute_pade_diff_error(&output_13, &expected)
+        );
+    }
     /// Compares expm acting on a matrix with random eigenvalues (drawn from
     /// Gaussians) and with random eigenvectors (drawn from Haar distribution)
     /// to the exact answer. The exact answer is done by exponentiating each
diff --git a/ndarray-linalg/src/normest1.rs b/ndarray-linalg/src/normest1.rs
index ea3939d3..587ba9bb 100644
--- a/ndarray-linalg/src/normest1.rs
+++ b/ndarray-linalg/src/normest1.rs
@@ -1,4 +1,4 @@
-use std::iter::{Zip, zip};
+use std::iter::{zip, Zip};
 
 use ndarray::{concatenate, prelude::*};
 use num_complex::ComplexFloat;
@@ -106,7 +106,7 @@ fn update_indices(indices: &mut Vec<usize>, index_history: &Vec<usize>, t: usize
     }
 }
 
-fn normest(A: &Array2<f64>, t: u32, itmax: u32) -> f64 {
+pub fn normest(A: &Array2<f64>, t: u32, itmax: u32) -> f64 {
     let mut est = 0.0;
     let mut best_index = 0;
     // Need to ensure that A is square
@@ -167,7 +167,7 @@ fn normest(A: &Array2<f64>, t: u32, itmax: u32) -> f64 {
             break;
         }
         let mut zipped_pairs: Vec<(f64, usize)> = zip(h.to_vec(), indices.clone()).collect();
-        zipped_pairs.sort_unstable_by(|a,b| b.0.partial_cmp(&a.0).unwrap());
+        zipped_pairs.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
         for ix in 0..zipped_pairs.len() {
             h[[ix]] = zipped_pairs[ix].0;
             indices[ix] = zipped_pairs[ix].1;
@@ -190,12 +190,13 @@ fn normest(A: &Array2<f64>, t: u32, itmax: u32) -> f64 {
 mod tests {
     use crate::{
         ndarray::ShapeBuilder,
-        normest1::{ensure_no_parallel_columns, is_column_parallel, prepare_x_matrix}, OperationNorm,
+        normest1::{ensure_no_parallel_columns, is_column_parallel, prepare_x_matrix},
+        OperationNorm,
     };
     use ndarray::Array2;
     use rand::{thread_rng, Rng};
 
-    use super::{special_delivery, normest};
+    use super::{normest, special_delivery};
 
     #[test]
     fn test_prep() {
@@ -248,10 +249,11 @@ mod tests {
         let mut rng = rand::thread_rng();
         for _ in 0..1000 {
             mat.mapv_inplace(|_| rng.gen());
-            results.push({
-                normest(&mat, t as u32, itmax) / mat.opnorm_one().unwrap()
-            });
+            results.push({ normest(&mat, t as u32, itmax) / mat.opnorm_one().unwrap() });
         }
-        println!("results average: {:}", results.iter().sum::<f64>() / results.len() as f64);
+        println!(
+            "results average: {:}",
+            results.iter().sum::<f64>() / results.len() as f64
+        );
     }
 }

From f5b74db54665f142e741211fc0493b1c265243d6 Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Fri, 14 Oct 2022 15:59:48 -0400
Subject: [PATCH 05/16] normest passes tests

---
 ndarray-linalg/src/normest1.rs | 170 +++++++++++++++++++++------------
 1 file changed, 109 insertions(+), 61 deletions(-)

diff --git a/ndarray-linalg/src/normest1.rs b/ndarray-linalg/src/normest1.rs
index 587ba9bb..d72b4a34 100644
--- a/ndarray-linalg/src/normest1.rs
+++ b/ndarray-linalg/src/normest1.rs
@@ -3,6 +3,8 @@ use std::iter::{zip, Zip};
 use ndarray::{concatenate, prelude::*};
 use num_complex::ComplexFloat;
 use rand::Rng;
+
+use crate::OperationNorm;
 // use rand::prelude::*;
 
 const MAX_COLUMN_RESAMPLES: u32 = 10;
@@ -14,7 +16,7 @@ fn prepare_x_matrix(num_rows: usize, num_columns: usize) -> Array2<f64> {
     // todo - check sizes?
     output.column_mut(0).fill(1.);
     ensure_no_parallel_columns(&mut output);
-    output
+    output / (num_rows as f64)
 }
 
 fn is_column_parallel(index: usize, matrix: &Array2<f64>) -> bool {
@@ -52,7 +54,7 @@ fn ensure_no_parallel_columns(mat: &mut Array2<f64>) {
 }
 
 /// Outputs true if every column of s_new is parallel to some column in s_old, false otherwise.
-fn special_delivery(s_new: &Array2<f64>, s_old: &Array2<f64>) -> bool {
+fn check_if_s_parallel_to_s_old(s_new: &Array2<f64>, s_old: &Array2<f64>) -> bool {
     let mut ret = true;
     let dots = s_old.t().dot(s_new);
     for col in dots.columns() {
@@ -70,6 +72,8 @@ fn special_delivery(s_new: &Array2<f64>, s_old: &Array2<f64>) -> bool {
     ret
 }
 
+/// Resamples columns of s that are parallel to to any prior columns of s itself or to any column
+/// of s_old.
 fn ensure_new_s_matrix(s: &mut Array2<f64>, s_old: &Array2<f64>) {
     let mut big_matrix: Array2<f64> = concatenate(Axis(1), &[s_old.view(), s.view()]).unwrap();
     for col_ix in 0..s.ncols() {
@@ -106,84 +110,116 @@ fn update_indices(indices: &mut Vec<usize>, index_history: &Vec<usize>, t: usize
     }
 }
 
-pub fn normest(A: &Array2<f64>, t: u32, itmax: u32) -> f64 {
+/// Compute a lower bound of the 1-norm of a square matrix. The 1-norm is defined as
+/// || A ||_1 = max_{1 <= j <= n} \sum_i |a_{i,j} |
+/// In other words find the column with the largest sum over all rows (and take absolute value).
+/// Note this is not equivalent to the induced 1-norm or the Schatten 1-norm.
+/// We do not give the option of returning the v, w that maximize the resulting norm to keep the
+/// function signature clean. This could be implemented in the future.
+/// Panics if input matrix is non-square to avoid returning a Result<_,_>.
+/// This is computed following Algorithm 2.4 of the paper "A Block Algorithm for Matrix 1-Norm
+/// Estimation with an Application to 1-Norm Pseudospectra" by Nicholas J. Higham and
+/// Francoise Tisseur (2000) SIAM J. Matrix Anal. Appl. Vol. 21, No. 4, pp. 1185-1201.
+pub fn normest(input_matrix: &Array2<f64>, t: usize, itmax: u32) -> f64 {
+    if input_matrix.is_square() == false {
+        panic!("One Norm Estimation encountered non-square input matrix.");
+    }
+    if t < 1 {
+        panic!("One Norm Estimation requires at least one column for estimation.");
+    }
+    if itmax < 2 {
+        panic!("One Norm Estimation requires at least two iterations.");
+    }
+    let n = input_matrix.nrows();
+
+    // This is worse than just computing the norm exactly, so just do that. Will panic
+    // if opnorm_one() fails.
+    if t > n {
+        return input_matrix.opnorm_one().unwrap();
+    }
     let mut est = 0.0;
     let mut best_index = 0;
-    // Need to ensure that A is square
-    let n = A.nrows();
-    let mut x_matrix = prepare_x_matrix(n, t as usize);
+
+    let mut x_matrix = prepare_x_matrix(n, t);
     let mut index_history: Vec<usize> = Vec::new();
     let mut est_old = 0.0;
     let mut indices: Vec<usize> = (0..n).collect();
-    let mut S_matrix: Array2<f64> = Array::<_, _>::zeros((n, t as usize).f());
-    let mut S_old = S_matrix.clone();
+    let mut s_matrix: Array2<f64> = Array::<_, _>::zeros((n, t).f());
+    let mut s_old: Array2<f64> = Array::<_, _>::zeros((n, t).f());
     // Main loop of algorithm 2.4 in higham and tisseur
     for iteration in 0..itmax {
-        let Y = A.dot(&x_matrix);
-        let index_norm_pairs: Vec<(usize, f64)> = (0..Y.ncols())
+        // Y = AX
+        let y = input_matrix.dot(&x_matrix);
+
+        // est = max { ||Y(:, j)||_1 : j = 1:t}
+        let index_norm_pairs: Vec<(usize, f64)> = (0..y.ncols())
             .into_iter()
-            .map(|ix| (ix, Y.column(ix).map(|elem| f64::abs(*elem)).sum()))
+            .map(|ix| (ix, y.column(ix).map(|elem| f64::abs(*elem)).sum()))
             .collect();
-        let mut ix_best = 0;
-        let mut max_est = -1.0;
-        for ix in 0..index_norm_pairs.len() {
-            if index_norm_pairs[ix].1 > max_est {
-                ix_best = ix;
-                max_est = index_norm_pairs[ix].1;
-            }
-        }
+        let (maximizing_ix, max_est) = *index_norm_pairs
+            .iter()
+            .reduce(|x, y| if x.1 > y.1 { x } else { y })
+            .unwrap();
         est = max_est;
+
         if est > est_old || iteration == 1 {
-            best_index = ix_best;
+            best_index = indices[maximizing_ix];
         }
-        if iteration > 1 && est < est_old {
+
+        // Section (1) of Alg. 2.4
+        if iteration > 1 && est <= est_old {
             est = est_old;
             break;
         }
-        S_old = S_matrix.clone();
-        S_matrix = Y.mapv(|x| x.signum());
-        if special_delivery(&S_matrix, &S_old) {
+
+        est_old = est;
+        s_old.assign(&s_matrix);
+        // Is there a faster way to do this? I wanted to avoid creating a new matrix of signs
+        // and then copying it into s_matrix, this was the first way I could think of doing so.
+        for row_ix in 0..s_matrix.nrows() {
+            for col_ix in 0..s_matrix.ncols() {
+                s_matrix[[row_ix, col_ix]] = y[[row_ix, col_ix]].signum()
+            }
+        }
+
+        // Section (2) of Alg. 2.4
+        if check_if_s_parallel_to_s_old(&s_matrix, &s_old) {
             break;
         }
         if t > 1 {
-            ensure_new_s_matrix(&mut S_matrix, &S_old);
-        }
-        let z_matrix: Array2<f64> = A.t().dot(&S_matrix);
-        let mut h: Array1<f64> = ndarray::Array::<f64, _>::zeros((z_matrix.nrows()).f());
-        let mut max_h = f64::MIN;
-        for row_ix in 0..z_matrix.nrows() {
-            let mut max = f64::MIN;
-            for col_ix in 0..z_matrix.ncols() {
-                if z_matrix[[row_ix, col_ix]] > max {
-                    max = z_matrix[[row_ix, col_ix]];
-                }
-            }
-            h[[row_ix]] = max;
-            if max > max_h {
-                max_h = max;
-            }
+            ensure_new_s_matrix(&mut s_matrix, &s_old);
         }
-        if iteration >= 2 && max_h == h[[best_index]] {
+
+        // Section (3) of Alg. 2.4
+        let z_matrix: Array2<f64> = input_matrix.t().dot(&s_matrix);
+        let mut h: Vec<f64> = z_matrix
+            .rows()
+            .into_iter()
+            .map(|row| *row.iter().reduce(|x, y| if x > y { x } else { y }).unwrap())
+            .collect();
+        let max_h = *h.iter().reduce(|x, y| if x > y { x } else { y }).unwrap();
+
+        // Section (4) of Alg. 2.4
+        if iteration >= 2 && max_h == h[best_index] {
             break;
         }
-        let mut zipped_pairs: Vec<(f64, usize)> = zip(h.to_vec(), indices.clone()).collect();
+        let mut zipped_pairs: Vec<(f64, usize)> = zip(h.clone(), 0..n).collect();
         zipped_pairs.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
-        for ix in 0..zipped_pairs.len() {
-            h[[ix]] = zipped_pairs[ix].0;
-            indices[ix] = zipped_pairs[ix].1;
-        }
+        (h, indices) = zipped_pairs.into_iter().unzip();
         if t > 1 {
-            if check_index_history(&indices, &index_history, t as usize) {
+            // Section (5) of Alg. 2.4
+            if check_index_history(&indices, &index_history, t) {
                 break;
             }
-            update_indices(&mut indices, &index_history, t as usize);
+            update_indices(&mut indices, &index_history, t);
         }
-        for ix in 0..(t as usize) {
+        for ix in 0..t {
             x_matrix.column_mut(ix).fill(0.0);
             x_matrix[[indices[ix], ix]] = 1.0;
             index_history.push(indices[ix]);
         }
     }
+    // Would be Section (6) of Alg 2.4 if we returned the best index guess.
     est
 }
 
@@ -191,12 +227,12 @@ mod tests {
     use crate::{
         ndarray::ShapeBuilder,
         normest1::{ensure_no_parallel_columns, is_column_parallel, prepare_x_matrix},
-        OperationNorm,
+        OperationNorm, Inverse,
     };
     use ndarray::Array2;
     use rand::{thread_rng, Rng};
 
-    use super::{normest, special_delivery};
+    use super::{check_if_s_parallel_to_s_old, normest};
 
     #[test]
     fn test_prep() {
@@ -231,29 +267,41 @@ mod tests {
     }
 
     #[test]
-    fn test_special_delivery() {
+    fn test_check_if_s_parallel_to_s_old() {
         let n = 100;
         let t = 4;
         let s_1: Array2<f64> = array![[-1., 1., 1.], [1., 1., 1.], [-1., -1., 1.]];
         let s_2: Array2<f64> = array![[-1., -1., 1.], [-1., -1., 1.], [-1., 1., -1.]];
-        println!("special delivery: {:}", special_delivery(&s_2, &s_1));
+        println!(
+            "check_if_s_parallel_to_s_old: {:}",
+            check_if_s_parallel_to_s_old(&s_2, &s_1)
+        );
     }
 
     #[test]
-    fn test_normest() {
+    fn test_average_normest() {
         let n = 100;
-        let mut results = Vec::new();
-        let t = 10;
-        let itmax = 10;
+        let mut differenial_error = Vec::new();
+        let mut ratios = Vec::new();
+        let t = 2;
+        let itmax = 5;
         let mut mat: Array2<f64> = ndarray::Array::<f64, _>::zeros((n, n).f());
         let mut rng = rand::thread_rng();
-        for _ in 0..1000 {
+        for _i in 0..5000 {
             mat.mapv_inplace(|_| rng.gen());
-            results.push({ normest(&mat, t as u32, itmax) / mat.opnorm_one().unwrap() });
+            mat.assign(&mat.inv().unwrap());
+            let est = normest(&mat, t, itmax);
+            let exp = mat.opnorm_one().unwrap();
+            differenial_error.push((est - exp).abs() / exp);
+            ratios.push(est / exp);
         }
         println!(
-            "results average: {:}",
-            results.iter().sum::<f64>() / results.len() as f64
+            "differential error average: {:}",
+            differenial_error.iter().sum::<f64>() / differenial_error.len() as f64
+        );
+        println!(
+            "ratio average: {:?}",
+            ratios.iter().sum::<f64>() / ratios.len() as f64
         );
     }
 }

From 1e4d5c232047e8653a5844cb61458ce823e2a3af Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Sun, 16 Oct 2022 00:11:37 -0400
Subject: [PATCH 06/16] working on pade helper

---
 ndarray-linalg/src/expm.rs     | 470 ++++++++++++++++++++++++++-------
 ndarray-linalg/src/normest1.rs |   3 +-
 2 files changed, 379 insertions(+), 94 deletions(-)

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index 1134de4a..1686aeaa 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -1,4 +1,6 @@
-use crate::{types, Inverse};
+use std::ops::MulAssign;
+
+use crate::{normest1::normest, types, Inverse, OperationNorm};
 use ndarray::prelude::*;
 use statrs::function::factorial::{binomial, factorial};
 
@@ -63,9 +65,66 @@ const PADE_COEFFS_13: [f64; 14] = [
     1. / 64_764_752_532_480_000.,
 ];
 
+struct OneNormOneCalc {}
+
+/// Computes operator one norms either exactly or estimates them using Higham & Tisseur (ref)
+pub enum OpNormOne<'a> {
+    Exact {
+        a_matrix: &'a Array2<f64>,
+    },
+    Estimate {
+        a_matrix: &'a Array2<f64>,
+        t: usize,
+        itmax: usize,
+    },
+    EstimateSequence {
+        matrices: Vec<&'a Array2<f64>>,
+    },
+    EstimatePower {
+        a_matrix: &'a Array2<f64>,
+        p: usize,
+    },
+}
+
+impl<'a> OpNormOne<'a> {
+    fn compute(&self) -> f64 {
+        match self {
+            Self::Exact { a_matrix } => a_matrix.opnorm_one().unwrap(),
+            Self::Estimate { a_matrix, t, itmax } => normest(a_matrix, *t, *itmax as u32),
+            Self::EstimateSequence { matrices } => 1.,
+            _ => 1.,
+        }
+    }
+}
+
+fn power_abs_norm(matrix: &Array2<f64>, p: usize) -> f64 {
+    let mut v = Array1::<f64>::ones((matrix.ncols()).f());
+    let abs_matrix = matrix.t().map(|x| x.abs());
+    for _ in 0..p {
+        v.assign(&abs_matrix.dot(&v));
+    }
+    v.into_iter()
+        .reduce(|x, y| if x > y { x } else { y })
+        .unwrap()
+}
+
 // helper function used in Al-M & H. Name is unchanged for future reference.
-fn ell(A: Array2<f64>, m: u64) -> i32 {
-    1
+fn ell(a_matrix: &Array2<f64>, m: u64) -> i32 {
+    if a_matrix.is_square() == false {
+        panic!("subroutine ell expected a square matrix.");
+    }
+    let p = 2 * m + 1;
+    let a_one_norm = a_matrix.opnorm_one().unwrap();
+
+    if a_one_norm < f64::EPSILON * 2.0 {
+        panic!("Subroutine ell encountered zero norm matrix.");
+    }
+    let powered_abs_norm = power_abs_norm(a_matrix, p as usize);
+    let alpha = powered_abs_norm * pade_error_coefficient(m) / a_one_norm;
+    let u = f64::EPSILON / 2.;
+    let log2_alpha_div_u = f64::log2(alpha / u);
+    let val = f64::ceil(log2_alpha_div_u / ((2 * m) as f64)).round() as i32;
+    i32::max(val, 0)
 }
 
 /// Calculates the leading term of the error series for the [m/m] Pade approximation to exp(x).
@@ -73,98 +132,276 @@ fn pade_error_coefficient(m: u64) -> f64 {
     1.0 / (binomial(2 * m, m) * factorial(2 * m + 1))
 }
 
-fn pade_approximation_3(input: &Array2<f64>, output: &mut Array2<f64>) {
-    let input_2 = input.dot(input);
-    let evens = PADE_COEFFS_3[0] * Array::eye(input.nrows()) + PADE_COEFFS_3[2] * input_2.clone();
-    let odds =
-        (PADE_COEFFS_3[1] * Array::eye(input.nrows()) + PADE_COEFFS_3[3] * input_2).dot(input);
-    let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
-    output.assign(&inverted.dot(&(odds + evens)));
+fn pade_approximation_3(mp: &mut MatrixPowers) -> Array2<f64> {
+    let mut evens = PADE_COEFFS_3[0] * Array::eye(mp.get(1).nrows());
+    evens.scaled_add(PADE_COEFFS_3[2], mp.get(2));
+    let mut odds = PADE_COEFFS_3[1] * Array::eye(mp.get(1).nrows());
+    odds.scaled_add(PADE_COEFFS_3[3], mp.get(2));
+    odds = odds.dot(mp.get(1));
+    let inverted = (-1. * odds + evens).inv().unwrap();
+    inverted.dot(&(odds + evens))
 }
 
-fn pade_approximation_5(input: &Array2<f64>, output: &mut Array2<f64>) {
-    let input_2 = input.dot(input);
-    let input_4 = input_2.dot(&input_2);
-    let evens = PADE_COEFFS_5[0] * Array::eye(input.nrows())
-        + PADE_COEFFS_5[2] * input_2.clone()
-        + PADE_COEFFS_5[4] * input_4.clone();
-    let odds = (PADE_COEFFS_5[1] * Array::eye(input.nrows())
-        + PADE_COEFFS_5[3] * input_2
-        + PADE_COEFFS_5[5] * input_4)
-        .dot(input);
-    let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
-    output.assign(&inverted.dot(&(odds + evens)));
+fn pade_approximation_5(mp: &mut MatrixPowers) -> Array2<f64> {
+    let mut evens: Array2<f64> = PADE_COEFFS_9[0] * Array::eye(mp.get(1).nrows());
+    for m in 1..=2 {
+        evens.scaled_add(PADE_COEFFS_9[2 * m], mp.get(2 * m));
+    }
+    let mut odds: Array2<f64> = PADE_COEFFS_9[1] * Array::eye(mp.get(1).nrows());
+    for m in 1..=2 {
+        odds.scaled_add(PADE_COEFFS_9[2 * m + 1], mp.get(2*m));
+    }
+    odds = odds.dot(mp.get(1));
+    let inverted = (-1. * odds + evens).inv().unwrap();
+    inverted.dot(&(odds + evens))
 }
 
-fn pade_approximation_7(input: &Array2<f64>, output: &mut Array2<f64>) {
-    let input_2 = input.dot(input);
-    let input_4 = input_2.dot(&input_2);
-    let input_6 = input_2.dot(&input_4);
-    let evens = PADE_COEFFS_7[0] * Array::eye(input.nrows())
-        + PADE_COEFFS_7[2] * input_2.clone()
-        + PADE_COEFFS_7[4] * input_4.clone()
-        + PADE_COEFFS_7[6] * input_6.clone();
-    let odds = (PADE_COEFFS_7[1] * Array::eye(input.nrows())
-        + PADE_COEFFS_7[3] * input_2
-        + PADE_COEFFS_7[5] * input_4
-        + PADE_COEFFS_7[7] * input_6)
-        .dot(input);
-    let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
-    output.assign(&inverted.dot(&(odds + evens)));
+fn pade_approximation_7(mp: &mut MatrixPowers) -> Array2<f64> {
+    let mut evens: Array2<f64> = PADE_COEFFS_9[0] * Array::eye(mp.get(1).nrows());
+    for m in 1..=3 {
+        evens.scaled_add(PADE_COEFFS_9[2 * m], mp.get(2 * m));
+    }
+    let mut odds: Array2<f64> = PADE_COEFFS_9[1] * Array::eye(mp.get(1).nrows());
+    for m in 1..=3 {
+        odds.scaled_add(PADE_COEFFS_9[2 * m + 1], mp.get(2*m));
+    }
+    odds = odds.dot(mp.get(1));
+    let inverted = (-1. * odds + evens).inv().unwrap();
+    inverted.dot(&(odds + evens))
 }
-fn pade_approximation_9(input: &Array2<f64>, output: &mut Array2<f64>) {
-    let input_2 = input.dot(input);
-    let input_4 = input_2.dot(&input_2);
-    let input_6 = input_2.dot(&input_4);
-    let input_8 = input_4.dot(&input_4);
-    let evens = PADE_COEFFS_9[0] * Array::eye(input.nrows())
-        + PADE_COEFFS_9[2] * input_2.clone()
-        + PADE_COEFFS_9[4] * input_4.clone()
-        + PADE_COEFFS_9[6] * input_6.clone()
-        + PADE_COEFFS_9[8] * input_8.clone();
-    let odds = (PADE_COEFFS_9[1] * Array::eye(input.nrows())
-        + PADE_COEFFS_9[3] * input_2
-        + PADE_COEFFS_9[5] * input_4
-        + PADE_COEFFS_9[7] * input_6
-        + PADE_COEFFS_9[9] * input_8)
-        .dot(input);
-    let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
-    output.assign(&inverted.dot(&(odds + evens)));
+fn pade_approximation_9(mp: &mut MatrixPowers) -> Array2<f64> {
+    let mut evens: Array2<f64> = PADE_COEFFS_9[0] * Array::eye(mp.get(1).nrows());
+    for m in 1..=4 {
+        evens.scaled_add(PADE_COEFFS_9[2 * m], mp.get(2 * m));
+    }
+    let mut odds: Array2<f64> = PADE_COEFFS_9[1] * Array::eye(mp.get(1).nrows());
+    for m in 1..=4 {
+        odds.scaled_add(PADE_COEFFS_9[2 * m + 1], mp.get(2*m));
+    }
+    odds = odds.dot(mp.get(1));
+    let inverted = (-1. * odds + evens).inv().unwrap();
+    inverted.dot(&(odds + evens))
 }
-fn pade_approximation_13(input: &Array2<f64>, output: &mut Array2<f64>) {
-    let input_2 = input.dot(input);
-    let input_4 = input_2.dot(&input_2);
-    let input_6 = input_2.dot(&input_4);
-    let input_8 = input_4.dot(&input_4);
-    let evens_1 = PADE_COEFFS_13[0] * Array::eye(input.nrows())
-        + PADE_COEFFS_13[2] * input_2.clone()
-        + PADE_COEFFS_13[4] * input_4.clone()
-        + PADE_COEFFS_13[6] * input_6.clone();
-    let evens_2 = (PADE_COEFFS_13[8] * input_2.clone()
-        + PADE_COEFFS_13[10] * input_4.clone()
-        + PADE_COEFFS_13[12] * input_6.clone())
-    .dot(&input_6);
-    let evens = evens_1 + evens_2;
-    let odds_1 = PADE_COEFFS_13[1] * Array::eye(input.nrows())
-        + PADE_COEFFS_13[3] * input_2.clone()
-        + PADE_COEFFS_13[5] * input_4.clone()
-        + PADE_COEFFS_13[7] * input_6.clone();
-    let odds_2 = (PADE_COEFFS_13[9] * input_2
-        + PADE_COEFFS_13[11] * input_4
-        + PADE_COEFFS_13[13] * input_6.clone())
-    .dot(&input_6);
-    let odds = (odds_1 + odds_2).dot(input);
+
+fn pade_approximation_13(mp: &mut MatrixPowers) -> Array2<f64> {
+    let mut evens_1 = PADE_COEFFS_13[0] * Array::eye(mp.get(1).nrows());
+    for m in 1..=3 {
+        evens_1.scaled_add(PADE_COEFFS_13[2 * m], mp.get(2 * m));
+    }
+    let mut evens_2 = PADE_COEFFS_13[8] * mp.get(2).clone();
+    evens_2.scaled_add(PADE_COEFFS_13[10], mp.get(4));
+    evens_2.scaled_add(PADE_COEFFS_13[12], mp.get(6));
+    let evens = evens_2.dot(mp.get(6)) + &evens_1;
+    let mut odds_1 = PADE_COEFFS_13[1] * Array::eye(mp.get(1).nrows());
+    for m in 1..=3 {
+        odds_1.scaled_add(PADE_COEFFS_13[2 * m + 1], mp.get(2 * m));
+    }
+    let mut odds_2 = PADE_COEFFS_13[9] * mp.get(2).clone();
+    odds_2.scaled_add(PADE_COEFFS_13[11], mp.get(4));
+    odds_2.scaled_add(PADE_COEFFS_13[13], mp.get(6));
+    odds_2 = odds_2.dot(mp.get(6));
+    let odds = (odds_1 + odds_2).dot(mp.get(1));
     let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
-    output.assign(&inverted.dot(&(odds + evens)));
+    inverted.dot(&(odds + evens))
+}
+
+/// Helper struct to ensure that the power of the input matrix is only computed once.
+#[derive(Debug)]
+struct MatrixPowers<'a> {
+    pub input_1 : Option<&'a Array2<f64>>,
+    pub input_2 : Option<Array2<f64>>,
+    pub input_3 : Option<Array2<f64>>,
+    pub input_4 : Option<Array2<f64>>,
+    pub input_5 : Option<Array2<f64>>,
+    pub input_6 : Option<Array2<f64>>,
+    pub input_7 : Option<Array2<f64>>,
+    pub input_8 : Option<Array2<f64>>,
+    pub input_9 : Option<Array2<f64>>,
+    pub input_10 : Option<Array2<f64>>,
+    pub input_11 : Option<Array2<f64>>,
+    pub input_12 : Option<Array2<f64>>,
+    pub input_13 : Option<Array2<f64>>,
+    pub num_matprods: usize,
+}
+impl<'a> MatrixPowers<'a> {
+    pub fn new() -> Self {
+        MatrixPowers { input_1: None, input_2: None, input_3:None, input_4: None, input_5: None, input_6: None, input_7: None, input_8: None, input_9: None, input_10: None, input_11: None, input_12: None, input_13: None, num_matprods: 0 }
+    }
+
+    fn compute2(&mut self) {
+        if let Some(input_1) = self.input_1.clone() {
+            self.input_2 = Some(input_1.dot(input_1));
+            self.num_matprods += 1;
+        }
+    }
+    fn compute3(&mut self) {
+        match &self.input_2 {
+            Some(i2) => {
+                self.input_3 = Some(self.input_1.as_ref().unwrap().dot(i2));
+                self.num_matprods += 1;
+            },
+            None => {
+                // after calling self.compute2() then self.input_2 will match to Some(_)
+                // so just recurse.
+                self.compute2();
+                self.compute3();
+            },
+        }
+    }
+    fn compute4(&mut self) {
+        match &self.input_2 {
+            Some(i2) => {
+                self.input_4 = Some(self.input_2.as_ref().unwrap().dot(i2));
+                self.num_matprods += 1;
+            },
+            None => {
+                self.compute2();
+                self.compute4();
+            }
+        }
+    }
+    fn compute5(&mut self) {
+        match &self.input_3 {
+            Some(i3) => {
+                match &self.input_2 {
+                    Some(i2) => {
+                        self.input_5 = Some(i2.dot(i3));
+                        self.num_matprods += 1;
+                    },
+                    None => {
+                        self.compute2();
+                        self.compute5();
+                    }
+                }
+            },
+            None => {
+                self.compute3();
+                self.compute5();
+            }
+        };
+    }
+    fn compute6(&mut self) {
+        match &self.input_4 {
+            Some(i4) => {
+                // If input_4 is computed then input_2 must be computed.
+                self.input_6 = Some(i4.dot(self.input_2.as_ref().unwrap()));
+                self.num_matprods += 1;
+            },
+            None => {
+                match &self.input_3 {
+                    Some(i3) => {
+                        self.input_6 = Some(i3.dot(i3));
+                        self.num_matprods += 1;
+                    },
+                    None => {
+                        // We do not have 4 or 3 computed yet, so we will either have 2 or have to compute it.
+                        // in that case easiest way is to first compute 4 and then revisit.
+                        self.compute4();
+                        self.compute6();
+                    },
+                }
+            }
+        };
+    }
+
+    pub fn get(&mut self, m: usize) -> &Array2<f64> {
+        match m {
+            1 => self.get1(),
+            2 => self.get2(),
+            3 => self.get3(),
+            4 => self.get4(),
+            5 => self.get5(),
+            6 => self.get6(),
+            // 7 => self.get7(),
+            // 8 => self.get8(),
+            // 9 => self.get9(),
+            // 10 => self.get10(),
+            // 11 => self.get11(),
+            // 12 => self.get12(),
+            // 13 => self.get13(),
+            _ => {
+                println!("This power of input matrix is not implemented. Returning input matrix.");
+                self.input_1.as_ref().unwrap()
+            }
+        }
+    }
+    fn get1(&mut self) -> &Array2<f64> {
+        self.input_1.as_ref().unwrap()
+    }
+    fn get2(&mut self) -> &Array2<f64> {
+        match &self.input_2 {
+            Some(mat) => self.input_2.as_ref().unwrap(),
+            None => {
+                self.compute2();
+                self.input_2.as_ref().unwrap()
+            }
+        }
+    }
+    fn get3(&mut self) -> &Array2<f64> {
+        match &self.input_3 {
+            Some(mat) => self.input_3.as_ref().unwrap(),
+            None => {
+                self.compute3();
+                &self.input_3.as_ref().unwrap()
+            }
+        }
+    }
+    fn get4(&mut self) -> &Array2<f64> {
+        match &self.input_4 {
+            Some(mat) => &self.input_4.as_ref().unwrap(),
+            None => {
+                self.compute4();
+                &self.input_4.as_ref().unwrap()
+            }
+        }
+    }
+    fn get5(&mut self) -> &Array2<f64> {
+        match &self.input_5 {
+            Some(mat) => &self.input_5.as_ref().unwrap(),
+            None => {
+                self.compute5();
+                &self.input_5.as_ref().unwrap()
+            }
+        }
+    }
+    fn get6(&mut self) -> &Array2<f64> {
+        match &self.input_6 {
+            Some(mat) => &self.input_6.as_ref().unwrap(),
+            None => {
+                self.compute6();
+                &self.input_6.as_ref().unwrap()
+            }
+        }
+    }
 }
 
-fn pade_approximation(input: &Array2<f64>, output: &mut Array2<f64>, degree: &usize) {
+fn pade_approximation(mp: &MatrixPowers, output: &mut Array2<f64>, degree: &usize) {
     match *degree {
-        3 => pade_approximation_3(input, output),
-        5 => pade_approximation_5(input, output),
-        7 => pade_approximation_7(input, output),
-        9 => pade_approximation_9(input, output),
-        13 => pade_approximation_13(input, output),
+        3 => {
+            pade_approximation_3(mp);
+        }
+        5 => {
+            pade_approximation_5(mp.get(1), mp.get(2), mp.get(4), output);
+        }
+        7 => {
+            let input_2 = mp.dot(mp);
+            let input_4 = input_2.dot(&input_2);
+            let input_6 = input_4.dot(&input_2);
+            pade_approximation_7(mp, &input_2, &input_4, &input_6, output);
+        }
+        9 => {
+            let input_2 = mp.dot(mp);
+            let input_4 = input_2.dot(&input_2);
+            let input_6 = input_4.dot(&input_2);
+            let input_8 = input_4.dot(&input_4);
+            pade_approximation_9(mp, &input_2, &input_4, &input_6, &input_8, output);
+        }
+        13 => {
+            let input_2 = mp.dot(mp);
+            let input_4 = input_2.dot(&input_2);
+            let input_6 = input_4.dot(&input_2);
+            pade_approximation_13(mp, &input_2, &input_4, &input_6, output);
+        }
         _ => {
             println!("Undefined pade approximant order.")
         }
@@ -182,8 +419,17 @@ fn old_expm(matrix: &Array2<f64>) -> Array2<f64> {
     ret
 }
 /// This function is based on the scale-and-squaring algorithm by
-pub fn expm(A: Array2<i32>) -> Array2<i32> {
-    arr2(&[[2]])
+pub fn expm(A: Array2<f64>) -> Array2<f64> {
+    let mut output = Array2::<f64>::zeros((A.nrows(), A.ncols()).f());
+    let d6 = f64::powf(power_abs_norm(&A, 6), 1. / 6.);
+    let d4 = f64::powf(power_abs_norm(&A, 4), 1. / 4.);
+    let eta_1 = f64::max(d4, d6);
+    if eta_1 < THETA_3 && ell(&A, 3) == 0 {
+        pade_approximation(&A, &mut output, &3);
+        return output;
+    }
+
+    arr2(&[[2.]])
 }
 
 mod tests {
@@ -193,9 +439,47 @@ mod tests {
     };
     use ndarray::{linalg::Dot, *};
 
+    use super::{expm, MatrixPowers};
+
+    #[test]
+    fn test_matrix_powers() {
+        let mut mp = MatrixPowers::new();
+        mp.input_1 = Some(array![[0., 1.], [1., 0.]]);
+        println!("get3():");
+        println!("{:?}",mp.get3());
+        println!("get3():");
+        println!("{:?}",mp.get3());
+        println!("mp?");
+        println!("{:?}", mp);
+    }
+    
+    #[test]
+    fn test_expm() {
+        fn compute_pade_diff_error(output: &Array2<f64>, expected: &Array2<f64>) -> f64 {
+            let mut tot = 0.0;
+            for row_ix in 0..output.nrows() {
+                for col_ix in 0..output.ncols() {
+                    tot += (output[[row_ix, col_ix]] - expected[[row_ix, col_ix]]).abs();
+                }
+            }
+            tot
+        }
+        let mat: Array2<f64> =
+            0.01 as f64 * array![[0.1, 0.2, 0.3], [0.2, 0.1, 0.5], [0.11, 0.22, 0.32]];
+        let expected: Array2<f64> = array![
+            [1.001, 0.00200531, 0.00301133],
+            [0.00200476, 1.00101, 0.00501353],
+            [0.00110452, 0.00220573, 1.00321]
+        ];
+        println!("expm output:");
+        println!("{:?}", expm(mat.clone()));
+        println!("diff: {:}", compute_pade_diff_error(&expm(mat), &expected));
+    }
+
     #[test]
     fn test_pade_approximants() {
-        let mat = 10. * array![[0.1, 0.2, 0.3], [0.2, 0.1, 0.5], [0.11, 0.22, 0.32]];
+        let mat: Array2<f64> =
+            10. as f64 * array![[0.1, 0.2, 0.3], [0.2, 0.1, 0.5], [0.11, 0.22, 0.32]];
         let mut output_3: Array2<f64> = Array::zeros((3, 3).f());
         let mut output_5: Array2<f64> = Array::zeros((3, 3).f());
         let mut output_7: Array2<f64> = Array::zeros((3, 3).f());
@@ -210,11 +494,11 @@ mod tests {
             }
             tot
         }
-        pade_approximation_3(&mat, &mut output_3);
-        pade_approximation_5(&mat, &mut output_5);
-        pade_approximation_7(&mat, &mut output_7);
-        pade_approximation_9(&mat, &mut output_9);
-        pade_approximation_13(&mat, &mut output_13);
+        // pade_approximation_3(&mat, &mut output_3);
+        // pade_approximation_5(&mat, &mut output_5);
+        // pade_approximation_7(&mat, &mut output_7);
+        // pade_approximation_9(&mat, &mut output_9);
+        // pade_approximation_13(&mat, &mut output_13);
         let expected = array![
             [157.766, 217.949, 432.144],
             [200.674, 278.691, 552.725],
diff --git a/ndarray-linalg/src/normest1.rs b/ndarray-linalg/src/normest1.rs
index d72b4a34..016bb814 100644
--- a/ndarray-linalg/src/normest1.rs
+++ b/ndarray-linalg/src/normest1.rs
@@ -122,6 +122,7 @@ fn update_indices(indices: &mut Vec<usize>, index_history: &Vec<usize>, t: usize
 /// Francoise Tisseur (2000) SIAM J. Matrix Anal. Appl. Vol. 21, No. 4, pp. 1185-1201.
 pub fn normest(input_matrix: &Array2<f64>, t: usize, itmax: u32) -> f64 {
     if input_matrix.is_square() == false {
+    
         panic!("One Norm Estimation encountered non-square input matrix.");
     }
     if t < 1 {
@@ -227,7 +228,7 @@ mod tests {
     use crate::{
         ndarray::ShapeBuilder,
         normest1::{ensure_no_parallel_columns, is_column_parallel, prepare_x_matrix},
-        OperationNorm, Inverse,
+        Inverse, OperationNorm,
     };
     use ndarray::Array2;
     use rand::{thread_rng, Rng};

From 26d64689ba761eac98938e4b62cda441eecc4e44 Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Sun, 16 Oct 2022 14:37:19 -0400
Subject: [PATCH 07/16] passing basic tests

---
 ndarray-linalg/src/expm.rs     | 550 +++++++++++++++++++++++----------
 ndarray-linalg/src/normest1.rs |   1 -
 2 files changed, 392 insertions(+), 159 deletions(-)

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index 1686aeaa..179b0e81 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -1,8 +1,15 @@
 use std::ops::MulAssign;
 
 use crate::{normest1::normest, types, Inverse, OperationNorm};
-use ndarray::prelude::*;
-use statrs::function::factorial::{binomial, factorial};
+use cauchy::{c64, Scalar};
+use lax::{Lapack, NormType};
+use ndarray::{linalg::Dot, prelude::*};
+use num_complex::Complex;
+use num_traits::{real::Real, Pow};
+use statrs::{
+    function::factorial::{binomial, factorial},
+    statistics::Statistics,
+};
 
 // These constants are hard-coded from Al-Mohy & Higham
 const THETA_3: f64 = 1.495585217958292e-2;
@@ -97,24 +104,28 @@ impl<'a> OpNormOne<'a> {
     }
 }
 
-fn power_abs_norm(matrix: &Array2<f64>, p: usize) -> f64 {
-    let mut v = Array1::<f64>::ones((matrix.ncols()).f());
-    let abs_matrix = matrix.t().map(|x| x.abs());
+fn power_abs_norm<S>(input_matrix: &Array2<S>, p: usize) -> f64
+where
+    S: Scalar<Real = f64>,
+{
+    let mut v = Array1::<f64>::ones((input_matrix.ncols()).f());
+    let abs_matrix = input_matrix.t().map(|x| x.abs());
     for _ in 0..p {
         v.assign(&abs_matrix.dot(&v));
     }
+    // return max col sum
     v.into_iter()
         .reduce(|x, y| if x > y { x } else { y })
         .unwrap()
 }
 
 // helper function used in Al-M & H. Name is unchanged for future reference.
-fn ell(a_matrix: &Array2<f64>, m: u64) -> i32 {
+fn ell<S: Scalar<Real = f64>>(a_matrix: &Array2<S>, m: u64) -> i32 {
     if a_matrix.is_square() == false {
         panic!("subroutine ell expected a square matrix.");
     }
     let p = 2 * m + 1;
-    let a_one_norm = a_matrix.opnorm_one().unwrap();
+    let a_one_norm = a_matrix.map(|x| x.abs()).opnorm_one().unwrap();
 
     if a_one_norm < f64::EPSILON * 2.0 {
         panic!("Subroutine ell encountered zero norm matrix.");
@@ -132,105 +143,197 @@ fn pade_error_coefficient(m: u64) -> f64 {
     1.0 / (binomial(2 * m, m) * factorial(2 * m + 1))
 }
 
-fn pade_approximation_3(mp: &mut MatrixPowers) -> Array2<f64> {
-    let mut evens = PADE_COEFFS_3[0] * Array::eye(mp.get(1).nrows());
-    evens.scaled_add(PADE_COEFFS_3[2], mp.get(2));
-    let mut odds = PADE_COEFFS_3[1] * Array::eye(mp.get(1).nrows());
-    odds.scaled_add(PADE_COEFFS_3[3], mp.get(2));
+trait Number {
+    fn abs(&self) -> f64;
+}
+impl Number for f64 {
+    fn abs(&self) -> f64 {
+        self.abs()
+    }
+}
+
+fn pade_approximation_3<S: Scalar<Real = f64> + Lapack>(
+    mp: &mut MatrixPowers<S>,
+    output: &mut Array2<S>,
+) {
+    let mut evens: Array2<S> = Array2::<S>::eye(mp.get(1).nrows());
+    evens.mapv_inplace(|x| x * S::from_real(PADE_COEFFS_3[0]));
+    evens.scaled_add(S::from_real(PADE_COEFFS_3[2]), mp.get(2));
+
+    let mut odds: Array2<S> = Array2::<S>::eye(mp.get(1).nrows());
+    odds.mapv_inplace(|x| x * S::from_real(PADE_COEFFS_3[1]));
+    odds.scaled_add(S::from_real(PADE_COEFFS_3[3]), mp.get(2));
     odds = odds.dot(mp.get(1));
-    let inverted = (-1. * odds + evens).inv().unwrap();
-    inverted.dot(&(odds + evens))
+
+    odds.mapv_inplace(|x| -x);
+    let inverted = (&odds + &evens).inv().unwrap();
+    odds.mapv_inplace(|x| -x);
+    output.assign(&inverted.dot(&(odds + evens)));
 }
 
-fn pade_approximation_5(mp: &mut MatrixPowers) -> Array2<f64> {
-    let mut evens: Array2<f64> = PADE_COEFFS_9[0] * Array::eye(mp.get(1).nrows());
+fn pade_approximation_5<S: Scalar<Real = f64> + Lapack>(
+    mp: &mut MatrixPowers<S>,
+    output: &mut Array2<S>,
+) {
+    let mut evens: Array2<S> = Array2::<S>::eye(mp.get(1).nrows());
+    evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[0]) * x);
     for m in 1..=2 {
-        evens.scaled_add(PADE_COEFFS_9[2 * m], mp.get(2 * m));
+        evens.scaled_add(S::from_real(PADE_COEFFS_9[2 * m]), mp.get(2 * m));
     }
-    let mut odds: Array2<f64> = PADE_COEFFS_9[1] * Array::eye(mp.get(1).nrows());
+    let mut odds: Array2<S> = Array::eye(mp.get(1).nrows());
+    odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[1]) * x);
     for m in 1..=2 {
-        odds.scaled_add(PADE_COEFFS_9[2 * m + 1], mp.get(2*m));
+        odds.scaled_add(S::from_real(PADE_COEFFS_9[2 * m + 1]), mp.get(2 * m));
     }
     odds = odds.dot(mp.get(1));
-    let inverted = (-1. * odds + evens).inv().unwrap();
-    inverted.dot(&(odds + evens))
+    odds.mapv_inplace(|x| -x);
+    let inverted = (&odds + &evens).inv().unwrap();
+    odds.mapv_inplace(|x| -x);
+    output.assign(&inverted.dot(&(odds + evens)));
 }
 
-fn pade_approximation_7(mp: &mut MatrixPowers) -> Array2<f64> {
-    let mut evens: Array2<f64> = PADE_COEFFS_9[0] * Array::eye(mp.get(1).nrows());
+fn pade_approximation_7<S: Scalar<Real = f64> + Lapack>(
+    mp: &mut MatrixPowers<S>,
+    output: &mut Array2<S>,
+) {
+    let mut evens: Array2<S> = Array::eye(mp.get(1).nrows());
+    evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[0]) * x);
     for m in 1..=3 {
-        evens.scaled_add(PADE_COEFFS_9[2 * m], mp.get(2 * m));
+        evens.scaled_add(S::from_real(PADE_COEFFS_9[2 * m]), mp.get(2 * m));
     }
-    let mut odds: Array2<f64> = PADE_COEFFS_9[1] * Array::eye(mp.get(1).nrows());
+    let mut odds: Array2<S> = Array::eye(mp.get(1).nrows());
+    odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[1]) * x);
     for m in 1..=3 {
-        odds.scaled_add(PADE_COEFFS_9[2 * m + 1], mp.get(2*m));
+        odds.scaled_add(S::from_real(PADE_COEFFS_9[2 * m + 1]), mp.get(2 * m));
     }
     odds = odds.dot(mp.get(1));
-    let inverted = (-1. * odds + evens).inv().unwrap();
-    inverted.dot(&(odds + evens))
+    odds.mapv_inplace(|x| -x);
+    let inverted = (&odds + &evens).inv().unwrap();
+    odds.mapv_inplace(|x| -x);
+    output.assign(&inverted.dot(&(odds + evens)));
 }
-fn pade_approximation_9(mp: &mut MatrixPowers) -> Array2<f64> {
-    let mut evens: Array2<f64> = PADE_COEFFS_9[0] * Array::eye(mp.get(1).nrows());
+fn pade_approximation_9<S: Scalar<Real = f64> + Lapack>(
+    mp: &mut MatrixPowers<S>,
+    output: &mut Array2<S>,
+) {
+    let mut evens: Array2<S> = Array::eye(mp.get(1).nrows());
+    evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[0]) * x);
     for m in 1..=4 {
-        evens.scaled_add(PADE_COEFFS_9[2 * m], mp.get(2 * m));
+        evens.scaled_add(S::from_real(PADE_COEFFS_9[2 * m]), mp.get(2 * m));
     }
-    let mut odds: Array2<f64> = PADE_COEFFS_9[1] * Array::eye(mp.get(1).nrows());
+    let mut odds: Array2<S> = Array::eye(mp.get(1).nrows());
+    odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[1]) * x);
     for m in 1..=4 {
-        odds.scaled_add(PADE_COEFFS_9[2 * m + 1], mp.get(2*m));
+        odds.scaled_add(S::from_real(PADE_COEFFS_9[2 * m + 1]), mp.get(2 * m));
     }
     odds = odds.dot(mp.get(1));
-    let inverted = (-1. * odds + evens).inv().unwrap();
-    inverted.dot(&(odds + evens))
+    odds.mapv_inplace(|x| -x);
+    let inverted: Array2<S> = (&odds + &evens).inv().unwrap();
+    odds.mapv_inplace(|x| -x);
+    output.assign(&inverted.dot(&(odds + evens)));
 }
 
-fn pade_approximation_13(mp: &mut MatrixPowers) -> Array2<f64> {
-    let mut evens_1 = PADE_COEFFS_13[0] * Array::eye(mp.get(1).nrows());
+// TODO: scale powers by appropriate value of s.
+fn pade_approximation_13<S: Scalar<Real = f64> + Lapack>(
+    mp: &mut MatrixPowers<S>,
+    scale_factor: i32,
+    output: &mut Array2<S>,
+) {
+    // note this may have unnecessary allocations.
+    let mut evens_1: Array2<S> = Array::eye(mp.get(1).nrows());
+    evens_1.mapv_inplace(|x| S::from_real(PADE_COEFFS_13[0]) * x);
     for m in 1..=3 {
-        evens_1.scaled_add(PADE_COEFFS_13[2 * m], mp.get(2 * m));
+        evens_1.scaled_add(S::from_real(PADE_COEFFS_13[2 * m]), mp.get(2 * m));
     }
-    let mut evens_2 = PADE_COEFFS_13[8] * mp.get(2).clone();
-    evens_2.scaled_add(PADE_COEFFS_13[10], mp.get(4));
-    evens_2.scaled_add(PADE_COEFFS_13[12], mp.get(6));
+    let mut evens_2 = mp.get(2).clone();
+    evens_2.mapv_inplace(|x| S::from_real(PADE_COEFFS_13[8]) * x);
+    evens_2.scaled_add(S::from_real(PADE_COEFFS_13[10]), mp.get(4));
+    evens_2.scaled_add(S::from_real(PADE_COEFFS_13[12]), mp.get(6));
     let evens = evens_2.dot(mp.get(6)) + &evens_1;
-    let mut odds_1 = PADE_COEFFS_13[1] * Array::eye(mp.get(1).nrows());
+    let mut odds_1: Array2<S> = Array::eye(mp.get(1).nrows());
+    odds_1.mapv_inplace(|x| S::from_real(PADE_COEFFS_13[1]) * x);
     for m in 1..=3 {
-        odds_1.scaled_add(PADE_COEFFS_13[2 * m + 1], mp.get(2 * m));
+        odds_1.scaled_add(S::from_real(PADE_COEFFS_13[2 * m + 1]), mp.get(2 * m));
     }
-    let mut odds_2 = PADE_COEFFS_13[9] * mp.get(2).clone();
-    odds_2.scaled_add(PADE_COEFFS_13[11], mp.get(4));
-    odds_2.scaled_add(PADE_COEFFS_13[13], mp.get(6));
+    let mut odds_2 = mp.get(2).clone();
+    odds_2.mapv_inplace(|x| S::from_real(PADE_COEFFS_13[9]) * x);
+    odds_2.scaled_add(S::from_real(PADE_COEFFS_13[11]), mp.get(4));
+    odds_2.scaled_add(S::from_real(PADE_COEFFS_13[13]), mp.get(6));
     odds_2 = odds_2.dot(mp.get(6));
-    let odds = (odds_1 + odds_2).dot(mp.get(1));
-    let inverted = (-1. * odds.clone() + evens.clone()).inv().unwrap();
-    inverted.dot(&(odds + evens))
+    let mut odds = (odds_1 + odds_2).dot(mp.get(1));
+    odds.mapv_inplace(|x| -x);
+    let inverted: Array2<S> = (&odds + &evens).inv().unwrap();
+    odds.mapv_inplace(|x| -x);
+    output.assign(&inverted.dot(&(odds + evens)));
 }
 
 /// Helper struct to ensure that the power of the input matrix is only computed once.
 #[derive(Debug)]
-struct MatrixPowers<'a> {
-    pub input_1 : Option<&'a Array2<f64>>,
-    pub input_2 : Option<Array2<f64>>,
-    pub input_3 : Option<Array2<f64>>,
-    pub input_4 : Option<Array2<f64>>,
-    pub input_5 : Option<Array2<f64>>,
-    pub input_6 : Option<Array2<f64>>,
-    pub input_7 : Option<Array2<f64>>,
-    pub input_8 : Option<Array2<f64>>,
-    pub input_9 : Option<Array2<f64>>,
-    pub input_10 : Option<Array2<f64>>,
-    pub input_11 : Option<Array2<f64>>,
-    pub input_12 : Option<Array2<f64>>,
-    pub input_13 : Option<Array2<f64>>,
+struct MatrixPowers<S: Scalar + Lapack> {
+    pub input_1: Option<Array2<S>>,
+    pub input_2: Option<Array2<S>>,
+    pub input_3: Option<Array2<S>>,
+    pub input_4: Option<Array2<S>>,
+    pub input_5: Option<Array2<S>>,
+    pub input_6: Option<Array2<S>>,
+    pub input_7: Option<Array2<S>>,
+    pub input_8: Option<Array2<S>>,
+    pub input_9: Option<Array2<S>>,
+    pub input_10: Option<Array2<S>>,
+    pub input_11: Option<Array2<S>>,
+    pub input_12: Option<Array2<S>>,
+    pub input_13: Option<Array2<S>>,
     pub num_matprods: usize,
 }
-impl<'a> MatrixPowers<'a> {
-    pub fn new() -> Self {
-        MatrixPowers { input_1: None, input_2: None, input_3:None, input_4: None, input_5: None, input_6: None, input_7: None, input_8: None, input_9: None, input_10: None, input_11: None, input_12: None, input_13: None, num_matprods: 0 }
+impl<S: Scalar + Lapack> MatrixPowers<S> {
+    pub fn new(input: Array2<S>) -> Self {
+        MatrixPowers {
+            input_1: Some(input),
+            input_2: None,
+            input_3: None,
+            input_4: None,
+            input_5: None,
+            input_6: None,
+            input_7: None,
+            input_8: None,
+            input_9: None,
+            input_10: None,
+            input_11: None,
+            input_12: None,
+            input_13: None,
+            num_matprods: 0,
+        }
+    }
+    pub fn scale(&mut self, s: i32) {
+        let scale_factor = S::from_i32(2).unwrap().pow(-S::from_i32(s).unwrap());
+        let mut scaler = scale_factor.clone();
+        if let Some(i1) = &mut self.input_1 {
+            i1.mapv_inplace(|x| x * scaler);
+        }
+        scaler *= scale_factor;
+        if let Some(i2) = &mut self.input_2 {
+            i2.mapv_inplace(|x| x * scaler);
+        }
+        scaler *= scale_factor;
+        if let Some(i3) = &mut self.input_3 {
+            i3.mapv_inplace(|x| x * scaler);
+        }
+        scaler *= scale_factor;
+        if let Some(i4) = &mut self.input_4 {
+            i4.mapv_inplace(|x| x * scaler);
+        }
+        scaler *= scale_factor;
+        if let Some(i5) = &mut self.input_5 {
+            i5.mapv_inplace(|x| x * scaler);
+        }
+        scaler *= scale_factor;
+        if let Some(i6) = &mut self.input_6 {
+            i6.mapv_inplace(|x| x * scaler);
+        }
     }
-
     fn compute2(&mut self) {
         if let Some(input_1) = self.input_1.clone() {
-            self.input_2 = Some(input_1.dot(input_1));
+            self.input_2 = Some(input_1.dot(&input_1));
             self.num_matprods += 1;
         }
     }
@@ -239,13 +342,13 @@ impl<'a> MatrixPowers<'a> {
             Some(i2) => {
                 self.input_3 = Some(self.input_1.as_ref().unwrap().dot(i2));
                 self.num_matprods += 1;
-            },
+            }
             None => {
                 // after calling self.compute2() then self.input_2 will match to Some(_)
                 // so just recurse.
                 self.compute2();
                 self.compute3();
-            },
+            }
         }
     }
     fn compute4(&mut self) {
@@ -253,7 +356,7 @@ impl<'a> MatrixPowers<'a> {
             Some(i2) => {
                 self.input_4 = Some(self.input_2.as_ref().unwrap().dot(i2));
                 self.num_matprods += 1;
-            },
+            }
             None => {
                 self.compute2();
                 self.compute4();
@@ -262,16 +365,14 @@ impl<'a> MatrixPowers<'a> {
     }
     fn compute5(&mut self) {
         match &self.input_3 {
-            Some(i3) => {
-                match &self.input_2 {
-                    Some(i2) => {
-                        self.input_5 = Some(i2.dot(i3));
-                        self.num_matprods += 1;
-                    },
-                    None => {
-                        self.compute2();
-                        self.compute5();
-                    }
+            Some(i3) => match &self.input_2 {
+                Some(i2) => {
+                    self.input_5 = Some(i2.dot(i3));
+                    self.num_matprods += 1;
+                }
+                None => {
+                    self.compute2();
+                    self.compute5();
                 }
             },
             None => {
@@ -286,25 +387,94 @@ impl<'a> MatrixPowers<'a> {
                 // If input_4 is computed then input_2 must be computed.
                 self.input_6 = Some(i4.dot(self.input_2.as_ref().unwrap()));
                 self.num_matprods += 1;
-            },
+            }
             None => {
                 match &self.input_3 {
                     Some(i3) => {
                         self.input_6 = Some(i3.dot(i3));
                         self.num_matprods += 1;
-                    },
+                    }
                     None => {
                         // We do not have 4 or 3 computed yet, so we will either have 2 or have to compute it.
                         // in that case easiest way is to first compute 4 and then revisit.
                         self.compute4();
                         self.compute6();
-                    },
+                    }
                 }
             }
         };
     }
+    fn compute7(&mut self) {
+        match &self.input_6 {
+            Some(i6) => {
+                self.input_7 = Some(self.input_1.as_ref().unwrap().dot(i6));
+                self.num_matprods += 1;
+            },
+            None => {
+                match &self.input_5 {
+                    Some(i5) => {
+                        self.input_7 = Some(self.input_2.as_ref().unwrap().dot(i5));
+                        self.num_matprods += 1;
+                    },
+                    None => {
+                        match &self.input_4 {
+                            Some(i4) => {
+                                if let Some(i3) = &self.input_3 {
+                                    self.input_7 = Some(i3.dot(i4));
+                                    self.num_matprods += 1;
+                                } else {
+                                    self.compute3();
+                                    self.compute7();
+                                }
+                            },
+                            None => {
+                                self.compute4();
+                                self.compute7();
+                            }
+                        }
+                    },
+                }
+            }
+        }
+    }
+    fn compute8(&mut self) {
+        match &self.input_4 {
+            Some(i4) => {
+                self.input_8 = Some(i4.dot(i4));
+                self.num_matprods += 1;
+            },
+            None => {
+                self.compute4();
+                self.compute8();
+            }
+        }
+    }
+    fn compute10(&mut self) {
+        match &self.input_5 {
+            Some(i5) => {
+                self.input_10 = Some(i5.dot(i5));
+                self.num_matprods += 1;
+            },
+            None => {
+                match &self.input_6 {
+                    Some(i6) => {
+                        self.input_10 = Some(i6.dot(self.input_4.as_ref().unwrap()));
+                        self.num_matprods += 1;
+                    },
+                    None => {
+                        if self.input_3.is_some() {
+                            self.compute5();
+                        } else {
+                            self.compute6()
+                        }
+                        self.compute10();
+                    }
+                }
+            }
+        }
+    }
 
-    pub fn get(&mut self, m: usize) -> &Array2<f64> {
+    pub fn get(&mut self, m: usize) -> &Array2<S> {
         match m {
             1 => self.get1(),
             2 => self.get2(),
@@ -312,23 +482,24 @@ impl<'a> MatrixPowers<'a> {
             4 => self.get4(),
             5 => self.get5(),
             6 => self.get6(),
-            // 7 => self.get7(),
-            // 8 => self.get8(),
+            7 => self.get7(),
+            8 => self.get8(),
             // 9 => self.get9(),
-            // 10 => self.get10(),
+            10 => self.get10(),
             // 11 => self.get11(),
             // 12 => self.get12(),
             // 13 => self.get13(),
             _ => {
+                println!("I need:{:}", m);
                 println!("This power of input matrix is not implemented. Returning input matrix.");
                 self.input_1.as_ref().unwrap()
             }
         }
     }
-    fn get1(&mut self) -> &Array2<f64> {
+    fn get1(&mut self) -> &Array2<S> {
         self.input_1.as_ref().unwrap()
     }
-    fn get2(&mut self) -> &Array2<f64> {
+    fn get2(&mut self) -> &Array2<S> {
         match &self.input_2 {
             Some(mat) => self.input_2.as_ref().unwrap(),
             None => {
@@ -337,7 +508,7 @@ impl<'a> MatrixPowers<'a> {
             }
         }
     }
-    fn get3(&mut self) -> &Array2<f64> {
+    fn get3(&mut self) -> &Array2<S> {
         match &self.input_3 {
             Some(mat) => self.input_3.as_ref().unwrap(),
             None => {
@@ -346,7 +517,7 @@ impl<'a> MatrixPowers<'a> {
             }
         }
     }
-    fn get4(&mut self) -> &Array2<f64> {
+    fn get4(&mut self) -> &Array2<S> {
         match &self.input_4 {
             Some(mat) => &self.input_4.as_ref().unwrap(),
             None => {
@@ -355,7 +526,7 @@ impl<'a> MatrixPowers<'a> {
             }
         }
     }
-    fn get5(&mut self) -> &Array2<f64> {
+    fn get5(&mut self) -> &Array2<S> {
         match &self.input_5 {
             Some(mat) => &self.input_5.as_ref().unwrap(),
             None => {
@@ -364,7 +535,7 @@ impl<'a> MatrixPowers<'a> {
             }
         }
     }
-    fn get6(&mut self) -> &Array2<f64> {
+    fn get6(&mut self) -> &Array2<S> {
         match &self.input_6 {
             Some(mat) => &self.input_6.as_ref().unwrap(),
             None => {
@@ -373,107 +544,169 @@ impl<'a> MatrixPowers<'a> {
             }
         }
     }
+    fn get7(&mut self) -> &Array2<S> {
+        match &self.input_7 {
+            Some(mat) => &self.input_6.as_ref().unwrap(),
+            None => {
+                self.compute7();
+                &self.input_6.as_ref().unwrap()
+            }
+        }
+    }
+    fn get8(&mut self) -> &Array2<S> {
+        match &self.input_8 {
+            Some(mat) => &self.input_8.as_ref().unwrap(),
+            None => {
+                self.compute8();
+                &self.input_8.as_ref().unwrap()
+            }
+        }
+    }
+    fn get10(&mut self) -> &Array2<S> {
+        match &self.input_10 {
+            Some(mat) => &self.input_10.as_ref().unwrap(),
+            None => {
+                self.compute10();
+                &self.input_10.as_ref().unwrap()
+            }
+        }
+    }
 }
 
-fn pade_approximation(mp: &MatrixPowers, output: &mut Array2<f64>, degree: &usize) {
-    match *degree {
+fn pade_approximation<'a, S: Scalar<Real = f64> + Lapack>(
+    mp: &'a mut MatrixPowers<S>,
+    scale_factor: i32,
+    output: &mut Array2<S>,
+    degree: usize,
+) {
+    match degree {
         3 => {
-            pade_approximation_3(mp);
+            pade_approximation_3(mp, output);
         }
         5 => {
-            pade_approximation_5(mp.get(1), mp.get(2), mp.get(4), output);
+            pade_approximation_5(mp, output);
         }
         7 => {
-            let input_2 = mp.dot(mp);
-            let input_4 = input_2.dot(&input_2);
-            let input_6 = input_4.dot(&input_2);
-            pade_approximation_7(mp, &input_2, &input_4, &input_6, output);
+            pade_approximation_7(mp, output);
         }
         9 => {
-            let input_2 = mp.dot(mp);
-            let input_4 = input_2.dot(&input_2);
-            let input_6 = input_4.dot(&input_2);
-            let input_8 = input_4.dot(&input_4);
-            pade_approximation_9(mp, &input_2, &input_4, &input_6, &input_8, output);
+            pade_approximation_9(mp, output);
         }
         13 => {
-            let input_2 = mp.dot(mp);
-            let input_4 = input_2.dot(&input_2);
-            let input_6 = input_4.dot(&input_2);
-            pade_approximation_13(mp, &input_2, &input_4, &input_6, output);
+            pade_approximation_13(mp, scale_factor, output);
         }
         _ => {
-            println!("Undefined pade approximant order.")
+            println!(
+                "Undefined pade approximant order. Returning first order approximation to expm"
+            );
+            output.assign(&(Array2::<S>::eye(mp.get(1).nrows()) + mp.get(1)));
         }
     }
 }
 
-fn old_expm(matrix: &Array2<f64>) -> Array2<f64> {
-    let mut ret: Array2<f64> = ndarray::Array2::<f64>::zeros((matrix.nrows(), matrix.ncols()));
-    for m in vec![3, 5, 7, 9].iter() {
-        if super::normest1::normest(matrix, 4, 4) <= THETA_MAP[*m] {
-            pade_approximation(matrix, &mut ret, m);
-            return ret;
-        }
-    }
-    ret
-}
-/// This function is based on the scale-and-squaring algorithm by
-pub fn expm(A: Array2<f64>) -> Array2<f64> {
-    let mut output = Array2::<f64>::zeros((A.nrows(), A.ncols()).f());
-    let d6 = f64::powf(power_abs_norm(&A, 6), 1. / 6.);
-    let d4 = f64::powf(power_abs_norm(&A, 4), 1. / 4.);
+// fn old_expm(matrix: &Array2<f64>) -> Array2<f64> {
+//     let mut ret: Array2<f64> = ndarray::Array2::<f64>::zeros((matrix.nrows(), matrix.ncols()));
+//     for m in vec![3, 5, 7, 9].iter() {
+//         if super::normest1::normest(matrix, 4, 4) <= THETA_MAP[*m] {
+//             pade_approximation(matrix, &mut ret, m);
+//             return ret;
+//         }
+//     }
+//     ret
+// }
+/// Computes matrix exponential based on the scale-and-squaring algorithm by
+pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> Array2<S> {
+    let mut output = Array2::<S>::zeros((a_matrix.nrows(), a_matrix.ncols()).f());
+    let mut mp = MatrixPowers::new(a_matrix.clone());
+    let d4 = f64::powf(mp.get(4).opnorm_one().unwrap(), 1. / 4.);
+    let d6 = f64::powf((*mp.get(6)).opnorm_one().unwrap(), 1. / 6.);
+    // Note d6 should be an estimate and d4 an estimate
     let eta_1 = f64::max(d4, d6);
-    if eta_1 < THETA_3 && ell(&A, 3) == 0 {
-        pade_approximation(&A, &mut output, &3);
+
+    if eta_1 < THETA_3 && ell(&a_matrix, 3) == 0 {
+        pade_approximation(&mut mp, 1, &mut output, 3);
+        println!("returning pade 3");
+        println!("matrix mupltiplications of input mat: {:}", mp.num_matprods);
         return output;
     }
-
-    arr2(&[[2.]])
+    // d4 should be exact here, d6 an estimate
+    let eta_2 = f64::max(d4, d6);
+    if eta_2 < THETA_5 && ell(&a_matrix, 5) == 0 {
+        pade_approximation(&mut mp, 1, &mut output, 5);
+        println!("returning pade 5");
+        println!("matrix mupltiplications of input mat: {:}", mp.num_matprods);
+        return output;
+    }
+    let d8 = f64::powf(mp.get(8).opnorm_one().unwrap(), 1. / 8.);
+    let eta_3 = f64::max(d6, d8);
+    if eta_3 < THETA_7 && ell(&a_matrix, 7) == 0 {
+        pade_approximation(&mut mp, 1, &mut output, 7);
+        println!("returning pade 7");
+        println!("matrix mupltiplications of input mat: {:}", mp.num_matprods);
+        return output;
+    }
+    if eta_3 < THETA_9 && ell(&a_matrix, 9) == 0 {
+        pade_approximation(&mut mp, 1, &mut output, 9);
+        println!("returning pade 9");
+        println!("matrix mupltiplications of input mat: {:}", mp.num_matprods);
+        return output;
+    }
+    let eta_4 = f64::max(d8, mp.get(10).opnorm_one().unwrap());
+    let eta_5 = f64::min(eta_3, eta_4);
+    let mut s = f64::max(0., (eta_5 / THETA_13).log2().ceil()) as i32;
+    let mut a_scaled = a_matrix.clone();
+    println!("a norm before: {:}", a_matrix.opnorm_one().unwrap());
+    a_scaled.mapv_inplace(|x| x * S::from_real(cauchy::Scalar::pow(2., -s as f64)));
+    println!("scaled norm: {:}", a_scaled.opnorm_one().unwrap());
+    s += ell(&a_scaled, 13);
+    mp.scale(s);
+    pade_approximation(&mut mp, s, &mut output, 13);
+    for _ in 0..s {
+        output = output.dot(&output);
+    }
+    println!("returning pade 13");
+    println!("matrix mupltiplications of input mat: {:}", mp.num_matprods);
+    output
+    // arr2(&[[2.]])
 }
 
 mod tests {
-    use crate::expm::{
+    use crate::{expm::{
         pade_approximation_13, pade_approximation_3, pade_approximation_5, pade_approximation_7,
         pade_approximation_9,
-    };
+    }, OperationNorm};
     use ndarray::{linalg::Dot, *};
 
     use super::{expm, MatrixPowers};
 
     #[test]
     fn test_matrix_powers() {
-        let mut mp = MatrixPowers::new();
-        mp.input_1 = Some(array![[0., 1.], [1., 0.]]);
+        let a: Array2<f64> = array![[0., 1.], [1., 0.]];
+        let mut mp = MatrixPowers::new(a);
         println!("get3():");
-        println!("{:?}",mp.get3());
+        println!("{:?}", mp.get3());
         println!("get3():");
-        println!("{:?}",mp.get3());
+        println!("{:?}", mp.get3());
         println!("mp?");
         println!("{:?}", mp);
     }
-    
+
     #[test]
     fn test_expm() {
         fn compute_pade_diff_error(output: &Array2<f64>, expected: &Array2<f64>) -> f64 {
-            let mut tot = 0.0;
-            for row_ix in 0..output.nrows() {
-                for col_ix in 0..output.ncols() {
-                    tot += (output[[row_ix, col_ix]] - expected[[row_ix, col_ix]]).abs();
-                }
-            }
-            tot
+            let mut diff = (output - expected);
+            diff.mapv_inplace(|x| x.abs());
+            diff.opnorm_one().unwrap()
         }
         let mat: Array2<f64> =
-            0.01 as f64 * array![[0.1, 0.2, 0.3], [0.2, 0.1, 0.5], [0.11, 0.22, 0.32]];
-        let expected: Array2<f64> = array![
-            [1.001, 0.00200531, 0.00301133],
-            [0.00200476, 1.00101, 0.00501353],
-            [0.00110452, 0.00220573, 1.00321]
-        ];
+            10. as f64 * array![[0.1, 0.2, 0.3], [0.2, 0.1, 0.5], [0.11, 0.22, 0.32]];
+        let expected: Array2<f64> = array![[157.7662816 , 217.94900112, 432.14352751],
+        [200.6740715 , 278.69078891, 552.72474352],
+        [169.9692465 , 236.2889153 , 469.43670795]];
         println!("expm output:");
-        println!("{:?}", expm(mat.clone()));
-        println!("diff: {:}", compute_pade_diff_error(&expm(mat), &expected));
+        let  out =expm(&mat);
+        println!("{:?}", out);
+        println!("diff: {:}", compute_pade_diff_error(&out, &expected));
     }
 
     #[test]
@@ -494,11 +727,12 @@ mod tests {
             }
             tot
         }
-        // pade_approximation_3(&mat, &mut output_3);
-        // pade_approximation_5(&mat, &mut output_5);
-        // pade_approximation_7(&mat, &mut output_7);
-        // pade_approximation_9(&mat, &mut output_9);
-        // pade_approximation_13(&mat, &mut output_13);
+        let mut mp = MatrixPowers::new(mat);
+        pade_approximation_3(&mut mp, &mut output_3);
+        pade_approximation_5(&mut mp, &mut output_5);
+        pade_approximation_7(&mut mp, &mut output_7);
+        pade_approximation_9(&mut mp, &mut output_9);
+        pade_approximation_13(&mut mp, 1, &mut output_13);
         let expected = array![
             [157.766, 217.949, 432.144],
             [200.674, 278.691, 552.725],
diff --git a/ndarray-linalg/src/normest1.rs b/ndarray-linalg/src/normest1.rs
index 016bb814..30d1a027 100644
--- a/ndarray-linalg/src/normest1.rs
+++ b/ndarray-linalg/src/normest1.rs
@@ -122,7 +122,6 @@ fn update_indices(indices: &mut Vec<usize>, index_history: &Vec<usize>, t: usize
 /// Francoise Tisseur (2000) SIAM J. Matrix Anal. Appl. Vol. 21, No. 4, pp. 1185-1201.
 pub fn normest(input_matrix: &Array2<f64>, t: usize, itmax: u32) -> f64 {
     if input_matrix.is_square() == false {
-    
         panic!("One Norm Estimation encountered non-square input matrix.");
     }
     if t < 1 {

From 409dfad65eb3470f7103ff03d4d8d73efff373d9 Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Mon, 17 Oct 2022 23:33:35 -0400
Subject: [PATCH 08/16] better testing

---
 ndarray-linalg/src/expm.rs | 246 +++++++++++++++++++++++--------------
 1 file changed, 152 insertions(+), 94 deletions(-)

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index 179b0e81..7c36feae 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -1,10 +1,10 @@
 use std::ops::MulAssign;
 
 use crate::{normest1::normest, types, Inverse, OperationNorm};
-use cauchy::{c64, Scalar};
+use cauchy::{Scalar};
 use lax::{Lapack, NormType};
 use ndarray::{linalg::Dot, prelude::*};
-use num_complex::Complex;
+use num_complex::{Complex, Complex32 as c32, Complex64 as c64};
 use num_traits::{real::Real, Pow};
 use statrs::{
     function::factorial::{binomial, factorial},
@@ -28,6 +28,7 @@ const THETA_MAP: [f64; 14] = [
 // Coefficients are also stored via the power of x, so for example the numerator would look like
 // x^0 * PADE_COEFF_M[0] + x^1 * PADE_COEFF_M[1] + ... + x^m * PADE_COEFF_M[M]
 const PADE_COEFFS_3: [f64; 4] = [1., 0.5, 0.1, 1. / 120.];
+// const PADE_COEFFS_3:[f64; 4] = [1., 12., 60., 120.];
 
 const PADE_COEFFS_5: [f64; 6] = [1., 0.5, 1. / 9., 1. / 72., 1. / 1_008., 1. / 30_240.];
 
@@ -72,37 +73,6 @@ const PADE_COEFFS_13: [f64; 14] = [
     1. / 64_764_752_532_480_000.,
 ];
 
-struct OneNormOneCalc {}
-
-/// Computes operator one norms either exactly or estimates them using Higham & Tisseur (ref)
-pub enum OpNormOne<'a> {
-    Exact {
-        a_matrix: &'a Array2<f64>,
-    },
-    Estimate {
-        a_matrix: &'a Array2<f64>,
-        t: usize,
-        itmax: usize,
-    },
-    EstimateSequence {
-        matrices: Vec<&'a Array2<f64>>,
-    },
-    EstimatePower {
-        a_matrix: &'a Array2<f64>,
-        p: usize,
-    },
-}
-
-impl<'a> OpNormOne<'a> {
-    fn compute(&self) -> f64 {
-        match self {
-            Self::Exact { a_matrix } => a_matrix.opnorm_one().unwrap(),
-            Self::Estimate { a_matrix, t, itmax } => normest(a_matrix, *t, *itmax as u32),
-            Self::EstimateSequence { matrices } => 1.,
-            _ => 1.,
-        }
-    }
-}
 
 fn power_abs_norm<S>(input_matrix: &Array2<S>, p: usize) -> f64
 where
@@ -143,21 +113,12 @@ fn pade_error_coefficient(m: u64) -> f64 {
     1.0 / (binomial(2 * m, m) * factorial(2 * m + 1))
 }
 
-trait Number {
-    fn abs(&self) -> f64;
-}
-impl Number for f64 {
-    fn abs(&self) -> f64 {
-        self.abs()
-    }
-}
-
 fn pade_approximation_3<S: Scalar<Real = f64> + Lapack>(
     mp: &mut MatrixPowers<S>,
     output: &mut Array2<S>,
 ) {
     let mut evens: Array2<S> = Array2::<S>::eye(mp.get(1).nrows());
-    evens.mapv_inplace(|x| x * S::from_real(PADE_COEFFS_3[0]));
+    // evens.mapv_inplace(|x| x * S::from_real(PADE_COEFFS_3[0]));
     evens.scaled_add(S::from_real(PADE_COEFFS_3[2]), mp.get(2));
 
     let mut odds: Array2<S> = Array2::<S>::eye(mp.get(1).nrows());
@@ -176,14 +137,14 @@ fn pade_approximation_5<S: Scalar<Real = f64> + Lapack>(
     output: &mut Array2<S>,
 ) {
     let mut evens: Array2<S> = Array2::<S>::eye(mp.get(1).nrows());
-    evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[0]) * x);
+    // evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_5[0]) * x);
     for m in 1..=2 {
-        evens.scaled_add(S::from_real(PADE_COEFFS_9[2 * m]), mp.get(2 * m));
+        evens.scaled_add(S::from_real(PADE_COEFFS_5[2 * m]), mp.get(2 * m));
     }
     let mut odds: Array2<S> = Array::eye(mp.get(1).nrows());
-    odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[1]) * x);
+    odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_5[1]) * x);
     for m in 1..=2 {
-        odds.scaled_add(S::from_real(PADE_COEFFS_9[2 * m + 1]), mp.get(2 * m));
+        odds.scaled_add(S::from_real(PADE_COEFFS_5[2 * m + 1]), mp.get(2 * m));
     }
     odds = odds.dot(mp.get(1));
     odds.mapv_inplace(|x| -x);
@@ -197,14 +158,14 @@ fn pade_approximation_7<S: Scalar<Real = f64> + Lapack>(
     output: &mut Array2<S>,
 ) {
     let mut evens: Array2<S> = Array::eye(mp.get(1).nrows());
-    evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[0]) * x);
+    // evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_7[0]) * x);
     for m in 1..=3 {
-        evens.scaled_add(S::from_real(PADE_COEFFS_9[2 * m]), mp.get(2 * m));
+        evens.scaled_add(S::from_real(PADE_COEFFS_7[2 * m]), mp.get(2 * m));
     }
     let mut odds: Array2<S> = Array::eye(mp.get(1).nrows());
-    odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[1]) * x);
+    odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_7[1]) * x);
     for m in 1..=3 {
-        odds.scaled_add(S::from_real(PADE_COEFFS_9[2 * m + 1]), mp.get(2 * m));
+        odds.scaled_add(S::from_real(PADE_COEFFS_7[2 * m + 1]), mp.get(2 * m));
     }
     odds = odds.dot(mp.get(1));
     odds.mapv_inplace(|x| -x);
@@ -217,9 +178,10 @@ fn pade_approximation_9<S: Scalar<Real = f64> + Lapack>(
     output: &mut Array2<S>,
 ) {
     let mut evens: Array2<S> = Array::eye(mp.get(1).nrows());
-    evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[0]) * x);
+    // evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[0]) * x);
     for m in 1..=4 {
         evens.scaled_add(S::from_real(PADE_COEFFS_9[2 * m]), mp.get(2 * m));
+        let delta = mp.get(2 * m).map(|x| S::from_real(PADE_COEFFS_9[2 * m]) * *x);
     }
     let mut odds: Array2<S> = Array::eye(mp.get(1).nrows());
     odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[1]) * x);
@@ -236,7 +198,6 @@ fn pade_approximation_9<S: Scalar<Real = f64> + Lapack>(
 // TODO: scale powers by appropriate value of s.
 fn pade_approximation_13<S: Scalar<Real = f64> + Lapack>(
     mp: &mut MatrixPowers<S>,
-    scale_factor: i32,
     output: &mut Array2<S>,
 ) {
     // note this may have unnecessary allocations.
@@ -575,7 +536,6 @@ impl<S: Scalar + Lapack> MatrixPowers<S> {
 
 fn pade_approximation<'a, S: Scalar<Real = f64> + Lapack>(
     mp: &'a mut MatrixPowers<S>,
-    scale_factor: i32,
     output: &mut Array2<S>,
     degree: usize,
 ) {
@@ -593,7 +553,7 @@ fn pade_approximation<'a, S: Scalar<Real = f64> + Lapack>(
             pade_approximation_9(mp, output);
         }
         13 => {
-            pade_approximation_13(mp, scale_factor, output);
+            pade_approximation_13(mp, output);
         }
         _ => {
             println!(
@@ -604,78 +564,60 @@ fn pade_approximation<'a, S: Scalar<Real = f64> + Lapack>(
     }
 }
 
-// fn old_expm(matrix: &Array2<f64>) -> Array2<f64> {
-//     let mut ret: Array2<f64> = ndarray::Array2::<f64>::zeros((matrix.nrows(), matrix.ncols()));
-//     for m in vec![3, 5, 7, 9].iter() {
-//         if super::normest1::normest(matrix, 4, 4) <= THETA_MAP[*m] {
-//             pade_approximation(matrix, &mut ret, m);
-//             return ret;
-//         }
-//     }
-//     ret
-// }
 /// Computes matrix exponential based on the scale-and-squaring algorithm by
-pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> Array2<S> {
+pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> (Array2<S>, usize) {
     let mut output = Array2::<S>::zeros((a_matrix.nrows(), a_matrix.ncols()).f());
     let mut mp = MatrixPowers::new(a_matrix.clone());
     let d4 = f64::powf(mp.get(4).opnorm_one().unwrap(), 1. / 4.);
     let d6 = f64::powf((*mp.get(6)).opnorm_one().unwrap(), 1. / 6.);
     // Note d6 should be an estimate and d4 an estimate
     let eta_1 = f64::max(d4, d6);
-
     if eta_1 < THETA_3 && ell(&a_matrix, 3) == 0 {
-        pade_approximation(&mut mp, 1, &mut output, 3);
-        println!("returning pade 3");
-        println!("matrix mupltiplications of input mat: {:}", mp.num_matprods);
-        return output;
+        pade_approximation(&mut mp, &mut output, 3);
+        return (output, 3);
     }
     // d4 should be exact here, d6 an estimate
     let eta_2 = f64::max(d4, d6);
     if eta_2 < THETA_5 && ell(&a_matrix, 5) == 0 {
-        pade_approximation(&mut mp, 1, &mut output, 5);
-        println!("returning pade 5");
-        println!("matrix mupltiplications of input mat: {:}", mp.num_matprods);
-        return output;
+        pade_approximation(&mut mp, &mut output, 5);
+        return (output, 5);
     }
     let d8 = f64::powf(mp.get(8).opnorm_one().unwrap(), 1. / 8.);
     let eta_3 = f64::max(d6, d8);
     if eta_3 < THETA_7 && ell(&a_matrix, 7) == 0 {
-        pade_approximation(&mut mp, 1, &mut output, 7);
-        println!("returning pade 7");
-        println!("matrix mupltiplications of input mat: {:}", mp.num_matprods);
-        return output;
+        pade_approximation(&mut mp, &mut output, 7);
+        return (output, 7);
     }
     if eta_3 < THETA_9 && ell(&a_matrix, 9) == 0 {
-        pade_approximation(&mut mp, 1, &mut output, 9);
-        println!("returning pade 9");
-        println!("matrix mupltiplications of input mat: {:}", mp.num_matprods);
-        return output;
+        pade_approximation(&mut mp, &mut output, 9);
+        return (output, 9);
     }
     let eta_4 = f64::max(d8, mp.get(10).opnorm_one().unwrap());
     let eta_5 = f64::min(eta_3, eta_4);
     let mut s = f64::max(0., (eta_5 / THETA_13).log2().ceil()) as i32;
     let mut a_scaled = a_matrix.clone();
-    println!("a norm before: {:}", a_matrix.opnorm_one().unwrap());
     a_scaled.mapv_inplace(|x| x * S::from_real(cauchy::Scalar::pow(2., -s as f64)));
-    println!("scaled norm: {:}", a_scaled.opnorm_one().unwrap());
     s += ell(&a_scaled, 13);
     mp.scale(s);
-    pade_approximation(&mut mp, s, &mut output, 13);
+    pade_approximation(&mut mp, &mut output, 13);
     for _ in 0..s {
         output = output.dot(&output);
     }
-    println!("returning pade 13");
-    println!("matrix mupltiplications of input mat: {:}", mp.num_matprods);
-    output
-    // arr2(&[[2.]])
+    (output, 13)
 }
 
 mod tests {
+    use std::{collections::HashMap, fs::File, io::Read, str::FromStr};
+
     use crate::{expm::{
         pade_approximation_13, pade_approximation_3, pade_approximation_5, pade_approximation_7,
         pade_approximation_9,
-    }, OperationNorm};
+    }, OperationNorm, SVD, Eig};
     use ndarray::{linalg::Dot, *};
+    use num_complex::{Complex, ComplexFloat, Complex32 as c32, Complex64 as c64};
+    use rand::Rng;
+    use ndarray_csv::Array2Reader;
+    use csv::ReaderBuilder;
 
     use super::{expm, MatrixPowers};
 
@@ -706,9 +648,125 @@ mod tests {
         println!("expm output:");
         let  out =expm(&mat);
         println!("{:?}", out);
-        println!("diff: {:}", compute_pade_diff_error(&out, &expected));
+        println!("diff: {:}", compute_pade_diff_error(&out.0, &expected));
+    }
+
+    #[test]
+    fn test_high_norm() {
+        let mut rng = rand::thread_rng();
+        let n = 200;
+        let samps = 100;
+        let mut results = Vec::new();
+        let mut avg_entry_error = Vec::new();
+        let scale = 0.002;
+        for _ in 0..samps {
+            let mut m:Array2<c64> = Array2::<c64>::ones((n,n).f());
+            m.mapv_inplace(|_| {
+                c64::new(rng.gen::<f64>() * 1., rng.gen::<f64>() * 1.)
+            });
+            m = m.dot(&m.t().map(|x| x.conj()));
+            let (mut eigs,mut  vecs) = m.eig().unwrap();
+            eigs.mapv_inplace(|_| scale * c64::new(rng.gen::<f64>() , rng.gen::<f64>()));
+            let adjoint_vecs = vecs.t().clone().mapv(|x| x.conj());
+
+            let recomputed_m = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
+            // println!("reconstruction diff: {:}", m_diff.opnorm_one().unwrap() / f64::EPSILON);
+            eigs.mapv_inplace(|x| x.exp() );
+            let eigen_expm = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
+            let (expm_comp, deg) = expm(&recomputed_m);
+            let diff = &expm_comp - &eigen_expm;
+            avg_entry_error.push({
+                let tot = diff.map(|x| x.abs()).into_iter().sum::<f64>();
+                tot / (n * n) as f64
+            });
+            results.push(diff.opnorm_one().unwrap());
+            // println!("diff norm over epsilon: {:}", diff.opnorm_one().unwrap() / f64::EPSILON);
+        }
+        let avg: f64 = results.iter().sum::<f64>() / results.len() as f64;
+        let avg_entry_diff = avg_entry_error.iter().sum::<f64>() / avg_entry_error.len() as f64;
+        let std: f64 = f64::powf(results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() -1)  as f64, 0.5);
+        println!("collected {:} samples.", results.len());
+        println!("diff norm: {:} +- ({:})", avg, std);
+        println!("average entry error over epsilon:{:}", avg_entry_diff / f64::EPSILON);
+        println!("avg over epsilon: {:.2}", avg / f64::EPSILON);
+        println!("std over epsilon: {:.2}", std / f64::EPSILON);
     }
 
+    #[test]
+    fn test_random_matrix() {
+        // load data from csv
+        let mut input_file = File::open("/Users/matt/Desktop/matrix_input.csv").unwrap();
+        let mut output_file = File::open("/Users/matt/Desktop/expm_output.csv").unwrap();
+        let mut input_reader = ReaderBuilder::new().has_headers(false).from_reader(input_file);
+        let mut output_reader = ReaderBuilder::new().has_headers(false).from_reader(output_file);
+        let input: Vec<c64> = Vec::new();
+        for res in output_reader.records() {
+            res.unwrap().iter().map(|x| {
+                let real:Vec<_> = x.split('.').collect();
+                println!("real: {:?}", real);
+                let mut re: f64 = (real[0].clone()).parse().unwrap();
+                println!("re: {:}",re);
+                if real[1].contains("*^") {
+                    // re += "." + real[1].split("*^").into_iter().take(n)
+                }
+                let c = c64::from_str(x);
+                println!("x: {:}", x);
+                println!("c: {:?}", c);
+            }).collect::<()>();
+            break;
+        }
+        let input: Array2<c64> = input_reader.deserialize_array2((100,100)).unwrap();
+        let expected: Array2<c64> = output_reader.deserialize_array2((100,100)).unwrap();
+        let (computed, deg) = expm(&input);
+        let diff = &expected - &computed;
+        println!("diff norm: {:}", diff.opnorm_one().unwrap());
+    }
+    #[test]
+    fn test_pauli_rotation() {
+        let mut results = Vec::new();
+        let mut d3 = 0;
+        let mut d5 = 0;
+        let mut d7 = 0;
+        let mut d9 = 0;
+        let mut d13 = 0;
+        let mut rng = rand::thread_rng();
+        let num_samples = 10;
+        for _ in 0..num_samples {
+            let theta: c64 =  c64::from_polar(2. * std::f64::consts::PI * rng.gen::<f64>(), 0.);
+            let pauli_y: Array2<c64> = array![
+                [c64::new(0., 0.), c64::new(0., -1.)],
+                [c64::new(0., 1.), c64::new(0., 0.)],
+            ];
+            let x = c64::new(0., 1.) * theta * pauli_y.clone();
+            let actual = c64::cos(theta /2.) * Array2::<c64>::eye(x.nrows()) - c64::sin(theta / 2.) * c64::new(0., 1.) * &pauli_y;
+            let (computed, deg) = expm(&(c64::new(0., -0.5) * theta * pauli_y));
+            match deg {
+                3 => d3 += 1,
+                5 => d5 += 1,
+                7 => d7 += 1,
+                9 => d9 += 1,
+                13 => d13 += 1,
+                _ => {},
+            }
+            let diff = (actual - computed).map(|x| x.abs());
+            let diff_norm = diff.opnorm_one().unwrap();
+            results.push(diff_norm);
+        }
+        let avg: f64 = results.iter().sum::<f64>() / results.len() as f64;
+        let std: f64 = f64::powf(results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() -1)  as f64, 0.5);
+        println!("collected {:} samples.", results.len());
+        println!("diff norm: {:} +- ({:})", avg, std);
+        println!("avg over epsilon: {:.2}", avg / f64::EPSILON);
+        println!("std over epsilon: {:.2}", std / f64::EPSILON);
+        println!("degree percentages: \n3  - {:.4}, \n5  - {:.4}, \n7  - {:.4}, \n9  - {:.4}, \n13 - {:.4}",
+        d3 as f64 / num_samples as f64,
+        d5 as f64 / num_samples as f64,
+        d7 as f64 / num_samples as f64,
+        d9 as f64 / num_samples as f64,
+        d13 as f64 / num_samples as f64);
+        // println!("results: {:?}", results);
+
+    }
     #[test]
     fn test_pade_approximants() {
         let mat: Array2<f64> =
@@ -732,7 +790,7 @@ mod tests {
         pade_approximation_5(&mut mp, &mut output_5);
         pade_approximation_7(&mut mp, &mut output_7);
         pade_approximation_9(&mut mp, &mut output_9);
-        pade_approximation_13(&mut mp, 1, &mut output_13);
+        pade_approximation_13(&mut mp, &mut output_13);
         let expected = array![
             [157.766, 217.949, 432.144],
             [200.674, 278.691, 552.725],
@@ -777,4 +835,4 @@ mod tests {
         // let exact = floating_exp(D);
         // let diff = expm(U D U.conj().T) - U F U.conj().T;
     }
-}
+}
\ No newline at end of file

From d1f1c0054205f5d11f3edc5955c02467572c9eba Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Tue, 18 Oct 2022 16:14:58 -0400
Subject: [PATCH 09/16] cleaned up

---
 ndarray-linalg/src/expm.rs | 789 +++++++++++--------------------------
 1 file changed, 225 insertions(+), 564 deletions(-)

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index 7c36feae..af72fe74 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -1,7 +1,7 @@
 use std::ops::MulAssign;
 
 use crate::{normest1::normest, types, Inverse, OperationNorm};
-use cauchy::{Scalar};
+use cauchy::Scalar;
 use lax::{Lapack, NormType};
 use ndarray::{linalg::Dot, prelude::*};
 use num_complex::{Complex, Complex32 as c32, Complex64 as c64};
@@ -18,17 +18,11 @@ const THETA_7: f64 = 9.504178996162932e-1;
 const THETA_9: f64 = 2.097847961257068e0;
 const THETA_13: f64 = 4.25; // Alg 5.1
 
-// this is pure laziness aka "ergonomics"
-const THETA_MAP: [f64; 14] = [
-    0., 0., 0., THETA_3, 0., THETA_5, 0., THETA_7, 0., THETA_9, 0., 0., 0., THETA_13,
-];
-
 // The Pade Coefficients for the numerator of the diagonal approximation to Exp[x]. Computed via Mathematica/WolframAlpha.
 // Note that the denominator has the same coefficients but odd powers have an opposite sign.
 // Coefficients are also stored via the power of x, so for example the numerator would look like
 // x^0 * PADE_COEFF_M[0] + x^1 * PADE_COEFF_M[1] + ... + x^m * PADE_COEFF_M[M]
 const PADE_COEFFS_3: [f64; 4] = [1., 0.5, 0.1, 1. / 120.];
-// const PADE_COEFFS_3:[f64; 4] = [1., 12., 60., 120.];
 
 const PADE_COEFFS_5: [f64; 6] = [1., 0.5, 1. / 9., 1. / 72., 1. / 1_008., 1. / 30_240.];
 
@@ -73,533 +67,238 @@ const PADE_COEFFS_13: [f64; 14] = [
     1. / 64_764_752_532_480_000.,
 ];
 
-
-fn power_abs_norm<S>(input_matrix: &Array2<S>, p: usize) -> f64
-where
-    S: Scalar<Real = f64>,
-{
-    let mut v = Array1::<f64>::ones((input_matrix.ncols()).f());
-    let abs_matrix = input_matrix.t().map(|x| x.abs());
-    for _ in 0..p {
-        v.assign(&abs_matrix.dot(&v));
-    }
-    // return max col sum
-    v.into_iter()
-        .reduce(|x, y| if x > y { x } else { y })
-        .unwrap()
-}
-
-// helper function used in Al-M & H. Name is unchanged for future reference.
-fn ell<S: Scalar<Real = f64>>(a_matrix: &Array2<S>, m: u64) -> i32 {
-    if a_matrix.is_square() == false {
-        panic!("subroutine ell expected a square matrix.");
-    }
-    let p = 2 * m + 1;
-    let a_one_norm = a_matrix.map(|x| x.abs()).opnorm_one().unwrap();
-
-    if a_one_norm < f64::EPSILON * 2.0 {
-        panic!("Subroutine ell encountered zero norm matrix.");
-    }
-    let powered_abs_norm = power_abs_norm(a_matrix, p as usize);
-    let alpha = powered_abs_norm * pade_error_coefficient(m) / a_one_norm;
-    let u = f64::EPSILON / 2.;
-    let log2_alpha_div_u = f64::log2(alpha / u);
-    let val = f64::ceil(log2_alpha_div_u / ((2 * m) as f64)).round() as i32;
-    i32::max(val, 0)
-}
-
-/// Calculates the leading term of the error series for the [m/m] Pade approximation to exp(x).
-fn pade_error_coefficient(m: u64) -> f64 {
-    1.0 / (binomial(2 * m, m) * factorial(2 * m + 1))
-}
+// These are the ones used in scipy
+// const PADE_COEFFS_13: [f64; 14] = [
+//     64764752532480000.,
+//     32382376266240000.,
+//     7771770303897600.,
+//     1187353796428800.,
+//     129060195264000.,
+//     10559470521600.,
+//     670442572800.,
+//     33522128640.,
+//     1323241920.,
+//     40840800.,
+//     960960.,
+//     16380.,
+//     182.,
+//     1.,
+// ];
 
 fn pade_approximation_3<S: Scalar<Real = f64> + Lapack>(
-    mp: &mut MatrixPowers<S>,
-    output: &mut Array2<S>,
-) {
-    let mut evens: Array2<S> = Array2::<S>::eye(mp.get(1).nrows());
+    a_1: &Array2<S>,
+    a_2: &Array2<S>,
+) -> Array2<S> {
+    let mut evens: Array2<S> = Array2::<S>::eye(a_1.nrows());
     // evens.mapv_inplace(|x| x * S::from_real(PADE_COEFFS_3[0]));
-    evens.scaled_add(S::from_real(PADE_COEFFS_3[2]), mp.get(2));
+    evens.scaled_add(S::from_real(PADE_COEFFS_3[2]), a_2);
 
-    let mut odds: Array2<S> = Array2::<S>::eye(mp.get(1).nrows());
+    let mut odds: Array2<S> = Array2::<S>::eye(a_1.nrows());
     odds.mapv_inplace(|x| x * S::from_real(PADE_COEFFS_3[1]));
-    odds.scaled_add(S::from_real(PADE_COEFFS_3[3]), mp.get(2));
-    odds = odds.dot(mp.get(1));
+    odds.scaled_add(S::from_real(PADE_COEFFS_3[3]), a_2);
+    odds = odds.dot(a_1);
 
     odds.mapv_inplace(|x| -x);
     let inverted = (&odds + &evens).inv().unwrap();
     odds.mapv_inplace(|x| -x);
-    output.assign(&inverted.dot(&(odds + evens)));
+    inverted.dot(&(odds + evens))
 }
 
 fn pade_approximation_5<S: Scalar<Real = f64> + Lapack>(
-    mp: &mut MatrixPowers<S>,
-    output: &mut Array2<S>,
-) {
-    let mut evens: Array2<S> = Array2::<S>::eye(mp.get(1).nrows());
+    a_1: &Array2<S>,
+    a_2: &Array2<S>,
+    a_4: &Array2<S>,
+) -> Array2<S> {
+    let mut evens: Array2<S> = Array2::<S>::eye(a_1.nrows());
     // evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_5[0]) * x);
-    for m in 1..=2 {
-        evens.scaled_add(S::from_real(PADE_COEFFS_5[2 * m]), mp.get(2 * m));
-    }
-    let mut odds: Array2<S> = Array::eye(mp.get(1).nrows());
+    evens.scaled_add(S::from_real(PADE_COEFFS_5[2]), a_2);
+    evens.scaled_add(S::from_real(PADE_COEFFS_5[4]), a_4);
+
+    let mut odds: Array2<S> = Array::eye(a_1.nrows());
     odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_5[1]) * x);
-    for m in 1..=2 {
-        odds.scaled_add(S::from_real(PADE_COEFFS_5[2 * m + 1]), mp.get(2 * m));
-    }
-    odds = odds.dot(mp.get(1));
+    odds.scaled_add(S::from_real(PADE_COEFFS_5[3]), a_2);
+    odds.scaled_add(S::from_real(PADE_COEFFS_5[5]), a_4);
+    odds = odds.dot(a_1);
+
     odds.mapv_inplace(|x| -x);
     let inverted = (&odds + &evens).inv().unwrap();
     odds.mapv_inplace(|x| -x);
-    output.assign(&inverted.dot(&(odds + evens)));
+    inverted.dot(&(odds + evens))
 }
 
 fn pade_approximation_7<S: Scalar<Real = f64> + Lapack>(
-    mp: &mut MatrixPowers<S>,
-    output: &mut Array2<S>,
-) {
-    let mut evens: Array2<S> = Array::eye(mp.get(1).nrows());
+    a_1: &Array2<S>,
+    a_2: &Array2<S>,
+    a_4: &Array2<S>,
+    a_6: &Array2<S>,
+) -> Array2<S> {
+    let mut evens: Array2<S> = Array::eye(a_1.nrows());
     // evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_7[0]) * x);
-    for m in 1..=3 {
-        evens.scaled_add(S::from_real(PADE_COEFFS_7[2 * m]), mp.get(2 * m));
-    }
-    let mut odds: Array2<S> = Array::eye(mp.get(1).nrows());
+    evens.scaled_add(S::from_real(PADE_COEFFS_7[2]), a_2);
+    evens.scaled_add(S::from_real(PADE_COEFFS_7[4]), a_4);
+    evens.scaled_add(S::from_real(PADE_COEFFS_7[6]), a_6);
+
+    let mut odds: Array2<S> = Array::eye(a_1.nrows());
     odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_7[1]) * x);
-    for m in 1..=3 {
-        odds.scaled_add(S::from_real(PADE_COEFFS_7[2 * m + 1]), mp.get(2 * m));
-    }
-    odds = odds.dot(mp.get(1));
+    odds.scaled_add(S::from_real(PADE_COEFFS_7[3]), a_2);
+    odds.scaled_add(S::from_real(PADE_COEFFS_7[5]), a_4);
+    odds.scaled_add(S::from_real(PADE_COEFFS_7[7]), a_6);
+    odds = odds.dot(a_1);
+
     odds.mapv_inplace(|x| -x);
     let inverted = (&odds + &evens).inv().unwrap();
     odds.mapv_inplace(|x| -x);
-    output.assign(&inverted.dot(&(odds + evens)));
+    inverted.dot(&(odds + evens))
 }
+
 fn pade_approximation_9<S: Scalar<Real = f64> + Lapack>(
-    mp: &mut MatrixPowers<S>,
-    output: &mut Array2<S>,
-) {
-    let mut evens: Array2<S> = Array::eye(mp.get(1).nrows());
+    a_1: &Array2<S>,
+    a_2: &Array2<S>,
+    a_4: &Array2<S>,
+    a_6: &Array2<S>,
+    a_8: &Array2<S>,
+) -> Array2<S> {
+    let mut evens: Array2<S> = Array::eye(a_1.nrows());
     // evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[0]) * x);
-    for m in 1..=4 {
-        evens.scaled_add(S::from_real(PADE_COEFFS_9[2 * m]), mp.get(2 * m));
-        let delta = mp.get(2 * m).map(|x| S::from_real(PADE_COEFFS_9[2 * m]) * *x);
-    }
-    let mut odds: Array2<S> = Array::eye(mp.get(1).nrows());
+    evens.scaled_add(S::from_real(PADE_COEFFS_9[2]), a_2);
+    evens.scaled_add(S::from_real(PADE_COEFFS_9[4]), a_4);
+    evens.scaled_add(S::from_real(PADE_COEFFS_9[6]), a_6);
+    evens.scaled_add(S::from_real(PADE_COEFFS_9[8]), a_8);
+
+    let mut odds: Array2<S> = Array::eye(a_1.nrows());
     odds.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[1]) * x);
-    for m in 1..=4 {
-        odds.scaled_add(S::from_real(PADE_COEFFS_9[2 * m + 1]), mp.get(2 * m));
-    }
-    odds = odds.dot(mp.get(1));
+    odds.scaled_add(S::from_real(PADE_COEFFS_9[3]), a_2);
+    odds.scaled_add(S::from_real(PADE_COEFFS_9[5]), a_4);
+    odds.scaled_add(S::from_real(PADE_COEFFS_9[7]), a_6);
+    odds.scaled_add(S::from_real(PADE_COEFFS_9[9]), a_8);
+    odds = odds.dot(a_1);
+
     odds.mapv_inplace(|x| -x);
     let inverted: Array2<S> = (&odds + &evens).inv().unwrap();
     odds.mapv_inplace(|x| -x);
-    output.assign(&inverted.dot(&(odds + evens)));
+    inverted.dot(&(odds + evens))
 }
 
 // TODO: scale powers by appropriate value of s.
 fn pade_approximation_13<S: Scalar<Real = f64> + Lapack>(
-    mp: &mut MatrixPowers<S>,
-    output: &mut Array2<S>,
-) {
-    // note this may have unnecessary allocations.
-    let mut evens_1: Array2<S> = Array::eye(mp.get(1).nrows());
+    a_1: &Array2<S>,
+    a_2: &Array2<S>,
+    a_4: &Array2<S>,
+    a_6: &Array2<S>,
+) -> Array2<S> {
+    let mut evens_1: Array2<S> = Array::eye(a_1.nrows());
     evens_1.mapv_inplace(|x| S::from_real(PADE_COEFFS_13[0]) * x);
-    for m in 1..=3 {
-        evens_1.scaled_add(S::from_real(PADE_COEFFS_13[2 * m]), mp.get(2 * m));
-    }
-    let mut evens_2 = mp.get(2).clone();
+    evens_1.scaled_add(S::from_real(PADE_COEFFS_13[2]), a_2);
+    evens_1.scaled_add(S::from_real(PADE_COEFFS_13[4]), a_4);
+    evens_1.scaled_add(S::from_real(PADE_COEFFS_13[6]), a_6);
+
+    let mut evens_2 = a_2.clone();
     evens_2.mapv_inplace(|x| S::from_real(PADE_COEFFS_13[8]) * x);
-    evens_2.scaled_add(S::from_real(PADE_COEFFS_13[10]), mp.get(4));
-    evens_2.scaled_add(S::from_real(PADE_COEFFS_13[12]), mp.get(6));
-    let evens = evens_2.dot(mp.get(6)) + &evens_1;
-    let mut odds_1: Array2<S> = Array::eye(mp.get(1).nrows());
+    evens_2.scaled_add(S::from_real(PADE_COEFFS_13[10]), a_4);
+    evens_2.scaled_add(S::from_real(PADE_COEFFS_13[12]), a_6);
+    let evens = evens_2.dot(a_6) + &evens_1;
+
+    let mut odds_1: Array2<S> = Array::eye(a_1.nrows());
     odds_1.mapv_inplace(|x| S::from_real(PADE_COEFFS_13[1]) * x);
-    for m in 1..=3 {
-        odds_1.scaled_add(S::from_real(PADE_COEFFS_13[2 * m + 1]), mp.get(2 * m));
-    }
-    let mut odds_2 = mp.get(2).clone();
+    odds_1.scaled_add(S::from_real(PADE_COEFFS_13[3]), a_2);
+    odds_1.scaled_add(S::from_real(PADE_COEFFS_13[5]), a_4);
+    odds_1.scaled_add(S::from_real(PADE_COEFFS_13[7]), a_6);
+
+    let mut odds_2 = a_2.clone();
     odds_2.mapv_inplace(|x| S::from_real(PADE_COEFFS_13[9]) * x);
-    odds_2.scaled_add(S::from_real(PADE_COEFFS_13[11]), mp.get(4));
-    odds_2.scaled_add(S::from_real(PADE_COEFFS_13[13]), mp.get(6));
-    odds_2 = odds_2.dot(mp.get(6));
-    let mut odds = (odds_1 + odds_2).dot(mp.get(1));
-    odds.mapv_inplace(|x| -x);
-    let inverted: Array2<S> = (&odds + &evens).inv().unwrap();
-    odds.mapv_inplace(|x| -x);
-    output.assign(&inverted.dot(&(odds + evens)));
+    odds_2.scaled_add(S::from_real(PADE_COEFFS_13[11]), a_4);
+    odds_2.scaled_add(S::from_real(PADE_COEFFS_13[13]), a_6);
+    odds_2 = odds_2.dot(a_6);
+
+    let mut odds = (&odds_1 + &odds_2).dot(a_1);
+    // odds.mapv_inplace(|x| -x);
+    let inverted: Array2<S> = (&odds - &evens).inv().unwrap();
+    // odds.mapv_inplace(|x| -x);
+    inverted.dot(&(odds + evens))
 }
 
-/// Helper struct to ensure that the power of the input matrix is only computed once.
-#[derive(Debug)]
-struct MatrixPowers<S: Scalar + Lapack> {
-    pub input_1: Option<Array2<S>>,
-    pub input_2: Option<Array2<S>>,
-    pub input_3: Option<Array2<S>>,
-    pub input_4: Option<Array2<S>>,
-    pub input_5: Option<Array2<S>>,
-    pub input_6: Option<Array2<S>>,
-    pub input_7: Option<Array2<S>>,
-    pub input_8: Option<Array2<S>>,
-    pub input_9: Option<Array2<S>>,
-    pub input_10: Option<Array2<S>>,
-    pub input_11: Option<Array2<S>>,
-    pub input_12: Option<Array2<S>>,
-    pub input_13: Option<Array2<S>>,
-    pub num_matprods: usize,
-}
-impl<S: Scalar + Lapack> MatrixPowers<S> {
-    pub fn new(input: Array2<S>) -> Self {
-        MatrixPowers {
-            input_1: Some(input),
-            input_2: None,
-            input_3: None,
-            input_4: None,
-            input_5: None,
-            input_6: None,
-            input_7: None,
-            input_8: None,
-            input_9: None,
-            input_10: None,
-            input_11: None,
-            input_12: None,
-            input_13: None,
-            num_matprods: 0,
-        }
-    }
-    pub fn scale(&mut self, s: i32) {
-        let scale_factor = S::from_i32(2).unwrap().pow(-S::from_i32(s).unwrap());
-        let mut scaler = scale_factor.clone();
-        if let Some(i1) = &mut self.input_1 {
-            i1.mapv_inplace(|x| x * scaler);
-        }
-        scaler *= scale_factor;
-        if let Some(i2) = &mut self.input_2 {
-            i2.mapv_inplace(|x| x * scaler);
-        }
-        scaler *= scale_factor;
-        if let Some(i3) = &mut self.input_3 {
-            i3.mapv_inplace(|x| x * scaler);
-        }
-        scaler *= scale_factor;
-        if let Some(i4) = &mut self.input_4 {
-            i4.mapv_inplace(|x| x * scaler);
-        }
-        scaler *= scale_factor;
-        if let Some(i5) = &mut self.input_5 {
-            i5.mapv_inplace(|x| x * scaler);
-        }
-        scaler *= scale_factor;
-        if let Some(i6) = &mut self.input_6 {
-            i6.mapv_inplace(|x| x * scaler);
-        }
-    }
-    fn compute2(&mut self) {
-        if let Some(input_1) = self.input_1.clone() {
-            self.input_2 = Some(input_1.dot(&input_1));
-            self.num_matprods += 1;
-        }
-    }
-    fn compute3(&mut self) {
-        match &self.input_2 {
-            Some(i2) => {
-                self.input_3 = Some(self.input_1.as_ref().unwrap().dot(i2));
-                self.num_matprods += 1;
-            }
-            None => {
-                // after calling self.compute2() then self.input_2 will match to Some(_)
-                // so just recurse.
-                self.compute2();
-                self.compute3();
-            }
-        }
-    }
-    fn compute4(&mut self) {
-        match &self.input_2 {
-            Some(i2) => {
-                self.input_4 = Some(self.input_2.as_ref().unwrap().dot(i2));
-                self.num_matprods += 1;
-            }
-            None => {
-                self.compute2();
-                self.compute4();
-            }
-        }
-    }
-    fn compute5(&mut self) {
-        match &self.input_3 {
-            Some(i3) => match &self.input_2 {
-                Some(i2) => {
-                    self.input_5 = Some(i2.dot(i3));
-                    self.num_matprods += 1;
-                }
-                None => {
-                    self.compute2();
-                    self.compute5();
-                }
-            },
-            None => {
-                self.compute3();
-                self.compute5();
-            }
-        };
-    }
-    fn compute6(&mut self) {
-        match &self.input_4 {
-            Some(i4) => {
-                // If input_4 is computed then input_2 must be computed.
-                self.input_6 = Some(i4.dot(self.input_2.as_ref().unwrap()));
-                self.num_matprods += 1;
-            }
-            None => {
-                match &self.input_3 {
-                    Some(i3) => {
-                        self.input_6 = Some(i3.dot(i3));
-                        self.num_matprods += 1;
-                    }
-                    None => {
-                        // We do not have 4 or 3 computed yet, so we will either have 2 or have to compute it.
-                        // in that case easiest way is to first compute 4 and then revisit.
-                        self.compute4();
-                        self.compute6();
-                    }
-                }
-            }
-        };
-    }
-    fn compute7(&mut self) {
-        match &self.input_6 {
-            Some(i6) => {
-                self.input_7 = Some(self.input_1.as_ref().unwrap().dot(i6));
-                self.num_matprods += 1;
-            },
-            None => {
-                match &self.input_5 {
-                    Some(i5) => {
-                        self.input_7 = Some(self.input_2.as_ref().unwrap().dot(i5));
-                        self.num_matprods += 1;
-                    },
-                    None => {
-                        match &self.input_4 {
-                            Some(i4) => {
-                                if let Some(i3) = &self.input_3 {
-                                    self.input_7 = Some(i3.dot(i4));
-                                    self.num_matprods += 1;
-                                } else {
-                                    self.compute3();
-                                    self.compute7();
-                                }
-                            },
-                            None => {
-                                self.compute4();
-                                self.compute7();
-                            }
-                        }
-                    },
-                }
-            }
-        }
-    }
-    fn compute8(&mut self) {
-        match &self.input_4 {
-            Some(i4) => {
-                self.input_8 = Some(i4.dot(i4));
-                self.num_matprods += 1;
-            },
-            None => {
-                self.compute4();
-                self.compute8();
-            }
-        }
-    }
-    fn compute10(&mut self) {
-        match &self.input_5 {
-            Some(i5) => {
-                self.input_10 = Some(i5.dot(i5));
-                self.num_matprods += 1;
-            },
-            None => {
-                match &self.input_6 {
-                    Some(i6) => {
-                        self.input_10 = Some(i6.dot(self.input_4.as_ref().unwrap()));
-                        self.num_matprods += 1;
-                    },
-                    None => {
-                        if self.input_3.is_some() {
-                            self.compute5();
-                        } else {
-                            self.compute6()
-                        }
-                        self.compute10();
-                    }
-                }
-            }
-        }
+fn power_abs_norm<S>(input_matrix: &Array2<S>, p: usize) -> f64
+where
+    S: Scalar<Real = f64>,
+{
+    let mut v = Array1::<f64>::ones((input_matrix.ncols()).f());
+    let abs_matrix = input_matrix.t().map(|x| x.abs());
+    for _ in 0..p {
+        v.assign(&abs_matrix.dot(&v));
     }
+    // return max col sum
+    v.into_iter()
+        .reduce(|x, y| if x > y { x } else { y })
+        .unwrap()
+}
 
-    pub fn get(&mut self, m: usize) -> &Array2<S> {
-        match m {
-            1 => self.get1(),
-            2 => self.get2(),
-            3 => self.get3(),
-            4 => self.get4(),
-            5 => self.get5(),
-            6 => self.get6(),
-            7 => self.get7(),
-            8 => self.get8(),
-            // 9 => self.get9(),
-            10 => self.get10(),
-            // 11 => self.get11(),
-            // 12 => self.get12(),
-            // 13 => self.get13(),
-            _ => {
-                println!("I need:{:}", m);
-                println!("This power of input matrix is not implemented. Returning input matrix.");
-                self.input_1.as_ref().unwrap()
-            }
-        }
-    }
-    fn get1(&mut self) -> &Array2<S> {
-        self.input_1.as_ref().unwrap()
-    }
-    fn get2(&mut self) -> &Array2<S> {
-        match &self.input_2 {
-            Some(mat) => self.input_2.as_ref().unwrap(),
-            None => {
-                self.compute2();
-                self.input_2.as_ref().unwrap()
-            }
-        }
-    }
-    fn get3(&mut self) -> &Array2<S> {
-        match &self.input_3 {
-            Some(mat) => self.input_3.as_ref().unwrap(),
-            None => {
-                self.compute3();
-                &self.input_3.as_ref().unwrap()
-            }
-        }
-    }
-    fn get4(&mut self) -> &Array2<S> {
-        match &self.input_4 {
-            Some(mat) => &self.input_4.as_ref().unwrap(),
-            None => {
-                self.compute4();
-                &self.input_4.as_ref().unwrap()
-            }
-        }
-    }
-    fn get5(&mut self) -> &Array2<S> {
-        match &self.input_5 {
-            Some(mat) => &self.input_5.as_ref().unwrap(),
-            None => {
-                self.compute5();
-                &self.input_5.as_ref().unwrap()
-            }
-        }
-    }
-    fn get6(&mut self) -> &Array2<S> {
-        match &self.input_6 {
-            Some(mat) => &self.input_6.as_ref().unwrap(),
-            None => {
-                self.compute6();
-                &self.input_6.as_ref().unwrap()
-            }
-        }
-    }
-    fn get7(&mut self) -> &Array2<S> {
-        match &self.input_7 {
-            Some(mat) => &self.input_6.as_ref().unwrap(),
-            None => {
-                self.compute7();
-                &self.input_6.as_ref().unwrap()
-            }
-        }
-    }
-    fn get8(&mut self) -> &Array2<S> {
-        match &self.input_8 {
-            Some(mat) => &self.input_8.as_ref().unwrap(),
-            None => {
-                self.compute8();
-                &self.input_8.as_ref().unwrap()
-            }
-        }
-    }
-    fn get10(&mut self) -> &Array2<S> {
-        match &self.input_10 {
-            Some(mat) => &self.input_10.as_ref().unwrap(),
-            None => {
-                self.compute10();
-                &self.input_10.as_ref().unwrap()
-            }
-        }
+// helper function used in Al-M & H. Name is unchanged for future reference.
+fn ell<S: Scalar<Real = f64>>(a_matrix: &Array2<S>, m: u64) -> i32 {
+    if a_matrix.is_square() == false {
+        panic!("subroutine ell expected a square matrix.");
     }
-}
+    let p = 2 * m + 1;
+    let a_one_norm = a_matrix.map(|x| x.abs()).opnorm_one().unwrap();
 
-fn pade_approximation<'a, S: Scalar<Real = f64> + Lapack>(
-    mp: &'a mut MatrixPowers<S>,
-    output: &mut Array2<S>,
-    degree: usize,
-) {
-    match degree {
-        3 => {
-            pade_approximation_3(mp, output);
-        }
-        5 => {
-            pade_approximation_5(mp, output);
-        }
-        7 => {
-            pade_approximation_7(mp, output);
-        }
-        9 => {
-            pade_approximation_9(mp, output);
-        }
-        13 => {
-            pade_approximation_13(mp, output);
-        }
-        _ => {
-            println!(
-                "Undefined pade approximant order. Returning first order approximation to expm"
-            );
-            output.assign(&(Array2::<S>::eye(mp.get(1).nrows()) + mp.get(1)));
-        }
+    if a_one_norm < f64::EPSILON * 2. {
+        panic!("Subroutine ell encountered zero norm matrix.");
     }
+    let powered_abs_norm = power_abs_norm(a_matrix, p as usize);
+    let alpha = powered_abs_norm * pade_error_coefficient(m) / a_one_norm;
+    let u = f64::EPSILON / 2.;
+    let log2_alpha_div_u = f64::log2(alpha / u);
+    let val = f64::ceil(log2_alpha_div_u / ((2 * m) as f64)) as i32;
+    i32::max(val, 0)
 }
 
+/// Calculates the leading term of the error series for the [m/m] Pade approximation to exp(x).
+fn pade_error_coefficient(m: u64) -> f64 {
+    1.0 / (binomial(2 * m, m) * factorial(2 * m + 1))
+}
 /// Computes matrix exponential based on the scale-and-squaring algorithm by
 pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> (Array2<S>, usize) {
-    let mut output = Array2::<S>::zeros((a_matrix.nrows(), a_matrix.ncols()).f());
-    let mut mp = MatrixPowers::new(a_matrix.clone());
-    let d4 = f64::powf(mp.get(4).opnorm_one().unwrap(), 1. / 4.);
-    let d6 = f64::powf((*mp.get(6)).opnorm_one().unwrap(), 1. / 6.);
+    let mut a_2 = a_matrix.dot(a_matrix);
+    let mut a_4 = a_2.dot(&a_2);
+    let mut a_6 = a_2.dot(&a_4);
+    let d4 = a_4.opnorm_one().unwrap().powf(1. / 4.);
+    let d6 = a_6.opnorm_one().unwrap().powf(1. / 6.);
     // Note d6 should be an estimate and d4 an estimate
     let eta_1 = f64::max(d4, d6);
     if eta_1 < THETA_3 && ell(&a_matrix, 3) == 0 {
-        pade_approximation(&mut mp, &mut output, 3);
-        return (output, 3);
+        return (pade_approximation_3(a_matrix, &a_2), 3);
     }
     // d4 should be exact here, d6 an estimate
     let eta_2 = f64::max(d4, d6);
     if eta_2 < THETA_5 && ell(&a_matrix, 5) == 0 {
-        pade_approximation(&mut mp, &mut output, 5);
-        return (output, 5);
+        return (pade_approximation_5(a_matrix, &a_2, &a_4), 5);
     }
-    let d8 = f64::powf(mp.get(8).opnorm_one().unwrap(), 1. / 8.);
+    let a_8 = a_4.dot(&a_4);
+    let d8 = a_8.opnorm_one().unwrap().powf(1. / 8.);
     let eta_3 = f64::max(d6, d8);
     if eta_3 < THETA_7 && ell(&a_matrix, 7) == 0 {
-        pade_approximation(&mut mp, &mut output, 7);
-        return (output, 7);
+        return (pade_approximation_7(a_matrix, &a_2, &a_4, &a_6), 7);
     }
     if eta_3 < THETA_9 && ell(&a_matrix, 9) == 0 {
-        pade_approximation(&mut mp, &mut output, 9);
-        return (output, 9);
+        return (pade_approximation_9(a_matrix, &a_2, &a_4, &a_6, &a_8), 9);
     }
-    let eta_4 = f64::max(d8, mp.get(10).opnorm_one().unwrap());
+    let a_10 = a_2.dot(&a_8);
+    let eta_4 = f64::max(d8, a_10.opnorm_one().unwrap());
     let eta_5 = f64::min(eta_3, eta_4);
+
     let mut s = f64::max(0., (eta_5 / THETA_13).log2().ceil()) as i32;
     let mut a_scaled = a_matrix.clone();
-    a_scaled.mapv_inplace(|x| x * S::from_real(cauchy::Scalar::pow(2., -s as f64)));
+    let mut scaler = S::from_real(cauchy::Scalar::pow(2., -s as f64));
+    a_scaled.mapv_inplace(|x| x * scaler);
     s += ell(&a_scaled, 13);
-    mp.scale(s);
-    pade_approximation(&mut mp, &mut output, 13);
+
+    a_scaled.assign(a_matrix);
+    scaler = S::from_real(cauchy::Scalar::pow(2., -s as f64));
+    a_scaled.mapv_inplace(|x| x * scaler);
+    a_2.mapv_inplace(|x| x * scaler.pow(S::from(2).unwrap()));
+    a_4.mapv_inplace(|x| x * scaler.pow(S::from(4).unwrap()));
+    a_6.mapv_inplace(|x| x * scaler.pow(S::from(6).unwrap()));
+
+    let mut output = pade_approximation_13(&a_scaled, &a_2, &a_4, &a_6);
     for _ in 0..s {
         output = output.dot(&output);
     }
@@ -607,120 +306,80 @@ pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> (Array2<S>,
 }
 
 mod tests {
-    use std::{collections::HashMap, fs::File, io::Read, str::FromStr};
-
-    use crate::{expm::{
-        pade_approximation_13, pade_approximation_3, pade_approximation_5, pade_approximation_7,
-        pade_approximation_9,
-    }, OperationNorm, SVD, Eig};
+    use crate::{
+        expm::{
+            pade_approximation_13, pade_approximation_3, pade_approximation_5,
+            pade_approximation_7, pade_approximation_9,
+        },
+        Eig, OperationNorm, SVD,
+    };
     use ndarray::{linalg::Dot, *};
-    use num_complex::{Complex, ComplexFloat, Complex32 as c32, Complex64 as c64};
+    use num_complex::{Complex, Complex32 as c32, Complex64 as c64, ComplexFloat};
     use rand::Rng;
-    use ndarray_csv::Array2Reader;
-    use csv::ReaderBuilder;
-
-    use super::{expm, MatrixPowers};
-
-    #[test]
-    fn test_matrix_powers() {
-        let a: Array2<f64> = array![[0., 1.], [1., 0.]];
-        let mut mp = MatrixPowers::new(a);
-        println!("get3():");
-        println!("{:?}", mp.get3());
-        println!("get3():");
-        println!("{:?}", mp.get3());
-        println!("mp?");
-        println!("{:?}", mp);
-    }
+    use std::{collections::HashMap, fs::File, io::Read, str::FromStr};
 
-    #[test]
-    fn test_expm() {
-        fn compute_pade_diff_error(output: &Array2<f64>, expected: &Array2<f64>) -> f64 {
-            let mut diff = (output - expected);
-            diff.mapv_inplace(|x| x.abs());
-            diff.opnorm_one().unwrap()
-        }
-        let mat: Array2<f64> =
-            10. as f64 * array![[0.1, 0.2, 0.3], [0.2, 0.1, 0.5], [0.11, 0.22, 0.32]];
-        let expected: Array2<f64> = array![[157.7662816 , 217.94900112, 432.14352751],
-        [200.6740715 , 278.69078891, 552.72474352],
-        [169.9692465 , 236.2889153 , 469.43670795]];
-        println!("expm output:");
-        let  out =expm(&mat);
-        println!("{:?}", out);
-        println!("diff: {:}", compute_pade_diff_error(&out.0, &expected));
-    }
+    use super::expm;
 
+    // 50 -> 5x worse error each entry than scipy
+    // 100 -> 7.3x worse error each entry than scipy
+    // 200 -> broken
     #[test]
-    fn test_high_norm() {
+    fn random_matrix_ensemble() {
         let mut rng = rand::thread_rng();
         let n = 200;
-        let samps = 100;
+        let samps = 10;
         let mut results = Vec::new();
         let mut avg_entry_error = Vec::new();
-        let scale = 0.002;
+        // Used to control what pade approximation is most likely to be used.
+        // the smaller the norm the lower the degree used.
+        let scale = 1.;
         for _ in 0..samps {
-            let mut m:Array2<c64> = Array2::<c64>::ones((n,n).f());
-            m.mapv_inplace(|_| {
-                c64::new(rng.gen::<f64>() * 1., rng.gen::<f64>() * 1.)
-            });
-            m = m.dot(&m.t().map(|x| x.conj()));
-            let (mut eigs,mut  vecs) = m.eig().unwrap();
-            eigs.mapv_inplace(|_| scale * c64::new(rng.gen::<f64>() , rng.gen::<f64>()));
+            // Sample a completely random matrix.
+            let mut matrix: Array2<c64> = Array2::<c64>::ones((n, n).f());
+            matrix.mapv_inplace(|_| c64::new(rng.gen::<f64>() * 1., rng.gen::<f64>() * 1.));
+
+            // Make m positive semidefinite so it has orthonormal eigenvecs.
+            matrix = matrix.dot(&matrix.t().map(|x| x.conj()));
+            let (mut eigs, vecs) = matrix.eig().unwrap();
             let adjoint_vecs = vecs.t().clone().mapv(|x| x.conj());
 
-            let recomputed_m = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
-            // println!("reconstruction diff: {:}", m_diff.opnorm_one().unwrap() / f64::EPSILON);
-            eigs.mapv_inplace(|x| x.exp() );
+            // Generate new random eigenvalues (complex, previously m had real eigenvals)
+            // and a new matrix m
+            eigs.mapv_inplace(|_| scale * c64::new(rng.gen::<f64>(), rng.gen::<f64>()));
+            let new_matrix = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
+
+            // compute the exponentiated matrix by exponentiating the eigenvalues
+            // and doing V e^Lambda V^\dagger
+            eigs.mapv_inplace(|x| x.exp());
             let eigen_expm = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
-            let (expm_comp, deg) = expm(&recomputed_m);
+
+            // Compute the expm routine, compute error metrics for this sample
+            let (expm_comp, deg) = expm(&new_matrix);
             let diff = &expm_comp - &eigen_expm;
             avg_entry_error.push({
                 let tot = diff.map(|x| x.abs()).into_iter().sum::<f64>();
                 tot / (n * n) as f64
             });
             results.push(diff.opnorm_one().unwrap());
-            // println!("diff norm over epsilon: {:}", diff.opnorm_one().unwrap() / f64::EPSILON);
         }
+
+        // compute averages
         let avg: f64 = results.iter().sum::<f64>() / results.len() as f64;
         let avg_entry_diff = avg_entry_error.iter().sum::<f64>() / avg_entry_error.len() as f64;
-        let std: f64 = f64::powf(results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() -1)  as f64, 0.5);
+        let std: f64 = f64::powf(
+            results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() - 1) as f64,
+            0.5,
+        );
         println!("collected {:} samples.", results.len());
         println!("diff norm: {:} +- ({:})", avg, std);
-        println!("average entry error over epsilon:{:}", avg_entry_diff / f64::EPSILON);
+        println!(
+            "average entry error over epsilon: {:}",
+            avg_entry_diff / f64::EPSILON
+        );
         println!("avg over epsilon: {:.2}", avg / f64::EPSILON);
         println!("std over epsilon: {:.2}", std / f64::EPSILON);
     }
 
-    #[test]
-    fn test_random_matrix() {
-        // load data from csv
-        let mut input_file = File::open("/Users/matt/Desktop/matrix_input.csv").unwrap();
-        let mut output_file = File::open("/Users/matt/Desktop/expm_output.csv").unwrap();
-        let mut input_reader = ReaderBuilder::new().has_headers(false).from_reader(input_file);
-        let mut output_reader = ReaderBuilder::new().has_headers(false).from_reader(output_file);
-        let input: Vec<c64> = Vec::new();
-        for res in output_reader.records() {
-            res.unwrap().iter().map(|x| {
-                let real:Vec<_> = x.split('.').collect();
-                println!("real: {:?}", real);
-                let mut re: f64 = (real[0].clone()).parse().unwrap();
-                println!("re: {:}",re);
-                if real[1].contains("*^") {
-                    // re += "." + real[1].split("*^").into_iter().take(n)
-                }
-                let c = c64::from_str(x);
-                println!("x: {:}", x);
-                println!("c: {:?}", c);
-            }).collect::<()>();
-            break;
-        }
-        let input: Array2<c64> = input_reader.deserialize_array2((100,100)).unwrap();
-        let expected: Array2<c64> = output_reader.deserialize_array2((100,100)).unwrap();
-        let (computed, deg) = expm(&input);
-        let diff = &expected - &computed;
-        println!("diff norm: {:}", diff.opnorm_one().unwrap());
-    }
     #[test]
     fn test_pauli_rotation() {
         let mut results = Vec::new();
@@ -732,13 +391,14 @@ mod tests {
         let mut rng = rand::thread_rng();
         let num_samples = 10;
         for _ in 0..num_samples {
-            let theta: c64 =  c64::from_polar(2. * std::f64::consts::PI * rng.gen::<f64>(), 0.);
+            let theta: c64 = c64::from_polar(2. * std::f64::consts::PI * rng.gen::<f64>(), 0.);
             let pauli_y: Array2<c64> = array![
                 [c64::new(0., 0.), c64::new(0., -1.)],
                 [c64::new(0., 1.), c64::new(0., 0.)],
             ];
             let x = c64::new(0., 1.) * theta * pauli_y.clone();
-            let actual = c64::cos(theta /2.) * Array2::<c64>::eye(x.nrows()) - c64::sin(theta / 2.) * c64::new(0., 1.) * &pauli_y;
+            let actual = c64::cos(theta / 2.) * Array2::<c64>::eye(x.nrows())
+                - c64::sin(theta / 2.) * c64::new(0., 1.) * &pauli_y;
             let (computed, deg) = expm(&(c64::new(0., -0.5) * theta * pauli_y));
             match deg {
                 3 => d3 += 1,
@@ -746,14 +406,17 @@ mod tests {
                 7 => d7 += 1,
                 9 => d9 += 1,
                 13 => d13 += 1,
-                _ => {},
+                _ => {}
             }
             let diff = (actual - computed).map(|x| x.abs());
             let diff_norm = diff.opnorm_one().unwrap();
             results.push(diff_norm);
         }
         let avg: f64 = results.iter().sum::<f64>() / results.len() as f64;
-        let std: f64 = f64::powf(results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() -1)  as f64, 0.5);
+        let std: f64 = f64::powf(
+            results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() - 1) as f64,
+            0.5,
+        );
         println!("collected {:} samples.", results.len());
         println!("diff norm: {:} +- ({:})", avg, std);
         println!("avg over epsilon: {:.2}", avg / f64::EPSILON);
@@ -765,7 +428,6 @@ mod tests {
         d9 as f64 / num_samples as f64,
         d13 as f64 / num_samples as f64);
         // println!("results: {:?}", results);
-
     }
     #[test]
     fn test_pade_approximants() {
@@ -785,12 +447,11 @@ mod tests {
             }
             tot
         }
-        let mut mp = MatrixPowers::new(mat);
-        pade_approximation_3(&mut mp, &mut output_3);
-        pade_approximation_5(&mut mp, &mut output_5);
-        pade_approximation_7(&mut mp, &mut output_7);
-        pade_approximation_9(&mut mp, &mut output_9);
-        pade_approximation_13(&mut mp, &mut output_13);
+        // pade_approximation_3(&mut mp, &mut output_3);
+        // pade_approximation_5(&mut mp, &mut output_5);
+        // pade_approximation_7(&mut mp, &mut output_7);
+        // pade_approximation_9(&mut mp, &mut output_9);/
+        // pade_approximation_13(&mut mp, &mut output_13);
         let expected = array![
             [157.766, 217.949, 432.144],
             [200.674, 278.691, 552.725],
@@ -835,4 +496,4 @@ mod tests {
         // let exact = floating_exp(D);
         // let diff = expm(U D U.conj().T) - U F U.conj().T;
     }
-}
\ No newline at end of file
+}

From caa29d26fd9801b2e8ce409696b60e6152c8e2f0 Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Tue, 18 Oct 2022 21:12:52 -0400
Subject: [PATCH 10/16] cargo.toml update

---
 .gitignore                | 1 +
 ndarray-linalg/Cargo.toml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index e228a4bb..4c8fb780 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,4 @@ Cargo.lock
 
 # coverage
 *.xml
+ndarray-linalg/build.rs
diff --git a/ndarray-linalg/Cargo.toml b/ndarray-linalg/Cargo.toml
index 8d3aebff..912ddcce 100644
--- a/ndarray-linalg/Cargo.toml
+++ b/ndarray-linalg/Cargo.toml
@@ -34,6 +34,7 @@ num-complex = "0.4.0"
 num-traits  = "0.2.14"
 rand = "0.8.3"
 thiserror = "1.0.24"
+statrs = "0.16.0"
 
 [dependencies.ndarray]
 version = "0.15.2"

From c0c1f1cff98a3959dbd580fa698dd9d40466cad6 Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Wed, 19 Oct 2022 10:41:49 -0400
Subject: [PATCH 11/16] fixed statrs

---
 ndarray-linalg/src/expm.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index af72fe74..1435322c 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -6,6 +6,7 @@ use lax::{Lapack, NormType};
 use ndarray::{linalg::Dot, prelude::*};
 use num_complex::{Complex, Complex32 as c32, Complex64 as c64};
 use num_traits::{real::Real, Pow};
+extern crate statrs;
 use statrs::{
     function::factorial::{binomial, factorial},
     statistics::Statistics,

From 42574c59799b197c44ec28fa74053bd1478cded5 Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Fri, 2 Dec 2022 16:53:36 -0500
Subject: [PATCH 12/16] idk

---
 build.rs                   |  3 +++
 ndarray-linalg/src/expm.rs | 20 +++++++++++---------
 2 files changed, 14 insertions(+), 9 deletions(-)
 create mode 100644 build.rs

diff --git a/build.rs b/build.rs
new file mode 100644
index 00000000..bfade51a
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,3 @@
+fn main() {
+    println!("cargo:rustc-link-lib=framework=Accelerate")
+}
diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index 1435322c..b181a9cb 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -180,7 +180,8 @@ fn pade_approximation_9<S: Scalar<Real = f64> + Lapack>(
     inverted.dot(&(odds + evens))
 }
 
-// TODO: scale powers by appropriate value of s.
+// Note: all input matrices should be scaled in the main expm
+// function. 
 fn pade_approximation_13<S: Scalar<Real = f64> + Lapack>(
     a_1: &Array2<S>,
     a_2: &Array2<S>,
@@ -212,9 +213,9 @@ fn pade_approximation_13<S: Scalar<Real = f64> + Lapack>(
     odds_2 = odds_2.dot(a_6);
 
     let mut odds = (&odds_1 + &odds_2).dot(a_1);
-    // odds.mapv_inplace(|x| -x);
-    let inverted: Array2<S> = (&odds - &evens).inv().unwrap();
-    // odds.mapv_inplace(|x| -x);
+    odds.mapv_inplace(|x| -x);
+    let inverted: Array2<S> = (&odds + &evens).inv().unwrap();
+    odds.mapv_inplace(|x| -x);
     inverted.dot(&(odds + evens))
 }
 
@@ -288,16 +289,17 @@ pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> (Array2<S>,
 
     let mut s = f64::max(0., (eta_5 / THETA_13).log2().ceil()) as i32;
     let mut a_scaled = a_matrix.clone();
-    let mut scaler = S::from_real(cauchy::Scalar::pow(2., -s as f64));
+    let mut scaler = S::from_real(2.).powi(-s);
     a_scaled.mapv_inplace(|x| x * scaler);
     s += ell(&a_scaled, 13);
 
     a_scaled.assign(a_matrix);
-    scaler = S::from_real(cauchy::Scalar::pow(2., -s as f64));
+    scaler = S::from_real(2.).powi(-s);
     a_scaled.mapv_inplace(|x| x * scaler);
-    a_2.mapv_inplace(|x| x * scaler.pow(S::from(2).unwrap()));
-    a_4.mapv_inplace(|x| x * scaler.pow(S::from(4).unwrap()));
-    a_6.mapv_inplace(|x| x * scaler.pow(S::from(6).unwrap()));
+
+    a_2.mapv_inplace(|x| x * scaler.powi(2));
+    a_4.mapv_inplace(|x| x * scaler.powi(4));
+    a_6.mapv_inplace(|x| x * scaler.powi(6));
 
     let mut output = pade_approximation_13(&a_scaled, &a_2, &a_4, &a_6);
     for _ in 0..s {

From 7dd0a5b633d9bda9a1559260b5a2df2fb0ac9b2e Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Tue, 27 Dec 2022 17:30:27 -0500
Subject: [PATCH 13/16] added test file

---
 ndarray-linalg/src/expm.rs   | 153 +++++++++++++++++------------------
 ndarray-linalg/src/lib.rs    |   1 +
 ndarray-linalg/tests/expm.rs | 118 +++++++++++++++++++++++++++
 3 files changed, 191 insertions(+), 81 deletions(-)
 create mode 100644 ndarray-linalg/tests/expm.rs

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index b181a9cb..bc5b29c2 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -1,9 +1,8 @@
-use std::ops::MulAssign;
-
-use crate::{normest1::normest, types, Inverse, OperationNorm};
+use crate::{Inverse, OperationNorm};
 use cauchy::Scalar;
-use lax::{Lapack, NormType};
-use ndarray::{linalg::Dot, prelude::*};
+use lax::{Lapack};
+use ndarray::{prelude::*};
+use ndarray::linalg::Dot;
 use num_complex::{Complex, Complex32 as c32, Complex64 as c64};
 use num_traits::{real::Real, Pow};
 extern crate statrs;
@@ -11,6 +10,7 @@ use statrs::{
     function::factorial::{binomial, factorial},
     statistics::Statistics,
 };
+use crate::error::Result;
 
 // These constants are hard-coded from Al-Mohy & Higham
 const THETA_3: f64 = 1.495585217958292e-2;
@@ -68,24 +68,6 @@ const PADE_COEFFS_13: [f64; 14] = [
     1. / 64_764_752_532_480_000.,
 ];
 
-// These are the ones used in scipy
-// const PADE_COEFFS_13: [f64; 14] = [
-//     64764752532480000.,
-//     32382376266240000.,
-//     7771770303897600.,
-//     1187353796428800.,
-//     129060195264000.,
-//     10559470521600.,
-//     670442572800.,
-//     33522128640.,
-//     1323241920.,
-//     40840800.,
-//     960960.,
-//     16380.,
-//     182.,
-//     1.,
-// ];
-
 fn pade_approximation_3<S: Scalar<Real = f64> + Lapack>(
     a_1: &Array2<S>,
     a_2: &Array2<S>,
@@ -234,7 +216,7 @@ where
         .unwrap()
 }
 
-// helper function used in Al-M & H. Name is unchanged for future reference.
+// helper function used in Al-M & Higham. Name is unchanged for future reference.
 fn ell<S: Scalar<Real = f64>>(a_matrix: &Array2<S>, m: u64) -> i32 {
     if a_matrix.is_square() == false {
         panic!("subroutine ell expected a square matrix.");
@@ -257,8 +239,16 @@ fn ell<S: Scalar<Real = f64>>(a_matrix: &Array2<S>, m: u64) -> i32 {
 fn pade_error_coefficient(m: u64) -> f64 {
     1.0 / (binomial(2 * m, m) * factorial(2 * m + 1))
 }
-/// Computes matrix exponential based on the scale-and-squaring algorithm by
-pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> (Array2<S>, usize) {
+/// ## Matrix Exponentiation
+/// Computes matrix exponential based on the scaling-and-squaring algorithm by Al-Mohy and Higham.
+/// Currently restricted to matrices with entries that are either f64 or Complex64. Utilizes Lapack
+/// calls so entries must satisfy LAPACK trait bounds. 
+/// 
+/// ### Caveats
+/// Currently confirmed accurate to f64 precision up to 1024x1024 sparse matrices. Dense matrices
+/// have been observed with a worse average entry error, up to 100x100 matrices should be close 
+/// enough to f64 precision for vast majority of numeric purposes.
+pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> Result<Array2<S>> {
     let mut a_2 = a_matrix.dot(a_matrix);
     let mut a_4 = a_2.dot(&a_2);
     let mut a_6 = a_2.dot(&a_4);
@@ -267,21 +257,23 @@ pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> (Array2<S>,
     // Note d6 should be an estimate and d4 an estimate
     let eta_1 = f64::max(d4, d6);
     if eta_1 < THETA_3 && ell(&a_matrix, 3) == 0 {
-        return (pade_approximation_3(a_matrix, &a_2), 3);
+        return Ok(pade_approximation_3(a_matrix, &a_2));
+        // return (, 3);
     }
     // d4 should be exact here, d6 an estimate
     let eta_2 = f64::max(d4, d6);
     if eta_2 < THETA_5 && ell(&a_matrix, 5) == 0 {
-        return (pade_approximation_5(a_matrix, &a_2, &a_4), 5);
+
+        return Ok(pade_approximation_5(a_matrix, &a_2, &a_4));
     }
     let a_8 = a_4.dot(&a_4);
     let d8 = a_8.opnorm_one().unwrap().powf(1. / 8.);
     let eta_3 = f64::max(d6, d8);
     if eta_3 < THETA_7 && ell(&a_matrix, 7) == 0 {
-        return (pade_approximation_7(a_matrix, &a_2, &a_4, &a_6), 7);
+        return Ok(pade_approximation_7(a_matrix, &a_2, &a_4, &a_6));
     }
     if eta_3 < THETA_9 && ell(&a_matrix, 9) == 0 {
-        return (pade_approximation_9(a_matrix, &a_2, &a_4, &a_6, &a_8), 9);
+        return Ok(pade_approximation_9(a_matrix, &a_2, &a_4, &a_6, &a_8));
     }
     let a_10 = a_2.dot(&a_8);
     let eta_4 = f64::max(d8, a_10.opnorm_one().unwrap());
@@ -296,7 +288,6 @@ pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> (Array2<S>,
     a_scaled.assign(a_matrix);
     scaler = S::from_real(2.).powi(-s);
     a_scaled.mapv_inplace(|x| x * scaler);
-
     a_2.mapv_inplace(|x| x * scaler.powi(2));
     a_4.mapv_inplace(|x| x * scaler.powi(4));
     a_6.mapv_inplace(|x| x * scaler.powi(6));
@@ -305,7 +296,7 @@ pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> (Array2<S>,
     for _ in 0..s {
         output = output.dot(&output);
     }
-    (output, 13)
+    Ok(output)
 }
 
 mod tests {
@@ -357,7 +348,7 @@ mod tests {
             let eigen_expm = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
 
             // Compute the expm routine, compute error metrics for this sample
-            let (expm_comp, deg) = expm(&new_matrix);
+            let expm_comp = expm(&new_matrix).expect("Got this error:");
             let diff = &expm_comp - &eigen_expm;
             avg_entry_error.push({
                 let tot = diff.map(|x| x.abs()).into_iter().sum::<f64>();
@@ -384,54 +375,54 @@ mod tests {
     }
 
     #[test]
-    fn test_pauli_rotation() {
-        let mut results = Vec::new();
-        let mut d3 = 0;
-        let mut d5 = 0;
-        let mut d7 = 0;
-        let mut d9 = 0;
-        let mut d13 = 0;
-        let mut rng = rand::thread_rng();
-        let num_samples = 10;
-        for _ in 0..num_samples {
-            let theta: c64 = c64::from_polar(2. * std::f64::consts::PI * rng.gen::<f64>(), 0.);
-            let pauli_y: Array2<c64> = array![
-                [c64::new(0., 0.), c64::new(0., -1.)],
-                [c64::new(0., 1.), c64::new(0., 0.)],
-            ];
-            let x = c64::new(0., 1.) * theta * pauli_y.clone();
-            let actual = c64::cos(theta / 2.) * Array2::<c64>::eye(x.nrows())
-                - c64::sin(theta / 2.) * c64::new(0., 1.) * &pauli_y;
-            let (computed, deg) = expm(&(c64::new(0., -0.5) * theta * pauli_y));
-            match deg {
-                3 => d3 += 1,
-                5 => d5 += 1,
-                7 => d7 += 1,
-                9 => d9 += 1,
-                13 => d13 += 1,
-                _ => {}
-            }
-            let diff = (actual - computed).map(|x| x.abs());
-            let diff_norm = diff.opnorm_one().unwrap();
-            results.push(diff_norm);
-        }
-        let avg: f64 = results.iter().sum::<f64>() / results.len() as f64;
-        let std: f64 = f64::powf(
-            results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() - 1) as f64,
-            0.5,
-        );
-        println!("collected {:} samples.", results.len());
-        println!("diff norm: {:} +- ({:})", avg, std);
-        println!("avg over epsilon: {:.2}", avg / f64::EPSILON);
-        println!("std over epsilon: {:.2}", std / f64::EPSILON);
-        println!("degree percentages: \n3  - {:.4}, \n5  - {:.4}, \n7  - {:.4}, \n9  - {:.4}, \n13 - {:.4}",
-        d3 as f64 / num_samples as f64,
-        d5 as f64 / num_samples as f64,
-        d7 as f64 / num_samples as f64,
-        d9 as f64 / num_samples as f64,
-        d13 as f64 / num_samples as f64);
-        // println!("results: {:?}", results);
-    }
+    // fn test_pauli_rotation() {
+    //     let mut results = Vec::new();
+    //     let mut d3 = 0;
+    //     let mut d5 = 0;
+    //     let mut d7 = 0;
+    //     let mut d9 = 0;
+    //     let mut d13 = 0;
+    //     let mut rng = rand::thread_rng();
+    //     let num_samples = 10;
+    //     for _ in 0..num_samples {
+    //         let theta: c64 = c64::from_polar(2. * std::f64::consts::PI * rng.gen::<f64>(), 0.);
+    //         let pauli_y: Array2<c64> = array![
+    //             [c64::new(0., 0.), c64::new(0., -1.)],
+    //             [c64::new(0., 1.), c64::new(0., 0.)],
+    //         ];
+    //         let x = c64::new(0., 1.) * theta * pauli_y.clone();
+    //         let actual = c64::cos(theta / 2.) * Array2::<c64>::eye(x.nrows())
+    //             - c64::sin(theta / 2.) * c64::new(0., 1.) * &pauli_y;
+    //         let (computed, deg) = expm(&(c64::new(0., -0.5) * theta * pauli_y));
+    //         match deg {
+    //             3 => d3 += 1,
+    //             5 => d5 += 1,
+    //             7 => d7 += 1,
+    //             9 => d9 += 1,
+    //             13 => d13 += 1,
+    //             _ => {}
+    //         }
+    //         let diff = (actual - computed).map(|x| x.abs());
+    //         let diff_norm = diff.opnorm_one().unwrap();
+    //         results.push(diff_norm);
+    //     }
+    //     let avg: f64 = results.iter().sum::<f64>() / results.len() as f64;
+    //     let std: f64 = f64::powf(
+    //         results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() - 1) as f64,
+    //         0.5,
+    //     );
+    //     println!("collected {:} samples.", results.len());
+    //     println!("diff norm: {:} +- ({:})", avg, std);
+    //     println!("avg over epsilon: {:.2}", avg / f64::EPSILON);
+    //     println!("std over epsilon: {:.2}", std / f64::EPSILON);
+    //     println!("degree percentages: \n3  - {:.4}, \n5  - {:.4}, \n7  - {:.4}, \n9  - {:.4}, \n13 - {:.4}",
+    //     d3 as f64 / num_samples as f64,
+    //     d5 as f64 / num_samples as f64,
+    //     d7 as f64 / num_samples as f64,
+    //     d9 as f64 / num_samples as f64,
+    //     d13 as f64 / num_samples as f64);
+    //     // println!("results: {:?}", results);
+    // }
     #[test]
     fn test_pade_approximants() {
         let mat: Array2<f64> =
diff --git a/ndarray-linalg/src/lib.rs b/ndarray-linalg/src/lib.rs
index 2bd8d335..e0b7eca9 100644
--- a/ndarray-linalg/src/lib.rs
+++ b/ndarray-linalg/src/lib.rs
@@ -83,6 +83,7 @@ pub use crate::convert::*;
 pub use crate::diagonal::*;
 pub use crate::eig::*;
 pub use crate::eigh::*;
+pub use crate::expm::expm;
 pub use crate::generate::*;
 pub use crate::inner::*;
 pub use crate::layout::*;
diff --git a/ndarray-linalg/tests/expm.rs b/ndarray-linalg/tests/expm.rs
new file mode 100644
index 00000000..7408a170
--- /dev/null
+++ b/ndarray-linalg/tests/expm.rs
@@ -0,0 +1,118 @@
+use ndarray::*;
+use ndarray::linalg::kron;
+use ndarray_linalg::expm::expm;
+use ndarray_linalg::{Eig, OperationNorm};
+use num_complex::{Complex64 as c64, ComplexFloat};
+use rand::*;
+
+/// Test matrix exponentiation expm by using an exact formula for exponentials of Pauli matrices.
+/// These matrices are 1-sparse which allows for a different testing regime than the dense matrix
+/// test.
+#[test]
+fn test_random_sparse_matrix() {
+    let mut rng = rand::thread_rng();
+    let num_qubits: u32 = 10;
+    let dim = 2_usize.pow(num_qubits);
+    let mut matrix = Array2::<c64>::eye(2);
+    let _i = c64::new(0., 1.);
+    let zero = c64::new(0., 0.);
+    let pauli_x = array![[zero, c64::new(1., 0.)], [c64::new(1., 0.), zero]];
+    let pauli_y = array![[zero, c64::new(0., -1.)], [c64::new(0., 1.), zero]];
+    let pauli_z = array![[c64::new(1., 0.), zero], [zero, c64::new(-1., 0.)]];
+    for n in 0..num_qubits {
+        let pauli_matrix = match rng.gen_range(0..=3) {
+            0 => {
+                Array2::<c64>::eye(2)
+            },
+            1 => {
+                pauli_x.clone()
+            },
+            2 => {
+                pauli_y.clone()
+            },
+            3 => {
+                pauli_z.clone()
+            },
+            _ => unreachable!(),
+        };
+        if n == 0 {
+            matrix = matrix.dot(&pauli_matrix);
+        } else {
+            matrix = kron(&matrix, &pauli_matrix);
+        }
+    }
+    // now check that this matrix squares to the identity as otherwise the exact value will be
+    // incorrect.
+    let matrix_squared = matrix.dot(&matrix);
+    let diff = &matrix_squared - Array2::<c64>::eye(dim);
+    assert!(diff.opnorm_one().unwrap() < 10. * (dim as f64) * f64::EPSILON);
+
+    let theta = 1. * std::f64::consts::PI * rng.gen::<f64>();
+    let scaled_matrix = matrix.clone() * c64::new(0., theta);
+    let (expm_computed, deg) = expm(&scaled_matrix);
+    let expm_expected = Array2::<c64>::eye(dim) * theta.cos() + c64::new(0., theta.sin()) * matrix;
+    let comp_diff = &expm_expected - &expm_computed;
+
+    let error = comp_diff.opnorm_one().unwrap() / f64::EPSILON;
+    assert!(error <= 10.);
+}
+
+/// Test dense matrix exponentiation from random matrices. This works by constructing a random
+/// unitary matrix as the eigenvectors and then random eigenvalues. We can then use the f64
+/// exponentiation routine to compute the exponential of the diagonal eigenvalue matrix and then
+/// multiply by the eigenvalues to compute the exponentiated matrix. This exact value is compared
+/// to the expm calculation by looking at the average error per matrix entry. This test reveals an
+/// error that scales with the dimension worse than competitors on the author's laptop (Mac M1 with
+/// the Accelerate BLAS/LAPACK backend as of December 10th, 2022).
+#[test]
+fn test_low_dimension_random_dense_matrix() {
+    let mut rng = rand::thread_rng();
+    let dimensions = 50;
+    let samps = 1000;
+    let scale = 1.;
+    let mut results = Vec::new();
+    let mut avg_entry_error = Vec::new();
+    // Used to control what pade approximation is most likely to be used.
+    // the smaller the norm the lower the degree used.
+    for _ in 0..samps {
+        // Sample a completely random matrix.
+        let mut matrix: Array2<c64> = Array2::<c64>::ones((dimensions, dimensions).f());
+        matrix.mapv_inplace(|_| c64::new(rng.gen::<f64>() * 1., rng.gen::<f64>() * 1.));
+
+        // Make m positive semidefinite so it has orthonormal eigenvecs.
+        matrix = matrix.dot(&matrix.t().map(|x| x.conj()));
+        let (mut eigs, vecs) = matrix.eig().unwrap();
+        let adjoint_vecs = vecs.t().clone().mapv(|x| x.conj());
+
+        // Generate new random eigenvalues (complex, previously m had real eigenvals)
+        // and a new matrix m
+        eigs.mapv_inplace(|_| scale * c64::new(rng.gen::<f64>(), rng.gen::<f64>()));
+        let new_matrix = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
+
+        // compute the exponentiated matrix by exponentiating the eigenvalues
+        // and doing V e^Lambda V^\dagger
+        eigs.mapv_inplace(|x| x.exp());
+        let eigen_expm = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
+
+        // Compute the expm routine, compute error metrics for this sample
+        let (expm_comp, deg) = expm(&new_matrix);
+        // println!("deg: {:}", deg);
+        let diff = &expm_comp - &eigen_expm;
+        avg_entry_error.push({
+            let tot = diff.map(|x| x.abs()).into_iter().sum::<f64>();
+            tot / (dimensions * dimensions) as f64
+        });
+        results.push(diff.opnorm_one().unwrap());
+    }
+
+    // compute averages
+    let avg: f64 = results.iter().sum::<f64>() / results.len() as f64;
+    let avg_entry_diff = avg_entry_error.iter().sum::<f64>() / avg_entry_error.len() as f64;
+    let std: f64 = f64::powf(
+        results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() - 1) as f64,
+        0.5,
+    );
+
+    println!("This may fail at higher dimensions.");
+    assert!(avg_entry_diff / f64::EPSILON <= 1000.)
+}

From 56aa3e28a0ede3a4561cac043018ce9199be903f Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Thu, 29 Dec 2022 15:43:45 -0500
Subject: [PATCH 14/16] converted outputs to result<> type

---
 ndarray-linalg/src/expm.rs   | 280 +++++------------------------------
 ndarray-linalg/tests/expm.rs |  12 +-
 2 files changed, 46 insertions(+), 246 deletions(-)

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index bc5b29c2..b0bc8a07 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -1,16 +1,12 @@
 use crate::{Inverse, OperationNorm};
 use cauchy::Scalar;
-use lax::{Lapack};
-use ndarray::{prelude::*};
-use ndarray::linalg::Dot;
-use num_complex::{Complex, Complex32 as c32, Complex64 as c64};
-use num_traits::{real::Real, Pow};
+use lax::Lapack;
+use ndarray::prelude::*;
 extern crate statrs;
 use statrs::{
     function::factorial::{binomial, factorial},
-    statistics::Statistics,
 };
-use crate::error::Result;
+use crate::error::{Result, LinalgError};
 
 // These constants are hard-coded from Al-Mohy & Higham
 const THETA_3: f64 = 1.495585217958292e-2;
@@ -71,7 +67,7 @@ const PADE_COEFFS_13: [f64; 14] = [
 fn pade_approximation_3<S: Scalar<Real = f64> + Lapack>(
     a_1: &Array2<S>,
     a_2: &Array2<S>,
-) -> Array2<S> {
+) -> Result<Array2<S>> {
     let mut evens: Array2<S> = Array2::<S>::eye(a_1.nrows());
     // evens.mapv_inplace(|x| x * S::from_real(PADE_COEFFS_3[0]));
     evens.scaled_add(S::from_real(PADE_COEFFS_3[2]), a_2);
@@ -82,16 +78,16 @@ fn pade_approximation_3<S: Scalar<Real = f64> + Lapack>(
     odds = odds.dot(a_1);
 
     odds.mapv_inplace(|x| -x);
-    let inverted = (&odds + &evens).inv().unwrap();
+    let inverted = (&odds + &evens).inv()?;
     odds.mapv_inplace(|x| -x);
-    inverted.dot(&(odds + evens))
+    Ok(inverted.dot(&(odds + evens)))
 }
 
 fn pade_approximation_5<S: Scalar<Real = f64> + Lapack>(
     a_1: &Array2<S>,
     a_2: &Array2<S>,
     a_4: &Array2<S>,
-) -> Array2<S> {
+) -> Result<Array2<S>> {
     let mut evens: Array2<S> = Array2::<S>::eye(a_1.nrows());
     // evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_5[0]) * x);
     evens.scaled_add(S::from_real(PADE_COEFFS_5[2]), a_2);
@@ -104,9 +100,9 @@ fn pade_approximation_5<S: Scalar<Real = f64> + Lapack>(
     odds = odds.dot(a_1);
 
     odds.mapv_inplace(|x| -x);
-    let inverted = (&odds + &evens).inv().unwrap();
+    let inverted = (&odds + &evens).inv()?;
     odds.mapv_inplace(|x| -x);
-    inverted.dot(&(odds + evens))
+    Ok(inverted.dot(&(odds + evens)))
 }
 
 fn pade_approximation_7<S: Scalar<Real = f64> + Lapack>(
@@ -114,7 +110,7 @@ fn pade_approximation_7<S: Scalar<Real = f64> + Lapack>(
     a_2: &Array2<S>,
     a_4: &Array2<S>,
     a_6: &Array2<S>,
-) -> Array2<S> {
+) -> Result<Array2<S>> {
     let mut evens: Array2<S> = Array::eye(a_1.nrows());
     // evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_7[0]) * x);
     evens.scaled_add(S::from_real(PADE_COEFFS_7[2]), a_2);
@@ -129,9 +125,9 @@ fn pade_approximation_7<S: Scalar<Real = f64> + Lapack>(
     odds = odds.dot(a_1);
 
     odds.mapv_inplace(|x| -x);
-    let inverted = (&odds + &evens).inv().unwrap();
+    let inverted = (&odds + &evens).inv()?;
     odds.mapv_inplace(|x| -x);
-    inverted.dot(&(odds + evens))
+    Ok(inverted.dot(&(odds + evens)))
 }
 
 fn pade_approximation_9<S: Scalar<Real = f64> + Lapack>(
@@ -140,7 +136,7 @@ fn pade_approximation_9<S: Scalar<Real = f64> + Lapack>(
     a_4: &Array2<S>,
     a_6: &Array2<S>,
     a_8: &Array2<S>,
-) -> Array2<S> {
+) -> Result<Array2<S>> {
     let mut evens: Array2<S> = Array::eye(a_1.nrows());
     // evens.mapv_inplace(|x| S::from_real(PADE_COEFFS_9[0]) * x);
     evens.scaled_add(S::from_real(PADE_COEFFS_9[2]), a_2);
@@ -157,9 +153,9 @@ fn pade_approximation_9<S: Scalar<Real = f64> + Lapack>(
     odds = odds.dot(a_1);
 
     odds.mapv_inplace(|x| -x);
-    let inverted: Array2<S> = (&odds + &evens).inv().unwrap();
+    let inverted: Array2<S> = (&odds + &evens).inv()?;
     odds.mapv_inplace(|x| -x);
-    inverted.dot(&(odds + evens))
+    Ok(inverted.dot(&(odds + evens)))
 }
 
 // Note: all input matrices should be scaled in the main expm
@@ -169,7 +165,7 @@ fn pade_approximation_13<S: Scalar<Real = f64> + Lapack>(
     a_2: &Array2<S>,
     a_4: &Array2<S>,
     a_6: &Array2<S>,
-) -> Array2<S> {
+) -> Result<Array2<S>> {
     let mut evens_1: Array2<S> = Array::eye(a_1.nrows());
     evens_1.mapv_inplace(|x| S::from_real(PADE_COEFFS_13[0]) * x);
     evens_1.scaled_add(S::from_real(PADE_COEFFS_13[2]), a_2);
@@ -196,9 +192,9 @@ fn pade_approximation_13<S: Scalar<Real = f64> + Lapack>(
 
     let mut odds = (&odds_1 + &odds_2).dot(a_1);
     odds.mapv_inplace(|x| -x);
-    let inverted: Array2<S> = (&odds + &evens).inv().unwrap();
+    let inverted: Array2<S> = (&odds + &evens).inv()?;
     odds.mapv_inplace(|x| -x);
-    inverted.dot(&(odds + evens))
+    Ok(inverted.dot(&(odds + evens)))
 }
 
 fn power_abs_norm<S>(input_matrix: &Array2<S>, p: usize) -> f64
@@ -216,32 +212,31 @@ where
         .unwrap()
 }
 
-// helper function used in Al-M & Higham. Name is unchanged for future reference.
-fn ell<S: Scalar<Real = f64>>(a_matrix: &Array2<S>, m: u64) -> i32 {
+/// helper function used in Al-Mohy & Higham. Name is unchanged for future reference.
+fn ell<S: Scalar<Real = f64>>(a_matrix: &Array2<S>, m: u64) -> Result<i32> {
     if a_matrix.is_square() == false {
-        panic!("subroutine ell expected a square matrix.");
+        return Err(LinalgError::NotSquare { rows: a_matrix.nrows() as i32, cols: a_matrix.ncols() as i32 });
     }
     let p = 2 * m + 1;
-    let a_one_norm = a_matrix.map(|x| x.abs()).opnorm_one().unwrap();
+    let a_one_norm = a_matrix.map(|x| x.abs()).opnorm_one()?;
 
-    if a_one_norm < f64::EPSILON * 2. {
-        panic!("Subroutine ell encountered zero norm matrix.");
-    }
     let powered_abs_norm = power_abs_norm(a_matrix, p as usize);
     let alpha = powered_abs_norm * pade_error_coefficient(m) / a_one_norm;
     let u = f64::EPSILON / 2.;
     let log2_alpha_div_u = f64::log2(alpha / u);
     let val = f64::ceil(log2_alpha_div_u / ((2 * m) as f64)) as i32;
-    i32::max(val, 0)
+    Ok(i32::max(val, 0))
 }
 
 /// Calculates the leading term of the error series for the [m/m] Pade approximation to exp(x).
 fn pade_error_coefficient(m: u64) -> f64 {
     1.0 / (binomial(2 * m, m) * factorial(2 * m + 1))
 }
+
 /// ## Matrix Exponentiation
 /// Computes matrix exponential based on the scaling-and-squaring algorithm by Al-Mohy and Higham.
-/// Currently restricted to matrices with entries that are either f64 or Complex64. Utilizes Lapack
+/// Currently restricted to matrices with entries that are either f64 or Complex64. 64 bit precision is required 
+/// due to error calculations in Al-Mohy and Higham. Utilizes Lapack
 /// calls so entries must satisfy LAPACK trait bounds. 
 /// 
 /// ### Caveats
@@ -252,38 +247,36 @@ pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> Result<Arra
     let mut a_2 = a_matrix.dot(a_matrix);
     let mut a_4 = a_2.dot(&a_2);
     let mut a_6 = a_2.dot(&a_4);
-    let d4 = a_4.opnorm_one().unwrap().powf(1. / 4.);
-    let d6 = a_6.opnorm_one().unwrap().powf(1. / 6.);
+    let d4 = a_4.opnorm_one()?.powf(1. / 4.);
+    let d6 = a_6.opnorm_one()?.powf(1. / 6.);
     // Note d6 should be an estimate and d4 an estimate
     let eta_1 = f64::max(d4, d6);
-    if eta_1 < THETA_3 && ell(&a_matrix, 3) == 0 {
-        return Ok(pade_approximation_3(a_matrix, &a_2));
-        // return (, 3);
+    if eta_1 < THETA_3 && ell(&a_matrix, 3)? == 0 {
+        return pade_approximation_3(a_matrix, &a_2);
     }
     // d4 should be exact here, d6 an estimate
     let eta_2 = f64::max(d4, d6);
-    if eta_2 < THETA_5 && ell(&a_matrix, 5) == 0 {
-
-        return Ok(pade_approximation_5(a_matrix, &a_2, &a_4));
+    if eta_2 < THETA_5 && ell(&a_matrix, 5)? == 0 {
+        return pade_approximation_5(a_matrix, &a_2, &a_4);
     }
     let a_8 = a_4.dot(&a_4);
     let d8 = a_8.opnorm_one().unwrap().powf(1. / 8.);
     let eta_3 = f64::max(d6, d8);
-    if eta_3 < THETA_7 && ell(&a_matrix, 7) == 0 {
-        return Ok(pade_approximation_7(a_matrix, &a_2, &a_4, &a_6));
+    if eta_3 < THETA_7 && ell(&a_matrix, 7)? == 0 {
+        return pade_approximation_7(a_matrix, &a_2, &a_4, &a_6);
     }
-    if eta_3 < THETA_9 && ell(&a_matrix, 9) == 0 {
-        return Ok(pade_approximation_9(a_matrix, &a_2, &a_4, &a_6, &a_8));
+    if eta_3 < THETA_9 && ell(&a_matrix, 9)? == 0 {
+        return pade_approximation_9(a_matrix, &a_2, &a_4, &a_6, &a_8);
     }
     let a_10 = a_2.dot(&a_8);
-    let eta_4 = f64::max(d8, a_10.opnorm_one().unwrap());
+    let eta_4 = f64::max(d8, a_10.opnorm_one()?);
     let eta_5 = f64::min(eta_3, eta_4);
 
     let mut s = f64::max(0., (eta_5 / THETA_13).log2().ceil()) as i32;
     let mut a_scaled = a_matrix.clone();
     let mut scaler = S::from_real(2.).powi(-s);
     a_scaled.mapv_inplace(|x| x * scaler);
-    s += ell(&a_scaled, 13);
+    s += ell(&a_scaled, 13)?;
 
     a_scaled.assign(a_matrix);
     scaler = S::from_real(2.).powi(-s);
@@ -292,202 +285,9 @@ pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> Result<Arra
     a_4.mapv_inplace(|x| x * scaler.powi(4));
     a_6.mapv_inplace(|x| x * scaler.powi(6));
 
-    let mut output = pade_approximation_13(&a_scaled, &a_2, &a_4, &a_6);
+    let mut output = pade_approximation_13(&a_scaled, &a_2, &a_4, &a_6)?;
     for _ in 0..s {
         output = output.dot(&output);
     }
     Ok(output)
-}
-
-mod tests {
-    use crate::{
-        expm::{
-            pade_approximation_13, pade_approximation_3, pade_approximation_5,
-            pade_approximation_7, pade_approximation_9,
-        },
-        Eig, OperationNorm, SVD,
-    };
-    use ndarray::{linalg::Dot, *};
-    use num_complex::{Complex, Complex32 as c32, Complex64 as c64, ComplexFloat};
-    use rand::Rng;
-    use std::{collections::HashMap, fs::File, io::Read, str::FromStr};
-
-    use super::expm;
-
-    // 50 -> 5x worse error each entry than scipy
-    // 100 -> 7.3x worse error each entry than scipy
-    // 200 -> broken
-    #[test]
-    fn random_matrix_ensemble() {
-        let mut rng = rand::thread_rng();
-        let n = 200;
-        let samps = 10;
-        let mut results = Vec::new();
-        let mut avg_entry_error = Vec::new();
-        // Used to control what pade approximation is most likely to be used.
-        // the smaller the norm the lower the degree used.
-        let scale = 1.;
-        for _ in 0..samps {
-            // Sample a completely random matrix.
-            let mut matrix: Array2<c64> = Array2::<c64>::ones((n, n).f());
-            matrix.mapv_inplace(|_| c64::new(rng.gen::<f64>() * 1., rng.gen::<f64>() * 1.));
-
-            // Make m positive semidefinite so it has orthonormal eigenvecs.
-            matrix = matrix.dot(&matrix.t().map(|x| x.conj()));
-            let (mut eigs, vecs) = matrix.eig().unwrap();
-            let adjoint_vecs = vecs.t().clone().mapv(|x| x.conj());
-
-            // Generate new random eigenvalues (complex, previously m had real eigenvals)
-            // and a new matrix m
-            eigs.mapv_inplace(|_| scale * c64::new(rng.gen::<f64>(), rng.gen::<f64>()));
-            let new_matrix = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
-
-            // compute the exponentiated matrix by exponentiating the eigenvalues
-            // and doing V e^Lambda V^\dagger
-            eigs.mapv_inplace(|x| x.exp());
-            let eigen_expm = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
-
-            // Compute the expm routine, compute error metrics for this sample
-            let expm_comp = expm(&new_matrix).expect("Got this error:");
-            let diff = &expm_comp - &eigen_expm;
-            avg_entry_error.push({
-                let tot = diff.map(|x| x.abs()).into_iter().sum::<f64>();
-                tot / (n * n) as f64
-            });
-            results.push(diff.opnorm_one().unwrap());
-        }
-
-        // compute averages
-        let avg: f64 = results.iter().sum::<f64>() / results.len() as f64;
-        let avg_entry_diff = avg_entry_error.iter().sum::<f64>() / avg_entry_error.len() as f64;
-        let std: f64 = f64::powf(
-            results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() - 1) as f64,
-            0.5,
-        );
-        println!("collected {:} samples.", results.len());
-        println!("diff norm: {:} +- ({:})", avg, std);
-        println!(
-            "average entry error over epsilon: {:}",
-            avg_entry_diff / f64::EPSILON
-        );
-        println!("avg over epsilon: {:.2}", avg / f64::EPSILON);
-        println!("std over epsilon: {:.2}", std / f64::EPSILON);
-    }
-
-    #[test]
-    // fn test_pauli_rotation() {
-    //     let mut results = Vec::new();
-    //     let mut d3 = 0;
-    //     let mut d5 = 0;
-    //     let mut d7 = 0;
-    //     let mut d9 = 0;
-    //     let mut d13 = 0;
-    //     let mut rng = rand::thread_rng();
-    //     let num_samples = 10;
-    //     for _ in 0..num_samples {
-    //         let theta: c64 = c64::from_polar(2. * std::f64::consts::PI * rng.gen::<f64>(), 0.);
-    //         let pauli_y: Array2<c64> = array![
-    //             [c64::new(0., 0.), c64::new(0., -1.)],
-    //             [c64::new(0., 1.), c64::new(0., 0.)],
-    //         ];
-    //         let x = c64::new(0., 1.) * theta * pauli_y.clone();
-    //         let actual = c64::cos(theta / 2.) * Array2::<c64>::eye(x.nrows())
-    //             - c64::sin(theta / 2.) * c64::new(0., 1.) * &pauli_y;
-    //         let (computed, deg) = expm(&(c64::new(0., -0.5) * theta * pauli_y));
-    //         match deg {
-    //             3 => d3 += 1,
-    //             5 => d5 += 1,
-    //             7 => d7 += 1,
-    //             9 => d9 += 1,
-    //             13 => d13 += 1,
-    //             _ => {}
-    //         }
-    //         let diff = (actual - computed).map(|x| x.abs());
-    //         let diff_norm = diff.opnorm_one().unwrap();
-    //         results.push(diff_norm);
-    //     }
-    //     let avg: f64 = results.iter().sum::<f64>() / results.len() as f64;
-    //     let std: f64 = f64::powf(
-    //         results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() - 1) as f64,
-    //         0.5,
-    //     );
-    //     println!("collected {:} samples.", results.len());
-    //     println!("diff norm: {:} +- ({:})", avg, std);
-    //     println!("avg over epsilon: {:.2}", avg / f64::EPSILON);
-    //     println!("std over epsilon: {:.2}", std / f64::EPSILON);
-    //     println!("degree percentages: \n3  - {:.4}, \n5  - {:.4}, \n7  - {:.4}, \n9  - {:.4}, \n13 - {:.4}",
-    //     d3 as f64 / num_samples as f64,
-    //     d5 as f64 / num_samples as f64,
-    //     d7 as f64 / num_samples as f64,
-    //     d9 as f64 / num_samples as f64,
-    //     d13 as f64 / num_samples as f64);
-    //     // println!("results: {:?}", results);
-    // }
-    #[test]
-    fn test_pade_approximants() {
-        let mat: Array2<f64> =
-            10. as f64 * array![[0.1, 0.2, 0.3], [0.2, 0.1, 0.5], [0.11, 0.22, 0.32]];
-        let mut output_3: Array2<f64> = Array::zeros((3, 3).f());
-        let mut output_5: Array2<f64> = Array::zeros((3, 3).f());
-        let mut output_7: Array2<f64> = Array::zeros((3, 3).f());
-        let mut output_9: Array2<f64> = Array::zeros((3, 3).f());
-        let mut output_13: Array2<f64> = Array::zeros((3, 3).f());
-        fn compute_pade_diff_error(output: &Array2<f64>, expected: &Array2<f64>) -> f64 {
-            let mut tot = 0.0;
-            for row_ix in 0..output.nrows() {
-                for col_ix in 0..output.ncols() {
-                    tot += (output[[row_ix, col_ix]] - expected[[row_ix, col_ix]]).abs();
-                }
-            }
-            tot
-        }
-        // pade_approximation_3(&mut mp, &mut output_3);
-        // pade_approximation_5(&mut mp, &mut output_5);
-        // pade_approximation_7(&mut mp, &mut output_7);
-        // pade_approximation_9(&mut mp, &mut output_9);/
-        // pade_approximation_13(&mut mp, &mut output_13);
-        let expected = array![
-            [157.766, 217.949, 432.144],
-            [200.674, 278.691, 552.725],
-            [169.969, 236.289, 469.437]
-        ];
-        println!("output?");
-        println!(
-            "3 norm error: {:}",
-            compute_pade_diff_error(&output_3, &expected)
-        );
-        println!(
-            "5 norm error: {:}",
-            compute_pade_diff_error(&output_5, &expected)
-        );
-        println!(
-            "7 norm error: {:}",
-            compute_pade_diff_error(&output_7, &expected)
-        );
-        println!(
-            "9 norm error: {:}",
-            compute_pade_diff_error(&output_9, &expected)
-        );
-        println!(
-            "13 norm error: {:}",
-            compute_pade_diff_error(&output_13, &expected)
-        );
-    }
-    /// Compares expm acting on a matrix with random eigenvalues (drawn from
-    /// Gaussians) and with random eigenvectors (drawn from Haar distribution)
-    /// to the exact answer. The exact answer is done by exponentiating each
-    /// diagonal entry in the eigenvalue matrix before conjugating with the
-    /// eigenvector matrices. In other words, let A = U D U^\dagger, then
-    /// because e^A = e^(U D U^\dagger) = U (e^D) U^dagger. We use expm
-    /// to compute e^A and normal floating point exponentiation to compute e^D
-    #[test]
-    fn expm_test_gaussian_random_input() {
-        let a: Array2<i32> = arr2(&[[1, 2], [3, 4]]);
-        let b: Array2<i32> = arr2(&[[0, 1], [1, 0]]);
-        println!("matrix multiplication? {:?}", a.dot(&b));
-        // let eigenvals = diagonal::Diagonal::from([1,2,3,4,5]);
-        // let eigenvecs = haar_random(dim);
-        // let exact = floating_exp(D);
-        // let diff = expm(U D U.conj().T) - U F U.conj().T;
-    }
-}
+}
\ No newline at end of file
diff --git a/ndarray-linalg/tests/expm.rs b/ndarray-linalg/tests/expm.rs
index 7408a170..50305017 100644
--- a/ndarray-linalg/tests/expm.rs
+++ b/ndarray-linalg/tests/expm.rs
@@ -20,7 +20,7 @@ fn test_random_sparse_matrix() {
     let pauli_y = array![[zero, c64::new(0., -1.)], [c64::new(0., 1.), zero]];
     let pauli_z = array![[c64::new(1., 0.), zero], [zero, c64::new(-1., 0.)]];
     for n in 0..num_qubits {
-        let pauli_matrix = match rng.gen_range(0..=3) {
+        let pauli_matrix = match rng.gen_range::<i32,_>(0..=3) {
             0 => {
                 Array2::<c64>::eye(2)
             },
@@ -49,7 +49,7 @@ fn test_random_sparse_matrix() {
 
     let theta = 1. * std::f64::consts::PI * rng.gen::<f64>();
     let scaled_matrix = matrix.clone() * c64::new(0., theta);
-    let (expm_computed, deg) = expm(&scaled_matrix);
+    let expm_computed = expm(&scaled_matrix).unwrap();
     let expm_expected = Array2::<c64>::eye(dim) * theta.cos() + c64::new(0., theta.sin()) * matrix;
     let comp_diff = &expm_expected - &expm_computed;
 
@@ -67,8 +67,8 @@ fn test_random_sparse_matrix() {
 #[test]
 fn test_low_dimension_random_dense_matrix() {
     let mut rng = rand::thread_rng();
-    let dimensions = 50;
-    let samps = 1000;
+    let dimensions = 100;
+    let samps = 500;
     let scale = 1.;
     let mut results = Vec::new();
     let mut avg_entry_error = Vec::new();
@@ -95,7 +95,7 @@ fn test_low_dimension_random_dense_matrix() {
         let eigen_expm = vecs.dot(&Array2::from_diag(&eigs)).dot(&adjoint_vecs);
 
         // Compute the expm routine, compute error metrics for this sample
-        let (expm_comp, deg) = expm(&new_matrix);
+        let expm_comp = expm(&new_matrix).unwrap();
         // println!("deg: {:}", deg);
         let diff = &expm_comp - &eigen_expm;
         avg_entry_error.push({
@@ -113,6 +113,6 @@ fn test_low_dimension_random_dense_matrix() {
         0.5,
     );
 
-    println!("This may fail at higher dimensions.");
+    // This may fail at higher dimensions
     assert!(avg_entry_diff / f64::EPSILON <= 1000.)
 }

From 0f2dadd638ac2e89d1872ee95e213b876d9b7656 Mon Sep 17 00:00:00 2001
From: Matthew Hagan <matthewhagan15@gmail.com>
Date: Tue, 24 Jan 2023 15:34:28 -0500
Subject: [PATCH 15/16] final tweaks

---
 ndarray-linalg/src/expm.rs     |  48 +++---
 ndarray-linalg/src/lib.rs      |   2 -
 ndarray-linalg/src/normest1.rs | 307 ---------------------------------
 ndarray-linalg/tests/expm.rs   |  23 +--
 4 files changed, 33 insertions(+), 347 deletions(-)
 delete mode 100644 ndarray-linalg/src/normest1.rs

diff --git a/ndarray-linalg/src/expm.rs b/ndarray-linalg/src/expm.rs
index b0bc8a07..47267e6e 100644
--- a/ndarray-linalg/src/expm.rs
+++ b/ndarray-linalg/src/expm.rs
@@ -3,16 +3,15 @@ use cauchy::Scalar;
 use lax::Lapack;
 use ndarray::prelude::*;
 extern crate statrs;
-use statrs::{
-    function::factorial::{binomial, factorial},
-};
-use crate::error::{Result, LinalgError};
+use crate::error::{LinalgError, Result};
+use statrs::function::factorial::{binomial, factorial};
 
 // These constants are hard-coded from Al-Mohy & Higham
-const THETA_3: f64 = 1.495585217958292e-2;
-const THETA_5: f64 = 2.539398330063230e-1;
-const THETA_7: f64 = 9.504178996162932e-1;
-const THETA_9: f64 = 2.097847961257068e0;
+const THETA_3: f64 = 1.495_585_217_958_292e-2;
+const THETA_5: f64 = 2.539_398_330_063_23e-1;
+
+const THETA_7: f64 = 9.504_178_996_162_932e-1;
+const THETA_9: f64 = 2.097_847_961_257_068e0;
 const THETA_13: f64 = 4.25; // Alg 5.1
 
 // The Pade Coefficients for the numerator of the diagonal approximation to Exp[x]. Computed via Mathematica/WolframAlpha.
@@ -159,7 +158,7 @@ fn pade_approximation_9<S: Scalar<Real = f64> + Lapack>(
 }
 
 // Note: all input matrices should be scaled in the main expm
-// function. 
+// function.
 fn pade_approximation_13<S: Scalar<Real = f64> + Lapack>(
     a_1: &Array2<S>,
     a_2: &Array2<S>,
@@ -212,10 +211,13 @@ where
         .unwrap()
 }
 
-/// helper function used in Al-Mohy & Higham. Name is unchanged for future reference.
+/// helper function used in Al-Mohy & Higham. Name is unchanged for both literature reference.
 fn ell<S: Scalar<Real = f64>>(a_matrix: &Array2<S>, m: u64) -> Result<i32> {
     if a_matrix.is_square() == false {
-        return Err(LinalgError::NotSquare { rows: a_matrix.nrows() as i32, cols: a_matrix.ncols() as i32 });
+        return Err(LinalgError::NotSquare {
+            rows: a_matrix.nrows() as i32,
+            cols: a_matrix.ncols() as i32,
+        });
     }
     let p = 2 * m + 1;
     let a_one_norm = a_matrix.map(|x| x.abs()).opnorm_one()?;
@@ -234,15 +236,17 @@ fn pade_error_coefficient(m: u64) -> f64 {
 }
 
 /// ## Matrix Exponentiation
-/// Computes matrix exponential based on the scaling-and-squaring algorithm by Al-Mohy and Higham.
-/// Currently restricted to matrices with entries that are either f64 or Complex64. 64 bit precision is required 
-/// due to error calculations in Al-Mohy and Higham. Utilizes Lapack
-/// calls so entries must satisfy LAPACK trait bounds. 
-/// 
+/// Computes matrix exponential based on the scaling-and-squaring algorithm by Al-Mohy and Higham [[1]].
+/// Currently restricted to matrices with entries that are either f64 or Complex64. 64 bit precision is required
+/// due to error calculations in [[1]] Utilizes Lapack calls so entries must satisfy LAPACK trait bounds.
+///
 /// ### Caveats
 /// Currently confirmed accurate to f64 precision up to 1024x1024 sparse matrices. Dense matrices
-/// have been observed with a worse average entry error, up to 100x100 matrices should be close 
+/// have been observed with a worse average entry error, up to 100x100 matrices should be close
 /// enough to f64 precision for vast majority of numeric purposes.
+///
+/// ### References
+/// [[1]] A New Scaling and Squaring Algorithm for the Matrix Exponential. Al-Mohy, Awad H. and Higham, Nicholas J. 2009. Siam J. Matrix Anal. Appl. Vol. 31, No. 3, pp. 970-989.
 pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> Result<Array2<S>> {
     let mut a_2 = a_matrix.dot(a_matrix);
     let mut a_4 = a_2.dot(&a_2);
@@ -251,21 +255,21 @@ pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> Result<Arra
     let d6 = a_6.opnorm_one()?.powf(1. / 6.);
     // Note d6 should be an estimate and d4 an estimate
     let eta_1 = f64::max(d4, d6);
-    if eta_1 < THETA_3 && ell(&a_matrix, 3)? == 0 {
+    if eta_1 < THETA_3 && ell(a_matrix, 3)? == 0 {
         return pade_approximation_3(a_matrix, &a_2);
     }
     // d4 should be exact here, d6 an estimate
     let eta_2 = f64::max(d4, d6);
-    if eta_2 < THETA_5 && ell(&a_matrix, 5)? == 0 {
+    if eta_2 < THETA_5 && ell(a_matrix, 5)? == 0 {
         return pade_approximation_5(a_matrix, &a_2, &a_4);
     }
     let a_8 = a_4.dot(&a_4);
     let d8 = a_8.opnorm_one().unwrap().powf(1. / 8.);
     let eta_3 = f64::max(d6, d8);
-    if eta_3 < THETA_7 && ell(&a_matrix, 7)? == 0 {
+    if eta_3 < THETA_7 && ell(a_matrix, 7)? == 0 {
         return pade_approximation_7(a_matrix, &a_2, &a_4, &a_6);
     }
-    if eta_3 < THETA_9 && ell(&a_matrix, 9)? == 0 {
+    if eta_3 < THETA_9 && ell(a_matrix, 9)? == 0 {
         return pade_approximation_9(a_matrix, &a_2, &a_4, &a_6, &a_8);
     }
     let a_10 = a_2.dot(&a_8);
@@ -290,4 +294,4 @@ pub fn expm<S: Scalar<Real = f64> + Lapack>(a_matrix: &Array2<S>) -> Result<Arra
         output = output.dot(&output);
     }
     Ok(output)
-}
\ No newline at end of file
+}
diff --git a/ndarray-linalg/src/lib.rs b/ndarray-linalg/src/lib.rs
index e0b7eca9..bd6d609e 100644
--- a/ndarray-linalg/src/lib.rs
+++ b/ndarray-linalg/src/lib.rs
@@ -64,7 +64,6 @@ pub mod layout;
 pub mod least_squares;
 pub mod lobpcg;
 pub mod norm;
-pub mod normest1;
 pub mod operator;
 pub mod opnorm;
 pub mod qr;
@@ -101,4 +100,3 @@ pub use crate::trace::*;
 pub use crate::triangular::*;
 pub use crate::tridiagonal::*;
 pub use crate::types::*;
-// pub use crate::expm::*;
diff --git a/ndarray-linalg/src/normest1.rs b/ndarray-linalg/src/normest1.rs
deleted file mode 100644
index 30d1a027..00000000
--- a/ndarray-linalg/src/normest1.rs
+++ /dev/null
@@ -1,307 +0,0 @@
-use std::iter::{zip, Zip};
-
-use ndarray::{concatenate, prelude::*};
-use num_complex::ComplexFloat;
-use rand::Rng;
-
-use crate::OperationNorm;
-// use rand::prelude::*;
-
-const MAX_COLUMN_RESAMPLES: u32 = 10;
-
-fn prepare_x_matrix(num_rows: usize, num_columns: usize) -> Array2<f64> {
-    let mut rng = rand::thread_rng();
-    let mut output: Array2<f64> = Array::<f64, _>::zeros((num_rows, num_columns).f());
-    output.mapv_inplace(|_| if rng.gen_bool(0.5) { 1.0 } else { -1.0 });
-    // todo - check sizes?
-    output.column_mut(0).fill(1.);
-    ensure_no_parallel_columns(&mut output);
-    output / (num_rows as f64)
-}
-
-fn is_column_parallel(index: usize, matrix: &Array2<f64>) -> bool {
-    // TODO - no need to dot product entire matrix, just the preceeding columns
-    let dot_prods = matrix.t().dot(&matrix.column(index));
-    for ix in 0..index {
-        if f64::abs(dot_prods[ix]) == matrix.nrows() as f64 {
-            return true;
-        }
-    }
-    false
-}
-
-fn ensure_column_not_parallel(matrix: &mut Array2<f64>, column_index: usize) {
-    let mut rng = rand::thread_rng();
-    for _ in 0..MAX_COLUMN_RESAMPLES {
-        if is_column_parallel(column_index, matrix) {
-            matrix.column_mut(column_index).mapv_inplace(|_| {
-                if rng.gen_bool(0.5) {
-                    1.0
-                } else {
-                    -1.0
-                }
-            });
-        } else {
-            break;
-        }
-    }
-}
-
-fn ensure_no_parallel_columns(mat: &mut Array2<f64>) {
-    for col_ix in 0..mat.ncols() {
-        ensure_column_not_parallel(mat, col_ix);
-    }
-}
-
-/// Outputs true if every column of s_new is parallel to some column in s_old, false otherwise.
-fn check_if_s_parallel_to_s_old(s_new: &Array2<f64>, s_old: &Array2<f64>) -> bool {
-    let mut ret = true;
-    let dots = s_old.t().dot(s_new);
-    for col in dots.columns() {
-        let mut is_col_good = false;
-        for elem in col.iter() {
-            if f64::abs(*elem) == col.len() as f64 {
-                is_col_good = true;
-            }
-        }
-        if is_col_good == false {
-            ret = false;
-            break;
-        }
-    }
-    ret
-}
-
-/// Resamples columns of s that are parallel to to any prior columns of s itself or to any column
-/// of s_old.
-fn ensure_new_s_matrix(s: &mut Array2<f64>, s_old: &Array2<f64>) {
-    let mut big_matrix: Array2<f64> = concatenate(Axis(1), &[s_old.view(), s.view()]).unwrap();
-    for col_ix in 0..s.ncols() {
-        ensure_column_not_parallel(&mut big_matrix, s_old.ncols() + col_ix);
-        for row_ix in 0..s.nrows() {
-            s.column_mut(col_ix)[row_ix] = big_matrix.column(s_old.ncols() + col_ix)[row_ix];
-        }
-    }
-}
-
-fn check_index_history(indices: &Vec<usize>, index_history: &Vec<usize>, t: usize) -> bool {
-    let mut ret = false;
-    for chunk in index_history.chunks(t) {
-        if *chunk == indices[0..t] {
-            ret = true;
-            break;
-        }
-    }
-    ret
-}
-
-fn update_indices(indices: &mut Vec<usize>, index_history: &Vec<usize>, t: usize) {
-    let mut unique_indices: Vec<usize> = Vec::with_capacity(t);
-    for ix in indices.iter() {
-        if index_history.contains(&ix) == false {
-            unique_indices.push(*ix);
-            if unique_indices.len() == t {
-                break;
-            }
-        }
-    }
-    for ix in 0..unique_indices.len() {
-        indices[ix] = unique_indices[ix];
-    }
-}
-
-/// Compute a lower bound of the 1-norm of a square matrix. The 1-norm is defined as
-/// || A ||_1 = max_{1 <= j <= n} \sum_i |a_{i,j} |
-/// In other words find the column with the largest sum over all rows (and take absolute value).
-/// Note this is not equivalent to the induced 1-norm or the Schatten 1-norm.
-/// We do not give the option of returning the v, w that maximize the resulting norm to keep the
-/// function signature clean. This could be implemented in the future.
-/// Panics if input matrix is non-square to avoid returning a Result<_,_>.
-/// This is computed following Algorithm 2.4 of the paper "A Block Algorithm for Matrix 1-Norm
-/// Estimation with an Application to 1-Norm Pseudospectra" by Nicholas J. Higham and
-/// Francoise Tisseur (2000) SIAM J. Matrix Anal. Appl. Vol. 21, No. 4, pp. 1185-1201.
-pub fn normest(input_matrix: &Array2<f64>, t: usize, itmax: u32) -> f64 {
-    if input_matrix.is_square() == false {
-        panic!("One Norm Estimation encountered non-square input matrix.");
-    }
-    if t < 1 {
-        panic!("One Norm Estimation requires at least one column for estimation.");
-    }
-    if itmax < 2 {
-        panic!("One Norm Estimation requires at least two iterations.");
-    }
-    let n = input_matrix.nrows();
-
-    // This is worse than just computing the norm exactly, so just do that. Will panic
-    // if opnorm_one() fails.
-    if t > n {
-        return input_matrix.opnorm_one().unwrap();
-    }
-    let mut est = 0.0;
-    let mut best_index = 0;
-
-    let mut x_matrix = prepare_x_matrix(n, t);
-    let mut index_history: Vec<usize> = Vec::new();
-    let mut est_old = 0.0;
-    let mut indices: Vec<usize> = (0..n).collect();
-    let mut s_matrix: Array2<f64> = Array::<_, _>::zeros((n, t).f());
-    let mut s_old: Array2<f64> = Array::<_, _>::zeros((n, t).f());
-    // Main loop of algorithm 2.4 in higham and tisseur
-    for iteration in 0..itmax {
-        // Y = AX
-        let y = input_matrix.dot(&x_matrix);
-
-        // est = max { ||Y(:, j)||_1 : j = 1:t}
-        let index_norm_pairs: Vec<(usize, f64)> = (0..y.ncols())
-            .into_iter()
-            .map(|ix| (ix, y.column(ix).map(|elem| f64::abs(*elem)).sum()))
-            .collect();
-        let (maximizing_ix, max_est) = *index_norm_pairs
-            .iter()
-            .reduce(|x, y| if x.1 > y.1 { x } else { y })
-            .unwrap();
-        est = max_est;
-
-        if est > est_old || iteration == 1 {
-            best_index = indices[maximizing_ix];
-        }
-
-        // Section (1) of Alg. 2.4
-        if iteration > 1 && est <= est_old {
-            est = est_old;
-            break;
-        }
-
-        est_old = est;
-        s_old.assign(&s_matrix);
-        // Is there a faster way to do this? I wanted to avoid creating a new matrix of signs
-        // and then copying it into s_matrix, this was the first way I could think of doing so.
-        for row_ix in 0..s_matrix.nrows() {
-            for col_ix in 0..s_matrix.ncols() {
-                s_matrix[[row_ix, col_ix]] = y[[row_ix, col_ix]].signum()
-            }
-        }
-
-        // Section (2) of Alg. 2.4
-        if check_if_s_parallel_to_s_old(&s_matrix, &s_old) {
-            break;
-        }
-        if t > 1 {
-            ensure_new_s_matrix(&mut s_matrix, &s_old);
-        }
-
-        // Section (3) of Alg. 2.4
-        let z_matrix: Array2<f64> = input_matrix.t().dot(&s_matrix);
-        let mut h: Vec<f64> = z_matrix
-            .rows()
-            .into_iter()
-            .map(|row| *row.iter().reduce(|x, y| if x > y { x } else { y }).unwrap())
-            .collect();
-        let max_h = *h.iter().reduce(|x, y| if x > y { x } else { y }).unwrap();
-
-        // Section (4) of Alg. 2.4
-        if iteration >= 2 && max_h == h[best_index] {
-            break;
-        }
-        let mut zipped_pairs: Vec<(f64, usize)> = zip(h.clone(), 0..n).collect();
-        zipped_pairs.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
-        (h, indices) = zipped_pairs.into_iter().unzip();
-        if t > 1 {
-            // Section (5) of Alg. 2.4
-            if check_index_history(&indices, &index_history, t) {
-                break;
-            }
-            update_indices(&mut indices, &index_history, t);
-        }
-        for ix in 0..t {
-            x_matrix.column_mut(ix).fill(0.0);
-            x_matrix[[indices[ix], ix]] = 1.0;
-            index_history.push(indices[ix]);
-        }
-    }
-    // Would be Section (6) of Alg 2.4 if we returned the best index guess.
-    est
-}
-
-mod tests {
-    use crate::{
-        ndarray::ShapeBuilder,
-        normest1::{ensure_no_parallel_columns, is_column_parallel, prepare_x_matrix},
-        Inverse, OperationNorm,
-    };
-    use ndarray::Array2;
-    use rand::{thread_rng, Rng};
-
-    use super::{check_if_s_parallel_to_s_old, normest};
-
-    #[test]
-    fn test_prep() {
-        let n = 4;
-        let t = 5;
-        let mut x_mat: Array2<f64> = ndarray::Array::<f64, _>::zeros((n, t).f());
-        println!("x_mat before");
-        println!("{:?}", x_mat);
-        // prepare_x_matrix(&mut x_mat);
-        println!("x_mat after");
-        println!("{:?}", x_mat);
-        println!(
-            "any parallel columns? {:}",
-            is_column_parallel(x_mat.ncols() - 1, &x_mat)
-        );
-        println!("resampling columns");
-        ensure_no_parallel_columns(&mut x_mat);
-        println!("after resampling");
-        println!("{:?}", x_mat);
-    }
-
-    #[test]
-    fn test_one_norm() {
-        let Y: Array2<f64> = array![[1., 2., 3., 4.], [1., 2., 3., 0.], [1., 2., 3., 0.]];
-        let est = Y
-            .columns()
-            .into_iter()
-            .map(|col| col.map(|x| f64::abs(*x)).sum())
-            .reduce(f64::max)
-            .unwrap();
-        println!("est: {:}", est);
-    }
-
-    #[test]
-    fn test_check_if_s_parallel_to_s_old() {
-        let n = 100;
-        let t = 4;
-        let s_1: Array2<f64> = array![[-1., 1., 1.], [1., 1., 1.], [-1., -1., 1.]];
-        let s_2: Array2<f64> = array![[-1., -1., 1.], [-1., -1., 1.], [-1., 1., -1.]];
-        println!(
-            "check_if_s_parallel_to_s_old: {:}",
-            check_if_s_parallel_to_s_old(&s_2, &s_1)
-        );
-    }
-
-    #[test]
-    fn test_average_normest() {
-        let n = 100;
-        let mut differenial_error = Vec::new();
-        let mut ratios = Vec::new();
-        let t = 2;
-        let itmax = 5;
-        let mut mat: Array2<f64> = ndarray::Array::<f64, _>::zeros((n, n).f());
-        let mut rng = rand::thread_rng();
-        for _i in 0..5000 {
-            mat.mapv_inplace(|_| rng.gen());
-            mat.assign(&mat.inv().unwrap());
-            let est = normest(&mat, t, itmax);
-            let exp = mat.opnorm_one().unwrap();
-            differenial_error.push((est - exp).abs() / exp);
-            ratios.push(est / exp);
-        }
-        println!(
-            "differential error average: {:}",
-            differenial_error.iter().sum::<f64>() / differenial_error.len() as f64
-        );
-        println!(
-            "ratio average: {:?}",
-            ratios.iter().sum::<f64>() / ratios.len() as f64
-        );
-    }
-}
diff --git a/ndarray-linalg/tests/expm.rs b/ndarray-linalg/tests/expm.rs
index 50305017..37ef30c1 100644
--- a/ndarray-linalg/tests/expm.rs
+++ b/ndarray-linalg/tests/expm.rs
@@ -1,5 +1,5 @@
-use ndarray::*;
 use ndarray::linalg::kron;
+use ndarray::*;
 use ndarray_linalg::expm::expm;
 use ndarray_linalg::{Eig, OperationNorm};
 use num_complex::{Complex64 as c64, ComplexFloat};
@@ -20,19 +20,11 @@ fn test_random_sparse_matrix() {
     let pauli_y = array![[zero, c64::new(0., -1.)], [c64::new(0., 1.), zero]];
     let pauli_z = array![[c64::new(1., 0.), zero], [zero, c64::new(-1., 0.)]];
     for n in 0..num_qubits {
-        let pauli_matrix = match rng.gen_range::<i32,_>(0..=3) {
-            0 => {
-                Array2::<c64>::eye(2)
-            },
-            1 => {
-                pauli_x.clone()
-            },
-            2 => {
-                pauli_y.clone()
-            },
-            3 => {
-                pauli_z.clone()
-            },
+        let pauli_matrix = match rng.gen_range::<i32, _>(0..=3) {
+            0 => Array2::<c64>::eye(2),
+            1 => pauli_x.clone(),
+            2 => pauli_y.clone(),
+            3 => pauli_z.clone(),
             _ => unreachable!(),
         };
         if n == 0 {
@@ -96,7 +88,6 @@ fn test_low_dimension_random_dense_matrix() {
 
         // Compute the expm routine, compute error metrics for this sample
         let expm_comp = expm(&new_matrix).unwrap();
-        // println!("deg: {:}", deg);
         let diff = &expm_comp - &eigen_expm;
         avg_entry_error.push({
             let tot = diff.map(|x| x.abs()).into_iter().sum::<f64>();
@@ -108,7 +99,7 @@ fn test_low_dimension_random_dense_matrix() {
     // compute averages
     let avg: f64 = results.iter().sum::<f64>() / results.len() as f64;
     let avg_entry_diff = avg_entry_error.iter().sum::<f64>() / avg_entry_error.len() as f64;
-    let std: f64 = f64::powf(
+    let _std: f64 = f64::powf(
         results.iter().map(|x| f64::powi(x - avg, 2)).sum::<f64>() / (results.len() - 1) as f64,
         0.5,
     );

From b4dd1c7e7762d59b47dc75964533be0bdd278a9f Mon Sep 17 00:00:00 2001
From: matthagan15 <matthewhagan15@gmail.com>
Date: Tue, 24 Jan 2023 15:47:15 -0500
Subject: [PATCH 16/16] Delete build.rs

---
 build.rs | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 build.rs

diff --git a/build.rs b/build.rs
deleted file mode 100644
index bfade51a..00000000
--- a/build.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-fn main() {
-    println!("cargo:rustc-link-lib=framework=Accelerate")
-}