Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
4a1c6fa
ugliest LU algo
Feb 20, 2025
62aacee
ugliest LU algo
Feb 20, 2025
7bc000c
ugliest LU algo
Feb 20, 2025
a9f0d0a
less messy
Feb 20, 2025
baab534
householder first step
Feb 25, 2025
b8d4492
cleaning up
Feb 26, 2025
45b6fcf
i think this is householder?
Feb 27, 2025
e55953f
grossest hacks, need to rethink on how to do this, but at least this …
Feb 27, 2025
aa4afcf
working on making it inplace
Feb 27, 2025
1ea8ea6
first iteration complete
Feb 28, 2025
fc104a3
first clean done
Feb 28, 2025
ead0f03
different buffer strategy
Feb 28, 2025
21d5d97
working but iterating
Feb 28, 2025
c4846c6
unsure why it was not working maybe i didn't save
Feb 28, 2025
7f3c562
better indexing
Feb 28, 2025
0b713cc
finished householder!
Feb 28, 2025
9590a04
looks good to me
Feb 28, 2025
f60d3c0
better version for sum
Feb 28, 2025
c9fb4ea
atting a little structure
Mar 2, 2025
246e7a4
progress on getting back out Q but it's not ortho, not yet
Mar 2, 2025
7125964
QR Decomp complete!
Mar 2, 2025
ecaf2b9
QR Q builds backwards with Householder
Mar 4, 2025
bdd1d2c
set-up for iterated QR
Mar 4, 2025
017b50c
need to refine methods in order to pull back product Q[i] for the U, …
Mar 5, 2025
938aef8
need to refine methods in order to pull back product Q[i] for the U, …
Mar 5, 2025
0d13ae4
quick note
Mar 5, 2025
3a729ff
working on doing decompose now
Mar 5, 2025
ddea2ef
working on doing decompose now
Mar 5, 2025
e8c7765
refactor
Mar 5, 2025
fae6a0a
dynamic stopp condition and have out the orthogonal vector U
Mar 6, 2025
817ae14
looks great time to start getting the rotations for the real-schur an…
Mar 6, 2025
02cf3fd
looks great time to start getting the rotations for the real-schur an…
Mar 6, 2025
1aee129
okie this is looking good
Mar 6, 2025
81cbb2c
that was tiring, need to work on bounds got first computed svd, somet…
Mar 7, 2025
9ac0597
it isn't SVD but it is diagonal decomp, should be able to extend to o…
Mar 8, 2025
e460058
givens working for symetric
Mar 9, 2025
886858e
holy eff got the worst SVD implementation up but it's up golub kahan!…
Mar 12, 2025
c0b3a33
bidiagonalization is up, just need bidiagonalization -> Diagonal, hop…
Mar 12, 2025
9c7056d
not the best algo
Mar 13, 2025
a15f4b2
i think the QR iterative method is correct, i'm starting to doubt the…
Mar 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ edition = "2021"
itertools = "0.14.0"
rand = "0.8.5"
rayon = "1.10.0"

[dev-dependencies]
criterion = "0.5"

[[bench]]
Expand Down
12 changes: 6 additions & 6 deletions benches/matrix_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use rand::Rng;

fn generate_matrix(rows: usize, cols: usize) -> blas::NdArray {
let mut rng = rand::thread_rng();
let data: Vec<f32> = (0..rows * cols).map(|_| rng.gen_range(-10.0..10.0)).collect();
let data: Vec<f32> = (0..rows * cols)
.map(|_| rng.gen_range(-10.0..10.0))
.collect();
blas::NdArray::new(vec![rows, cols], data)
}

Expand All @@ -17,7 +19,7 @@ fn benchmark(c: &mut Criterion) {
let medium_size = 128;
let large_size = 256;
let blocksize = 32; // Example blocksize, modify as needed
//
//

let small_x = generate_matrix(small_size, small_size);
let small_y = generate_matrix(small_size, small_size);
Expand Down Expand Up @@ -60,11 +62,9 @@ fn benchmark(c: &mut Criterion) {
});
// Specify the number of iterations for the benchmarks

// c.configure_from_args();
// c.warm_up_time(std::time::Duration::from_secs(2)); // Optional: set warm-up time
// c.configure_from_args();
// c.warm_up_time(std::time::Duration::from_secs(2)); // Optional: set warm-up time
}


criterion_group!(benches, benchmark);
criterion_main!(benches);

86 changes: 86 additions & 0 deletions src/calc_utils/blas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,22 @@ use crate::calc_utils::math;
use rayon::prelude::*;
use std::fmt;

pub fn create_identity_matrix(n:usize) -> NdArray {
let mut data = vec![0_f32;n * n];
let dims = vec![n;2];
for i in 0..n {
data[i * n + i] = 1_f32;
}
NdArray { dims, data }
}

pub struct Matrix {
pub rows: usize,
pub cols: usize,
pub data: Vec<f32>,
}

#[derive(Clone)]
pub struct NdArray {
pub dims: Vec<usize>,
pub data: Vec<f32>,
Expand Down Expand Up @@ -221,3 +231,79 @@ pub fn tensor_mult(blocksize: usize, x: &NdArray, y: &NdArray) -> NdArray {
dims[1] = y.dims[1];
NdArray::new(dims, new)
}

fn householder_params(mut x: &[f32]) -> (f32, Vec<f32>) {
let length = x.len();
assert!(length > 0, "needs to have non-zero length");
println!("X: {:?}", x);
let dims = vec![length; 2];
let data = vec![0_f32; length * length];
let mut householder = NdArray::new(dims, data);
let max_element: f32 = x.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let mut u = x.par_iter().copied()
.map(|val| val / max_element)
.collect::<Vec<f32>>();
u[0] += math::magnitude(&u) * x[0].signum();
let magnitude_squared = math::dot_product(&u, &u);
for i in 0..length {
householder.data[i * length + i] = 1_f32;
}
(2_f32 / magnitude_squared, u)
}

fn householder_factor(mut x: NdArray) -> NdArray {
let rows = x.dims[0];
let cols = x.dims[1];

for o in 0..cols.min(rows) {
let column_vector = (o..rows).into_par_iter().map(|r| x.data[r*cols + o]).collect::<Vec<f32>>();
let (b, u) = householder_params(&column_vector);
let mut queue: Vec<(usize, f32)> = vec![(0, 0_f32); (cols - o) * (rows -o)];
for i in 0..(rows-o).min(cols-o) {
for j in 0..cols-o{
// Need to compute the change for everything to the right of the initial vector
if i <= j || j > o {
let sum = (0..rows-o).into_par_iter().map(|k| {
x.data[(k + o)*cols + (j + o)] * b * u[i] * u[k]
}).sum();
queue[i*(cols - o) + j].0 = (i + o)* cols + (j+ o);
queue[i*(cols - o) + j].1 = sum;
}
}
}
queue.iter().for_each(|q| x.data[q.0] -= q.1);
(o+1..rows).for_each(|i| x.data[i*cols + o] = 0_f32);
}
x
}

fn lu_factorization(x: &NdArray) -> (NdArray, NdArray) {
let rows = x.dims[0];
let cols = x.dims[1];
assert_eq!(rows, cols, "currently LU is available only for square");
let mut lower = vec![0_f32; x.data.len()];
let mut upper = x.data.clone();

for j in 0..rows {
for i in 0..rows {
for k in 0..rows {
if j > i && k == 0 {
upper[j * cols + i] = 0_f32;
} else if i == j && k == 0 {
lower[i * cols + j] = 1_f32;
} else if i > j {
if k == 0 {
lower[i * cols + j] = -upper[i * cols + j] / upper[j * cols + j];
upper[i * cols + j] = 0_f32;
} else {
upper[i * cols + k] += lower[i * cols + j] * upper[j * cols + k];
}
}
}
}
}
(
NdArray::new(x.dims.clone(), lower),
NdArray::new(x.dims.clone(), upper),
)
}
21 changes: 21 additions & 0 deletions src/calc_utils/math.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ pub fn dot_product(x: &[f32], y: &[f32]) -> f32 {
x.iter().zip(y.iter()).map(|(&x, &y)| x * y).sum()
}

pub fn magnitude(x: &[f32]) -> f32 {
x.iter()
.zip(x.iter())
.map(|(&x, &y)| x * y)
.sum::<f32>()
.sqrt()
}

pub fn sigmoid(x: f32) -> f32 {
1.0 / (1.0 + (-x).exp())
}
Expand Down Expand Up @@ -90,3 +98,16 @@ fn cross_product(x: Vec<f32>, y: Vec<f32>) -> Vec<Vec<f32>> {
}
cross_apply(&x, &y, product)
}

pub fn outer_product(x: Vec<f32>) -> Vec<f32> {
// returns a a symetric matrix of length x length
let length = x.len();
assert!(length > 0, "needs to have non-zero length");
let mut new_data = vec![0_f32; length * length];
for i in 0..length {
for j in 0..length {
new_data[i * length + j] = x[i] * x[j];
}
}
new_data
}
1 change: 1 addition & 0 deletions src/calc_utils/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod blas;
pub mod math;
pub mod simd;
pub mod qr_decomposition;
129 changes: 129 additions & 0 deletions src/calc_utils/qr_decomposition.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@

#![allow(warnings)]
use blas::{Matrix, NdArray};
// use neural_net::calc_utils::blas;
// use neural_net::calc_utils::math;
// use neural_net::calc_utils::simd;
use crate::calc_utils::blas;
use crate::calc_utils::math;
use crate::calc_utils::simd;
use rand::Rng;
use rayon::prelude::ParallelIterator;
use rayon::prelude::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

struct HouseholderReflection {
beta:f32, // store 2 / u'u
vector:Vec<f32>, // stores reflection u
}

impl HouseholderReflection {
fn new(beta:f32, vector:Vec<f32>) -> Self {
Self { beta, vector }
}
}

struct QrDecomposition {
projections: Vec<HouseholderReflection>,
ndarray: NdArray,
}


impl QrDecomposition {
fn new(projections: Vec<HouseholderReflection>, ndarray:NdArray) -> Self{
Self { projections, ndarray }
}

fn retrieve_q(&self) -> NdArray {
let dims = self.ndarray.dims.clone();
let mut data = vec![0_f32; dims[0] * dims[1]];

// for i in 0..dims[1] {
for i in 0..dims[1] {
// for i in 0..1 {
let cordinate = self.determine_basis(i);
println!("cordinate appears as {:?}", cordinate);
for k in 0..cordinate.len() {
// If you want to generate columns
// data[k * dims[0] + i] = cordinate[k];
data[i * dims[0] + k] = cordinate[k];
}
}
NdArray::new(dims, data)
}
fn determine_basis(&self, e:usize) -> Vec<f32> {
assert!(e < self.ndarray.dims[0]);
let mut data = vec![0_f32;self.ndarray.dims[0]];
let mut queue = vec![0_f32; self.ndarray.dims[0]];
data[e] = 1_f32;

println!("data: {:?}", data);
let mut delta = vec![0_f32; self.ndarray.dims[0]];
// high to low for column vector
// for i in (0..self.projections.len()).rev() {
for i in (0..self.projections.len()) {
let mut delta = vec![0_f32; self.ndarray.dims[0]];
let projection = &self.projections[i];
println!("projection {:?}", projection.vector);
for j in 0..projection.vector.len() {
for k in 0..projection.vector.len() {
delta[i + j] -= projection.beta * projection.vector[k] * projection.vector[j] * data[i + k];
}
}
println!("Delta: {:?}", delta);
for j in 0..delta.len() {
data[j] += delta[j];
}
}
data
}
}


fn householder_params(mut x: &[f32]) -> HouseholderReflection {
let length = x.len();
assert!(length > 0, "needs to have non-zero length");
println!("X: {:?}", x);
let dims = vec![length; 2];
let data = vec![0_f32; length * length];
let mut householder = NdArray::new(dims, data);
let max_element: f32 = x.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let mut u = x.par_iter().copied()
.map(|val| val / max_element)
.collect::<Vec<f32>>();
u[0] += math::magnitude(&u) * x[0].signum();
let magnitude_squared = math::dot_product(&u, &u);
for i in 0..length {
householder.data[i * length + i] = 1_f32;
}
HouseholderReflection::new(2_f32 / magnitude_squared, u)
}

fn householder_factor(mut x: NdArray) -> QrDecomposition {
let rows = x.dims[0];
let cols = x.dims[1];
let mut projections = Vec::with_capacity(cols.min(rows));

for o in 0..cols.min(rows) {
let column_vector = (o..rows).into_par_iter().map(|r| x.data[r*cols + o]).collect::<Vec<f32>>();
let householder = householder_params(&column_vector);
projections.push(householder);
let mut queue: Vec<(usize, f32)> = vec![(0, 0_f32); (cols - o) * (rows -o)];
for i in 0..(rows-o).min(cols-o) {
for j in 0..cols-o {
// Need to compute the change for everything to the right of the initial vector
if i <= j || j > o {
let sum = (0..rows-o).into_par_iter().map(|k| {
x.data[(k + o)*cols + (j + o)] * projections[o].beta * projections[o].vector[i] * projections[o].vector[k]
}).sum();
queue[i*(cols - o) + j].0 = (i + o)* cols + (j+ o);
queue[i*(cols - o) + j].1 = sum;
}
}
}
queue.iter().for_each(|q| x.data[q.0] -= q.1);
(o+1..rows).for_each(|i| x.data[i*cols + o] = 0_f32);
}
QrDecomposition::new(projections, x)
}
Loading