Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
3bbed50
Generalize get_ld and get_ncols in blas/tools.hpp
Thoemi09 May 8, 2025
8b0d8a3
Update blas/scal.hpp
Thoemi09 May 8, 2025
afca7ec
Update blas/ger.hpp
Thoemi09 May 8, 2025
31d8304
Add an outer product function to linalg
Thoemi09 May 8, 2025
9584be1
Update blas/dot.hpp and linalg/dot.hpp
Thoemi09 May 8, 2025
ceb52f9
Add blas::get_array to blas/tools.hpp
Thoemi09 May 9, 2025
5a86f16
Allow nda::Array types in mem::common_addr_space
Thoemi09 May 9, 2025
5783a39
Update blas/gemv.hpp and introduce linalg/matvecmul.hpp
Thoemi09 May 9, 2025
19a66d2
Update blas/gemm.hpp and linalg/matmul.hpp
Thoemi09 May 13, 2025
14c0941
Update blas/gemm_batch.hpp
Thoemi09 May 13, 2025
be5709a
Update lapack/geqp3.hpp, lapack/orgqr.hpp and lapack/ungqr.hpp
Thoemi09 May 13, 2025
3c120fc
Update lapack/gesvd.hpp
Thoemi09 May 13, 2025
42609d8
Add linalg::svd and linalg::svd_in_place
Thoemi09 May 13, 2025
12bcfb5
Update lapack/gtsv.hpp
Thoemi09 May 13, 2025
18d9980
Update lapack/getrf.hpp, lapack/getrs.hpp and lapack/getri.hpp
Thoemi09 May 13, 2025
d2dfd70
Update lapack/gelss.hpp and lapack/gelss_worker.hpp
Thoemi09 May 14, 2025
a29b571
Add linalg::solve and linalg::solve_in_place
Thoemi09 May 14, 2025
2ce350c
Update linalg/norm.hpp
Thoemi09 May 14, 2025
551c2f8
Update linalg/cross_product.hpp
Thoemi09 May 14, 2025
6806401
Move is_matrix_square and is_matrix_diagonal from linalg/det_and_inve…
Thoemi09 May 14, 2025
e798dcd
Split linalg/det_and_inverse.hpp into linalg/det.hpp and linalg/inv.hpp
Thoemi09 May 14, 2025
f0a0865
Update linalg/inv.hpp
Thoemi09 May 15, 2025
304ed37
Update linalg/det.hpp
Thoemi09 May 15, 2025
910b62d
Remove compiler warning in mem and update docs
Thoemi09 May 22, 2025
e9978ed
Do not throw in assign_from_ndarray and fill_with_scalar
Thoemi09 May 22, 2025
5e030e1
Add blas/gerc.hpp
Thoemi09 May 16, 2025
28d1025
Add lapack::syev and lapack::heev wrappers
Thoemi09 May 19, 2025
7ee182c
Add linalg::eigh and linalg::eigvalsh
Thoemi09 May 19, 2025
49ef113
Add lapack::sygv and lapack::hegv wrappers
Thoemi09 May 19, 2025
daaa5b1
Overload linalg::eigh and linalg::eigvalsh for generalized eigenvalue…
Thoemi09 May 20, 2025
b856252
Update tests in nda_cublas.cpp
Thoemi09 May 22, 2025
a9d0268
Remove getri declarations from lapack/interface/cusolver_interface.hpp
Thoemi09 May 23, 2025
483738b
Add missing include to lapack/gelss_worker.hpp
Thoemi09 May 23, 2025
5736f3b
Add additional check to lapack/gesvd.hpp for device implementation
Thoemi09 May 23, 2025
37f4ec1
Update tests in nda_culapack.cpp
Thoemi09 May 23, 2025
18c5bb5
Minor test updates in nda_linear_algebra.cpp
Thoemi09 May 26, 2025
229ac12
Add compile time check to assign_from_ndarray
Thoemi09 May 26, 2025
70e5830
Add test for linalg routines on the device
Thoemi09 May 26, 2025
84b4b98
[doc] Update docs of BLAS, LAPACK and linalg routines
Thoemi09 May 27, 2025
f236859
[doc] Use sections instead of subsections in examples
Thoemi09 May 27, 2025
e725e82
[ghactions] Update to gcc-15 in macos runner
Thoemi09 May 27, 2025
bc17721
Add float32 support for main blas routines
blackwer Jul 23, 2025
d77ea2b
Add float32 support for some lapack routines
blackwer Jul 24, 2025
06380eb
test_linear_algebra: handle promotion and precision issues in libc++
blackwer Jul 24, 2025
9e50a7e
test_linear_algebra: more precision check improvements
blackwer Jul 24, 2025
ed0b15b
add float overloads for MKL blas functions, including batches
blackwer Jul 24, 2025
0601056
Add float overloads for more LAPACK functions
blackwer Jul 24, 2025
ad18c62
tests: more eps_close for fp32 lapack tests
blackwer Jul 25, 2025
a6e3088
tests: small tweaks to linear algebra tests
blackwer Jul 25, 2025
4934c16
tests: Set 'eps_close' in linalg/lapack based on error analysis
blackwer Jul 28, 2025
05eadcb
Merge remote-tracking branch 'origin/unstable' into float-support
blackwer Aug 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/small_inv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ static void inv(benchmark::State &state) {
for (int i = 0; i < N; ++i)
for (int j = 0; j < N; ++j) W(i, j) = (i > j ? 0.5 + i + 2.5 * j : i * 0.8 - j - 0.5);

while (state.KeepRunning()) { benchmark::DoNotOptimize(Wi = inverse(W)); }
while (state.KeepRunning()) { benchmark::DoNotOptimize(Wi = nda::linalg::inv(W)); }
}

BENCHMARK_TEMPLATE(inv, 1);
Expand Down
31 changes: 20 additions & 11 deletions c++/nda/_impl_basic_array_view_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,11 +438,13 @@ auto &operator=(R const &rhs) noexcept
private:
// Implementation of the assignment from an n-dimensional array type.
template <typename RHS>
void assign_from_ndarray(RHS const &rhs) { // FIXME noexcept {
void assign_from_ndarray(RHS const &rhs) noexcept {
#ifdef NDA_ENFORCE_BOUNDCHECK
if (this->shape() != rhs.shape())
NDA_RUNTIME_ERROR << "Error in assign_from_ndarray: Size mismatch:"
<< "\n LHS.shape() = " << this->shape() << "\n RHS.shape() = " << rhs.shape();
if (this->shape() != rhs.shape()) {
std::cerr << "Error in assign_from_ndarray: Size mismatch:"
<< "\n LHS.shape() = " << this->shape() << "\n RHS.shape() = " << rhs.shape() << std::endl;
std::terminate();
}
#endif
// compile-time check if assignment is possible
static_assert(std::is_assignable_v<value_type &, get_value_t<RHS>>, "Error in assign_from_ndarray: Incompatible value types");
Expand All @@ -453,6 +455,10 @@ void assign_from_ndarray(RHS const &rhs) { // FIXME noexcept {
// do both operands have the same stride order?
static constexpr bool same_stride_order = get_layout_info<self_t>.stride_order == get_layout_info<RHS>.stride_order;

// compile-time check for device arrays to avoid runtime errors
static_assert(!(mem::on_device<self_t> or mem::on_device<RHS>) or (both_in_memory and same_stride_order and have_same_value_type_v<self_t, RHS>),
"Error in assign_from_ndarray: Assignment to/from device arrays is not supported for the given types.");

// prefer optimized options if possible
if constexpr (both_in_memory and same_stride_order) {
if (rhs.empty()) return;
Expand All @@ -470,7 +476,10 @@ void assign_from_ndarray(RHS const &rhs) { // FIXME noexcept {
auto [n_bl_dst, bl_size_dst, bl_str_dst] = *bl_layout_dst;
auto [n_bl_src, bl_size_src, bl_str_src] = *bl_layout_src;
// check that the total memory size is the same
if (n_bl_dst * bl_size_dst != n_bl_src * bl_size_src) NDA_RUNTIME_ERROR << "Error in assign_from_ndarray: Incompatible block sizes";
if (n_bl_dst * bl_size_dst != n_bl_src * bl_size_src) {
std::cerr << "Error in assign_from_ndarray: Incompatible block sizes" << std::endl;
std::terminate();
}
// if either destination or source consists of a single block, we can chunk it up to make the layouts compatible
if (n_bl_dst == 1 && n_bl_src > 1) {
n_bl_dst = n_bl_src;
Expand All @@ -494,15 +503,15 @@ void assign_from_ndarray(RHS const &rhs) { // FIXME noexcept {
}
// otherwise fallback to elementwise assignment
if constexpr (mem::on_device<self_t> || mem::on_device<RHS>) {
NDA_RUNTIME_ERROR << "Error in assign_from_ndarray: Fallback to elementwise assignment not implemented for arrays/views on the GPU";
std::cerr << "Error in assign_from_ndarray: Elementwise assignment not implemented for arrays/views on the GPU" << std::endl;
std::terminate();
}
nda::for_each(shape(), [this, &rhs](auto const &...args) { (*this)(args...) = rhs(args...); });
}

// Implementation to fill a view/array with a constant scalar value.
template <typename Scalar>
void fill_with_scalar(Scalar const &scalar) noexcept {
// we make a special implementation if the array is strided in 1d or contiguous
if constexpr (mem::on_host<self_t>) {
if constexpr (has_layout_strided_1d<self_t>) {
const long L = size();
Expand All @@ -517,23 +526,23 @@ void fill_with_scalar(Scalar const &scalar) noexcept {
} else {
for (auto &x : *this) x = scalar;
}
} else if constexpr (mem::on_device<self_t> or mem::on_unified<self_t>) { // on device
if constexpr (has_layout_strided_1d<self_t>) { // possibly contiguous
} else if constexpr (mem::on_device<self_t> or mem::on_unified<self_t>) {
if constexpr (has_layout_strided_1d<self_t>) {
if constexpr (has_contiguous_layout<self_t>) {
mem::fill_n<mem::get_addr_space<self_t>>(data(), size(), value_type(scalar));
} else {
const long stri = indexmap().min_stride();
mem::fill2D_n<mem::get_addr_space<self_t>>(data(), stri, 1, size(), value_type(scalar));
}
} else {
// check for 2D layout
auto bl_layout = get_block_layout(*this);
if (bl_layout) {
auto [n_bl, bl_size, bl_str] = *bl_layout;
mem::fill2D_n<mem::get_addr_space<self_t>>(data(), bl_str, bl_size, n_bl, value_type(scalar));
} else {
// MAM: implement recursive call to fill_with_scalar on (i,nda::ellipsis{})
NDA_RUNTIME_ERROR << "fill_with_scalar: Not implemented yet for general layout. ";
std::cerr << "Error in fill_with_scalar: Only block strided arrays/views are supported on the GPU";
std::terminate();
}
}
}
Expand Down
11 changes: 6 additions & 5 deletions c++/nda/arithmetic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@

#include "./concepts.hpp"
#include "./declarations.hpp"
#include "./linalg/inv.hpp"
#include "./linalg/matmul.hpp"
#include "./linalg/det_and_inverse.hpp"
#include "./linalg/matvecmul.hpp"
#include "./macros.hpp"
#include "./stdutil/complex.hpp"
#include "./traits.hpp"
Expand Down Expand Up @@ -420,10 +421,10 @@ namespace nda {
static_assert(r_algebra != 'A', "Error in nda::operator*: Can not multiply a matrix by an array");
if constexpr (r_algebra == 'M')
// matrix * matrix
return matmul(std::forward<L>(l), std::forward<R>(r));
return linalg::matmul(std::forward<L>(l), std::forward<R>(r));
else
// matrix * vector
return matvecmul(std::forward<L>(l), std::forward<R>(r));
return linalg::matvecmul(std::forward<L>(l), std::forward<R>(r));
}
}

Expand Down Expand Up @@ -495,7 +496,7 @@ namespace nda {
// two matrices: M / M
if constexpr (l_algebra == 'M') {
static_assert(r_algebra == 'M', "Error in nda::operator*: Can not divide a matrix by an array/vector");
return std::forward<L>(l) * inverse(matrix<get_value_t<R>>{std::forward<R>(r)});
return std::forward<L>(l) * linalg::inv(matrix<get_value_t<R>>{std::forward<R>(r)});
}
}

Expand Down Expand Up @@ -532,7 +533,7 @@ namespace nda {
Array auto operator/(S &&s, A &&a) { // NOLINT (S&& is mandatory for proper concept Array <: typename to work)
static constexpr char algebra = get_algebra<A>;
if constexpr (algebra == 'M')
return s * inverse(matrix<get_value_t<A>>{std::forward<A>(a)});
return s * linalg::inv(matrix<get_value_t<A>>{std::forward<A>(a)});
else
return expr<'/', std::decay_t<S>, A>{s, std::forward<A>(a)};
}
Expand Down
11 changes: 5 additions & 6 deletions c++/nda/basic_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@
#include "./basic_array_view.hpp"
#include "./basic_functions.hpp"
#include "./concepts.hpp"
#include "./exceptions.hpp"
#include "./iterators.hpp"
#include "./layout/for_each.hpp"
#include "./layout/permutation.hpp"
#include "./layout/range.hpp"
#include "./layout/slice_static.hpp"
#include "./layout_transforms.hpp"
#include "./macros.hpp"
#include "./matrix_functions.hpp"
#include "./mem/address_space.hpp"
#include "./mem/fill.hpp"
#include "./mem/memcpy.hpp"
#include "./mem/policies.hpp"
#include "./stdutil/array.hpp"
Expand All @@ -31,17 +33,14 @@
#include <array>
#include <complex>
#include <concepts>
#include <exception>
#include <initializer_list>
#include <iostream>
#include <random>
#include <ranges>
#include <type_traits>
#include <utility>

#ifdef NDA_ENFORCE_BOUNDCHECK
#include <exception>
#include <iostream>
#endif // NDA_ENFORCE_BOUNDCHECK

namespace nda {

/**
Expand Down
13 changes: 5 additions & 8 deletions c++/nda/basic_array_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@
#include "./clef.hpp"
#include "./concepts.hpp"
#include "./declarations.hpp"
#include "./exceptions.hpp"
#include "./iterators.hpp"
#include "layout/slice_static.hpp"
#include "./layout/for_each.hpp"
#include "./layout/idx_map.hpp"
#include "./layout/permutation.hpp"
#include "./layout/range.hpp"
#include "./layout/slice_static.hpp"
#include "./macros.hpp"
#include "./matrix_functions.hpp"
#include "./mem/address_space.hpp"
#include "./mem/fill.hpp"
#include "./mem/memcpy.hpp"
#include "./mem/memset.hpp"
#include "./mem/fill.hpp"
#include "./mem/policies.hpp"
#include "./traits.hpp"

Expand All @@ -34,16 +34,13 @@
#include <algorithm>
#include <array>
#include <cstring>
#include <exception>
#include <iostream>
#include <memory>
#include <ranges>
#include <type_traits>
#include <utility>

#ifdef NDA_ENFORCE_BOUNDCHECK
#include <exception>
#include <iostream>
#endif // NDA_ENFORCE_BOUNDCHECK

namespace std {

/**
Expand Down
Loading
Loading