diff --git a/cpp/open3d/core/CMakeLists.txt b/cpp/open3d/core/CMakeLists.txt index 210cc33058a..3a04ca72164 100644 --- a/cpp/open3d/core/CMakeLists.txt +++ b/cpp/open3d/core/CMakeLists.txt @@ -48,6 +48,7 @@ target_sources(core PRIVATE linalg/LeastSquares.cpp linalg/LU.cpp linalg/Matmul.cpp + linalg/Gramian.cpp linalg/Solve.cpp linalg/SVD.cpp linalg/Tri.cpp @@ -83,6 +84,7 @@ target_sources(core_impl PRIVATE linalg/LeastSquaresCPU.cpp linalg/LUCPU.cpp linalg/MatmulCPU.cpp + linalg/GramianCPU.cpp linalg/SolveCPU.cpp linalg/SVDCPU.cpp linalg/TriCPU.cpp @@ -102,6 +104,7 @@ open3d_sycl_target_sources(core_impl PRIVATE linalg/LeastSquaresSYCL.cpp linalg/LUSYCL.cpp linalg/MatmulSYCL.cpp + linalg/GramianSYCL.cpp linalg/SolveSYCL.cpp linalg/SVDSYCL.cpp linalg/TriSYCL.cpp @@ -129,6 +132,7 @@ if (BUILD_CUDA_MODULE) linalg/LinalgUtils.cpp linalg/LUCUDA.cpp linalg/MatmulCUDA.cpp + linalg/GramianCUDA.cpp linalg/SolveCUDA.cpp linalg/SVDCUDA.cpp linalg/TriCUDA.cu diff --git a/cpp/open3d/core/Tensor.cpp b/cpp/open3d/core/Tensor.cpp index 1fff67cc853..aa98ca18154 100644 --- a/cpp/open3d/core/Tensor.cpp +++ b/cpp/open3d/core/Tensor.cpp @@ -25,6 +25,7 @@ #include "open3d/core/kernel/IndexReduction.h" #include "open3d/core/kernel/Kernel.h" #include "open3d/core/linalg/Det.h" +#include "open3d/core/linalg/Gramian.h" #include "open3d/core/linalg/Inverse.h" #include "open3d/core/linalg/LU.h" #include "open3d/core/linalg/LeastSquares.h" @@ -1057,6 +1058,40 @@ Tensor Tensor::T() const { } } +Tensor Tensor::Gram() const { + int64_t n_dims = NumDims(); + if (n_dims <= 0) { + return *this; + } else if (n_dims == 1) { + return Reshape({-1, 1}).Gram(); + } else if (n_dims == 2) { + Tensor output; + core::Gram(*this, output); + return output; + } else { + utility::LogError( + "Tensor::Gram() expects a Tensor with <= 2 dimensions, but the " + "Tensor has {} dimensions."); + } +} + +Tensor Tensor::RowGram() const { + int64_t n_dims = NumDims(); + if (n_dims <= 0) { + return *this; + } else if (n_dims == 1) { + return Reshape({-1, 1}).RowGram(); + } else if (n_dims == 2) { + Tensor output; + core::RowGram(*this, output); + return output; + } else { + utility::LogError( + "Tensor::RowGram() expects a Tensor with <= 2 dimensions, but " + "the Tensor has {} dimensions."); + } +} + double Tensor::Det() const { AssertTensorDtypes(*this, {Float32, Float64}); return core::Det(*this); diff --git a/cpp/open3d/core/Tensor.h b/cpp/open3d/core/Tensor.h index 73de4e72df2..6f4fed49aa9 100644 --- a/cpp/open3d/core/Tensor.h +++ b/cpp/open3d/core/Tensor.h @@ -608,6 +608,24 @@ class Tensor : public IsDevice { /// 0-D and 1-D Tensor remains the same. Tensor T() const; + /// \brief Computes the Gram matrix (Gramian) of this tensor. Expects input + /// to be <= 2-D Tensor. + /// + /// \return The Gramian of the given tensor. For a 2D tensor of shape {m, + /// n}, this will be a square matrix of shape {n, n}. 1D tensors of shape + /// {n} are reshaped to {1, n} before the operation. For 0D, the returned + /// tensor is the same as the input. + Tensor Gram() const; + + /// \brief Computes the row Gram matrix of this tensor. Expects input to be + /// <= 2-D Tensor. + /// + /// \return The Row Gramian of the given tensor. For a 2D tensor of shape + /// {m, n}, this will be a square matrix of shape {m, m}. 1D tensors of + /// shape {n} are reshaped to {1, n} before the operation. For 0D, the + /// returned tensor is the same as the input. + Tensor RowGram() const; + /// \brief Compute the determinant of a 2D square tensor. /// \return returns the determinant of the matrix (double). double Det() const; diff --git a/cpp/open3d/core/linalg/Gramian.cpp b/cpp/open3d/core/linalg/Gramian.cpp new file mode 100644 index 00000000000..7865e100d36 --- /dev/null +++ b/cpp/open3d/core/linalg/Gramian.cpp @@ -0,0 +1,128 @@ +// ---------------------------------------------------------------------------- +// - Open3D: www.open3d.org - +// ---------------------------------------------------------------------------- +// Copyright (c) 2018-2024 www.open3d.org +// SPDX-License-Identifier: MIT +// ---------------------------------------------------------------------------- + +#include "open3d/core/linalg/Gramian.h" + +#include + +#include "open3d/core/CUDAUtils.h" + +namespace open3d { +namespace core { + +void Gram(const Tensor& A, Tensor& output) { + const Device device = A.GetDevice(); + const Dtype dtype_original = A.GetDtype(); + Dtype dtype; + + if (dtype_original != core::Float32 && dtype_original != core::Float64) { + utility::LogDebug("Converting to Float32 dtype to from {}.", + dtype_original.ToString()); + dtype = core::Float32; + } else { + dtype = dtype_original; + } + + // Check shapes + SizeVector A_shape = A.GetShape(); + + if (A_shape.size() != 2) { + utility::LogError("Tensor A must be 2D, but got {}D.", A_shape.size()); + } + + // Dispatch to backends + int64_t m = A_shape[0]; + int64_t n = A_shape[1]; + + if (m == 0 || n == 0) { + utility::LogError( + "Tensor shapes should not contain dimensions with zero."); + } + + Tensor A_contiguous = A.Contiguous().To(dtype); + void* A_data = A_contiguous.GetDataPtr(); + + output = Tensor::Empty({n, n}, dtype, device); + void* B_data = output.GetDataPtr(); + + if (device.IsSYCL()) { +#ifdef BUILD_SYCL_MODULE + GramSYCL(A_data, B_data, m, n, dtype, device); +#else + utility::LogError("Unimplemented device."); +#endif + } else if (device.IsCUDA()) { +#ifdef BUILD_CUDA_MODULE + CUDAScopedDevice scoped_device(device); + GramCUDA(A_data, B_data, m, n, dtype, device); +#else + utility::LogError("Unimplemented device."); +#endif + } else { + GramCPU(A_data, B_data, m, n, dtype); + } + + output = output.To(dtype_original); +} + +void RowGram(const Tensor& A, Tensor& output) { + const Device device = A.GetDevice(); + const Dtype dtype_original = A.GetDtype(); + Dtype dtype; + + if (dtype_original != core::Float32 && dtype_original != core::Float64) { + utility::LogDebug("Converting to Float32 dtype to from {}.", + dtype_original.ToString()); + dtype = core::Float32; + } else { + dtype = dtype_original; + } + + // Check shapes + SizeVector A_shape = A.GetShape(); + + if (A_shape.size() != 2) { + utility::LogError("Tensor A must be 2D, but got {}D.", A_shape.size()); + } + + // Dispatch to backends + int64_t m = A_shape[0]; + int64_t n = A_shape[1]; + + if (m == 0 || n == 0) { + utility::LogError( + "Tensor shapes should not contain dimensions with zero."); + } + + Tensor A_contiguous = A.Contiguous().To(dtype); + void* A_data = A_contiguous.GetDataPtr(); + + output = Tensor::Empty({m, m}, dtype, device); + void* B_data = output.GetDataPtr(); + + if (device.IsSYCL()) { +#ifdef BUILD_SYCL_MODULE + RowGramSYCL(A_data, B_data, m, n, dtype, device); +#else + utility::LogError("Unimplemented device."); +#endif + } else if (device.IsCUDA()) { +#ifdef BUILD_CUDA_MODULE + CUDAScopedDevice scoped_device(device); + RowGramCUDA(A_data, B_data, m, n, dtype, device); +#else + utility::LogError("Unimplemented device."); +#endif + } else { + RowGramCPU(A_data, B_data, m, n, dtype); + } + + output = output.To(dtype_original); +} + +} // namespace core +} // namespace open3d diff --git a/cpp/open3d/core/linalg/Gramian.h b/cpp/open3d/core/linalg/Gramian.h new file mode 100644 index 00000000000..179af5ccb09 --- /dev/null +++ b/cpp/open3d/core/linalg/Gramian.h @@ -0,0 +1,52 @@ +// ---------------------------------------------------------------------------- +// - Open3D: www.open3d.org - +// ---------------------------------------------------------------------------- +// Copyright (c) 2018-2024 www.open3d.org +// SPDX-License-Identifier: MIT +// ---------------------------------------------------------------------------- + +#pragma once + +#include "open3d/core/Tensor.h" + +namespace open3d { +namespace core { + +/// Computes the gram matrix of a Tensor (B = A.T @ A) +void Gram(const Tensor& A, Tensor& B); + +/// Computes the row gram matrix of a Tensor (B = A @ A.T) +void RowGram(const Tensor& A, Tensor& B); + +#ifdef BUILD_SYCL_MODULE +void GramSYCL(void* A_data, + void* B_data, + int64_t m, + int64_t n, + Dtype dtype, + const Device& device); +void RowGramSYCL(void* A_data, + void* B_data, + int64_t m, + int64_t n, + Dtype dtype, + const Device& device); +#endif +#ifdef BUILD_CUDA_MODULE +void GramCUDA(void* A_data, + void* B_data, + int64_t m, + int64_t n, + Dtype dtype, + const Device& device); +void RowGramCUDA(void* A_data, + void* B_data, + int64_t m, + int64_t n, + Dtype dtype, + const Device& device); +#endif +void GramCPU(void* A_data, void* B_data, int64_t m, int64_t n, Dtype dtype); +void RowGramCPU(void* A_data, void* B_data, int64_t m, int64_t n, Dtype dtype); +} // namespace core +} // namespace open3d diff --git a/cpp/open3d/core/linalg/GramianCPU.cpp b/cpp/open3d/core/linalg/GramianCPU.cpp new file mode 100644 index 00000000000..ae2471ba560 --- /dev/null +++ b/cpp/open3d/core/linalg/GramianCPU.cpp @@ -0,0 +1,35 @@ +// ---------------------------------------------------------------------------- +// - Open3D: www.open3d.org - +// ---------------------------------------------------------------------------- +// Copyright (c) 2018-2024 www.open3d.org +// SPDX-License-Identifier: MIT +// ---------------------------------------------------------------------------- + +#include "open3d/core/linalg/BlasWrapper.h" +#include "open3d/core/linalg/Gramian.h" +#include "open3d/core/linalg/LinalgUtils.h" + +namespace open3d { +namespace core { +void GramCPU(void* A_data, void* B_data, int64_t m, int64_t n, Dtype dtype) { + DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() { + scalar_t alpha = 1, beta = 0; + gemm_cpu(CblasColMajor, CblasNoTrans, CblasTrans, n, n, m, + alpha, static_cast(A_data), n, + static_cast(A_data), n, beta, + static_cast(B_data), n); + }); +} + +void RowGramCPU(void* A_data, void* B_data, int64_t m, int64_t n, Dtype dtype) { + DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() { + scalar_t alpha = 1, beta = 0; + gemm_cpu(CblasColMajor, CblasTrans, CblasNoTrans, m, m, n, + alpha, static_cast(A_data), n, + static_cast(A_data), n, beta, + static_cast(B_data), m); + }); +} + +} // namespace core +} // namespace open3d diff --git a/cpp/open3d/core/linalg/GramianCUDA.cpp b/cpp/open3d/core/linalg/GramianCUDA.cpp new file mode 100644 index 00000000000..56aedcc0ed5 --- /dev/null +++ b/cpp/open3d/core/linalg/GramianCUDA.cpp @@ -0,0 +1,55 @@ +// ---------------------------------------------------------------------------- +// - Open3D: www.open3d.org - +// ---------------------------------------------------------------------------- +// Copyright (c) 2018-2024 www.open3d.org +// SPDX-License-Identifier: MIT +// ---------------------------------------------------------------------------- + +#include "open3d/core/linalg/BlasWrapper.h" +#include "open3d/core/linalg/LinalgUtils.h" +#include "open3d/core/linalg/Matmul.h" +#include "open3d/utility/Logging.h" + +namespace open3d { +namespace core { + +void GramCUDA(void* A_data, + void* B_data, + int64_t m, + int64_t n, + Dtype dtype, + const Device& device) { + cublasHandle_t handle = CuBLASContext::GetInstance().GetHandle(device); + DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() { + scalar_t alpha = 1, beta = 0; + OPEN3D_CUBLAS_CHECK( + gemm_cuda(handle, CUBLAS_OP_N, CUBLAS_OP_T, n, n, m, + &alpha, + static_cast(A_data), n, + static_cast(A_data), n, + &beta, static_cast(B_data), n), + "cuda gemm failed"); + }); +} + +void RowGramCUDA(void* A_data, + void* B_data, + int64_t m, + int64_t n, + Dtype dtype, + const Device& device) { + cublasHandle_t handle = CuBLASContext::GetInstance().GetHandle(device); + DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() { + scalar_t alpha = 1, beta = 0; + OPEN3D_CUBLAS_CHECK( + gemm_cuda(handle, CUBLAS_OP_T, CUBLAS_OP_N, m, m, n, + &alpha, + static_cast(A_data), n, + static_cast(A_data), n, + &beta, static_cast(B_data), m), + "cuda gemm failed"); + }); +} + +} // namespace core +} // namespace open3d diff --git a/cpp/open3d/core/linalg/GramianSYCL.cpp b/cpp/open3d/core/linalg/GramianSYCL.cpp new file mode 100644 index 00000000000..4530f69470f --- /dev/null +++ b/cpp/open3d/core/linalg/GramianSYCL.cpp @@ -0,0 +1,52 @@ +// ---------------------------------------------------------------------------- +// - Open3D: www.open3d.org - +// ---------------------------------------------------------------------------- +// Copyright (c) 2018-2024 www.open3d.org +// SPDX-License-Identifier: MIT +// ---------------------------------------------------------------------------- + +#include + +#include "oneapi/mkl.hpp" +#include "open3d/core/SYCLContext.h" +#include "open3d/core/linalg/Gramian.h" +#include "open3d/core/linalg/LinalgUtils.h" +#include "open3d/utility/Logging.h" + +namespace open3d { +namespace core { +void GramSYCL(void* A_data, + void* B_data, + int64_t m, + int64_t n, + Dtype dtype, + const Device& device) { + using namespace oneapi::mkl; + sycl::queue queue = sy::SYCLContext::GetInstance().GetDefaultQueue(device); + DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() { + scalar_t alpha = 1, beta = 0; + blas::column_major::gemm(queue, transpose::N, transpose::T, n, n, m, + alpha, static_cast(A_data), n, + static_cast(A_data), n, beta, + static_cast(B_data), n) + .wait_and_throw(); + }); + + void RowGramSYCL(void* A_data, void* B_data, int64_t m, int64_t n, + Dtype dtype, const Device& device) { + using namespace oneapi::mkl; + sycl::queue queue = + sy::SYCLContext::GetInstance().GetDefaultQueue(device); + DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() { + scalar_t alpha = 1, beta = 0; + blas::column_major::gemm(queue, transpose::T, transpose::N, m, m, n, + alpha, + static_cast(A_data), n, + static_cast(A_data), n, + beta, static_cast(B_data), m) + .wait_and_throw(); + }); + } + +} // namespace core +} // namespace open3d diff --git a/cpp/pybind/core/tensor.cpp b/cpp/pybind/core/tensor.cpp index e9a7be5919a..be314f6361a 100644 --- a/cpp/pybind/core/tensor.cpp +++ b/cpp/pybind/core/tensor.cpp @@ -570,6 +570,13 @@ tuple `output` tensor of shape {n,n} and `ipiv` tensor of shape {n}, where tensor.def("__matmul__", &Tensor::Matmul, "Computes matrix multiplication of a" " 2D tensor with another tensor of compatible shape."); + tensor.def("gram", &Tensor::Gram, + "Computes the Gram matrix of a given <=2D tensor. 1D tensors of " + "shape {n} are expanded to 2D by reshaping them to {1, n}."); + tensor.def( + "row_gram", &Tensor::RowGram, + "Computes the Row Gram matrix of a given <=2D tensor. 1D tensors " + "of shape {n} are expanded to 2D by reshaping them to {1, n}."); tensor.def("lstsq", &Tensor::LeastSquares, "Solves the linear system AX = B with QR decomposition and " "returns X. A is a (m, n) matrix with m >= n.", diff --git a/cpp/tests/core/Linalg.cpp b/cpp/tests/core/Linalg.cpp index 3bd9775e534..6e4866a4ace 100644 --- a/cpp/tests/core/Linalg.cpp +++ b/cpp/tests/core/Linalg.cpp @@ -72,6 +72,86 @@ TEST_P(LinalgPermuteDevices, Matmul) { EXPECT_ANY_THROW(A.Matmul(core::Tensor::Zeros({2, 4}, dtype))); } +TEST_P(LinalgPermuteDevices, Gram) { + const float EPSILON = 1e-8; + + core::Device device = GetParam(); + core::Dtype dtype = core::Float32; + + // Gram test. + core::Tensor A = core::Tensor::Init({{1, 2, 3}, {4, 5, 6}}, device); + + core::Tensor B = A.Gram(); + EXPECT_EQ(B.GetShape(), core::SizeVector({3, 3})); + std::vector B_data = B.ToFlatVector(); + std::vector B_gt = {17, 22, 27, 22, 29, 36, 27, 36, 45}; + for (int i = 0; i < 9; ++i) { + EXPECT_NEAR(B_data[i], B_gt[i], EPSILON); + } + + // Gram matrix must equal the operation A.T @ A + core::Tensor C = A.T().Matmul(A); + std::vector C_data = C.ToFlatVector(); + for (int i = 0; i < 9; ++i) { + EXPECT_NEAR(C_data[i], B_gt[i], EPSILON); + } + + // Non-contiguous Gram test. + core::Tensor A_slice = A.Slice(1, 0, 3, 2); + core::Tensor B_slice = A_slice.Gram(); + EXPECT_EQ(B_slice.GetShape(), core::SizeVector({2, 2})); + + std::vector B_slice_data = B_slice.ToFlatVector(); + std::vector B_slice_gt = {17, 27, 27, 45}; + for (int i = 0; i < 4; ++i) { + EXPECT_NEAR(B_slice_data[i], B_slice_gt[i], EPSILON); + } + + // Incompatible shape test. + EXPECT_ANY_THROW(core::Tensor::Zeros({3, 0}, dtype, device).Gram()); + EXPECT_ANY_THROW(core::Tensor::Zeros({3, 3, 3}, dtype, device).Gram()); +} + +TEST_P(LinalgPermuteDevices, RowGram) { + const float EPSILON = 1e-8; + + core::Device device = GetParam(); + core::Dtype dtype = core::Float32; + + // RowGram test. + core::Tensor A = core::Tensor::Init({{1, 2, 3}, {4, 5, 6}}, device); + + core::Tensor B = A.RowGram(); + EXPECT_EQ(B.GetShape(), core::SizeVector({2, 2})); + std::vector B_data = B.ToFlatVector(); + std::vector B_gt = {14, 32, 32, 77}; + for (int i = 0; i < 4; ++i) { + EXPECT_NEAR(B_data[i], B_gt[i], EPSILON); + } + + // Row gram matrix must equal the operation A @ A.T + core::Tensor C = A.Matmul(A.T()); + std::vector C_data = C.ToFlatVector(); + for (int i = 0; i < 4; ++i) { + EXPECT_NEAR(C_data[i], B_gt[i], EPSILON); + } + + // Non-contiguous RowGram test. + core::Tensor A_slice = A.Slice(1, 0, 3, 2); + core::Tensor B_slice = A_slice.RowGram(); + EXPECT_EQ(B_slice.GetShape(), core::SizeVector({2, 2})); + + std::vector B_slice_data = B_slice.ToFlatVector(); + std::vector B_slice_gt = {10, 22, 22, 52}; + for (int i = 0; i < 4; ++i) { + EXPECT_NEAR(B_slice_data[i], B_slice_gt[i], EPSILON); + } + + // Incompatible shape test. + EXPECT_ANY_THROW(core::Tensor::Zeros({3, 0}, dtype, device).RowGram()); + EXPECT_ANY_THROW(core::Tensor::Zeros({3, 3, 3}, dtype, device).RowGram()); +} + TEST_P(LinalgPermuteDevices, AddMM) { const float EPSILON = 1e-8;