diff --git a/cpp/open3d/core/CMakeLists.txt b/cpp/open3d/core/CMakeLists.txt
index 210cc33058a..3a04ca72164 100644
--- a/cpp/open3d/core/CMakeLists.txt
+++ b/cpp/open3d/core/CMakeLists.txt
@@ -48,6 +48,7 @@ target_sources(core PRIVATE
     linalg/LeastSquares.cpp
     linalg/LU.cpp
     linalg/Matmul.cpp
+    linalg/Gramian.cpp
     linalg/Solve.cpp
     linalg/SVD.cpp
     linalg/Tri.cpp
@@ -83,6 +84,7 @@ target_sources(core_impl PRIVATE
     linalg/LeastSquaresCPU.cpp
     linalg/LUCPU.cpp
     linalg/MatmulCPU.cpp
+    linalg/GramianCPU.cpp
     linalg/SolveCPU.cpp
     linalg/SVDCPU.cpp
     linalg/TriCPU.cpp
@@ -102,6 +104,7 @@ open3d_sycl_target_sources(core_impl PRIVATE
     linalg/LeastSquaresSYCL.cpp
     linalg/LUSYCL.cpp
     linalg/MatmulSYCL.cpp
+    linalg/GramianSYCL.cpp
     linalg/SolveSYCL.cpp
     linalg/SVDSYCL.cpp
     linalg/TriSYCL.cpp
@@ -129,6 +132,7 @@ if (BUILD_CUDA_MODULE)
         linalg/LinalgUtils.cpp
         linalg/LUCUDA.cpp
         linalg/MatmulCUDA.cpp
+        linalg/GramianCUDA.cpp
         linalg/SolveCUDA.cpp
         linalg/SVDCUDA.cpp
         linalg/TriCUDA.cu
diff --git a/cpp/open3d/core/Tensor.cpp b/cpp/open3d/core/Tensor.cpp
index 1fff67cc853..aa98ca18154 100644
--- a/cpp/open3d/core/Tensor.cpp
+++ b/cpp/open3d/core/Tensor.cpp
@@ -25,6 +25,7 @@
 #include "open3d/core/kernel/IndexReduction.h"
 #include "open3d/core/kernel/Kernel.h"
 #include "open3d/core/linalg/Det.h"
+#include "open3d/core/linalg/Gramian.h"
 #include "open3d/core/linalg/Inverse.h"
 #include "open3d/core/linalg/LU.h"
 #include "open3d/core/linalg/LeastSquares.h"
@@ -1057,6 +1058,40 @@ Tensor Tensor::T() const {
     }
 }
 
+Tensor Tensor::Gram() const {
+    int64_t n_dims = NumDims();
+    if (n_dims <= 0) {
+        return *this;
+    } else if (n_dims == 1) {
+        return Reshape({-1, 1}).Gram();
+    } else if (n_dims == 2) {
+        Tensor output;
+        core::Gram(*this, output);
+        return output;
+    } else {
+        utility::LogError(
+                "Tensor::Gram() expects a Tensor with <= 2 dimensions, but the "
+                "Tensor has {} dimensions.");
+    }
+}
+
+Tensor Tensor::RowGram() const {
+    int64_t n_dims = NumDims();
+    if (n_dims <= 0) {
+        return *this;
+    } else if (n_dims == 1) {
+        return Reshape({-1, 1}).RowGram();
+    } else if (n_dims == 2) {
+        Tensor output;
+        core::RowGram(*this, output);
+        return output;
+    } else {
+        utility::LogError(
+                "Tensor::RowGram() expects a Tensor with <= 2 dimensions, but "
+                "the Tensor has {} dimensions.");
+    }
+}
+
 double Tensor::Det() const {
     AssertTensorDtypes(*this, {Float32, Float64});
     return core::Det(*this);
diff --git a/cpp/open3d/core/Tensor.h b/cpp/open3d/core/Tensor.h
index 73de4e72df2..6f4fed49aa9 100644
--- a/cpp/open3d/core/Tensor.h
+++ b/cpp/open3d/core/Tensor.h
@@ -608,6 +608,24 @@ class Tensor : public IsDevice {
     /// 0-D and 1-D Tensor remains the same.
     Tensor T() const;
 
+    /// \brief Computes the Gram matrix (Gramian) of this tensor. Expects input
+    /// to be <= 2-D Tensor.
+    ///
+    /// \return The Gramian of the given tensor. For a 2D tensor of shape {m,
+    /// n}, this will be a square matrix of shape {n, n}. 1D tensors of shape
+    /// {n} are reshaped to {1, n} before the operation. For 0D, the returned
+    /// tensor is the same as the input.
+    Tensor Gram() const;
+
+    /// \brief Computes the row Gram matrix of this tensor. Expects input to be
+    /// <= 2-D Tensor.
+    ///
+    /// \return The Row Gramian of the given tensor. For a 2D tensor of shape
+    /// {m, n}, this will be a square matrix of shape {m, m}. 1D tensors of
+    /// shape {n} are reshaped to {1, n} before the operation. For 0D, the
+    /// returned tensor is the same as the input.
+    Tensor RowGram() const;
+
     /// \brief Compute the determinant of a 2D square tensor.
     /// \return returns the determinant of the matrix (double).
     double Det() const;
diff --git a/cpp/open3d/core/linalg/Gramian.cpp b/cpp/open3d/core/linalg/Gramian.cpp
new file mode 100644
index 00000000000..7865e100d36
--- /dev/null
+++ b/cpp/open3d/core/linalg/Gramian.cpp
@@ -0,0 +1,128 @@
+// ----------------------------------------------------------------------------
+// -                        Open3D: www.open3d.org                            -
+// ----------------------------------------------------------------------------
+// Copyright (c) 2018-2024 www.open3d.org
+// SPDX-License-Identifier: MIT
+// ----------------------------------------------------------------------------
+
+#include "open3d/core/linalg/Gramian.h"
+
+#include <unordered_map>
+
+#include "open3d/core/CUDAUtils.h"
+
+namespace open3d {
+namespace core {
+
+void Gram(const Tensor& A, Tensor& output) {
+    const Device device = A.GetDevice();
+    const Dtype dtype_original = A.GetDtype();
+    Dtype dtype;
+
+    if (dtype_original != core::Float32 && dtype_original != core::Float64) {
+        utility::LogDebug("Converting to Float32 dtype to from {}.",
+                          dtype_original.ToString());
+        dtype = core::Float32;
+    } else {
+        dtype = dtype_original;
+    }
+
+    // Check shapes
+    SizeVector A_shape = A.GetShape();
+
+    if (A_shape.size() != 2) {
+        utility::LogError("Tensor A must be 2D, but got {}D.", A_shape.size());
+    }
+
+    // Dispatch to backends
+    int64_t m = A_shape[0];
+    int64_t n = A_shape[1];
+
+    if (m == 0 || n == 0) {
+        utility::LogError(
+                "Tensor shapes should not contain dimensions with zero.");
+    }
+
+    Tensor A_contiguous = A.Contiguous().To(dtype);
+    void* A_data = A_contiguous.GetDataPtr();
+
+    output = Tensor::Empty({n, n}, dtype, device);
+    void* B_data = output.GetDataPtr();
+
+    if (device.IsSYCL()) {
+#ifdef BUILD_SYCL_MODULE
+        GramSYCL(A_data, B_data, m, n, dtype, device);
+#else
+        utility::LogError("Unimplemented device.");
+#endif
+    } else if (device.IsCUDA()) {
+#ifdef BUILD_CUDA_MODULE
+        CUDAScopedDevice scoped_device(device);
+        GramCUDA(A_data, B_data, m, n, dtype, device);
+#else
+        utility::LogError("Unimplemented device.");
+#endif
+    } else {
+        GramCPU(A_data, B_data, m, n, dtype);
+    }
+
+    output = output.To(dtype_original);
+}
+
+void RowGram(const Tensor& A, Tensor& output) {
+    const Device device = A.GetDevice();
+    const Dtype dtype_original = A.GetDtype();
+    Dtype dtype;
+
+    if (dtype_original != core::Float32 && dtype_original != core::Float64) {
+        utility::LogDebug("Converting to Float32 dtype to from {}.",
+                          dtype_original.ToString());
+        dtype = core::Float32;
+    } else {
+        dtype = dtype_original;
+    }
+
+    // Check shapes
+    SizeVector A_shape = A.GetShape();
+
+    if (A_shape.size() != 2) {
+        utility::LogError("Tensor A must be 2D, but got {}D.", A_shape.size());
+    }
+
+    // Dispatch to backends
+    int64_t m = A_shape[0];
+    int64_t n = A_shape[1];
+
+    if (m == 0 || n == 0) {
+        utility::LogError(
+                "Tensor shapes should not contain dimensions with zero.");
+    }
+
+    Tensor A_contiguous = A.Contiguous().To(dtype);
+    void* A_data = A_contiguous.GetDataPtr();
+
+    output = Tensor::Empty({m, m}, dtype, device);
+    void* B_data = output.GetDataPtr();
+
+    if (device.IsSYCL()) {
+#ifdef BUILD_SYCL_MODULE
+        RowGramSYCL(A_data, B_data, m, n, dtype, device);
+#else
+        utility::LogError("Unimplemented device.");
+#endif
+    } else if (device.IsCUDA()) {
+#ifdef BUILD_CUDA_MODULE
+        CUDAScopedDevice scoped_device(device);
+        RowGramCUDA(A_data, B_data, m, n, dtype, device);
+#else
+        utility::LogError("Unimplemented device.");
+#endif
+    } else {
+        RowGramCPU(A_data, B_data, m, n, dtype);
+    }
+
+    output = output.To(dtype_original);
+}
+
+}  // namespace core
+}  // namespace open3d
diff --git a/cpp/open3d/core/linalg/Gramian.h b/cpp/open3d/core/linalg/Gramian.h
new file mode 100644
index 00000000000..179af5ccb09
--- /dev/null
+++ b/cpp/open3d/core/linalg/Gramian.h
@@ -0,0 +1,52 @@
+// ----------------------------------------------------------------------------
+// -                        Open3D: www.open3d.org                            -
+// ----------------------------------------------------------------------------
+// Copyright (c) 2018-2024 www.open3d.org
+// SPDX-License-Identifier: MIT
+// ----------------------------------------------------------------------------
+
+#pragma once
+
+#include "open3d/core/Tensor.h"
+
+namespace open3d {
+namespace core {
+
+/// Computes the gram matrix of a Tensor (B = A.T @ A)
+void Gram(const Tensor& A, Tensor& B);
+
+/// Computes the row gram matrix of a Tensor (B = A @ A.T)
+void RowGram(const Tensor& A, Tensor& B);
+
+#ifdef BUILD_SYCL_MODULE
+void GramSYCL(void* A_data,
+              void* B_data,
+              int64_t m,
+              int64_t n,
+              Dtype dtype,
+              const Device& device);
+void RowGramSYCL(void* A_data,
+                 void* B_data,
+                 int64_t m,
+                 int64_t n,
+                 Dtype dtype,
+                 const Device& device);
+#endif
+#ifdef BUILD_CUDA_MODULE
+void GramCUDA(void* A_data,
+              void* B_data,
+              int64_t m,
+              int64_t n,
+              Dtype dtype,
+              const Device& device);
+void RowGramCUDA(void* A_data,
+                 void* B_data,
+                 int64_t m,
+                 int64_t n,
+                 Dtype dtype,
+                 const Device& device);
+#endif
+void GramCPU(void* A_data, void* B_data, int64_t m, int64_t n, Dtype dtype);
+void RowGramCPU(void* A_data, void* B_data, int64_t m, int64_t n, Dtype dtype);
+}  // namespace core
+}  // namespace open3d
diff --git a/cpp/open3d/core/linalg/GramianCPU.cpp b/cpp/open3d/core/linalg/GramianCPU.cpp
new file mode 100644
index 00000000000..ae2471ba560
--- /dev/null
+++ b/cpp/open3d/core/linalg/GramianCPU.cpp
@@ -0,0 +1,35 @@
+// ----------------------------------------------------------------------------
+// -                        Open3D: www.open3d.org                            -
+// ----------------------------------------------------------------------------
+// Copyright (c) 2018-2024 www.open3d.org
+// SPDX-License-Identifier: MIT
+// ----------------------------------------------------------------------------
+
+#include "open3d/core/linalg/BlasWrapper.h"
+#include "open3d/core/linalg/Gramian.h"
+#include "open3d/core/linalg/LinalgUtils.h"
+
+namespace open3d {
+namespace core {
+void GramCPU(void* A_data, void* B_data, int64_t m, int64_t n, Dtype dtype) {
+    DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() {
+        scalar_t alpha = 1, beta = 0;
+        gemm_cpu<scalar_t>(CblasColMajor, CblasNoTrans, CblasTrans, n, n, m,
+                           alpha, static_cast<const scalar_t*>(A_data), n,
+                           static_cast<const scalar_t*>(A_data), n, beta,
+                           static_cast<scalar_t*>(B_data), n);
+    });
+}
+
+void RowGramCPU(void* A_data, void* B_data, int64_t m, int64_t n, Dtype dtype) {
+    DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() {
+        scalar_t alpha = 1, beta = 0;
+        gemm_cpu<scalar_t>(CblasColMajor, CblasTrans, CblasNoTrans, m, m, n,
+                           alpha, static_cast<const scalar_t*>(A_data), n,
+                           static_cast<const scalar_t*>(A_data), n, beta,
+                           static_cast<scalar_t*>(B_data), m);
+    });
+}
+
+}  // namespace core
+}  // namespace open3d
diff --git a/cpp/open3d/core/linalg/GramianCUDA.cpp b/cpp/open3d/core/linalg/GramianCUDA.cpp
new file mode 100644
index 00000000000..56aedcc0ed5
--- /dev/null
+++ b/cpp/open3d/core/linalg/GramianCUDA.cpp
@@ -0,0 +1,55 @@
+// ----------------------------------------------------------------------------
+// -                        Open3D: www.open3d.org                            -
+// ----------------------------------------------------------------------------
+// Copyright (c) 2018-2024 www.open3d.org
+// SPDX-License-Identifier: MIT
+// ----------------------------------------------------------------------------
+
+#include "open3d/core/linalg/BlasWrapper.h"
+#include "open3d/core/linalg/LinalgUtils.h"
+#include "open3d/core/linalg/Matmul.h"
+#include "open3d/utility/Logging.h"
+
+namespace open3d {
+namespace core {
+
+void GramCUDA(void* A_data,
+              void* B_data,
+              int64_t m,
+              int64_t n,
+              Dtype dtype,
+              const Device& device) {
+    cublasHandle_t handle = CuBLASContext::GetInstance().GetHandle(device);
+    DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() {
+        scalar_t alpha = 1, beta = 0;
+        OPEN3D_CUBLAS_CHECK(
+                gemm_cuda<scalar_t>(handle, CUBLAS_OP_N, CUBLAS_OP_T, n, n, m,
+                                    &alpha,
+                                    static_cast<const scalar_t*>(A_data), n,
+                                    static_cast<const scalar_t*>(A_data), n,
+                                    &beta, static_cast<scalar_t*>(B_data), n),
+                "cuda gemm failed");
+    });
+}
+
+void RowGramCUDA(void* A_data,
+                 void* B_data,
+                 int64_t m,
+                 int64_t n,
+                 Dtype dtype,
+                 const Device& device) {
+    cublasHandle_t handle = CuBLASContext::GetInstance().GetHandle(device);
+    DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() {
+        scalar_t alpha = 1, beta = 0;
+        OPEN3D_CUBLAS_CHECK(
+                gemm_cuda<scalar_t>(handle, CUBLAS_OP_T, CUBLAS_OP_N, m, m, n,
+                                    &alpha,
+                                    static_cast<const scalar_t*>(A_data), n,
+                                    static_cast<const scalar_t*>(A_data), n,
+                                    &beta, static_cast<scalar_t*>(B_data), m),
+                "cuda gemm failed");
+    });
+}
+
+}  // namespace core
+}  // namespace open3d
diff --git a/cpp/open3d/core/linalg/GramianSYCL.cpp b/cpp/open3d/core/linalg/GramianSYCL.cpp
new file mode 100644
index 00000000000..4530f69470f
--- /dev/null
+++ b/cpp/open3d/core/linalg/GramianSYCL.cpp
@@ -0,0 +1,52 @@
+// ----------------------------------------------------------------------------
+// -                        Open3D: www.open3d.org                            -
+// ----------------------------------------------------------------------------
+// Copyright (c) 2018-2024 www.open3d.org
+// SPDX-License-Identifier: MIT
+// ----------------------------------------------------------------------------
+
+#include <sycl/sycl.hpp>
+
+#include "oneapi/mkl.hpp"
+#include "open3d/core/SYCLContext.h"
+#include "open3d/core/linalg/Gramian.h"
+#include "open3d/core/linalg/LinalgUtils.h"
+#include "open3d/utility/Logging.h"
+
+namespace open3d {
+namespace core {
+void GramSYCL(void* A_data,
+              void* B_data,
+              int64_t m,
+              int64_t n,
+              Dtype dtype,
+              const Device& device) {
+    using namespace oneapi::mkl;
+    sycl::queue queue = sy::SYCLContext::GetInstance().GetDefaultQueue(device);
+    DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() {
+        scalar_t alpha = 1, beta = 0;
+        blas::column_major::gemm(queue, transpose::N, transpose::T, n, n, m,
+                                 alpha, static_cast<const scalar_t*>(A_data), n,
+                                 static_cast<const scalar_t*>(A_data), n, beta,
+                                 static_cast<scalar_t*>(B_data), n)
+                .wait_and_throw();
+    });
+
+    void RowGramSYCL(void* A_data, void* B_data, int64_t m, int64_t n,
+                     Dtype dtype, const Device& device) {
+        using namespace oneapi::mkl;
+        sycl::queue queue =
+                sy::SYCLContext::GetInstance().GetDefaultQueue(device);
+        DISPATCH_LINALG_DTYPE_TO_TEMPLATE(dtype, [&]() {
+            scalar_t alpha = 1, beta = 0;
+            blas::column_major::gemm(queue, transpose::T, transpose::N, m, m, n,
+                                     alpha,
+                                     static_cast<const scalar_t*>(A_data), n,
+                                     static_cast<const scalar_t*>(A_data), n,
+                                     beta, static_cast<scalar_t*>(B_data), m)
+                    .wait_and_throw();
+        });
+    }
+
+}  // namespace core
+}  // namespace open3d
diff --git a/cpp/pybind/core/tensor.cpp b/cpp/pybind/core/tensor.cpp
index e9a7be5919a..be314f6361a 100644
--- a/cpp/pybind/core/tensor.cpp
+++ b/cpp/pybind/core/tensor.cpp
@@ -570,6 +570,13 @@ tuple `output` tensor of shape {n,n} and `ipiv` tensor of shape {n}, where
     tensor.def("__matmul__", &Tensor::Matmul,
                "Computes matrix multiplication of a"
                " 2D tensor with another tensor of compatible shape.");
+    tensor.def("gram", &Tensor::Gram,
+               "Computes the Gram matrix of a given <=2D tensor. 1D tensors of "
+               "shape {n} are expanded to 2D by reshaping them to {1, n}.");
+    tensor.def(
+            "row_gram", &Tensor::RowGram,
+            "Computes the Row Gram matrix of a given <=2D tensor. 1D tensors "
+            "of shape {n} are expanded to 2D by reshaping them to {1, n}.");
     tensor.def("lstsq", &Tensor::LeastSquares,
                "Solves the linear system AX = B with QR decomposition and "
                "returns X. A is a (m, n) matrix with m >= n.",
diff --git a/cpp/tests/core/Linalg.cpp b/cpp/tests/core/Linalg.cpp
index 3bd9775e534..6e4866a4ace 100644
--- a/cpp/tests/core/Linalg.cpp
+++ b/cpp/tests/core/Linalg.cpp
@@ -72,6 +72,86 @@ TEST_P(LinalgPermuteDevices, Matmul) {
     EXPECT_ANY_THROW(A.Matmul(core::Tensor::Zeros({2, 4}, dtype)));
 }
 
+TEST_P(LinalgPermuteDevices, Gram) {
+    const float EPSILON = 1e-8;
+
+    core::Device device = GetParam();
+    core::Dtype dtype = core::Float32;
+
+    // Gram test.
+    core::Tensor A = core::Tensor::Init<float>({{1, 2, 3}, {4, 5, 6}}, device);
+
+    core::Tensor B = A.Gram();
+    EXPECT_EQ(B.GetShape(), core::SizeVector({3, 3}));
+    std::vector<float> B_data = B.ToFlatVector<float>();
+    std::vector<float> B_gt = {17, 22, 27, 22, 29, 36, 27, 36, 45};
+    for (int i = 0; i < 9; ++i) {
+        EXPECT_NEAR(B_data[i], B_gt[i], EPSILON);
+    }
+
+    // Gram matrix must equal the operation A.T @ A
+    core::Tensor C = A.T().Matmul(A);
+    std::vector<float> C_data = C.ToFlatVector<float>();
+    for (int i = 0; i < 9; ++i) {
+        EXPECT_NEAR(C_data[i], B_gt[i], EPSILON);
+    }
+
+    // Non-contiguous Gram test.
+    core::Tensor A_slice = A.Slice(1, 0, 3, 2);
+    core::Tensor B_slice = A_slice.Gram();
+    EXPECT_EQ(B_slice.GetShape(), core::SizeVector({2, 2}));
+
+    std::vector<float> B_slice_data = B_slice.ToFlatVector<float>();
+    std::vector<float> B_slice_gt = {17, 27, 27, 45};
+    for (int i = 0; i < 4; ++i) {
+        EXPECT_NEAR(B_slice_data[i], B_slice_gt[i], EPSILON);
+    }
+
+    // Incompatible shape test.
+    EXPECT_ANY_THROW(core::Tensor::Zeros({3, 0}, dtype, device).Gram());
+    EXPECT_ANY_THROW(core::Tensor::Zeros({3, 3, 3}, dtype, device).Gram());
+}
+
+TEST_P(LinalgPermuteDevices, RowGram) {
+    const float EPSILON = 1e-8;
+
+    core::Device device = GetParam();
+    core::Dtype dtype = core::Float32;
+
+    // RowGram test.
+    core::Tensor A = core::Tensor::Init<float>({{1, 2, 3}, {4, 5, 6}}, device);
+
+    core::Tensor B = A.RowGram();
+    EXPECT_EQ(B.GetShape(), core::SizeVector({2, 2}));
+    std::vector<float> B_data = B.ToFlatVector<float>();
+    std::vector<float> B_gt = {14, 32, 32, 77};
+    for (int i = 0; i < 4; ++i) {
+        EXPECT_NEAR(B_data[i], B_gt[i], EPSILON);
+    }
+
+    // Row gram matrix must equal the operation A @ A.T
+    core::Tensor C = A.Matmul(A.T());
+    std::vector<float> C_data = C.ToFlatVector<float>();
+    for (int i = 0; i < 4; ++i) {
+        EXPECT_NEAR(C_data[i], B_gt[i], EPSILON);
+    }
+
+    // Non-contiguous RowGram test.
+    core::Tensor A_slice = A.Slice(1, 0, 3, 2);
+    core::Tensor B_slice = A_slice.RowGram();
+    EXPECT_EQ(B_slice.GetShape(), core::SizeVector({2, 2}));
+
+    std::vector<float> B_slice_data = B_slice.ToFlatVector<float>();
+    std::vector<float> B_slice_gt = {10, 22, 22, 52};
+    for (int i = 0; i < 4; ++i) {
+        EXPECT_NEAR(B_slice_data[i], B_slice_gt[i], EPSILON);
+    }
+
+    // Incompatible shape test.
+    EXPECT_ANY_THROW(core::Tensor::Zeros({3, 0}, dtype, device).RowGram());
+    EXPECT_ANY_THROW(core::Tensor::Zeros({3, 3, 3}, dtype, device).RowGram());
+}
+
 TEST_P(LinalgPermuteDevices, AddMM) {
     const float EPSILON = 1e-8;