apache · andishgar · Sep 17, 2025 · Sep 23, 2025 · Sep 26, 2025 · Sep 26, 2025
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
@@ -413,6 +413,99 @@ TEST_F(TestSparseCOOTensor, TestToTensor) {
   ASSERT_TRUE(tensor.Equals(*dense_tensor));
 }
 
+template <typename ValueType>
+class TestSparseCOOTensorCreationFromNegativeZero
+    : public TestSparseTensorBase<ValueType> {
+ public:
+  using ValueCType = typename ValueType::c_type;
+
+  void SetUp() override { type_ = TypeTraits<ValueType>::type_singleton(); }
+
+  void FromVector() {
+    std::vector<ValueCType> data{
+        -0.0, -0.0, 0.0, -0.0, 4.0, +0.0, -0.0, -0.0, -0.0, -1.0, 0.0, -0.0,
+    };
+    std::vector<int64_t> shape = {12};
+    auto buffer = Buffer::FromVector(data);
+    ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape));
+    ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor,
+                         SparseCOOTensor::Make(*dense_tensor, int64()));
+    ASSERT_EQ(2, sparse_coo_tensor->non_zero_length());
+    auto si =
+        internal::checked_pointer_cast<SparseCOOIndex>(sparse_coo_tensor->sparse_index());
+    AssertCOOIndex(si->indices(), 0, {4});
+    AssertCOOIndex(si->indices(), 1, {9});
+    ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor());
+    ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
+  }
+
+  void FromContiguousTensor() {
+    // clang-format off
+    std::vector<ValueCType> data{
+      -0.0,  0.0, -0.0,
+      +0.0,  4.0, -0.0,
+      -0.0, -0.0,  0.0,
+      -1.0, -0.0, -0.0,
+      };
+    // clang-format on
+    std::vector<int64_t> shape = {4, 3};
+    auto buffer = Buffer::FromVector(data);
+    ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape));
+    ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor,
+                         SparseCOOTensor::Make(*dense_tensor, int64()));
+    ASSERT_EQ(2, sparse_coo_tensor->non_zero_length());
+    auto si =
+        internal::checked_pointer_cast<SparseCOOIndex>(sparse_coo_tensor->sparse_index());
+    AssertCOOIndex(si->indices(), 0, {1, 1});
+    AssertCOOIndex(si->indices(), 1, {3, 0});
+    ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor());
+    ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
+  }
+
+  void FromNonContiguousTensor() {
+    // clang-format off
+    std::vector<ValueCType> data{
+      -0.0, -0.0,  0.0,  1.0,  2.0,
+      -0.0,  4.0,  0.0,  0.0, -0.0,
+      -0.0, +0.0, -0.0,  3.0,  4.0,
+      -1.0, -0.0, -0.0, -0.0, +0.0,
+      };
+    // clang-format on
+    std::vector<int64_t> shape = {4, 3};
+    auto buffer = Buffer::FromVector(data);
+    ASSERT_OK_AND_ASSIGN(auto dense_tensor,
+                         Tensor::Make(type_, buffer, shape,
+                                      {type_->byte_width() * 5, type_->byte_width()}));
+    ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor,
+                         SparseCOOTensor::Make(*dense_tensor, int64()));
+    ASSERT_EQ(12, sparse_coo_tensor->size());
+    ASSERT_EQ(2, sparse_coo_tensor->non_zero_length());
+    auto si =
+        internal::checked_pointer_cast<SparseCOOIndex>(sparse_coo_tensor->sparse_index());
+    AssertCOOIndex(si->indices(), 0, {1, 1});
+    AssertCOOIndex(si->indices(), 1, {3, 0});
+    ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor());
+    ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
+  }
+
+ private:
+  std::shared_ptr<DataType> type_;
+};
+
+TYPED_TEST_SUITE(TestSparseCOOTensorCreationFromNegativeZero, RealArrowTypes);
+
+TYPED_TEST(TestSparseCOOTensorCreationFromNegativeZero, FromVector) {
+  this->FromVector();
+}
+
+TYPED_TEST(TestSparseCOOTensorCreationFromNegativeZero, FromContiguousTensor) {
+  this->FromContiguousTensor();
+}
+
+TYPED_TEST(TestSparseCOOTensorCreationFromNegativeZero, FromNonContiguousTensor) {
+  this->FromNonContiguousTensor();
+}
+
 template <typename ValueType>
 class TestSparseCOOTensorEquality : public TestSparseTensorBase<ValueType> {
  public:
@@ -869,6 +962,51 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) {
   ASSERT_TRUE(tensor.Equals(*dense_tensor));
 }
 
+template <typename ValueType>
+class TestSparseCSRTensorCreationFromNegativeZero
+    : public TestSparseTensorBase<ValueType> {
+ public:
+  using ValueCType = typename ValueType::c_type;
+
+  void SetUp() override { type_ = TypeTraits<ValueType>::type_singleton(); }
+
+  void FromTensor() {
+    // clang-format off
+    std::vector<ValueCType> data{
+      -0.0, -0.0,  0.0,
+      -0.0,  4.0, -0.0,
+      +0.0, -0.0, -0.0,
+      -1.0, -0.0, +0.0,
+      };
+    // clang-format on
+    std::vector<int64_t> shape = {4, 3};
+    auto buffer = Buffer::FromVector(data);
+    ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape));
+    ASSERT_OK_AND_ASSIGN(auto sparse_csr_tensor,
+                         SparseCSRMatrix::Make(*dense_tensor, int64()));
+    ASSERT_EQ(2, sparse_csr_tensor->non_zero_length());
+    auto si =
+        internal::checked_pointer_cast<SparseCSRIndex>(sparse_csr_tensor->sparse_index());
+    const auto* indptr = si->indptr()->data()->template data_as<int64_t>();
+    const auto* indices = si->indices()->data()->template data_as<int64_t>();
+    ASSERT_EQ(indptr[2], 1);
+    ASSERT_EQ(indptr[4], 2);
+    ASSERT_EQ(indices[0], 1);
+    ASSERT_EQ(indices[1], 0);
+    ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csr_tensor->ToTensor());
+    ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
+  }
+
+ private:
+  std::shared_ptr<DataType> type_;
+};
+
+TYPED_TEST_SUITE(TestSparseCSRTensorCreationFromNegativeZero, RealArrowTypes);
+
+TYPED_TEST(TestSparseCSRTensorCreationFromNegativeZero, FromTensor) {
+  this->FromTensor();
+}
+
 template <typename ValueType>
 class TestSparseCSRMatrixEquality : public TestSparseTensorBase<ValueType> {
  public:
@@ -1204,6 +1342,51 @@ TEST_F(TestSparseCSCMatrix, TestToTensor) {
   ASSERT_TRUE(tensor.Equals(*dense_tensor));
 }
 
+template <typename ValueType>
+class TestSparseCSCTensorCreationFromNegativeZero
+    : public TestSparseTensorBase<ValueType> {
+ public:
+  using ValueCType = typename ValueType::c_type;
+
+  void SetUp() override { type_ = TypeTraits<ValueType>::type_singleton(); }
+
+  void FromTensor() {
+    // clang-format off
+    std::vector<ValueCType> data{
+      -0.0, -0.0, +0.0,
+      -0.0,  4.0, -0.0,
+      -0.0,  0.0, -0.0,
+      -1.0, -0.0, -0.0,
+      };
+    // clang-format on
+    std::vector<int64_t> shape = {4, 3};
+    auto buffer = Buffer::FromVector(data);
+    ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape));
+    ASSERT_OK_AND_ASSIGN(auto sparse_csc_tensor,
+                         SparseCSCMatrix::Make(*dense_tensor, int64()));
+    ASSERT_EQ(2, sparse_csc_tensor->non_zero_length());
+    auto si =
+        internal::checked_pointer_cast<SparseCSCIndex>(sparse_csc_tensor->sparse_index());
+    const auto* indptr = si->indptr()->data()->template data_as<int64_t>();
+    const auto* indices = si->indices()->data()->template data_as<int64_t>();
+    ASSERT_EQ(indptr[1], 1);
+    ASSERT_EQ(indptr[2], 2);
+    ASSERT_EQ(indices[0], 3);
+    ASSERT_EQ(indices[1], 1);
+    ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csc_tensor->ToTensor());
+    ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
+  }
+
+ private:
+  std::shared_ptr<DataType> type_;
+};
+
+TYPED_TEST_SUITE(TestSparseCSCTensorCreationFromNegativeZero, RealArrowTypes);
+
+TYPED_TEST(TestSparseCSCTensorCreationFromNegativeZero, FromTensor) {
+  this->FromTensor();
+}
+
 template <typename ValueType>
 class TestSparseCSCMatrixEquality : public TestSparseTensorBase<ValueType> {
  public:
@@ -1479,6 +1662,53 @@ TEST_F(TestSparseCSFTensor, CreationFromZeroTensor) {
   ASSERT_TRUE(t->Equals(*t_zero));
 }
 
+template <typename ValueType>
+class TestSparseCSFTensorCreationFromNegativeZero
+    : public TestSparseTensorBase<ValueType> {
+ public:
+  using ValueCType = typename ValueType::c_type;
+
+  void SetUp() override { type_ = TypeTraits<ValueType>::type_singleton(); }
+
+  void FromTensor() {
+    // clang-format off
+    std::vector<ValueCType> data{
+      -0.0, -0.0,  0.0, -0.0,
+       4.0, +0.0, -0.0, -0.0,
+       0.0, -1.0, -0.0, -0.0,
+      };
+    // clang-format on
+    std::vector<int64_t> shape = {3, 4};
+    auto buffer = Buffer::FromVector(data);
+    ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape));
+    ASSERT_OK_AND_ASSIGN(auto sparse_csf_tensor,
+                         SparseCSFTensor::Make(*dense_tensor, int64()));
+    ASSERT_EQ(2, sparse_csf_tensor->non_zero_length());
+    auto si =
+        internal::checked_pointer_cast<SparseCSFIndex>(sparse_csf_tensor->sparse_index());
+    auto indptr = si->indptr()[0]->data()->template data_as<int64_t>();
+    auto row_indices = si->indices()[0]->data()->template data_as<int64_t>();
+    auto column_indices = si->indices()[1]->data()->template data_as<int64_t>();
+    ASSERT_EQ(indptr[1], 1);
+    ASSERT_EQ(indptr[2], 2);
+    EXPECT_EQ(row_indices[0], 1);
+    EXPECT_EQ(row_indices[1], 2);
+    EXPECT_EQ(column_indices[0], 0);
+    EXPECT_EQ(column_indices[1], 1);
+    ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csf_tensor->ToTensor());
+    ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
+  }
+
+ private:
+  std::shared_ptr<DataType> type_;
+};
+
+TYPED_TEST_SUITE(TestSparseCSFTensorCreationFromNegativeZero, RealArrowTypes);
+
+TYPED_TEST(TestSparseCSFTensorCreationFromNegativeZero, FromTensor) {
+  this->FromTensor();
+}
+
 template <typename IndexValueType>
 class TestSparseCSFTensorForIndexValueType
     : public TestSparseCSFTensorBase<IndexValueType> {

diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
@@ -35,6 +35,7 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/int_util_overflow.h"
 #include "arrow/util/logging_internal.h"
+#include "arrow/util/macros.h"
 #include "arrow/util/unreachable.h"
 #include "arrow/visit_type_inline.h"
 
@@ -85,7 +86,7 @@ Status ComputeColumnMajorStrides(const FixedWidthType& type,
   if (!shape.empty() && shape.back() > 0) {
     total = byte_width;
     for (size_t i = 0; i < ndim - 1; ++i) {
-      if (internal::MultiplyWithOverflow(total, shape[i], &total)) {
+      if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow(total, shape[i], &total))) {
         return Status::Invalid(
             "Column-major strides computed from shape would not fit in 64-bit "
             "integer");
@@ -485,13 +486,14 @@ namespace {
 template <typename TYPE>
 int64_t StridedTensorCountNonZero(int dim_index, int64_t offset, const Tensor& tensor) {
   using c_type = typename TYPE::c_type;
-  const c_type zero = c_type(0);
   int64_t nnz = 0;
   if (dim_index == tensor.ndim() - 1) {
     for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
       const auto* ptr = tensor.raw_data() + offset + i * tensor.strides()[dim_index];
       auto& elem = *reinterpret_cast<const c_type*>(ptr);
-      if (elem != zero) ++nnz;
+      if (internal::is_not_zero<TYPE>(elem)) {
+        ++nnz;
+      }
     }
     return nnz;
   }
@@ -507,7 +509,7 @@ int64_t ContiguousTensorCountNonZero(const Tensor& tensor) {
   using c_type = typename TYPE::c_type;
   auto* data = reinterpret_cast<const c_type*>(tensor.raw_data());
   return std::count_if(data, data + tensor.size(),
-                       [](const c_type& x) { return x != 0; });
+                       [](const c_type& x) { return internal::is_not_zero<TYPE>(x); });
 }
 
 template <typename TYPE>

diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
@@ -55,6 +55,13 @@ constexpr bool is_tensor_supported(Type::type type_id) {
 
 namespace internal {
 
+// TODO(GH-47578): Enable HalfFloatType
+template <typename ValueDataType>
+inline bool is_not_zero(typename ValueDataType::c_type value) {
+  typename ValueDataType::c_type zero = 0;
+  return value != zero;
+}
+
 ARROW_EXPORT
 Status ComputeRowMajorStrides(const FixedWidthType& type,
                               const std::vector<int64_t>& shape,

diff --git a/cpp/src/arrow/tensor/converter.h b/cpp/src/arrow/tensor/converter.h
@@ -20,6 +20,9 @@
 #include "arrow/sparse_tensor.h"  // IWYU pragma: export
 
 #include <memory>
+#include <utility>
+
+#include "arrow/visit_type_inline.h"
 
 namespace arrow {
 namespace internal {
@@ -63,5 +66,56 @@ Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSCMatrix(
 Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSFTensor(
     MemoryPool* pool, const SparseCSFTensor* sparse_tensor);
 
+template <typename Converter>
+struct ConverterVisitor {
+  explicit ConverterVisitor(Converter& converter) : converter(converter) {}
+  template <typename ValueType, typename IndexType>
+  Status operator()(const ValueType& value, const IndexType& index_type) {
+    return converter.Convert(value, index_type);
+  }
+
+  Converter& converter;
+};
+
+struct ValueTypeVisitor {
+  template <typename ValueType, typename IndexType, typename Function>
+  enable_if_number<ValueType, Status> Visit(const ValueType& value_type,
+                                            const IndexType& index_type,
+                                            Function&& function) {
+    return function(value_type, index_type);
+  }
+
+  template <typename IndexType, typename Function>
+  Status Visit(const DataType& value_type, const IndexType&, Function&&) {
+    return Status::Invalid("Invalid value type: ", value_type.name(),
+                           ". Expected a number.");
+  }
+};
+
+struct IndexAndValueTypeVisitor {
+  template <typename IndexType, typename Function>
+  enable_if_integer<IndexType, Status> Visit(const IndexType& index_type,
+                                             const std::shared_ptr<DataType>& value_type,
+                                             Function&& function) {
+    ValueTypeVisitor visitor;
+    return VisitTypeInline(*value_type, &visitor, index_type,
+                           std::forward<Function>(function));
+  }
+
+  template <typename Function>
+  Status Visit(const DataType& type, const std::shared_ptr<DataType>&, Function&&) {
+    return Status::Invalid("Invalid index type: ", type.name(), ". Expected integer.");
+  }
+};
+
+template <typename Function>
+Status VisitValueAndIndexType(const std::shared_ptr<DataType>& value_type,
+                              const std::shared_ptr<DataType>& index_type,
+                              Function&& function) {
+  IndexAndValueTypeVisitor visitor;
+  return VisitTypeInline(*index_type, &visitor, value_type,
+                         std::forward<Function>(function));
+}
+
 }  // namespace internal
 }  // namespace arrow