Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 230 additions & 0 deletions cpp/src/arrow/sparse_tensor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,99 @@ TEST_F(TestSparseCOOTensor, TestToTensor) {
ASSERT_TRUE(tensor.Equals(*dense_tensor));
}

template <typename ValueType>
class TestSparseCOOTensorCreationFromNegativeZero
: public TestSparseTensorBase<ValueType> {
public:
using ValueCType = typename ValueType::c_type;

void SetUp() override { type_ = TypeTraits<ValueType>::type_singleton(); }

void FromVector() {
std::vector<ValueCType> data{
-0.0, -0.0, 0.0, -0.0, 4.0, +0.0, -0.0, -0.0, -0.0, -1.0, 0.0, -0.0,
};
std::vector<int64_t> shape = {12};
auto buffer = Buffer::FromVector(data);
ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape));
ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor,
SparseCOOTensor::Make(*dense_tensor, int64()));
ASSERT_EQ(2, sparse_coo_tensor->non_zero_length());
auto si =
internal::checked_pointer_cast<SparseCOOIndex>(sparse_coo_tensor->sparse_index());
AssertCOOIndex(si->indices(), 0, {4});
AssertCOOIndex(si->indices(), 1, {9});
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor());
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
}

void FromContiguousTensor() {
// clang-format off
std::vector<ValueCType> data{
-0.0, 0.0, -0.0,
+0.0, 4.0, -0.0,
-0.0, -0.0, 0.0,
-1.0, -0.0, -0.0,
};
// clang-format on
std::vector<int64_t> shape = {4, 3};
auto buffer = Buffer::FromVector(data);
ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape));
ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor,
SparseCOOTensor::Make(*dense_tensor, int64()));
ASSERT_EQ(2, sparse_coo_tensor->non_zero_length());
auto si =
internal::checked_pointer_cast<SparseCOOIndex>(sparse_coo_tensor->sparse_index());
AssertCOOIndex(si->indices(), 0, {1, 1});
AssertCOOIndex(si->indices(), 1, {3, 0});
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor());
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
}

void FromNonContiguousTensor() {
// clang-format off
std::vector<ValueCType> data{
-0.0, -0.0, 0.0, 1.0, 2.0,
-0.0, 4.0, 0.0, 0.0, -0.0,
-0.0, +0.0, -0.0, 3.0, 4.0,
-1.0, -0.0, -0.0, -0.0, +0.0,
};
// clang-format on
std::vector<int64_t> shape = {4, 3};
auto buffer = Buffer::FromVector(data);
ASSERT_OK_AND_ASSIGN(auto dense_tensor,
Tensor::Make(type_, buffer, shape,
{type_->byte_width() * 5, type_->byte_width()}));
ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor,
SparseCOOTensor::Make(*dense_tensor, int64()));
ASSERT_EQ(12, sparse_coo_tensor->size());
ASSERT_EQ(2, sparse_coo_tensor->non_zero_length());
auto si =
internal::checked_pointer_cast<SparseCOOIndex>(sparse_coo_tensor->sparse_index());
AssertCOOIndex(si->indices(), 0, {1, 1});
AssertCOOIndex(si->indices(), 1, {3, 0});
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor());
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
}

private:
std::shared_ptr<DataType> type_;
};

TYPED_TEST_SUITE(TestSparseCOOTensorCreationFromNegativeZero, RealArrowTypes);

TYPED_TEST(TestSparseCOOTensorCreationFromNegativeZero, FromVector) {
this->FromVector();
}

TYPED_TEST(TestSparseCOOTensorCreationFromNegativeZero, FromContiguousTensor) {
this->FromContiguousTensor();
}

TYPED_TEST(TestSparseCOOTensorCreationFromNegativeZero, FromNonContiguousTensor) {
this->FromNonContiguousTensor();
}

template <typename ValueType>
class TestSparseCOOTensorEquality : public TestSparseTensorBase<ValueType> {
public:
Expand Down Expand Up @@ -869,6 +962,51 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) {
ASSERT_TRUE(tensor.Equals(*dense_tensor));
}

template <typename ValueType>
class TestSparseCSRTensorCreationFromNegativeZero
: public TestSparseTensorBase<ValueType> {
public:
using ValueCType = typename ValueType::c_type;

void SetUp() override { type_ = TypeTraits<ValueType>::type_singleton(); }

void FromTensor() {
// clang-format off
std::vector<ValueCType> data{
-0.0, -0.0, 0.0,
-0.0, 4.0, -0.0,
+0.0, -0.0, -0.0,
-1.0, -0.0, +0.0,
};
// clang-format on
std::vector<int64_t> shape = {4, 3};
auto buffer = Buffer::FromVector(data);
ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape));
ASSERT_OK_AND_ASSIGN(auto sparse_csr_tensor,
SparseCSRMatrix::Make(*dense_tensor, int64()));
ASSERT_EQ(2, sparse_csr_tensor->non_zero_length());
auto si =
internal::checked_pointer_cast<SparseCSRIndex>(sparse_csr_tensor->sparse_index());
const auto* indptr = si->indptr()->data()->template data_as<int64_t>();
const auto* indices = si->indices()->data()->template data_as<int64_t>();
ASSERT_EQ(indptr[2], 1);
ASSERT_EQ(indptr[4], 2);
ASSERT_EQ(indices[0], 1);
ASSERT_EQ(indices[1], 0);
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csr_tensor->ToTensor());
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
}

private:
std::shared_ptr<DataType> type_;
};

TYPED_TEST_SUITE(TestSparseCSRTensorCreationFromNegativeZero, RealArrowTypes);

TYPED_TEST(TestSparseCSRTensorCreationFromNegativeZero, FromTensor) {
this->FromTensor();
}

template <typename ValueType>
class TestSparseCSRMatrixEquality : public TestSparseTensorBase<ValueType> {
public:
Expand Down Expand Up @@ -1204,6 +1342,51 @@ TEST_F(TestSparseCSCMatrix, TestToTensor) {
ASSERT_TRUE(tensor.Equals(*dense_tensor));
}

template <typename ValueType>
class TestSparseCSCTensorCreationFromNegativeZero
: public TestSparseTensorBase<ValueType> {
public:
using ValueCType = typename ValueType::c_type;

void SetUp() override { type_ = TypeTraits<ValueType>::type_singleton(); }

void FromTensor() {
// clang-format off
std::vector<ValueCType> data{
-0.0, -0.0, +0.0,
-0.0, 4.0, -0.0,
-0.0, 0.0, -0.0,
-1.0, -0.0, -0.0,
};
// clang-format on
std::vector<int64_t> shape = {4, 3};
auto buffer = Buffer::FromVector(data);
ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape));
ASSERT_OK_AND_ASSIGN(auto sparse_csc_tensor,
SparseCSCMatrix::Make(*dense_tensor, int64()));
ASSERT_EQ(2, sparse_csc_tensor->non_zero_length());
auto si =
internal::checked_pointer_cast<SparseCSCIndex>(sparse_csc_tensor->sparse_index());
const auto* indptr = si->indptr()->data()->template data_as<int64_t>();
const auto* indices = si->indices()->data()->template data_as<int64_t>();
ASSERT_EQ(indptr[1], 1);
ASSERT_EQ(indptr[2], 2);
ASSERT_EQ(indices[0], 3);
ASSERT_EQ(indices[1], 1);
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csc_tensor->ToTensor());
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
}

private:
std::shared_ptr<DataType> type_;
};

TYPED_TEST_SUITE(TestSparseCSCTensorCreationFromNegativeZero, RealArrowTypes);

TYPED_TEST(TestSparseCSCTensorCreationFromNegativeZero, FromTensor) {
this->FromTensor();
}

template <typename ValueType>
class TestSparseCSCMatrixEquality : public TestSparseTensorBase<ValueType> {
public:
Expand Down Expand Up @@ -1479,6 +1662,53 @@ TEST_F(TestSparseCSFTensor, CreationFromZeroTensor) {
ASSERT_TRUE(t->Equals(*t_zero));
}

template <typename ValueType>
class TestSparseCSFTensorCreationFromNegativeZero
: public TestSparseTensorBase<ValueType> {
public:
using ValueCType = typename ValueType::c_type;

void SetUp() override { type_ = TypeTraits<ValueType>::type_singleton(); }

void FromTensor() {
// clang-format off
std::vector<ValueCType> data{
-0.0, -0.0, 0.0, -0.0,
4.0, +0.0, -0.0, -0.0,
0.0, -1.0, -0.0, -0.0,
};
// clang-format on
std::vector<int64_t> shape = {3, 4};
auto buffer = Buffer::FromVector(data);
ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape));
ASSERT_OK_AND_ASSIGN(auto sparse_csf_tensor,
SparseCSFTensor::Make(*dense_tensor, int64()));
ASSERT_EQ(2, sparse_csf_tensor->non_zero_length());
auto si =
internal::checked_pointer_cast<SparseCSFIndex>(sparse_csf_tensor->sparse_index());
auto indptr = si->indptr()[0]->data()->template data_as<int64_t>();
auto row_indices = si->indices()[0]->data()->template data_as<int64_t>();
auto column_indices = si->indices()[1]->data()->template data_as<int64_t>();
ASSERT_EQ(indptr[1], 1);
ASSERT_EQ(indptr[2], 2);
EXPECT_EQ(row_indices[0], 1);
EXPECT_EQ(row_indices[1], 2);
EXPECT_EQ(column_indices[0], 0);
EXPECT_EQ(column_indices[1], 1);
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csf_tensor->ToTensor());
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
}

private:
std::shared_ptr<DataType> type_;
};

TYPED_TEST_SUITE(TestSparseCSFTensorCreationFromNegativeZero, RealArrowTypes);

TYPED_TEST(TestSparseCSFTensorCreationFromNegativeZero, FromTensor) {
this->FromTensor();
}

template <typename IndexValueType>
class TestSparseCSFTensorForIndexValueType
: public TestSparseCSFTensorBase<IndexValueType> {
Expand Down
10 changes: 6 additions & 4 deletions cpp/src/arrow/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "arrow/util/checked_cast.h"
#include "arrow/util/int_util_overflow.h"
#include "arrow/util/logging_internal.h"
#include "arrow/util/macros.h"
#include "arrow/util/unreachable.h"
#include "arrow/visit_type_inline.h"

Expand Down Expand Up @@ -85,7 +86,7 @@ Status ComputeColumnMajorStrides(const FixedWidthType& type,
if (!shape.empty() && shape.back() > 0) {
total = byte_width;
for (size_t i = 0; i < ndim - 1; ++i) {
if (internal::MultiplyWithOverflow(total, shape[i], &total)) {
if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow(total, shape[i], &total))) {
return Status::Invalid(
"Column-major strides computed from shape would not fit in 64-bit "
"integer");
Expand Down Expand Up @@ -485,13 +486,14 @@ namespace {
template <typename TYPE>
int64_t StridedTensorCountNonZero(int dim_index, int64_t offset, const Tensor& tensor) {
using c_type = typename TYPE::c_type;
const c_type zero = c_type(0);
int64_t nnz = 0;
if (dim_index == tensor.ndim() - 1) {
for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
const auto* ptr = tensor.raw_data() + offset + i * tensor.strides()[dim_index];
auto& elem = *reinterpret_cast<const c_type*>(ptr);
if (elem != zero) ++nnz;
if (internal::is_not_zero<TYPE>(elem)) {
++nnz;
}
}
return nnz;
}
Expand All @@ -507,7 +509,7 @@ int64_t ContiguousTensorCountNonZero(const Tensor& tensor) {
using c_type = typename TYPE::c_type;
auto* data = reinterpret_cast<const c_type*>(tensor.raw_data());
return std::count_if(data, data + tensor.size(),
[](const c_type& x) { return x != 0; });
[](const c_type& x) { return internal::is_not_zero<TYPE>(x); });
}

template <typename TYPE>
Expand Down
7 changes: 7 additions & 0 deletions cpp/src/arrow/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ constexpr bool is_tensor_supported(Type::type type_id) {

namespace internal {

// TODO(GH-47578): Enable HalfFloatType
template <typename ValueDataType>
inline bool is_not_zero(typename ValueDataType::c_type value) {
typename ValueDataType::c_type zero = 0;
return value != zero;
}

ARROW_EXPORT
Status ComputeRowMajorStrides(const FixedWidthType& type,
const std::vector<int64_t>& shape,
Expand Down
54 changes: 54 additions & 0 deletions cpp/src/arrow/tensor/converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#include "arrow/sparse_tensor.h" // IWYU pragma: export

#include <memory>
#include <utility>

#include "arrow/visit_type_inline.h"

namespace arrow {
namespace internal {
Expand Down Expand Up @@ -63,5 +66,56 @@ Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSCMatrix(
Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSFTensor(
MemoryPool* pool, const SparseCSFTensor* sparse_tensor);

template <typename Converter>
struct ConverterVisitor {
explicit ConverterVisitor(Converter& converter) : converter(converter) {}
template <typename ValueType, typename IndexType>
Status operator()(const ValueType& value, const IndexType& index_type) {
return converter.Convert(value, index_type);
}

Converter& converter;
};

struct ValueTypeVisitor {
template <typename ValueType, typename IndexType, typename Function>
enable_if_number<ValueType, Status> Visit(const ValueType& value_type,
const IndexType& index_type,
Function&& function) {
return function(value_type, index_type);
}

template <typename IndexType, typename Function>
Status Visit(const DataType& value_type, const IndexType&, Function&&) {
return Status::Invalid("Invalid value type: ", value_type.name(),
". Expected a number.");
}
};

struct IndexAndValueTypeVisitor {
template <typename IndexType, typename Function>
enable_if_integer<IndexType, Status> Visit(const IndexType& index_type,
const std::shared_ptr<DataType>& value_type,
Function&& function) {
ValueTypeVisitor visitor;
return VisitTypeInline(*value_type, &visitor, index_type,
std::forward<Function>(function));
}

template <typename Function>
Status Visit(const DataType& type, const std::shared_ptr<DataType>&, Function&&) {
return Status::Invalid("Invalid index type: ", type.name(), ". Expected integer.");
}
};

template <typename Function>
Status VisitValueAndIndexType(const std::shared_ptr<DataType>& value_type,
const std::shared_ptr<DataType>& index_type,
Function&& function) {
IndexAndValueTypeVisitor visitor;
return VisitTypeInline(*index_type, &visitor, value_type,
std::forward<Function>(function));
}

} // namespace internal
} // namespace arrow
Loading
Loading