From 5861ab7351b165d016cb2e381c5209367c975236 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 19 Nov 2025 23:04:40 -0500 Subject: [PATCH 1/7] working update --- r/R/convert-array-stream.R | 22 +- r/bootstrap.R | 4 - r/src/init.c | 2 + r/src/nanoarrow.hpp | 1114 ++++++++++++++++++++++++++++++++++++ r/src/nanoarrow_cpp.cc | 80 +++ r/src/nanoarrow_ipc.hpp | 209 +++++++ r/tools/make-callentries.R | 2 +- 7 files changed, 1412 insertions(+), 21 deletions(-) create mode 100644 r/src/nanoarrow.hpp create mode 100644 r/src/nanoarrow_ipc.hpp diff --git a/r/R/convert-array-stream.R b/r/R/convert-array-stream.R index b1f3e85e5..c8e73aedb 100644 --- a/r/R/convert-array-stream.R +++ b/r/R/convert-array-stream.R @@ -72,31 +72,21 @@ convert_array_stream <- function(array_stream, to = NULL, size = NULL, n = Inf) } else { # Otherwise, we need to collect all batches and calculate the total length # before calling nanoarrow_c_convert_array_stream(). - batches <- collect_array_stream( - array_stream, - n, - schema = schema, - validate = FALSE - ) + batch_info <- .Call(nanoarrow_c_collect_array_stream, array_stream, n) # If there is exactly one batch, use convert_array(). Converting a single # array currently takes a more efficient code path for types that can be # converted as ALTREP (e.g., strings). - if (length(batches) == 1L) { - return(.Call(nanoarrow_c_convert_array, batches[[1]], to)) + if (batch_info$n == 1L) { + array <- batch_info$stream$get_next(schema) + return(.Call(nanoarrow_c_convert_array, array, to)) } - # Otherwise, compute the final size, create another array stream, - # and call convert_array_stream() with a known size. Using .Call() - # directly because we have already type checked the inputs. - size <- .Call(nanoarrow_c_array_list_total_length, batches) - basic_stream <- .Call(nanoarrow_c_basic_array_stream, batches, schema, FALSE) - .Call( nanoarrow_c_convert_array_stream, - basic_stream, + batch_info$stream, to, - as.double(size), + as.double(batch_info$size), Inf ) } diff --git a/r/bootstrap.R b/r/bootstrap.R index c285f487b..041a4b407 100644 --- a/r/bootstrap.R +++ b/r/bootstrap.R @@ -60,7 +60,3 @@ stopifnot(file.exists("../CMakeLists.txt") && run_bundler()) f <- "src/flatcc/portable/pdiagnostic.h" lines <- readLines(f) writeLines(gsub("^#pragma", "/**/#pragma", lines), f) - -# Remove unused files -unused_files <- list.files("src", "\\.hpp$", full.names = TRUE) -unlink(unused_files) diff --git a/r/src/init.c b/r/src/init.c index 4d2109a2f..70eec867f 100644 --- a/r/src/init.c +++ b/r/src/init.c @@ -61,6 +61,7 @@ extern SEXP nanoarrow_c_ipc_array_reader_buffer(SEXP buffer_xptr); extern SEXP nanoarrow_c_ipc_array_reader_connection(SEXP con); extern SEXP nanoarrow_c_ipc_writer_connection(SEXP con); extern SEXP nanoarrow_c_ipc_writer_write_stream(SEXP writer_xptr, SEXP array_stream_xptr); +extern SEXP nanoarrow_c_collect_array_stream(SEXP array_stream_xptr, SEXP n_sexp); extern SEXP nanoarrow_c_allocate_schema(void); extern SEXP nanoarrow_c_allocate_array(void); extern SEXP nanoarrow_c_allocate_array_stream(void); @@ -144,6 +145,7 @@ static const R_CallMethodDef CallEntries[] = { {"nanoarrow_c_ipc_writer_connection", (DL_FUNC)&nanoarrow_c_ipc_writer_connection, 1}, {"nanoarrow_c_ipc_writer_write_stream", (DL_FUNC)&nanoarrow_c_ipc_writer_write_stream, 2}, + {"nanoarrow_c_collect_array_stream", (DL_FUNC)&nanoarrow_c_collect_array_stream, 2}, {"nanoarrow_c_allocate_schema", (DL_FUNC)&nanoarrow_c_allocate_schema, 0}, {"nanoarrow_c_allocate_array", (DL_FUNC)&nanoarrow_c_allocate_array, 0}, {"nanoarrow_c_allocate_array_stream", (DL_FUNC)&nanoarrow_c_allocate_array_stream, 0}, diff --git a/r/src/nanoarrow.hpp b/r/src/nanoarrow.hpp new file mode 100644 index 000000000..6f224f66f --- /dev/null +++ b/r/src/nanoarrow.hpp @@ -0,0 +1,1114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// \defgroup nanoarrow_hpp Nanoarrow C++ Helpers +/// +/// The utilities provided in this file are intended to support C++ users +/// of the nanoarrow C library such that C++-style resource allocation +/// and error handling can be used with nanoarrow data structures. +/// These utilities are not intended to mirror the nanoarrow C API. + + + + + + + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_EXCEPTION_HPP_INCLUDED +#define NANOARROW_HPP_EXCEPTION_HPP_INCLUDED + +#include +#include + +#include "nanoarrow.h" + +NANOARROW_CXX_NAMESPACE_BEGIN + +/// \defgroup nanoarrow_hpp-errors Error handling helpers +/// +/// Most functions in the C API return an ArrowErrorCode to communicate +/// possible failure. Except where documented, it is usually not safe to +/// continue after a non-zero value has been returned. While the +/// nanoarrow C++ helpers do not throw any exceptions of their own, +/// these helpers are provided to facilitate using the nanoarrow C++ helpers +/// in frameworks where this is a useful error handling idiom. +/// +/// @{ + +class Exception : public std::exception { + public: + Exception(const std::string& msg) : msg_(msg) {} + const char* what() const noexcept { return msg_.c_str(); } + + private: + std::string msg_; +}; + +#if defined(NANOARROW_DEBUG) +#define _NANOARROW_THROW_NOT_OK_IMPL(NAME, EXPR, EXPR_STR) \ + do { \ + const int NAME = (EXPR); \ + if (NAME) { \ + throw nanoarrow::Exception( \ + std::string(EXPR_STR) + std::string(" failed with errno ") + \ + std::to_string(NAME) + std::string("\n * ") + std::string(__FILE__) + \ + std::string(":") + std::to_string(__LINE__) + std::string("\n")); \ + } \ + } while (0) +#else +#define _NANOARROW_THROW_NOT_OK_IMPL(NAME, EXPR, EXPR_STR) \ + do { \ + const int NAME = (EXPR); \ + if (NAME) { \ + throw nanoarrow::Exception(std::string(EXPR_STR) + \ + std::string(" failed with errno ") + \ + std::to_string(NAME)); \ + } \ + } while (0) +#endif + +#define NANOARROW_THROW_NOT_OK(EXPR) \ + _NANOARROW_THROW_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, \ + #EXPR) + +/// @} + +NANOARROW_CXX_NAMESPACE_END + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_OPERATORS_HPP_INCLUDED +#define NANOARROW_HPP_OPERATORS_HPP_INCLUDED + +#include +#include +#include + +#include "nanoarrow.h" + +NANOARROW_CXX_NAMESPACE_BEGIN + +namespace literals { + +/// \defgroup nanoarrow_hpp-string_view_helpers ArrowStringView helpers +/// +/// Factories and equality comparison for ArrowStringView. +/// +/// @{ + +/// \brief User literal operator allowing ArrowStringView construction like "str"_asv +#if !defined(__clang__) && (defined(__GNUC__) && __GNUC__ < 6) +inline ArrowStringView operator"" _asv(const char* data, size_t size_bytes) { + return {data, static_cast(size_bytes)}; +} +#else +inline ArrowStringView operator""_asv(const char* data, size_t size_bytes) { + return {data, static_cast(size_bytes)}; +} +#endif +// N.B. older GCC requires the space above, newer Clang forbids the space + +// @} + +} // namespace literals + +NANOARROW_CXX_NAMESPACE_END + +/// \brief Equality comparison operator between ArrowStringView +/// \ingroup nanoarrow_hpp-string_view_helpers +inline bool operator==(ArrowStringView l, ArrowStringView r) { + if (l.size_bytes != r.size_bytes) return false; + return memcmp(l.data, r.data, l.size_bytes) == 0; +} + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_UNIQUE_HPP_INCLUDED +#define NANOARROW_HPP_UNIQUE_HPP_INCLUDED + +#include + +#include "nanoarrow.h" + +NANOARROW_CXX_NAMESPACE_BEGIN + +namespace internal { + +/// \defgroup nanoarrow_hpp-unique_base Base classes for Unique wrappers +/// +/// @{ + +template +static inline void init_pointer(T* data); + +template +static inline void move_pointer(T* src, T* dst); + +template +static inline void release_pointer(T* data); + +template <> +inline void init_pointer(struct ArrowSchema* data) { + data->release = nullptr; +} + +template <> +inline void move_pointer(struct ArrowSchema* src, struct ArrowSchema* dst) { + ArrowSchemaMove(src, dst); +} + +template <> +inline void release_pointer(struct ArrowSchema* data) { + if (data->release != nullptr) { + data->release(data); + } +} + +template <> +inline void init_pointer(struct ArrowArray* data) { + data->release = nullptr; +} + +template <> +inline void move_pointer(struct ArrowArray* src, struct ArrowArray* dst) { + ArrowArrayMove(src, dst); +} + +template <> +inline void release_pointer(struct ArrowArray* data) { + if (data->release != nullptr) { + data->release(data); + } +} + +template <> +inline void init_pointer(struct ArrowArrayStream* data) { + data->release = nullptr; +} + +template <> +inline void move_pointer(struct ArrowArrayStream* src, struct ArrowArrayStream* dst) { + ArrowArrayStreamMove(src, dst); +} + +template <> +inline void release_pointer(ArrowArrayStream* data) { + if (data->release != nullptr) { + data->release(data); + } +} + +template <> +inline void init_pointer(struct ArrowBuffer* data) { + ArrowBufferInit(data); +} + +template <> +inline void move_pointer(struct ArrowBuffer* src, struct ArrowBuffer* dst) { + ArrowBufferMove(src, dst); +} + +template <> +inline void release_pointer(struct ArrowBuffer* data) { + ArrowBufferReset(data); +} + +template <> +inline void init_pointer(struct ArrowBitmap* data) { + ArrowBitmapInit(data); +} + +template <> +inline void move_pointer(struct ArrowBitmap* src, struct ArrowBitmap* dst) { + ArrowBitmapMove(src, dst); +} + +template <> +inline void release_pointer(struct ArrowBitmap* data) { + ArrowBitmapReset(data); +} + +template <> +inline void init_pointer(struct ArrowArrayView* data) { + ArrowArrayViewInitFromType(data, NANOARROW_TYPE_UNINITIALIZED); +} + +template <> +inline void move_pointer(struct ArrowArrayView* src, struct ArrowArrayView* dst) { + ArrowArrayViewMove(src, dst); +} + +template <> +inline void release_pointer(struct ArrowArrayView* data) { + ArrowArrayViewReset(data); +} + +/// \brief A unique_ptr-like base class for stack-allocatable objects +/// \tparam T The object type +template +class Unique { + public: + /// \brief Construct an invalid instance of T holding no resources + Unique() { + memset(&data_, 0, sizeof(data_)); + init_pointer(&data_); + } + + /// \brief Move and take ownership of data + Unique(T* data) { + memset(&data_, 0, sizeof(data_)); + move_pointer(data, &data_); + } + + /// \brief Move and take ownership of data wrapped by rhs + Unique(Unique&& rhs) : Unique(rhs.get()) {} + Unique& operator=(Unique&& rhs) { + reset(rhs.get()); + return *this; + } + + // These objects are not copyable + Unique(const Unique& rhs) = delete; + + /// \brief Get a pointer to the data owned by this object + T* get() noexcept { return &data_; } + const T* get() const noexcept { return &data_; } + + /// \brief Use the pointer operator to access fields of this object + T* operator->() noexcept { return &data_; } + const T* operator->() const noexcept { return &data_; } + + /// \brief Call data's release callback if valid + void reset() { release_pointer(&data_); } + + /// \brief Call data's release callback if valid and move ownership of the data + /// pointed to by data + void reset(T* data) { + reset(); + move_pointer(data, &data_); + } + + /// \brief Move ownership of this object to the data pointed to by out + void move(T* out) { move_pointer(&data_, out); } + + ~Unique() { reset(); } + + protected: + T data_; +}; + +/// @} + +} // namespace internal + +/// \defgroup nanoarrow_hpp-unique Unique object wrappers +/// +/// The Arrow C Data interface, the Arrow C Stream interface, and the +/// nanoarrow C library use stack-allocatable objects, some of which +/// require initialization or cleanup. +/// +/// @{ + +/// \brief Class wrapping a unique struct ArrowSchema +using UniqueSchema = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowArray +using UniqueArray = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowArrayStream +using UniqueArrayStream = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowBuffer +using UniqueBuffer = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowBitmap +using UniqueBitmap = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowArrayView +using UniqueArrayView = internal::Unique; + +/// @} + +NANOARROW_CXX_NAMESPACE_END + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_ARRAY_STREAM_HPP_INCLUDED +#define NANOARROW_HPP_ARRAY_STREAM_HPP_INCLUDED + +#include + + + +NANOARROW_CXX_NAMESPACE_BEGIN + +/// \defgroup nanoarrow_hpp-array-stream ArrayStream helpers +/// +/// These classes provide simple ArrowArrayStream implementations that +/// can be extended to help simplify the process of creating a valid +/// ArrowArrayStream implementation or used as-is for testing. +/// +/// @{ + +/// @brief Export an ArrowArrayStream from a standard C++ class +/// @tparam T A class with methods `int GetSchema(ArrowSchema*)`, `int +/// GetNext(ArrowArray*)`, and `const char* GetLastError()` +/// +/// This class allows a standard C++ class to be exported to a generic ArrowArrayStream +/// consumer by mapping C callback invocations to method calls on an instance of the +/// object whose lifecycle is owned by the ArrowArrayStream. See VectorArrayStream for +/// minimal useful example of this pattern. +/// +/// The methods must be accessible to the ArrayStreamFactory, either as public methods or +/// by declaring ArrayStreamFactory a friend. Implementors are encouraged (but +/// not required) to implement a ToArrayStream(ArrowArrayStream*) that creates a new +/// instance owned by the ArrowArrayStream and moves the relevant data to that instance. +/// +/// An example implementation might be: +/// +/// \code +/// class StreamImpl { +/// public: +/// // Public methods (e.g., constructor) used from C++ to initialize relevant data +/// +/// // Idiomatic exporter to move data + lifecycle responsibility to an instance +/// // managed by the ArrowArrayStream callbacks +/// void ToArrayStream(struct ArrowArrayStream* out) { +/// ArrayStreamFactory::InitArrayStream(new StreamImpl(...), out); +/// } +/// +/// private: +/// // Make relevant methods available to the ArrayStreamFactory +/// friend class ArrayStreamFactory; +/// +/// // Method implementations (called from C, not normally interacted with from C++) +/// int GetSchema(struct ArrowSchema* schema) { return ENOTSUP; } +/// int GetNext(struct ArrowArray* array) { return ENOTSUP; } +/// const char* GetLastError() { nullptr; } +/// }; +/// \endcode +/// +/// An example usage might be: +/// +/// \code +/// // Call constructor and/or public methods to initialize relevant data +/// StreamImpl impl; +/// +/// // Export to ArrowArrayStream after data are finalized +/// UniqueArrayStream stream; +/// impl.ToArrayStream(stream.get()); +/// \endcode +template +class ArrayStreamFactory { + public: + /// \brief Take ownership of instance and populate callbacks of out + static void InitArrayStream(T* instance, struct ArrowArrayStream* out) { + out->get_schema = &get_schema_wrapper; + out->get_next = &get_next_wrapper; + out->get_last_error = &get_last_error_wrapper; + out->release = &release_wrapper; + out->private_data = instance; + } + + private: + static int get_schema_wrapper(struct ArrowArrayStream* stream, + struct ArrowSchema* schema) { + return reinterpret_cast(stream->private_data)->GetSchema(schema); + } + + static int get_next_wrapper(struct ArrowArrayStream* stream, struct ArrowArray* array) { + return reinterpret_cast(stream->private_data)->GetNext(array); + } + + static const char* get_last_error_wrapper(struct ArrowArrayStream* stream) { + return reinterpret_cast(stream->private_data)->GetLastError(); + } + + static void release_wrapper(struct ArrowArrayStream* stream) { + delete reinterpret_cast(stream->private_data); + stream->release = nullptr; + stream->private_data = nullptr; + } +}; + +/// \brief An empty array stream +/// +/// This class can be constructed from an struct ArrowSchema and implements a default +/// get_next() method that always marks the output ArrowArray as released. +class EmptyArrayStream { + public: + /// \brief Create an EmptyArrayStream from an ArrowSchema + /// + /// Takes ownership of schema. + EmptyArrayStream(struct ArrowSchema* schema) : schema_(schema) { + ArrowErrorInit(&error_); + } + + /// \brief Export to ArrowArrayStream + void ToArrayStream(struct ArrowArrayStream* out) { + EmptyArrayStream* impl = new EmptyArrayStream(schema_.get()); + ArrayStreamFactory::InitArrayStream(impl, out); + } + + private: + UniqueSchema schema_; + struct ArrowError error_; + + friend class ArrayStreamFactory; + + int GetSchema(struct ArrowSchema* schema) { + return ArrowSchemaDeepCopy(schema_.get(), schema); + } + + int GetNext(struct ArrowArray* array) { + array->release = nullptr; + return NANOARROW_OK; + } + + const char* GetLastError() { return error_.message; } +}; + +/// \brief Implementation of an ArrowArrayStream backed by a vector of UniqueArray objects +class VectorArrayStream { + public: + /// \brief Create a VectorArrayStream from an ArrowSchema + vector of UniqueArray + /// + /// Takes ownership of schema and moves arrays if possible. + VectorArrayStream(struct ArrowSchema* schema, std::vector arrays) + : offset_(0), schema_(schema), arrays_(std::move(arrays)) {} + + /// \brief Create a one-shot VectorArrayStream from an ArrowSchema + ArrowArray + /// + /// Takes ownership of schema and array. + VectorArrayStream(struct ArrowSchema* schema, struct ArrowArray* array) + : offset_(0), schema_(schema) { + arrays_.emplace_back(array); + } + + /// \brief Export to ArrowArrayStream + void ToArrayStream(struct ArrowArrayStream* out) { + VectorArrayStream* impl = new VectorArrayStream(schema_.get(), std::move(arrays_)); + ArrayStreamFactory::InitArrayStream(impl, out); + } + + private: + int64_t offset_; + UniqueSchema schema_; + std::vector arrays_; + + friend class ArrayStreamFactory; + + int GetSchema(struct ArrowSchema* schema) { + return ArrowSchemaDeepCopy(schema_.get(), schema); + } + + int GetNext(struct ArrowArray* array) { + if (offset_ < static_cast(arrays_.size())) { + arrays_[offset_++].move(array); + } else { + array->release = nullptr; + } + + return NANOARROW_OK; + } + + const char* GetLastError() { return ""; } +}; + +/// @} + +NANOARROW_CXX_NAMESPACE_END + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_BUFFER_HPP_INCLUDED +#define NANOARROW_HPP_BUFFER_HPP_INCLUDED + +#include +#include +#include "nanoarrow.h" + +NANOARROW_CXX_NAMESPACE_BEGIN + +namespace internal { +template +static inline void DeallocateWrappedBuffer(struct ArrowBufferAllocator* allocator, + uint8_t* ptr, int64_t size) { + NANOARROW_UNUSED(ptr); + NANOARROW_UNUSED(size); + auto obj = reinterpret_cast(allocator->private_data); + delete obj; +} +} // namespace internal + +/// \defgroup nanoarrow_hpp-buffer Buffer helpers +/// +/// Helpers to wrap buffer-like C++ objects as ArrowBuffer objects that can +/// be used to build ArrowArray objects. +/// +/// @{ + +/// \brief Initialize a buffer wrapping an arbitrary C++ object +/// +/// Initializes a buffer with a release callback that deletes the moved obj +/// when ArrowBufferReset is called. This version is useful for wrapping +/// an object whose .data() member is missing or unrelated to the buffer +/// value that is destined for a the buffer of an ArrowArray. T must be movable. +template +static inline void BufferInitWrapped(struct ArrowBuffer* buffer, T obj, + const uint8_t* data, int64_t size_bytes) { + T* obj_moved = new T(std::move(obj)); + buffer->data = const_cast(data); + buffer->size_bytes = size_bytes; + buffer->capacity_bytes = 0; + buffer->allocator = + ArrowBufferDeallocator(&internal::DeallocateWrappedBuffer, obj_moved); +} + +/// \brief Initialize a buffer wrapping a C++ sequence +/// +/// Specifically, this uses obj.data() to set the buffer address and +/// obj.size() * sizeof(T::value_type) to set the buffer size. This works +/// for STL containers like std::vector, std::array, and std::string. +/// This function moves obj and ensures it is deleted when ArrowBufferReset +/// is called. +template +void BufferInitSequence(struct ArrowBuffer* buffer, T obj) { + // Move before calling .data() (matters sometimes). + T* obj_moved = new T(std::move(obj)); + buffer->data = + const_cast(reinterpret_cast(obj_moved->data())); + buffer->size_bytes = obj_moved->size() * sizeof(typename T::value_type); + buffer->capacity_bytes = 0; + buffer->allocator = + ArrowBufferDeallocator(&internal::DeallocateWrappedBuffer, obj_moved); +} + +/// @} + +NANOARROW_CXX_NAMESPACE_END + +#endif +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_HPP_VIEW_HPP_INCLUDED +#define NANOARROW_HPP_VIEW_HPP_INCLUDED + +#include +#include + + +#include "nanoarrow.h" + +NANOARROW_CXX_NAMESPACE_BEGIN + +namespace internal { +struct Nothing {}; + +template +class Maybe { + public: + Maybe() : is_something_(false) {} + Maybe(Nothing) : Maybe() {} + + Maybe(T something) // NOLINT(google-explicit-constructor) + : is_something_(true), something_(something) {} + + explicit constexpr operator bool() const { return is_something_; } + + const T& operator*() const { return something_; } + + friend inline bool operator==(Maybe l, Maybe r) { + if (l.is_something_) { + return r.is_something_ && l.something_ == r.something_; + } else if (r.is_something_) { + return l.is_something_ && l.something_ == r.something_; + } else { + return l.is_something_ == r.is_something_; + } + } + friend inline bool operator!=(Maybe l, Maybe r) { return !(l == r); } + + T value_or(T val) const { return is_something_ ? something_ : val; } + + private: + // When support for gcc 4.8 is dropped, we should also assert + // is_trivially_copyable::value + static_assert(std::is_trivially_destructible::value, ""); + + bool is_something_{}; + T something_{}; +}; + +template +struct RandomAccessRange { + Get get; + int64_t offset; + int64_t size; + + using value_type = decltype(std::declval()(0)); + + struct const_iterator { + int64_t i; + const RandomAccessRange* range; + bool operator==(const_iterator other) const { return i == other.i; } + bool operator!=(const_iterator other) const { return i != other.i; } + const_iterator& operator++() { return ++i, *this; } + value_type operator*() const { return range->get(i); } + }; + + const_iterator begin() const { return {offset, this}; } + const_iterator end() const { return {offset + size, this}; } +}; + +template +struct InputRange { + Next next; + using ValueOrFalsy = decltype(std::declval()()); + + static_assert(std::is_constructible::value, ""); + static_assert(std::is_default_constructible::value, ""); + using value_type = decltype(*std::declval()); + + struct iterator { + InputRange* range; + ValueOrFalsy stashed; + + bool operator==(iterator other) const { + return static_cast(stashed) == static_cast(other.stashed); + } + bool operator!=(iterator other) const { return !(*this == other); } + + iterator& operator++() { + stashed = range->next(); + return *this; + } + value_type operator*() const { return *stashed; } + }; + + iterator begin() { return {this, next()}; } + iterator end() { return {this, ValueOrFalsy()}; } +}; +} // namespace internal + +/// \defgroup nanoarrow_hpp-range_for Range-for helpers +/// +/// The Arrow C Data interface and the Arrow C Stream interface represent +/// data which can be iterated through using C++'s range-for statement. +/// +/// @{ + +/// \brief An object convertible to any empty optional +constexpr internal::Nothing NA{}; + +/// \brief A range-for compatible wrapper for ArrowArray of fixed size type +/// +/// Provides a sequence of optional copied from each non-null +/// slot of the wrapped array (null slots result in empty optionals). +template +class ViewArrayAs { + private: + struct Get { + const uint8_t* validity; + const void* values; + + internal::Maybe operator()(int64_t i) const { + if (validity == nullptr || ArrowBitGet(validity, i)) { + if (std::is_same::value) { + return ArrowBitGet(static_cast(values), i); + } else { + return static_cast(values)[i]; + } + } + return NA; + } + }; + + internal::RandomAccessRange range_; + + public: + ViewArrayAs(const ArrowArrayView* array_view) + : range_{ + Get{ + array_view->buffer_views[0].data.as_uint8, + array_view->buffer_views[1].data.data, + }, + array_view->offset, + array_view->length, + } {} + + ViewArrayAs(const ArrowArray* array) + : range_{ + Get{ + static_cast(array->buffers[0]), + array->buffers[1], + }, + array->offset, + array->length, + } {} + + using value_type = typename internal::RandomAccessRange::value_type; + using const_iterator = typename internal::RandomAccessRange::const_iterator; + const_iterator begin() const { return range_.begin(); } + const_iterator end() const { return range_.end(); } + value_type operator[](int64_t i) const { return range_.get(i); } +}; + +/// \brief A range-for compatible wrapper for ArrowArray of binary or utf8 +/// +/// Provides a sequence of optional referencing each non-null +/// slot of the wrapped array (null slots result in empty optionals). Large +/// binary and utf8 arrays can be wrapped by specifying 64 instead of 32 for +/// the template argument. +template +class ViewArrayAsBytes { + private: + static_assert(OffsetSize == 32 || OffsetSize == 64, ""); + using OffsetType = typename std::conditional::type; + + struct Get { + const uint8_t* validity; + const void* offsets; + const char* data; + + internal::Maybe operator()(int64_t i) const { + auto* offsets = static_cast(this->offsets); + if (validity == nullptr || ArrowBitGet(validity, i)) { + return ArrowStringView{data + offsets[i], offsets[i + 1] - offsets[i]}; + } + return NA; + } + }; + + internal::RandomAccessRange range_; + + public: + ViewArrayAsBytes(const ArrowArrayView* array_view) + : range_{ + Get{ + array_view->buffer_views[0].data.as_uint8, + array_view->buffer_views[1].data.data, + array_view->buffer_views[2].data.as_char, + }, + array_view->offset, + array_view->length, + } {} + + ViewArrayAsBytes(const ArrowArray* array) + : range_{ + Get{ + static_cast(array->buffers[0]), + array->buffers[1], + static_cast(array->buffers[2]), + }, + array->offset, + array->length, + } {} + + using value_type = typename internal::RandomAccessRange::value_type; + using const_iterator = typename internal::RandomAccessRange::const_iterator; + const_iterator begin() const { return range_.begin(); } + const_iterator end() const { return range_.end(); } + value_type operator[](int64_t i) const { return range_.get(i); } +}; + +class ViewBinaryViewArrayAsBytes { + private: + struct Get { + const uint8_t* validity; + const union ArrowBinaryView* inline_data; + const void** variadic_buffers; + + internal::Maybe operator()(int64_t i) const { + if (validity == nullptr || ArrowBitGet(validity, i)) { + const union ArrowBinaryView* bv = &inline_data[i]; + if (bv->inlined.size <= NANOARROW_BINARY_VIEW_INLINE_SIZE) { + return ArrowStringView{reinterpret_cast(bv->inlined.data), + bv->inlined.size}; + } + + return ArrowStringView{ + reinterpret_cast(variadic_buffers[bv->ref.buffer_index]) + + bv->ref.offset, + bv->ref.size}; + } + return NA; + } + }; + + internal::RandomAccessRange range_; + + public: + ViewBinaryViewArrayAsBytes(const ArrowArrayView* array_view) + : range_{ + Get{ + array_view->buffer_views[0].data.as_uint8, + array_view->buffer_views[1].data.as_binary_view, + array_view->variadic_buffers, + }, + array_view->offset, + array_view->length, + } {} + + ViewBinaryViewArrayAsBytes(const ArrowArray* array) + : range_{ + Get{static_cast(array->buffers[0]), + static_cast(array->buffers[1]), + array->buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS}, + array->offset, + array->length, + } {} + + using value_type = typename internal::RandomAccessRange::value_type; + using const_iterator = typename internal::RandomAccessRange::const_iterator; + const_iterator begin() const { return range_.begin(); } + const_iterator end() const { return range_.end(); } + value_type operator[](int64_t i) const { return range_.get(i); } +}; + +/// \brief A range-for compatible wrapper for ArrowArray of fixed size binary +/// +/// Provides a sequence of optional referencing each non-null +/// slot of the wrapped array (null slots result in empty optionals). +class ViewArrayAsFixedSizeBytes { + private: + struct Get { + const uint8_t* validity; + const char* data; + int fixed_size; + + internal::Maybe operator()(int64_t i) const { + if (validity == nullptr || ArrowBitGet(validity, i)) { + return ArrowStringView{data + i * fixed_size, fixed_size}; + } + return NA; + } + }; + + internal::RandomAccessRange range_; + + public: + ViewArrayAsFixedSizeBytes(const ArrowArrayView* array_view, int fixed_size) + : range_{ + Get{ + array_view->buffer_views[0].data.as_uint8, + array_view->buffer_views[1].data.as_char, + fixed_size, + }, + array_view->offset, + array_view->length, + } {} + + ViewArrayAsFixedSizeBytes(const ArrowArray* array, int fixed_size) + : range_{ + Get{ + static_cast(array->buffers[0]), + static_cast(array->buffers[1]), + fixed_size, + }, + array->offset, + array->length, + } {} + + using value_type = typename internal::RandomAccessRange::value_type; + using const_iterator = typename internal::RandomAccessRange::const_iterator; + const_iterator begin() const { return range_.begin(); } + const_iterator end() const { return range_.end(); } + value_type operator[](int64_t i) const { return range_.get(i); } +}; + +/// \brief A range-for compatible wrapper for ArrowArrayStream +/// +/// Provides a sequence of ArrowArray& referencing the most recent array drawn +/// from the wrapped stream. (Each array may be moved from if necessary.) +/// When streams terminate due to an error, the error code and message are +/// available for inspection through the code() and error() member functions +/// respectively. Failure to inspect the error code will result in +/// an assertion failure. The number of arrays drawn from the stream is also +/// available through the count() member function. +class ViewArrayStream { + public: + ViewArrayStream(ArrowArrayStream* stream, ArrowErrorCode* code, ArrowError* error) + : code_{code}, error_{error} { + // Using a slightly more verbose constructor to silence a warning that occurs + // on some versions of MSVC. + range_.next.self = this; + range_.next.stream = stream; + } + + ViewArrayStream(ArrowArrayStream* stream, ArrowError* error) + : ViewArrayStream{stream, &internal_code_, error} {} + + ViewArrayStream(ArrowArrayStream* stream) + : ViewArrayStream{stream, &internal_code_, &internal_error_} {} + + // disable copy/move of this view, since its error references may point into itself + ViewArrayStream(ViewArrayStream&&) = delete; + ViewArrayStream& operator=(ViewArrayStream&&) = delete; + ViewArrayStream(const ViewArrayStream&) = delete; + ViewArrayStream& operator=(const ViewArrayStream&) = delete; + + // ensure the error code of this stream was accessed at least once + ~ViewArrayStream() { NANOARROW_DCHECK(code_was_accessed_); } + + private: + struct Next { + ViewArrayStream* self; + ArrowArrayStream* stream; + UniqueArray array; + + ArrowArray* operator()() { + array.reset(); + *self->code_ = ArrowArrayStreamGetNext(stream, array.get(), self->error_); + + if (array->release != nullptr) { + NANOARROW_DCHECK(*self->code_ == NANOARROW_OK); + ++self->count_; + return array.get(); + } + + return nullptr; + } + }; + + internal::InputRange range_; + ArrowErrorCode* code_; + ArrowError* error_; + ArrowError internal_error_ = {}; + ArrowErrorCode internal_code_; + bool code_was_accessed_ = false; + int count_ = 0; + + public: + using value_type = typename internal::InputRange::value_type; + using iterator = typename internal::InputRange::iterator; + iterator begin() { return range_.begin(); } + iterator end() { return range_.end(); } + + /// The error code which caused this stream to terminate, if any. + ArrowErrorCode code() { + code_was_accessed_ = true; + return *code_; + } + /// The error message which caused this stream to terminate, if any. + ArrowError* error() { return error_; } + + /// The number of arrays streamed so far. + int count() const { return count_; } +}; + +/// @} + +NANOARROW_CXX_NAMESPACE_END + +#endif diff --git a/r/src/nanoarrow_cpp.cc b/r/src/nanoarrow_cpp.cc index 9c0e38d68..c8df594f9 100644 --- a/r/src/nanoarrow_cpp.cc +++ b/r/src/nanoarrow_cpp.cc @@ -26,6 +26,9 @@ #include #include +#include "nanoarrow.hpp" +#include "nanoarrow/r.h" + // Without this infrastructure, it's possible to check that all objects // are released by running devtools::test(); gc() in a fresh session and // making sure that nanoarrow:::preserved_count() is zero afterward. @@ -201,3 +204,80 @@ extern "C" void nanoarrow_preserve_and_release_on_other_thread(SEXP obj) { std::thread worker([obj] { nanoarrow_release_sexp(obj); }); worker.join(); } + +struct ArrayVector { + nanoarrow::UniqueSchema schema; + nanoarrow::UniqueArray batch; + std::vector vec; +}; + +static void release_array_vector_xptr(SEXP array_vector_xptr) { + auto ptr = reinterpret_cast(R_ExternalPtrAddr(array_vector_xptr)); + if (ptr != NULL) { + delete ptr; + } +} + +extern "C" SEXP nanoarrow_c_collect_array_stream(SEXP array_stream_xptr, SEXP n_sexp) { + struct ArrowArrayStream* array_stream = + nanoarrow_array_stream_from_xptr(array_stream_xptr); + + double n_real = REAL(n_sexp)[0]; + int n; + if (R_FINITE(n_real)) { + n = (int)n_real; + } else { + n = INT_MAX; + } + + auto array_vector = new ArrayVector(); + SEXP array_vector_xptr = + PROTECT(R_MakeExternalPtr(array_vector, R_NilValue, R_NilValue)); + R_RegisterCFinalizer(array_vector_xptr, &release_array_vector_xptr); + + struct ArrowError error; + ArrowErrorInit(&error); + int code = ArrowArrayStreamGetSchema(array_stream, array_vector->schema.get(), &error); + if (code != NANOARROW_OK) { + Rf_error("ArrowArrayStreamGetSchema() failed (%d): %s", code, error.message); + } + + int64_t n_actual = 0; + int64_t size = 0; + while (n > 0) { + code = ArrowArrayStreamGetNext(array_stream, array_vector->batch.get(), &error); + if (code != NANOARROW_OK) { + Rf_error("ArrowArrayStreamGetNext() failed (%d): %s", code, error.message); + } + + if (array_vector->batch->release == nullptr) { + break; + } + + size += array_vector->batch->length; + ++n_actual; + --n; + array_vector->vec.push_back(std::move(array_vector->batch)); + array_vector->batch.reset(); + + R_CheckUserInterrupt(); + } + + SEXP array_stream_out_xptr = PROTECT(nanoarrow_array_stream_owning_xptr()); + struct ArrowArrayStream* array_stream_out = + nanoarrow_output_array_stream_from_xptr(array_stream_out_xptr); + + nanoarrow::VectorArrayStream(array_vector->schema.get(), std::move(array_vector->vec)) + .ToArrayStream(array_stream_out); + + SEXP size_sexp = PROTECT(Rf_ScalarReal(size)); + SEXP n_actual_sexp = PROTECT(Rf_ScalarReal(n_actual)); + const char* names[] = {"stream", "size", "n", ""}; + SEXP out = PROTECT(Rf_mkNamed(VECSXP, names)); + SET_VECTOR_ELT(out, 0, array_stream_out_xptr); + SET_VECTOR_ELT(out, 1, size_sexp); + SET_VECTOR_ELT(out, 2, n_actual_sexp); + + UNPROTECT(5); + return out; +} diff --git a/r/src/nanoarrow_ipc.hpp b/r/src/nanoarrow_ipc.hpp new file mode 100644 index 000000000..7311aca19 --- /dev/null +++ b/r/src/nanoarrow_ipc.hpp @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef NANOARROW_IPC_HPP_INCLUDED +#define NANOARROW_IPC_HPP_INCLUDED + +#include "nanoarrow.hpp" +#include "nanoarrow_ipc.h" + +namespace nanoarrow { + +namespace internal { + +template <> +inline void init_pointer(struct ArrowIpcSharedBuffer* data) { + init_pointer(&data->private_src); +} + +template <> +inline void move_pointer(struct ArrowIpcSharedBuffer* src, + struct ArrowIpcSharedBuffer* dst) { + move_pointer(&src->private_src, &dst->private_src); +} + +template <> +inline void release_pointer(struct ArrowIpcSharedBuffer* data) { + ArrowIpcSharedBufferReset(data); +} + +template <> +inline void init_pointer(struct ArrowIpcDecoder* data) { + data->private_data = nullptr; +} + +template <> +inline void move_pointer(struct ArrowIpcDecoder* src, struct ArrowIpcDecoder* dst) { + memcpy(dst, src, sizeof(struct ArrowIpcDecoder)); + src->private_data = nullptr; +} + +template <> +inline void release_pointer(struct ArrowIpcDecoder* data) { + ArrowIpcDecoderReset(data); +} + +template <> +inline void init_pointer(struct ArrowIpcFooter* data) { + ArrowIpcFooterInit(data); +} + +template <> +inline void move_pointer(struct ArrowIpcFooter* src, struct ArrowIpcFooter* dst) { + ArrowSchemaMove(&src->schema, &dst->schema); + ArrowBufferMove(&src->record_batch_blocks, &dst->record_batch_blocks); +} + +template <> +inline void release_pointer(struct ArrowIpcFooter* data) { + ArrowIpcFooterReset(data); +} + +template <> +inline void init_pointer(struct ArrowIpcEncoder* data) { + data->private_data = nullptr; +} + +template <> +inline void move_pointer(struct ArrowIpcEncoder* src, struct ArrowIpcEncoder* dst) { + memcpy(dst, src, sizeof(struct ArrowIpcEncoder)); + src->private_data = nullptr; +} + +template <> +inline void release_pointer(struct ArrowIpcEncoder* data) { + ArrowIpcEncoderReset(data); +} + +template <> +inline void init_pointer(struct ArrowIpcDecompressor* data) { + data->private_data = nullptr; +} + +template <> +inline void move_pointer(struct ArrowIpcDecompressor* src, + struct ArrowIpcDecompressor* dst) { + memcpy(dst, src, sizeof(struct ArrowIpcDecompressor)); + src->release = nullptr; +} + +template <> +inline void release_pointer(struct ArrowIpcDecompressor* data) { + if (data->release != nullptr) { + data->release(data); + } +} + +template <> +inline void init_pointer(struct ArrowIpcInputStream* data) { + data->release = nullptr; +} + +template <> +inline void move_pointer(struct ArrowIpcInputStream* src, + struct ArrowIpcInputStream* dst) { + memcpy(dst, src, sizeof(struct ArrowIpcInputStream)); + src->release = nullptr; +} + +template <> +inline void release_pointer(struct ArrowIpcInputStream* data) { + if (data->release != nullptr) { + data->release(data); + } +} + +template <> +inline void init_pointer(struct ArrowIpcOutputStream* data) { + data->release = nullptr; +} + +template <> +inline void move_pointer(struct ArrowIpcOutputStream* src, + struct ArrowIpcOutputStream* dst) { + memcpy(dst, src, sizeof(struct ArrowIpcOutputStream)); + src->release = nullptr; +} + +template <> +inline void release_pointer(struct ArrowIpcOutputStream* data) { + if (data->release != nullptr) { + data->release(data); + } +} + +template <> +inline void init_pointer(struct ArrowIpcWriter* data) { + data->private_data = nullptr; +} + +template <> +inline void move_pointer(struct ArrowIpcWriter* src, struct ArrowIpcWriter* dst) { + memcpy(dst, src, sizeof(struct ArrowIpcWriter)); + src->private_data = nullptr; +} + +template <> +inline void release_pointer(struct ArrowIpcWriter* data) { + ArrowIpcWriterReset(data); +} + +} // namespace internal +} // namespace nanoarrow + +namespace nanoarrow { + +namespace ipc { + +/// \defgroup nanoarrow_ipc_hpp-unique Unique object wrappers +/// +/// Extends the unique object wrappers in nanoarrow.hpp to include C structs +/// defined in the nanoarrow_ipc.h header. +/// +/// @{ + +/// \brief Class wrapping a unique struct ArrowIpcSharedBuffer +using UniqueSharedBuffer = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowIpcDecoder +using UniqueDecoder = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowIpcFooter +using UniqueFooter = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowIpcEncoder +using UniqueEncoder = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowIpcDecompressor +using UniqueDecompressor = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowIpcInputStream +using UniqueInputStream = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowIpcOutputStream +using UniqueOutputStream = internal::Unique; + +/// \brief Class wrapping a unique struct ArrowIpcWriter +using UniqueWriter = internal::Unique; + +/// @} + +} // namespace ipc + +} // namespace nanoarrow + +#endif diff --git a/r/tools/make-callentries.R b/r/tools/make-callentries.R index 403169c3c..552ff05fb 100644 --- a/r/tools/make-callentries.R +++ b/r/tools/make-callentries.R @@ -21,7 +21,7 @@ library(tidyverse) -src_files <- list.files("src", "\\.(c|cpp)$", full.names = TRUE) %>% +src_files <- list.files("src", "\\.(c|cc)$", full.names = TRUE) %>% setdiff("src/init.c") src_sources <- src_files %>% set_names() %>% map_chr(readr::read_file) From 9f0d45faae235ec389ea980c4be2c1ef12ff98b5 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 20 Nov 2025 12:05:00 -0700 Subject: [PATCH 2/7] clang --- r/src/nanoarrow.hpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/r/src/nanoarrow.hpp b/r/src/nanoarrow.hpp index 6f224f66f..c256ce10b 100644 --- a/r/src/nanoarrow.hpp +++ b/r/src/nanoarrow.hpp @@ -22,12 +22,6 @@ /// and error handling can be used with nanoarrow data structures. /// These utilities are not intended to mirror the nanoarrow C API. - - - - - - // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -417,8 +411,6 @@ NANOARROW_CXX_NAMESPACE_END #include - - NANOARROW_CXX_NAMESPACE_BEGIN /// \defgroup nanoarrow_hpp-array-stream ArrayStream helpers @@ -707,7 +699,6 @@ NANOARROW_CXX_NAMESPACE_END #include #include - #include "nanoarrow.h" NANOARROW_CXX_NAMESPACE_BEGIN From 62b39dcbffc0aebbb031ca4c52f0e7afc11ecc8f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Dec 2025 20:45:13 -0600 Subject: [PATCH 3/7] doc --- r/src/nanoarrow_cpp.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/r/src/nanoarrow_cpp.cc b/r/src/nanoarrow_cpp.cc index c8df594f9..112c46c26 100644 --- a/r/src/nanoarrow_cpp.cc +++ b/r/src/nanoarrow_cpp.cc @@ -205,12 +205,17 @@ extern "C" void nanoarrow_preserve_and_release_on_other_thread(SEXP obj) { worker.join(); } +// Collector utility for iterating over and collecting batches +// Keeping this all in a single object reduces the amount of C++ deletion +// we need to keep track of. struct ArrayVector { nanoarrow::UniqueSchema schema; nanoarrow::UniqueArray batch; std::vector vec; }; +// Use an external pointer to handle deleting the ArrayVector in +// the event of a longjmp static void release_array_vector_xptr(SEXP array_vector_xptr) { auto ptr = reinterpret_cast(R_ExternalPtrAddr(array_vector_xptr)); if (ptr != NULL) { @@ -218,6 +223,9 @@ static void release_array_vector_xptr(SEXP array_vector_xptr) { } } +// Collects the entire array stream and collects the total number of rows and +// total number of batches so that the R code on the end of this can decide +// how best to proceed. extern "C" SEXP nanoarrow_c_collect_array_stream(SEXP array_stream_xptr, SEXP n_sexp) { struct ArrowArrayStream* array_stream = nanoarrow_array_stream_from_xptr(array_stream_xptr); From 9716fc2c7b7bdf0558523ce50afbaa06a2c6e571 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Dec 2025 20:49:08 -0600 Subject: [PATCH 4/7] Update r/src/nanoarrow_cpp.cc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- r/src/nanoarrow_cpp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/nanoarrow_cpp.cc b/r/src/nanoarrow_cpp.cc index 112c46c26..83ebe07ac 100644 --- a/r/src/nanoarrow_cpp.cc +++ b/r/src/nanoarrow_cpp.cc @@ -218,7 +218,7 @@ struct ArrayVector { // the event of a longjmp static void release_array_vector_xptr(SEXP array_vector_xptr) { auto ptr = reinterpret_cast(R_ExternalPtrAddr(array_vector_xptr)); - if (ptr != NULL) { + if (ptr != nullptr) { delete ptr; } } From d011a065f32a7bc50cc3d0278d4a0836fd2c1f1b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Dec 2025 20:49:21 -0600 Subject: [PATCH 5/7] Update r/tools/make-callentries.R Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- r/tools/make-callentries.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tools/make-callentries.R b/r/tools/make-callentries.R index 552ff05fb..f40fe6541 100644 --- a/r/tools/make-callentries.R +++ b/r/tools/make-callentries.R @@ -21,7 +21,7 @@ library(tidyverse) -src_files <- list.files("src", "\\.(c|cc)$", full.names = TRUE) %>% +src_files <- list.files("src", "\\.(c|cc|cpp)$", full.names = TRUE) %>% setdiff("src/init.c") src_sources <- src_files %>% set_names() %>% map_chr(readr::read_file) From 3b889fe7620607787365b073554256d52f922aa1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Dec 2025 20:51:11 -0600 Subject: [PATCH 6/7] remove --- r/src/nanoarrow.hpp | 1105 --------------------------------------- r/src/nanoarrow_ipc.hpp | 209 -------- 2 files changed, 1314 deletions(-) delete mode 100644 r/src/nanoarrow.hpp delete mode 100644 r/src/nanoarrow_ipc.hpp diff --git a/r/src/nanoarrow.hpp b/r/src/nanoarrow.hpp deleted file mode 100644 index c256ce10b..000000000 --- a/r/src/nanoarrow.hpp +++ /dev/null @@ -1,1105 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/// \defgroup nanoarrow_hpp Nanoarrow C++ Helpers -/// -/// The utilities provided in this file are intended to support C++ users -/// of the nanoarrow C library such that C++-style resource allocation -/// and error handling can be used with nanoarrow data structures. -/// These utilities are not intended to mirror the nanoarrow C API. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef NANOARROW_HPP_EXCEPTION_HPP_INCLUDED -#define NANOARROW_HPP_EXCEPTION_HPP_INCLUDED - -#include -#include - -#include "nanoarrow.h" - -NANOARROW_CXX_NAMESPACE_BEGIN - -/// \defgroup nanoarrow_hpp-errors Error handling helpers -/// -/// Most functions in the C API return an ArrowErrorCode to communicate -/// possible failure. Except where documented, it is usually not safe to -/// continue after a non-zero value has been returned. While the -/// nanoarrow C++ helpers do not throw any exceptions of their own, -/// these helpers are provided to facilitate using the nanoarrow C++ helpers -/// in frameworks where this is a useful error handling idiom. -/// -/// @{ - -class Exception : public std::exception { - public: - Exception(const std::string& msg) : msg_(msg) {} - const char* what() const noexcept { return msg_.c_str(); } - - private: - std::string msg_; -}; - -#if defined(NANOARROW_DEBUG) -#define _NANOARROW_THROW_NOT_OK_IMPL(NAME, EXPR, EXPR_STR) \ - do { \ - const int NAME = (EXPR); \ - if (NAME) { \ - throw nanoarrow::Exception( \ - std::string(EXPR_STR) + std::string(" failed with errno ") + \ - std::to_string(NAME) + std::string("\n * ") + std::string(__FILE__) + \ - std::string(":") + std::to_string(__LINE__) + std::string("\n")); \ - } \ - } while (0) -#else -#define _NANOARROW_THROW_NOT_OK_IMPL(NAME, EXPR, EXPR_STR) \ - do { \ - const int NAME = (EXPR); \ - if (NAME) { \ - throw nanoarrow::Exception(std::string(EXPR_STR) + \ - std::string(" failed with errno ") + \ - std::to_string(NAME)); \ - } \ - } while (0) -#endif - -#define NANOARROW_THROW_NOT_OK(EXPR) \ - _NANOARROW_THROW_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, \ - #EXPR) - -/// @} - -NANOARROW_CXX_NAMESPACE_END - -#endif -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef NANOARROW_HPP_OPERATORS_HPP_INCLUDED -#define NANOARROW_HPP_OPERATORS_HPP_INCLUDED - -#include -#include -#include - -#include "nanoarrow.h" - -NANOARROW_CXX_NAMESPACE_BEGIN - -namespace literals { - -/// \defgroup nanoarrow_hpp-string_view_helpers ArrowStringView helpers -/// -/// Factories and equality comparison for ArrowStringView. -/// -/// @{ - -/// \brief User literal operator allowing ArrowStringView construction like "str"_asv -#if !defined(__clang__) && (defined(__GNUC__) && __GNUC__ < 6) -inline ArrowStringView operator"" _asv(const char* data, size_t size_bytes) { - return {data, static_cast(size_bytes)}; -} -#else -inline ArrowStringView operator""_asv(const char* data, size_t size_bytes) { - return {data, static_cast(size_bytes)}; -} -#endif -// N.B. older GCC requires the space above, newer Clang forbids the space - -// @} - -} // namespace literals - -NANOARROW_CXX_NAMESPACE_END - -/// \brief Equality comparison operator between ArrowStringView -/// \ingroup nanoarrow_hpp-string_view_helpers -inline bool operator==(ArrowStringView l, ArrowStringView r) { - if (l.size_bytes != r.size_bytes) return false; - return memcmp(l.data, r.data, l.size_bytes) == 0; -} - -#endif -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef NANOARROW_HPP_UNIQUE_HPP_INCLUDED -#define NANOARROW_HPP_UNIQUE_HPP_INCLUDED - -#include - -#include "nanoarrow.h" - -NANOARROW_CXX_NAMESPACE_BEGIN - -namespace internal { - -/// \defgroup nanoarrow_hpp-unique_base Base classes for Unique wrappers -/// -/// @{ - -template -static inline void init_pointer(T* data); - -template -static inline void move_pointer(T* src, T* dst); - -template -static inline void release_pointer(T* data); - -template <> -inline void init_pointer(struct ArrowSchema* data) { - data->release = nullptr; -} - -template <> -inline void move_pointer(struct ArrowSchema* src, struct ArrowSchema* dst) { - ArrowSchemaMove(src, dst); -} - -template <> -inline void release_pointer(struct ArrowSchema* data) { - if (data->release != nullptr) { - data->release(data); - } -} - -template <> -inline void init_pointer(struct ArrowArray* data) { - data->release = nullptr; -} - -template <> -inline void move_pointer(struct ArrowArray* src, struct ArrowArray* dst) { - ArrowArrayMove(src, dst); -} - -template <> -inline void release_pointer(struct ArrowArray* data) { - if (data->release != nullptr) { - data->release(data); - } -} - -template <> -inline void init_pointer(struct ArrowArrayStream* data) { - data->release = nullptr; -} - -template <> -inline void move_pointer(struct ArrowArrayStream* src, struct ArrowArrayStream* dst) { - ArrowArrayStreamMove(src, dst); -} - -template <> -inline void release_pointer(ArrowArrayStream* data) { - if (data->release != nullptr) { - data->release(data); - } -} - -template <> -inline void init_pointer(struct ArrowBuffer* data) { - ArrowBufferInit(data); -} - -template <> -inline void move_pointer(struct ArrowBuffer* src, struct ArrowBuffer* dst) { - ArrowBufferMove(src, dst); -} - -template <> -inline void release_pointer(struct ArrowBuffer* data) { - ArrowBufferReset(data); -} - -template <> -inline void init_pointer(struct ArrowBitmap* data) { - ArrowBitmapInit(data); -} - -template <> -inline void move_pointer(struct ArrowBitmap* src, struct ArrowBitmap* dst) { - ArrowBitmapMove(src, dst); -} - -template <> -inline void release_pointer(struct ArrowBitmap* data) { - ArrowBitmapReset(data); -} - -template <> -inline void init_pointer(struct ArrowArrayView* data) { - ArrowArrayViewInitFromType(data, NANOARROW_TYPE_UNINITIALIZED); -} - -template <> -inline void move_pointer(struct ArrowArrayView* src, struct ArrowArrayView* dst) { - ArrowArrayViewMove(src, dst); -} - -template <> -inline void release_pointer(struct ArrowArrayView* data) { - ArrowArrayViewReset(data); -} - -/// \brief A unique_ptr-like base class for stack-allocatable objects -/// \tparam T The object type -template -class Unique { - public: - /// \brief Construct an invalid instance of T holding no resources - Unique() { - memset(&data_, 0, sizeof(data_)); - init_pointer(&data_); - } - - /// \brief Move and take ownership of data - Unique(T* data) { - memset(&data_, 0, sizeof(data_)); - move_pointer(data, &data_); - } - - /// \brief Move and take ownership of data wrapped by rhs - Unique(Unique&& rhs) : Unique(rhs.get()) {} - Unique& operator=(Unique&& rhs) { - reset(rhs.get()); - return *this; - } - - // These objects are not copyable - Unique(const Unique& rhs) = delete; - - /// \brief Get a pointer to the data owned by this object - T* get() noexcept { return &data_; } - const T* get() const noexcept { return &data_; } - - /// \brief Use the pointer operator to access fields of this object - T* operator->() noexcept { return &data_; } - const T* operator->() const noexcept { return &data_; } - - /// \brief Call data's release callback if valid - void reset() { release_pointer(&data_); } - - /// \brief Call data's release callback if valid and move ownership of the data - /// pointed to by data - void reset(T* data) { - reset(); - move_pointer(data, &data_); - } - - /// \brief Move ownership of this object to the data pointed to by out - void move(T* out) { move_pointer(&data_, out); } - - ~Unique() { reset(); } - - protected: - T data_; -}; - -/// @} - -} // namespace internal - -/// \defgroup nanoarrow_hpp-unique Unique object wrappers -/// -/// The Arrow C Data interface, the Arrow C Stream interface, and the -/// nanoarrow C library use stack-allocatable objects, some of which -/// require initialization or cleanup. -/// -/// @{ - -/// \brief Class wrapping a unique struct ArrowSchema -using UniqueSchema = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowArray -using UniqueArray = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowArrayStream -using UniqueArrayStream = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowBuffer -using UniqueBuffer = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowBitmap -using UniqueBitmap = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowArrayView -using UniqueArrayView = internal::Unique; - -/// @} - -NANOARROW_CXX_NAMESPACE_END - -#endif -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef NANOARROW_HPP_ARRAY_STREAM_HPP_INCLUDED -#define NANOARROW_HPP_ARRAY_STREAM_HPP_INCLUDED - -#include - -NANOARROW_CXX_NAMESPACE_BEGIN - -/// \defgroup nanoarrow_hpp-array-stream ArrayStream helpers -/// -/// These classes provide simple ArrowArrayStream implementations that -/// can be extended to help simplify the process of creating a valid -/// ArrowArrayStream implementation or used as-is for testing. -/// -/// @{ - -/// @brief Export an ArrowArrayStream from a standard C++ class -/// @tparam T A class with methods `int GetSchema(ArrowSchema*)`, `int -/// GetNext(ArrowArray*)`, and `const char* GetLastError()` -/// -/// This class allows a standard C++ class to be exported to a generic ArrowArrayStream -/// consumer by mapping C callback invocations to method calls on an instance of the -/// object whose lifecycle is owned by the ArrowArrayStream. See VectorArrayStream for -/// minimal useful example of this pattern. -/// -/// The methods must be accessible to the ArrayStreamFactory, either as public methods or -/// by declaring ArrayStreamFactory a friend. Implementors are encouraged (but -/// not required) to implement a ToArrayStream(ArrowArrayStream*) that creates a new -/// instance owned by the ArrowArrayStream and moves the relevant data to that instance. -/// -/// An example implementation might be: -/// -/// \code -/// class StreamImpl { -/// public: -/// // Public methods (e.g., constructor) used from C++ to initialize relevant data -/// -/// // Idiomatic exporter to move data + lifecycle responsibility to an instance -/// // managed by the ArrowArrayStream callbacks -/// void ToArrayStream(struct ArrowArrayStream* out) { -/// ArrayStreamFactory::InitArrayStream(new StreamImpl(...), out); -/// } -/// -/// private: -/// // Make relevant methods available to the ArrayStreamFactory -/// friend class ArrayStreamFactory; -/// -/// // Method implementations (called from C, not normally interacted with from C++) -/// int GetSchema(struct ArrowSchema* schema) { return ENOTSUP; } -/// int GetNext(struct ArrowArray* array) { return ENOTSUP; } -/// const char* GetLastError() { nullptr; } -/// }; -/// \endcode -/// -/// An example usage might be: -/// -/// \code -/// // Call constructor and/or public methods to initialize relevant data -/// StreamImpl impl; -/// -/// // Export to ArrowArrayStream after data are finalized -/// UniqueArrayStream stream; -/// impl.ToArrayStream(stream.get()); -/// \endcode -template -class ArrayStreamFactory { - public: - /// \brief Take ownership of instance and populate callbacks of out - static void InitArrayStream(T* instance, struct ArrowArrayStream* out) { - out->get_schema = &get_schema_wrapper; - out->get_next = &get_next_wrapper; - out->get_last_error = &get_last_error_wrapper; - out->release = &release_wrapper; - out->private_data = instance; - } - - private: - static int get_schema_wrapper(struct ArrowArrayStream* stream, - struct ArrowSchema* schema) { - return reinterpret_cast(stream->private_data)->GetSchema(schema); - } - - static int get_next_wrapper(struct ArrowArrayStream* stream, struct ArrowArray* array) { - return reinterpret_cast(stream->private_data)->GetNext(array); - } - - static const char* get_last_error_wrapper(struct ArrowArrayStream* stream) { - return reinterpret_cast(stream->private_data)->GetLastError(); - } - - static void release_wrapper(struct ArrowArrayStream* stream) { - delete reinterpret_cast(stream->private_data); - stream->release = nullptr; - stream->private_data = nullptr; - } -}; - -/// \brief An empty array stream -/// -/// This class can be constructed from an struct ArrowSchema and implements a default -/// get_next() method that always marks the output ArrowArray as released. -class EmptyArrayStream { - public: - /// \brief Create an EmptyArrayStream from an ArrowSchema - /// - /// Takes ownership of schema. - EmptyArrayStream(struct ArrowSchema* schema) : schema_(schema) { - ArrowErrorInit(&error_); - } - - /// \brief Export to ArrowArrayStream - void ToArrayStream(struct ArrowArrayStream* out) { - EmptyArrayStream* impl = new EmptyArrayStream(schema_.get()); - ArrayStreamFactory::InitArrayStream(impl, out); - } - - private: - UniqueSchema schema_; - struct ArrowError error_; - - friend class ArrayStreamFactory; - - int GetSchema(struct ArrowSchema* schema) { - return ArrowSchemaDeepCopy(schema_.get(), schema); - } - - int GetNext(struct ArrowArray* array) { - array->release = nullptr; - return NANOARROW_OK; - } - - const char* GetLastError() { return error_.message; } -}; - -/// \brief Implementation of an ArrowArrayStream backed by a vector of UniqueArray objects -class VectorArrayStream { - public: - /// \brief Create a VectorArrayStream from an ArrowSchema + vector of UniqueArray - /// - /// Takes ownership of schema and moves arrays if possible. - VectorArrayStream(struct ArrowSchema* schema, std::vector arrays) - : offset_(0), schema_(schema), arrays_(std::move(arrays)) {} - - /// \brief Create a one-shot VectorArrayStream from an ArrowSchema + ArrowArray - /// - /// Takes ownership of schema and array. - VectorArrayStream(struct ArrowSchema* schema, struct ArrowArray* array) - : offset_(0), schema_(schema) { - arrays_.emplace_back(array); - } - - /// \brief Export to ArrowArrayStream - void ToArrayStream(struct ArrowArrayStream* out) { - VectorArrayStream* impl = new VectorArrayStream(schema_.get(), std::move(arrays_)); - ArrayStreamFactory::InitArrayStream(impl, out); - } - - private: - int64_t offset_; - UniqueSchema schema_; - std::vector arrays_; - - friend class ArrayStreamFactory; - - int GetSchema(struct ArrowSchema* schema) { - return ArrowSchemaDeepCopy(schema_.get(), schema); - } - - int GetNext(struct ArrowArray* array) { - if (offset_ < static_cast(arrays_.size())) { - arrays_[offset_++].move(array); - } else { - array->release = nullptr; - } - - return NANOARROW_OK; - } - - const char* GetLastError() { return ""; } -}; - -/// @} - -NANOARROW_CXX_NAMESPACE_END - -#endif -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef NANOARROW_HPP_BUFFER_HPP_INCLUDED -#define NANOARROW_HPP_BUFFER_HPP_INCLUDED - -#include -#include -#include "nanoarrow.h" - -NANOARROW_CXX_NAMESPACE_BEGIN - -namespace internal { -template -static inline void DeallocateWrappedBuffer(struct ArrowBufferAllocator* allocator, - uint8_t* ptr, int64_t size) { - NANOARROW_UNUSED(ptr); - NANOARROW_UNUSED(size); - auto obj = reinterpret_cast(allocator->private_data); - delete obj; -} -} // namespace internal - -/// \defgroup nanoarrow_hpp-buffer Buffer helpers -/// -/// Helpers to wrap buffer-like C++ objects as ArrowBuffer objects that can -/// be used to build ArrowArray objects. -/// -/// @{ - -/// \brief Initialize a buffer wrapping an arbitrary C++ object -/// -/// Initializes a buffer with a release callback that deletes the moved obj -/// when ArrowBufferReset is called. This version is useful for wrapping -/// an object whose .data() member is missing or unrelated to the buffer -/// value that is destined for a the buffer of an ArrowArray. T must be movable. -template -static inline void BufferInitWrapped(struct ArrowBuffer* buffer, T obj, - const uint8_t* data, int64_t size_bytes) { - T* obj_moved = new T(std::move(obj)); - buffer->data = const_cast(data); - buffer->size_bytes = size_bytes; - buffer->capacity_bytes = 0; - buffer->allocator = - ArrowBufferDeallocator(&internal::DeallocateWrappedBuffer, obj_moved); -} - -/// \brief Initialize a buffer wrapping a C++ sequence -/// -/// Specifically, this uses obj.data() to set the buffer address and -/// obj.size() * sizeof(T::value_type) to set the buffer size. This works -/// for STL containers like std::vector, std::array, and std::string. -/// This function moves obj and ensures it is deleted when ArrowBufferReset -/// is called. -template -void BufferInitSequence(struct ArrowBuffer* buffer, T obj) { - // Move before calling .data() (matters sometimes). - T* obj_moved = new T(std::move(obj)); - buffer->data = - const_cast(reinterpret_cast(obj_moved->data())); - buffer->size_bytes = obj_moved->size() * sizeof(typename T::value_type); - buffer->capacity_bytes = 0; - buffer->allocator = - ArrowBufferDeallocator(&internal::DeallocateWrappedBuffer, obj_moved); -} - -/// @} - -NANOARROW_CXX_NAMESPACE_END - -#endif -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef NANOARROW_HPP_VIEW_HPP_INCLUDED -#define NANOARROW_HPP_VIEW_HPP_INCLUDED - -#include -#include - -#include "nanoarrow.h" - -NANOARROW_CXX_NAMESPACE_BEGIN - -namespace internal { -struct Nothing {}; - -template -class Maybe { - public: - Maybe() : is_something_(false) {} - Maybe(Nothing) : Maybe() {} - - Maybe(T something) // NOLINT(google-explicit-constructor) - : is_something_(true), something_(something) {} - - explicit constexpr operator bool() const { return is_something_; } - - const T& operator*() const { return something_; } - - friend inline bool operator==(Maybe l, Maybe r) { - if (l.is_something_) { - return r.is_something_ && l.something_ == r.something_; - } else if (r.is_something_) { - return l.is_something_ && l.something_ == r.something_; - } else { - return l.is_something_ == r.is_something_; - } - } - friend inline bool operator!=(Maybe l, Maybe r) { return !(l == r); } - - T value_or(T val) const { return is_something_ ? something_ : val; } - - private: - // When support for gcc 4.8 is dropped, we should also assert - // is_trivially_copyable::value - static_assert(std::is_trivially_destructible::value, ""); - - bool is_something_{}; - T something_{}; -}; - -template -struct RandomAccessRange { - Get get; - int64_t offset; - int64_t size; - - using value_type = decltype(std::declval()(0)); - - struct const_iterator { - int64_t i; - const RandomAccessRange* range; - bool operator==(const_iterator other) const { return i == other.i; } - bool operator!=(const_iterator other) const { return i != other.i; } - const_iterator& operator++() { return ++i, *this; } - value_type operator*() const { return range->get(i); } - }; - - const_iterator begin() const { return {offset, this}; } - const_iterator end() const { return {offset + size, this}; } -}; - -template -struct InputRange { - Next next; - using ValueOrFalsy = decltype(std::declval()()); - - static_assert(std::is_constructible::value, ""); - static_assert(std::is_default_constructible::value, ""); - using value_type = decltype(*std::declval()); - - struct iterator { - InputRange* range; - ValueOrFalsy stashed; - - bool operator==(iterator other) const { - return static_cast(stashed) == static_cast(other.stashed); - } - bool operator!=(iterator other) const { return !(*this == other); } - - iterator& operator++() { - stashed = range->next(); - return *this; - } - value_type operator*() const { return *stashed; } - }; - - iterator begin() { return {this, next()}; } - iterator end() { return {this, ValueOrFalsy()}; } -}; -} // namespace internal - -/// \defgroup nanoarrow_hpp-range_for Range-for helpers -/// -/// The Arrow C Data interface and the Arrow C Stream interface represent -/// data which can be iterated through using C++'s range-for statement. -/// -/// @{ - -/// \brief An object convertible to any empty optional -constexpr internal::Nothing NA{}; - -/// \brief A range-for compatible wrapper for ArrowArray of fixed size type -/// -/// Provides a sequence of optional copied from each non-null -/// slot of the wrapped array (null slots result in empty optionals). -template -class ViewArrayAs { - private: - struct Get { - const uint8_t* validity; - const void* values; - - internal::Maybe operator()(int64_t i) const { - if (validity == nullptr || ArrowBitGet(validity, i)) { - if (std::is_same::value) { - return ArrowBitGet(static_cast(values), i); - } else { - return static_cast(values)[i]; - } - } - return NA; - } - }; - - internal::RandomAccessRange range_; - - public: - ViewArrayAs(const ArrowArrayView* array_view) - : range_{ - Get{ - array_view->buffer_views[0].data.as_uint8, - array_view->buffer_views[1].data.data, - }, - array_view->offset, - array_view->length, - } {} - - ViewArrayAs(const ArrowArray* array) - : range_{ - Get{ - static_cast(array->buffers[0]), - array->buffers[1], - }, - array->offset, - array->length, - } {} - - using value_type = typename internal::RandomAccessRange::value_type; - using const_iterator = typename internal::RandomAccessRange::const_iterator; - const_iterator begin() const { return range_.begin(); } - const_iterator end() const { return range_.end(); } - value_type operator[](int64_t i) const { return range_.get(i); } -}; - -/// \brief A range-for compatible wrapper for ArrowArray of binary or utf8 -/// -/// Provides a sequence of optional referencing each non-null -/// slot of the wrapped array (null slots result in empty optionals). Large -/// binary and utf8 arrays can be wrapped by specifying 64 instead of 32 for -/// the template argument. -template -class ViewArrayAsBytes { - private: - static_assert(OffsetSize == 32 || OffsetSize == 64, ""); - using OffsetType = typename std::conditional::type; - - struct Get { - const uint8_t* validity; - const void* offsets; - const char* data; - - internal::Maybe operator()(int64_t i) const { - auto* offsets = static_cast(this->offsets); - if (validity == nullptr || ArrowBitGet(validity, i)) { - return ArrowStringView{data + offsets[i], offsets[i + 1] - offsets[i]}; - } - return NA; - } - }; - - internal::RandomAccessRange range_; - - public: - ViewArrayAsBytes(const ArrowArrayView* array_view) - : range_{ - Get{ - array_view->buffer_views[0].data.as_uint8, - array_view->buffer_views[1].data.data, - array_view->buffer_views[2].data.as_char, - }, - array_view->offset, - array_view->length, - } {} - - ViewArrayAsBytes(const ArrowArray* array) - : range_{ - Get{ - static_cast(array->buffers[0]), - array->buffers[1], - static_cast(array->buffers[2]), - }, - array->offset, - array->length, - } {} - - using value_type = typename internal::RandomAccessRange::value_type; - using const_iterator = typename internal::RandomAccessRange::const_iterator; - const_iterator begin() const { return range_.begin(); } - const_iterator end() const { return range_.end(); } - value_type operator[](int64_t i) const { return range_.get(i); } -}; - -class ViewBinaryViewArrayAsBytes { - private: - struct Get { - const uint8_t* validity; - const union ArrowBinaryView* inline_data; - const void** variadic_buffers; - - internal::Maybe operator()(int64_t i) const { - if (validity == nullptr || ArrowBitGet(validity, i)) { - const union ArrowBinaryView* bv = &inline_data[i]; - if (bv->inlined.size <= NANOARROW_BINARY_VIEW_INLINE_SIZE) { - return ArrowStringView{reinterpret_cast(bv->inlined.data), - bv->inlined.size}; - } - - return ArrowStringView{ - reinterpret_cast(variadic_buffers[bv->ref.buffer_index]) + - bv->ref.offset, - bv->ref.size}; - } - return NA; - } - }; - - internal::RandomAccessRange range_; - - public: - ViewBinaryViewArrayAsBytes(const ArrowArrayView* array_view) - : range_{ - Get{ - array_view->buffer_views[0].data.as_uint8, - array_view->buffer_views[1].data.as_binary_view, - array_view->variadic_buffers, - }, - array_view->offset, - array_view->length, - } {} - - ViewBinaryViewArrayAsBytes(const ArrowArray* array) - : range_{ - Get{static_cast(array->buffers[0]), - static_cast(array->buffers[1]), - array->buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS}, - array->offset, - array->length, - } {} - - using value_type = typename internal::RandomAccessRange::value_type; - using const_iterator = typename internal::RandomAccessRange::const_iterator; - const_iterator begin() const { return range_.begin(); } - const_iterator end() const { return range_.end(); } - value_type operator[](int64_t i) const { return range_.get(i); } -}; - -/// \brief A range-for compatible wrapper for ArrowArray of fixed size binary -/// -/// Provides a sequence of optional referencing each non-null -/// slot of the wrapped array (null slots result in empty optionals). -class ViewArrayAsFixedSizeBytes { - private: - struct Get { - const uint8_t* validity; - const char* data; - int fixed_size; - - internal::Maybe operator()(int64_t i) const { - if (validity == nullptr || ArrowBitGet(validity, i)) { - return ArrowStringView{data + i * fixed_size, fixed_size}; - } - return NA; - } - }; - - internal::RandomAccessRange range_; - - public: - ViewArrayAsFixedSizeBytes(const ArrowArrayView* array_view, int fixed_size) - : range_{ - Get{ - array_view->buffer_views[0].data.as_uint8, - array_view->buffer_views[1].data.as_char, - fixed_size, - }, - array_view->offset, - array_view->length, - } {} - - ViewArrayAsFixedSizeBytes(const ArrowArray* array, int fixed_size) - : range_{ - Get{ - static_cast(array->buffers[0]), - static_cast(array->buffers[1]), - fixed_size, - }, - array->offset, - array->length, - } {} - - using value_type = typename internal::RandomAccessRange::value_type; - using const_iterator = typename internal::RandomAccessRange::const_iterator; - const_iterator begin() const { return range_.begin(); } - const_iterator end() const { return range_.end(); } - value_type operator[](int64_t i) const { return range_.get(i); } -}; - -/// \brief A range-for compatible wrapper for ArrowArrayStream -/// -/// Provides a sequence of ArrowArray& referencing the most recent array drawn -/// from the wrapped stream. (Each array may be moved from if necessary.) -/// When streams terminate due to an error, the error code and message are -/// available for inspection through the code() and error() member functions -/// respectively. Failure to inspect the error code will result in -/// an assertion failure. The number of arrays drawn from the stream is also -/// available through the count() member function. -class ViewArrayStream { - public: - ViewArrayStream(ArrowArrayStream* stream, ArrowErrorCode* code, ArrowError* error) - : code_{code}, error_{error} { - // Using a slightly more verbose constructor to silence a warning that occurs - // on some versions of MSVC. - range_.next.self = this; - range_.next.stream = stream; - } - - ViewArrayStream(ArrowArrayStream* stream, ArrowError* error) - : ViewArrayStream{stream, &internal_code_, error} {} - - ViewArrayStream(ArrowArrayStream* stream) - : ViewArrayStream{stream, &internal_code_, &internal_error_} {} - - // disable copy/move of this view, since its error references may point into itself - ViewArrayStream(ViewArrayStream&&) = delete; - ViewArrayStream& operator=(ViewArrayStream&&) = delete; - ViewArrayStream(const ViewArrayStream&) = delete; - ViewArrayStream& operator=(const ViewArrayStream&) = delete; - - // ensure the error code of this stream was accessed at least once - ~ViewArrayStream() { NANOARROW_DCHECK(code_was_accessed_); } - - private: - struct Next { - ViewArrayStream* self; - ArrowArrayStream* stream; - UniqueArray array; - - ArrowArray* operator()() { - array.reset(); - *self->code_ = ArrowArrayStreamGetNext(stream, array.get(), self->error_); - - if (array->release != nullptr) { - NANOARROW_DCHECK(*self->code_ == NANOARROW_OK); - ++self->count_; - return array.get(); - } - - return nullptr; - } - }; - - internal::InputRange range_; - ArrowErrorCode* code_; - ArrowError* error_; - ArrowError internal_error_ = {}; - ArrowErrorCode internal_code_; - bool code_was_accessed_ = false; - int count_ = 0; - - public: - using value_type = typename internal::InputRange::value_type; - using iterator = typename internal::InputRange::iterator; - iterator begin() { return range_.begin(); } - iterator end() { return range_.end(); } - - /// The error code which caused this stream to terminate, if any. - ArrowErrorCode code() { - code_was_accessed_ = true; - return *code_; - } - /// The error message which caused this stream to terminate, if any. - ArrowError* error() { return error_; } - - /// The number of arrays streamed so far. - int count() const { return count_; } -}; - -/// @} - -NANOARROW_CXX_NAMESPACE_END - -#endif diff --git a/r/src/nanoarrow_ipc.hpp b/r/src/nanoarrow_ipc.hpp deleted file mode 100644 index 7311aca19..000000000 --- a/r/src/nanoarrow_ipc.hpp +++ /dev/null @@ -1,209 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef NANOARROW_IPC_HPP_INCLUDED -#define NANOARROW_IPC_HPP_INCLUDED - -#include "nanoarrow.hpp" -#include "nanoarrow_ipc.h" - -namespace nanoarrow { - -namespace internal { - -template <> -inline void init_pointer(struct ArrowIpcSharedBuffer* data) { - init_pointer(&data->private_src); -} - -template <> -inline void move_pointer(struct ArrowIpcSharedBuffer* src, - struct ArrowIpcSharedBuffer* dst) { - move_pointer(&src->private_src, &dst->private_src); -} - -template <> -inline void release_pointer(struct ArrowIpcSharedBuffer* data) { - ArrowIpcSharedBufferReset(data); -} - -template <> -inline void init_pointer(struct ArrowIpcDecoder* data) { - data->private_data = nullptr; -} - -template <> -inline void move_pointer(struct ArrowIpcDecoder* src, struct ArrowIpcDecoder* dst) { - memcpy(dst, src, sizeof(struct ArrowIpcDecoder)); - src->private_data = nullptr; -} - -template <> -inline void release_pointer(struct ArrowIpcDecoder* data) { - ArrowIpcDecoderReset(data); -} - -template <> -inline void init_pointer(struct ArrowIpcFooter* data) { - ArrowIpcFooterInit(data); -} - -template <> -inline void move_pointer(struct ArrowIpcFooter* src, struct ArrowIpcFooter* dst) { - ArrowSchemaMove(&src->schema, &dst->schema); - ArrowBufferMove(&src->record_batch_blocks, &dst->record_batch_blocks); -} - -template <> -inline void release_pointer(struct ArrowIpcFooter* data) { - ArrowIpcFooterReset(data); -} - -template <> -inline void init_pointer(struct ArrowIpcEncoder* data) { - data->private_data = nullptr; -} - -template <> -inline void move_pointer(struct ArrowIpcEncoder* src, struct ArrowIpcEncoder* dst) { - memcpy(dst, src, sizeof(struct ArrowIpcEncoder)); - src->private_data = nullptr; -} - -template <> -inline void release_pointer(struct ArrowIpcEncoder* data) { - ArrowIpcEncoderReset(data); -} - -template <> -inline void init_pointer(struct ArrowIpcDecompressor* data) { - data->private_data = nullptr; -} - -template <> -inline void move_pointer(struct ArrowIpcDecompressor* src, - struct ArrowIpcDecompressor* dst) { - memcpy(dst, src, sizeof(struct ArrowIpcDecompressor)); - src->release = nullptr; -} - -template <> -inline void release_pointer(struct ArrowIpcDecompressor* data) { - if (data->release != nullptr) { - data->release(data); - } -} - -template <> -inline void init_pointer(struct ArrowIpcInputStream* data) { - data->release = nullptr; -} - -template <> -inline void move_pointer(struct ArrowIpcInputStream* src, - struct ArrowIpcInputStream* dst) { - memcpy(dst, src, sizeof(struct ArrowIpcInputStream)); - src->release = nullptr; -} - -template <> -inline void release_pointer(struct ArrowIpcInputStream* data) { - if (data->release != nullptr) { - data->release(data); - } -} - -template <> -inline void init_pointer(struct ArrowIpcOutputStream* data) { - data->release = nullptr; -} - -template <> -inline void move_pointer(struct ArrowIpcOutputStream* src, - struct ArrowIpcOutputStream* dst) { - memcpy(dst, src, sizeof(struct ArrowIpcOutputStream)); - src->release = nullptr; -} - -template <> -inline void release_pointer(struct ArrowIpcOutputStream* data) { - if (data->release != nullptr) { - data->release(data); - } -} - -template <> -inline void init_pointer(struct ArrowIpcWriter* data) { - data->private_data = nullptr; -} - -template <> -inline void move_pointer(struct ArrowIpcWriter* src, struct ArrowIpcWriter* dst) { - memcpy(dst, src, sizeof(struct ArrowIpcWriter)); - src->private_data = nullptr; -} - -template <> -inline void release_pointer(struct ArrowIpcWriter* data) { - ArrowIpcWriterReset(data); -} - -} // namespace internal -} // namespace nanoarrow - -namespace nanoarrow { - -namespace ipc { - -/// \defgroup nanoarrow_ipc_hpp-unique Unique object wrappers -/// -/// Extends the unique object wrappers in nanoarrow.hpp to include C structs -/// defined in the nanoarrow_ipc.h header. -/// -/// @{ - -/// \brief Class wrapping a unique struct ArrowIpcSharedBuffer -using UniqueSharedBuffer = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowIpcDecoder -using UniqueDecoder = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowIpcFooter -using UniqueFooter = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowIpcEncoder -using UniqueEncoder = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowIpcDecompressor -using UniqueDecompressor = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowIpcInputStream -using UniqueInputStream = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowIpcOutputStream -using UniqueOutputStream = internal::Unique; - -/// \brief Class wrapping a unique struct ArrowIpcWriter -using UniqueWriter = internal::Unique; - -/// @} - -} // namespace ipc - -} // namespace nanoarrow - -#endif From 4c4bf129de4b2c6e4fc0153c7dd0fbc4f46385b4 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Dec 2025 20:51:31 -0600 Subject: [PATCH 7/7] ignore --- r/src/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/r/src/.gitignore b/r/src/.gitignore index c5bf0ca6e..5093a88f8 100644 --- a/r/src/.gitignore +++ b/r/src/.gitignore @@ -22,5 +22,7 @@ nanoarrow.c nanoarrow.h nanoarrow_ipc.h nanoarrow_ipc.c +nanoarrow.hpp +nanoarrow_ipc.hpp flatcc* Makevars