55#include < sparrow/c_interface.hpp>
66#include < sparrow/record_batch.hpp>
77
8+ #include " sparrow_ipc/compression.hpp"
9+ #include " sparrow_ipc/utils.hpp"
10+
811namespace sparrow_ipc
912{
1013 // Creates a Flatbuffers Decimal type from a format string
@@ -164,6 +167,42 @@ namespace sparrow_ipc
164167 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::FieldNode>
165168 create_fieldnodes (const sparrow::record_batch& record_batch);
166169
170+ namespace details
171+ {
172+ template <typename Func>
173+ void fill_buffers_impl (
174+ const sparrow::arrow_proxy& arrow_proxy,
175+ std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_buffers,
176+ int64_t & offset,
177+ Func&& get_buffer_size
178+ )
179+ {
180+ const auto & buffers = arrow_proxy.buffers ();
181+ for (const auto & buffer : buffers)
182+ {
183+ int64_t size = get_buffer_size (buffer);
184+ flatbuf_buffers.emplace_back (offset, size);
185+ offset += utils::align_to_8 (size);
186+ }
187+ for (const auto & child : arrow_proxy.children ())
188+ {
189+ fill_buffers_impl (child, flatbuf_buffers, offset, get_buffer_size);
190+ }
191+ }
192+
193+ template <typename Func>
194+ std::vector<org::apache::arrow::flatbuf::Buffer> get_buffers_impl (const sparrow::record_batch& record_batch, Func&& fill_buffers_func)
195+ {
196+ std::vector<org::apache::arrow::flatbuf::Buffer> buffers;
197+ int64_t offset = 0 ;
198+ for (const auto & column : record_batch.columns ())
199+ {
200+ const auto & arrow_proxy = sparrow::detail::array_access::get_arrow_proxy (column);
201+ fill_buffers_func (arrow_proxy, buffers, offset);
202+ }
203+ return buffers;
204+ }
205+ } // namespace details
167206
168207 /* *
169208 * @brief Recursively fills a vector of FlatBuffer Buffer objects with buffer information from an Arrow
@@ -205,6 +244,67 @@ namespace sparrow_ipc
205244 [[nodiscard]] std::vector<org::apache::arrow::flatbuf::Buffer>
206245 get_buffers (const sparrow::record_batch& record_batch);
207246
247+ /* *
248+ * @brief Recursively populates a vector with compressed buffer metadata from an Arrow proxy.
249+ *
250+ * This function traverses the Arrow proxy and its children, compressing each buffer and recording
251+ * its metadata (offset and size) in the provided vector. The offset is updated to ensure proper
252+ * alignment for each subsequent buffer.
253+ *
254+ * @param arrow_proxy The Arrow proxy containing the buffers to be compressed.
255+ * @param flatbuf_compressed_buffers A vector to store the resulting compressed buffer metadata.
256+ * @param offset The current offset in the buffer layout, which will be updated by the function.
257+ * @param compression_type The compression algorithm to use.
258+ */
259+ void fill_compressed_buffers (
260+ const sparrow::arrow_proxy& arrow_proxy,
261+ std::vector<org::apache::arrow::flatbuf::Buffer>& flatbuf_compressed_buffers,
262+ int64_t & offset,
263+ const CompressionType compression_type
264+ );
265+
266+ /* *
267+ * @brief Retrieves metadata describing the layout of compressed buffers within a record batch.
268+ *
269+ * This function processes a record batch to determine the metadata (offset and size)
270+ * for each of its buffers, assuming they are compressed using the specified algorithm.
271+ * This metadata accounts for each compressed buffer being prefixed by its 8-byte
272+ * uncompressed size and padded to ensure 8-byte alignment.
273+ *
274+ * @param record_batch The record batch whose buffers' compressed metadata is to be retrieved.
275+ * @param compression_type The compression algorithm that would be applied (e.g., LZ4_FRAME, ZSTD).
276+ * @return A vector of FlatBuffer Buffer objects, each describing the offset and
277+ * size of a corresponding compressed buffer within a larger message body.
278+ */
279+ [[nodiscard]] std::vector<org::apache::arrow::flatbuf::Buffer>
280+ get_compressed_buffers (const sparrow::record_batch& record_batch, const CompressionType compression_type);
281+
282+ /* *
283+ * @brief Calculates the total size of the body section for an Arrow array.
284+ *
285+ * This function recursively computes the total size needed for all buffers
286+ * in an Arrow array structure, including buffers from child arrays. Each
287+ * buffer size is aligned to 8-byte boundaries as required by the Arrow format.
288+ *
289+ * @param arrow_proxy The Arrow array proxy containing buffers and child arrays
290+ * @param compression The compression type to use when serializing
291+ * @return int64_t The total aligned size in bytes of all buffers in the array hierarchy
292+ */
293+ [[nodiscard]] int64_t calculate_body_size (const sparrow::arrow_proxy& arrow_proxy, std::optional<CompressionType> compression = std::nullopt );
294+
295+ /* *
296+ * @brief Calculates the total body size of a record batch by summing the body sizes of all its columns.
297+ *
298+ * This function iterates through all columns in the given record batch and accumulates
299+ * the body size of each column's underlying Arrow array proxy. The body size represents
300+ * the total memory required for the serialized data content of the record batch.
301+ *
302+ * @param record_batch The sparrow record batch containing columns to calculate size for
303+ * @param compression The compression type to use when serializing
304+ * @return int64_t The total body size in bytes of all columns in the record batch
305+ */
306+ [[nodiscard]] int64_t calculate_body_size (const sparrow::record_batch& record_batch, std::optional<CompressionType> compression = std::nullopt );
307+
208308 /* *
209309 * @brief Creates a FlatBuffer message containing a serialized Apache Arrow RecordBatch.
210310 *
@@ -222,5 +322,5 @@ namespace sparrow_ipc
222322 * @note Variadic buffer counts is not currently implemented (set to 0)
223323 */
224324 [[nodiscard]] flatbuffers::FlatBufferBuilder
225- get_record_batch_message_builder (const sparrow::record_batch& record_batch, std::optional<org::apache::arrow::flatbuf:: CompressionType> compression = std::nullopt );
325+ get_record_batch_message_builder (const sparrow::record_batch& record_batch, std::optional<CompressionType> compression = std::nullopt );
226326}
0 commit comments