diff --git a/src/include/reltoaltrep.hpp b/src/include/reltoaltrep.hpp index 17b2e35ca..9270181b3 100644 --- a/src/include/reltoaltrep.hpp +++ b/src/include/reltoaltrep.hpp @@ -17,6 +17,8 @@ struct AltrepRelationWrapper { duckdb::unique_ptr Materialize(); + void MarkColumnAsTransformed(); + const bool allow_materialization; const size_t n_rows; const size_t n_cells; @@ -24,6 +26,13 @@ struct AltrepRelationWrapper { rel_extptr_t rel_eptr; duckdb::shared_ptr rel; duckdb::unique_ptr res; + + R_xlen_t rowcount; + bool rowcount_retrieved; + + size_t ncols; + size_t cols_transformed; + }; } diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index bbb5de1c4..c9b2cc27e 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -101,13 +101,24 @@ AltrepRelationWrapper *AltrepRelationWrapper::Get(SEXP x) { } AltrepRelationWrapper::AltrepRelationWrapper(rel_extptr_t rel_, bool allow_materialization_, size_t n_rows_, size_t n_cells_) - : allow_materialization(allow_materialization_), n_rows(n_rows_), n_cells(n_cells_), rel_eptr(rel_), rel(rel_->rel) { + : allow_materialization(allow_materialization_), n_rows(n_rows_), n_cells(n_cells_), rel_eptr(rel_), rel(rel_->rel), rowcount(0), rowcount_retrieved(false), ncols(0), cols_transformed(0) { } bool AltrepRelationWrapper::HasQueryResult() const { return (bool)res; } +void AltrepRelationWrapper::MarkColumnAsTransformed() { + // AltrepRelationWrapper keeps tabs on how many of the columns have been transformed + // to their R-representation + cols_transformed++; + // If all of the columns have been transformed, we can reset + // the query-result pointer and free the memory + if (cols_transformed == ncols) { + res.reset(); + } +} + MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() { if (!res) { if (!allow_materialization || n_cells == 0) { @@ -149,6 +160,10 @@ MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() { cpp11::stop("Query execution was interrupted"); } + + rowcount = ((MaterializedQueryResult *)res.get())->RowCount(); + rowcount_retrieved = true; + signal_handler.Disable(); } D_ASSERT(res); @@ -196,7 +211,7 @@ duckdb::unique_ptr AltrepRelationWrapper::Materialize() { struct AltrepRownamesWrapper { - AltrepRownamesWrapper(duckdb::shared_ptr rel_p) : rel(rel_p) { + AltrepRownamesWrapper(duckdb::shared_ptr rel_p) : rel(rel_p), rowlen_data_retrieved(false) { rowlen_data[0] = NA_INTEGER; } @@ -206,6 +221,7 @@ struct AltrepRownamesWrapper { int32_t rowlen_data[2]; duckdb::shared_ptr rel; + bool rowlen_data_retrieved; }; struct AltrepVectorWrapper { @@ -228,6 +244,8 @@ struct AltrepVectorWrapper { duckdb_r_transform(chunk.data[column_index], dest, dest_offset, chunk.size(), false); dest_offset += chunk.size(); } + + rel->MarkColumnAsTransformed(); } return DATAPTR(transformed_vector); } @@ -306,16 +324,27 @@ const void *RelToAltrep::RownamesDataptrOrNull(SEXP x) { void *RelToAltrep::DoRownamesDataptrGet(SEXP x) { auto rownames_wrapper = AltrepRownamesWrapper::Get(x); + + // the query has been materialized, return the rowcount + // (and void recomputing the query if it's been reset) + if (rownames_wrapper->rowlen_data_retrieved) { + return rownames_wrapper->rowlen_data; + } + auto row_count = rownames_wrapper->rel->GetQueryResult()->RowCount(); if (row_count > (idx_t)NumericLimits::Maximum()) { cpp11::stop("Integer overflow for row.names attribute"); } rownames_wrapper->rowlen_data[1] = -row_count; + rownames_wrapper->rowlen_data_retrieved = true; return rownames_wrapper->rowlen_data; } R_xlen_t RelToAltrep::VectorLength(SEXP x) { BEGIN_CPP11 + if (AltrepVectorWrapper::Get(x)->rel->rowcount_retrieved) { + return AltrepVectorWrapper::Get(x)->rel->rowcount; + } return AltrepVectorWrapper::Get(x)->rel->GetQueryResult()->RowCount(); END_CPP11_EX(0) } @@ -404,6 +433,8 @@ size_t DoubleToSize(double d) { auto relation_wrapper = make_shared_ptr(rel, allow_materialization, DoubleToSize(n_rows), DoubleToSize(n_cells)); + relation_wrapper->ncols = drel->Columns().size(); + cpp11::writable::list data_frame; data_frame.reserve(ncols); @@ -427,6 +458,8 @@ size_t DoubleToSize(double d) { } SET_NAMES(data_frame, StringsToSexp(names)); + relation_wrapper->ncols = drel->Columns().size(); + // Row names cpp11::external_pointer ptr(new AltrepRownamesWrapper(relation_wrapper)); R_SetExternalPtrTag(ptr, RStrings::get().duckdb_row_names_sym);