Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions r/inst/test-data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# Extra test files for R

## complex-map.arrows

Sourced from the [Overture Maps Foundation](https://overturemaps.org) "divisions_area" table using
[SedonaDB](https://sedona.apache.org/sedonadb).

```r
library(sedonadb)

sd_sql("SET datafusion.execution.batch_size = 1024")

sd_read_parquet("/Volumes/data/overture/data/theme=divisions/type=division_area/") |>
sd_to_view("division_area", overwrite = TRUE)

sd_sql("SELECT ROW_NUMBER() OVER (ORDER BY names.primary) as idx, names FROM division_area WHERE names.common IS NOT NULL") |>
sd_compute() |> sd_to_view("names_with_common", overwrite = TRUE)

sd_sql("SELECT * FROM names_with_common WHERE (idx % 100) = 0 ORDER BY idx") |>
nanoarrow::write_nanoarrow("inst/test-data/complex-map.arrows")
```
Binary file added r/inst/test-data/complex-map.arrows
Binary file not shown.
43 changes: 34 additions & 9 deletions r/src/materialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,41 @@ static int has_attrib_safe(SEXP x, SEXP sym) {
}

R_xlen_t nanoarrow_data_frame_size(SEXP x) {
if (Rf_length(x) > 0) {
// This both avoids materializing the row.names attribute and
// makes this work with struct-style vctrs that don't have a
// row.names attribute but that always have one or more element
// If this is actually a data frame, check the rownames. ALTREP ensures
// that "expanding" these rownames by accessing the attribute does not
// actually compute millions of strings.
if (Rf_inherits(x, "data.frame")) {
return Rf_xlength(Rf_getAttrib(x, R_RowNamesSymbol));
}

// If we are here, this is a struct style vctr. If we have a plain vctr
// as the first element we can just compute its length from the Rf_xlength()
// of the first column.
if (Rf_length(x) > 0 && !Rf_isObject(VECTOR_ELT(x, 0))) {
return Rf_xlength(VECTOR_ELT(x, 0));
} else {
// Since ALTREP was introduced, materializing the row.names attribute is
// usually deferred such that values in the form c(NA, -nrow), 1:nrow, or
// as.character(1:nrow) are never actually computed when the length is
// taken.
return Rf_xlength(Rf_getAttrib(x, R_RowNamesSymbol));
// Otherwise, we need S3 dispatch to compute the length for us.
SEXP length_sym = PROTECT(Rf_install("length"));
SEXP call_sexp = PROTECT(Rf_lang2(length_sym, x));
SEXP length_sexp = PROTECT(Rf_eval(call_sexp, R_BaseEnv));
if (Rf_xlength(length_sexp) != 1) {
Rf_error("length() return value with size != 1");
}

R_xlen_t out = 0;
switch (TYPEOF(length_sexp)) {
case INTSXP:
out = INTEGER(length_sexp)[0];
break;
case REALSXP:
out = (R_xlen_t)REAL(length_sexp)[0];
break;
default:
Rf_error("length() return value with unexpected type");
}

UNPROTECT(3);
return out;
}
}

Expand Down Expand Up @@ -559,6 +583,7 @@ static int nanoarrow_materialize_list_of(struct RConverter* converter,
case NANOARROW_TYPE_NA:
return NANOARROW_OK;
case NANOARROW_TYPE_LIST:
case NANOARROW_TYPE_MAP:
for (int64_t i = 0; i < dst->length; i++) {
if (!ArrowArrayViewIsNull(src->array_view, src->offset + i)) {
offset = offsets[raw_src_offset + i];
Expand Down
34 changes: 34 additions & 0 deletions r/tests/testthat/test-convert-array-stream.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,40 @@ test_that("convert array stream works for fixed_size_list_of() with parent nulls
)
})

test_that("complex nested struct/map combination can be read", {
skip_if_not_installed("arrow")
skip_if_not_installed("vctrs")

complex_map_file <- system.file("test-data/complex-map.arrows", package = "nanoarrow")
df_from_arrow <- as.data.frame(arrow::read_ipc_stream(complex_map_file))
df_from_nanoarrow <- as.data.frame(read_nanoarrow(complex_map_file))

expect_identical(
df_from_nanoarrow$idx,
as.double(df_from_arrow$idx)
)

expect_identical(
df_from_nanoarrow$names$primary,
df_from_arrow$names$primary
)

expect_identical(
df_from_nanoarrow$names$common[[3000]],
as.data.frame(df_from_arrow$names$common[[3000]])
)

expect_identical(
lengths(df_from_nanoarrow$names$rules),
lengths(df_from_arrow$names$rules)
)

expect_identical(
df_from_nanoarrow$names$rules[[4238]][c("variant", "language", "value")],
as.data.frame(df_from_arrow$names$rules[[4238]][c("variant", "language", "value")])
)
})

test_that("convert array stream respects the value of n", {
batches <- list(
data.frame(x = 1:5),
Expand Down
35 changes: 35 additions & 0 deletions r/tests/testthat/test-convert-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,17 @@ test_that("convert to vector works for tibble", {
)
})

test_that("convert to vector works for a rcrd-sytle vctr with complex columns", {
skip_if_not_installed("vctrs")

rcrd <- vctrs::new_rcrd(list(x = data.frame(y = 1:10)))
rcrd_array <- as_nanoarrow_array(vctrs::vec_data(rcrd))
expect_identical(
convert_array(rcrd_array, rcrd),
rcrd
)
})

test_that("convert to vector works for nanoarrow_vctr()", {
array <- as_nanoarrow_array(c("one", "two", "three"))

Expand Down Expand Up @@ -1084,6 +1095,30 @@ test_that("convert to vector works for null -> vctrs::list_of()", {
)
})

test_that("convert to vector works for map -> vctrs::list_of", {
skip_if_not_installed("arrow")
skip_if_not_installed("vctrs")

values <- vctrs::list_of(
data.frame(key = "key1", value = 1L),
data.frame(key = c("key2", "key3"), value = c(2L, 3L)),
NULL
)

array_list <- as_nanoarrow_array(
arrow::Array$create(
values,
type = arrow::map_of(arrow::string(), arrow::int32())
)
)

# Default conversion
expect_identical(
convert_array(array_list),
values
)
})

test_that("convert to vector works for fixed_size_list_of() -> matrix()", {
mat <- matrix(1:6, ncol = 2, byrow = TRUE)
array <- as_nanoarrow_array(mat)
Expand Down
Loading