Skip to content

Commit

Permalink
[python/r] Enforce dataframe domain lower bound == 0 (#3300)
Browse files Browse the repository at this point in the history
* [python/r] Enforce dataframe domain lower bound == 0

* python unit-test cases

* R-side assertion

* R-side unit tests

* update a few existing unit-test cases

* R-side code-review feedback [skip ci]

* Python-side code-review feedback
  • Loading branch information
johnkerl authored Nov 7, 2024
1 parent 208c0ab commit 017aad0
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 11 deletions.
12 changes: 12 additions & 0 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,18 @@ def create(
slot_core_max_domain, extent, saturated_md
)

if index_column_name == "soma_joinid":
lower = slot_core_current_domain[0]
upper = slot_core_current_domain[1]
if lower != 0:
raise ValueError(
f"domain for soma_joinid must have lower bound of 0; got {lower}"
)
if upper < 0:
raise ValueError(
f"domain for soma_joinid must have upper bound >= 0; got {upper}"
)

# Here is our Arrow data API for communicating schema info between
# Python/R and C++ libtiledbsoma:
#
Expand Down
35 changes: 35 additions & 0 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1864,3 +1864,38 @@ def test_presence_matrix(tmp_path):
actual = soma_df.read().concat().to_pandas()

assert actual.equals(df)


def test_bounds_on_somajoinid_domain(tmp_path):
uri = tmp_path.as_posix()

schema = pa.schema(
[
("soma_joinid", pa.int64()),
("mystring", pa.string()),
("myint", pa.int32()),
("myfloat", pa.float32()),
]
)

with pytest.raises(ValueError):
soma.DataFrame.create(
uri,
schema=schema,
domain=[[2, 99]],
)

with pytest.raises(ValueError):
soma.DataFrame.create(
uri,
schema=schema,
domain=[[0, -1]],
)

soma.DataFrame.create(
uri,
schema=schema,
domain=[[0, 99]],
)

assert soma.DataFrame.exists(uri)
9 changes: 9 additions & 0 deletions apis/r/R/SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ SOMADataFrame <- R6::R6Class(
length(attr_column_names) > 0
)

if ("soma_joinid" %in% index_column_names && !is.null(domain)) {
lower <- domain[["soma_joinid"]][1]
upper <- domain[["soma_joinid"]][2]
stopifnot(
"The lower bound for soma_joinid domain must be 0" = lower == 0,
"The upper bound for soma_joinid domain must be >= 0" = upper >= 0
)
}

# Parse the tiledb/create/ subkeys of the platform_config into a handy,
# typed, queryable data structure.
tiledb_create_options <- TileDBCreateOptions$new(platform_config)
Expand Down
50 changes: 47 additions & 3 deletions apis/r/tests/testthat/test-SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,50 @@ test_that("Basic mechanics with default index_column_names", {
gc()
})

test_that("soma_joinid domain lower bound must be zero", {
uri <- withr::local_tempdir("soma-dataframe-soma-joinid-domain-lower-bound")

index_column_names <- c("soma_joinid")

asch <- arrow::schema(
arrow::field("soma_joinid", arrow::int64(), nullable = FALSE),
arrow::field("mystring", arrow::large_utf8(), nullable = FALSE),
arrow::field("myint", arrow::int32(), nullable = FALSE),
arrow::field("myfloat", arrow::float32(), nullable = FALSE)
)

expect_error(
SOMADataFrameCreate(
uri,
asch,
index_column_names = index_column_names,
domain = list(soma_joinid=c(2, 99))
)
)

expect_error(
SOMADataFrameCreate(
uri,
asch,
index_column_names = index_column_names,
domain = list(soma_joinid=c(0, -1))
)
)

expect_no_condition(
SOMADataFrameCreate(
uri,
asch,
index_column_names = index_column_names,
domain = list(soma_joinid=c(0, 99))
)
)

sdf <- SOMADataFrameOpen(uri)
expect_true(sdf$exists())
sdf$close()
})

test_that("creation with all supported dimension data types", {
skip_if(!extended_tests())
sch <- arrow::schema(
Expand Down Expand Up @@ -605,9 +649,9 @@ test_that("SOMADataFrame timestamped ops", {
arrow::field("valdbl", arrow::float64(), nullable = FALSE)
)
if (dir.exists(uri)) unlink(uri, recursive = TRUE)
sdf <- SOMADataFrameCreate(uri = uri, schema = sch, domain = list(soma_joinid = c(1, 100)))
sdf <- SOMADataFrameCreate(uri = uri, schema = sch, domain = list(soma_joinid = c(0, 99)))
rb1 <- arrow::record_batch(
soma_joinid = bit64::as.integer64(1L:3L),
soma_joinid = bit64::as.integer64(0L:2L),
valint = 1L:3L,
valdbl = 100 * (1:3),
schema = sch
Expand All @@ -625,7 +669,7 @@ test_that("SOMADataFrame timestamped ops", {
t20 <- Sys.time()
sdf <- SOMADataFrameOpen(uri = uri, mode = "WRITE")
rb2 <- arrow::record_batch(
soma_joinid = bit64::as.integer64(4L:6L),
soma_joinid = bit64::as.integer64(3L:5L),
valint = 4L:6L,
valdbl = 100 * (4:6),
schema = sch
Expand Down
16 changes: 8 additions & 8 deletions apis/r/tests/testthat/test-shape.R
Original file line number Diff line number Diff line change
Expand Up @@ -327,14 +327,14 @@ test_that("SOMADataFrame domain mods", {
index_column_names <- c("soma_joinid", "mystring", "myint", "myfloat")

domain_for_create <- list(
soma_joinid = c(1, 4),
soma_joinid = c(0, 3),
mystring = NULL,
myint = c(20, 50),
myfloat = c(0.0, 6.0)
)

table <- arrow::arrow_table(
soma_joinid = 1L:4L,
soma_joinid = 0L:3L,
mystring = c("a", "b", "a", "b"),
myint = c(20, 30, 40, 50),
myfloat = c(1.0, 2.5, 4.0, 5.5),
Expand All @@ -355,7 +355,7 @@ test_that("SOMADataFrame domain mods", {

# Check "expand" to same
new_domain <- list(
soma_joinid = c(1, 4),
soma_joinid = c(0, 3),
mystring = NULL,
myint = c(20, 50),
myfloat = c(0.0, 6.0)
Expand All @@ -364,23 +364,23 @@ test_that("SOMADataFrame domain mods", {

# Shrink
new_domain <- list(
soma_joinid = c(1, 3),
soma_joinid = c(0, 2),
mystring = NULL,
myint = c(20, 50),
myfloat = c(0.0, 6.0)
)
expect_error(sdf$change_domain(new_domain))

new_domain <- list(
soma_joinid = c(1, 4),
soma_joinid = c(0, 3),
mystring = NULL,
myint = c(20, 40),
myfloat = c(0.0, 6.0)
)
expect_error(sdf$change_domain(new_domain))

new_domain <- list(
soma_joinid = c(1, 4),
soma_joinid = c(0, 3),
mystring = NULL,
myint = c(20, 50),
myfloat = c(2.0, 6.0)
Expand All @@ -389,7 +389,7 @@ test_that("SOMADataFrame domain mods", {

# String domain cannot be specified
new_domain <- list(
soma_joinid = c(1, 4),
soma_joinid = c(0, 3),
mystring = c("a", "z"),
myint = c(20, 50),
myfloat = c(0.0, 6.0)
Expand All @@ -400,7 +400,7 @@ test_that("SOMADataFrame domain mods", {

# String domain cannot be specified
new_domain <- list(
soma_joinid = c(1, 9),
soma_joinid = c(0, 8),
mystring = c("", ""),
myint = c(20, 50),
myfloat = c(0.0, 10.0)
Expand Down

0 comments on commit 017aad0

Please sign in to comment.