Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python/r] Enforce dataframe domain lower bound == 0 #3300

Merged
merged 7 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,18 @@
slot_core_max_domain, extent, saturated_md
)

if index_column_name == "soma_joinid":
lower = slot_core_current_domain[0]
upper = slot_core_current_domain[1]
johnkerl marked this conversation as resolved.
Show resolved Hide resolved
if lower != 0:
raise ValueError(
f"domain for soma_joinid must have lower bound of 0; got {lower}"
)
if upper < 0:
raise ValueError(

Check warning on line 306 in apis/python/src/tiledbsoma/_dataframe.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_dataframe.py#L306

Added line #L306 was not covered by tests
f"domain for soma_joinid must have upper bound >= 0; got {upper}"
)
nguyenv marked this conversation as resolved.
Show resolved Hide resolved

# Here is our Arrow data API for communicating schema info between
# Python/R and C++ libtiledbsoma:
#
Expand Down
35 changes: 35 additions & 0 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1864,3 +1864,38 @@ def test_presence_matrix(tmp_path):
actual = soma_df.read().concat().to_pandas()

assert actual.equals(df)


def test_bounds_on_somajoinid_domain(tmp_path):
uri = tmp_path.as_posix()

schema = pa.schema(
[
("soma_joinid", pa.int64()),
("mystring", pa.string()),
("myint", pa.int32()),
("myfloat", pa.float32()),
]
)

with pytest.raises(ValueError):
soma.DataFrame.create(
uri,
schema=schema,
domain=[[2, 99]],
)

with pytest.raises(ValueError):
soma.DataFrame.create(
uri,
schema=schema,
domain=[[0, -1]],
)

soma.DataFrame.create(
uri,
schema=schema,
domain=[[0, 99]],
)

assert soma.DataFrame.exists(uri)
9 changes: 9 additions & 0 deletions apis/r/R/SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ SOMADataFrame <- R6::R6Class(
length(attr_column_names) > 0
)

if ("soma_joinid" %in% index_column_names && !is.null(domain)) {
lower <- domain[["soma_joinid"]][1]
upper <- domain[["soma_joinid"]][2]
stopifnot(
"The lower bound for soma_joinid domain must be 0" = lower == 0,
"The upper bound for soma_joinid domain must be >= 0" = upper >= 0
)
}

# Parse the tiledb/create/ subkeys of the platform_config into a handy,
# typed, queryable data structure.
tiledb_create_options <- TileDBCreateOptions$new(platform_config)
Expand Down
50 changes: 47 additions & 3 deletions apis/r/tests/testthat/test-SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,50 @@ test_that("Basic mechanics with default index_column_names", {
gc()
})

test_that("soma_joinid domain lower bound must be zero", {
uri <- withr::local_tempdir("soma-dataframe-soma-joinid-domain-lower-bound")

index_column_names <- c("soma_joinid")

asch <- arrow::schema(
arrow::field("soma_joinid", arrow::int64(), nullable = FALSE),
arrow::field("mystring", arrow::large_utf8(), nullable = FALSE),
arrow::field("myint", arrow::int32(), nullable = FALSE),
arrow::field("myfloat", arrow::float32(), nullable = FALSE)
)

expect_error(
SOMADataFrameCreate(
uri,
asch,
index_column_names = index_column_names,
domain = list(soma_joinid=c(2, 99))
)
)

expect_error(
SOMADataFrameCreate(
uri,
asch,
index_column_names = index_column_names,
domain = list(soma_joinid=c(0, -1))
)
)

expect_no_condition(
SOMADataFrameCreate(
uri,
asch,
index_column_names = index_column_names,
domain = list(soma_joinid=c(0, 99))
)
)

sdf <- SOMADataFrameOpen(uri)
expect_true(sdf$exists())
sdf$close()
})

test_that("creation with all supported dimension data types", {
skip_if(!extended_tests())
sch <- arrow::schema(
Expand Down Expand Up @@ -605,9 +649,9 @@ test_that("SOMADataFrame timestamped ops", {
arrow::field("valdbl", arrow::float64(), nullable = FALSE)
)
if (dir.exists(uri)) unlink(uri, recursive = TRUE)
sdf <- SOMADataFrameCreate(uri = uri, schema = sch, domain = list(soma_joinid = c(1, 100)))
sdf <- SOMADataFrameCreate(uri = uri, schema = sch, domain = list(soma_joinid = c(0, 99)))
rb1 <- arrow::record_batch(
soma_joinid = bit64::as.integer64(1L:3L),
soma_joinid = bit64::as.integer64(0L:2L),
valint = 1L:3L,
valdbl = 100 * (1:3),
schema = sch
Expand All @@ -625,7 +669,7 @@ test_that("SOMADataFrame timestamped ops", {
t20 <- Sys.time()
sdf <- SOMADataFrameOpen(uri = uri, mode = "WRITE")
rb2 <- arrow::record_batch(
soma_joinid = bit64::as.integer64(4L:6L),
soma_joinid = bit64::as.integer64(3L:5L),
valint = 4L:6L,
valdbl = 100 * (4:6),
schema = sch
Expand Down
16 changes: 8 additions & 8 deletions apis/r/tests/testthat/test-shape.R
Original file line number Diff line number Diff line change
Expand Up @@ -327,14 +327,14 @@ test_that("SOMADataFrame domain mods", {
index_column_names <- c("soma_joinid", "mystring", "myint", "myfloat")

domain_for_create <- list(
soma_joinid = c(1, 4),
soma_joinid = c(0, 3),
mystring = NULL,
myint = c(20, 50),
myfloat = c(0.0, 6.0)
)

table <- arrow::arrow_table(
soma_joinid = 1L:4L,
soma_joinid = 0L:3L,
mystring = c("a", "b", "a", "b"),
myint = c(20, 30, 40, 50),
myfloat = c(1.0, 2.5, 4.0, 5.5),
Expand All @@ -355,7 +355,7 @@ test_that("SOMADataFrame domain mods", {

# Check "expand" to same
new_domain <- list(
soma_joinid = c(1, 4),
soma_joinid = c(0, 3),
mystring = NULL,
myint = c(20, 50),
myfloat = c(0.0, 6.0)
Expand All @@ -364,23 +364,23 @@ test_that("SOMADataFrame domain mods", {

# Shrink
new_domain <- list(
soma_joinid = c(1, 3),
soma_joinid = c(0, 2),
mystring = NULL,
myint = c(20, 50),
myfloat = c(0.0, 6.0)
)
expect_error(sdf$change_domain(new_domain))

new_domain <- list(
soma_joinid = c(1, 4),
soma_joinid = c(0, 3),
mystring = NULL,
myint = c(20, 40),
myfloat = c(0.0, 6.0)
)
expect_error(sdf$change_domain(new_domain))

new_domain <- list(
soma_joinid = c(1, 4),
soma_joinid = c(0, 3),
mystring = NULL,
myint = c(20, 50),
myfloat = c(2.0, 6.0)
Expand All @@ -389,7 +389,7 @@ test_that("SOMADataFrame domain mods", {

# String domain cannot be specified
new_domain <- list(
soma_joinid = c(1, 4),
soma_joinid = c(0, 3),
mystring = c("a", "z"),
myint = c(20, 50),
myfloat = c(0.0, 6.0)
Expand All @@ -400,7 +400,7 @@ test_that("SOMADataFrame domain mods", {

# String domain cannot be specified
new_domain <- list(
soma_joinid = c(1, 9),
soma_joinid = c(0, 8),
mystring = c("", ""),
myint = c(20, 50),
myfloat = c(0.0, 10.0)
Expand Down
Loading