Skip to content

Commit

Permalink
[python/r] Move beyond the new-shape feature flag (#3301)
Browse files Browse the repository at this point in the history
* Python side

* R side

* CI YAML
  • Loading branch information
johnkerl authored Nov 7, 2024
1 parent fd0578d commit 4a2cbcb
Show file tree
Hide file tree
Showing 20 changed files with 356 additions and 553 deletions.
9 changes: 1 addition & 8 deletions .github/workflows/python-ci-single.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,7 @@ jobs:
env:
TILEDB_SOMA_INIT_BUFFER_BYTES: 33554432 # accommodate tiny runners

- name: Run pytests for Python without new shape
shell: bash
# Setting PYTHONPATH ensures the tests load the in-tree source code under apis/python/src
# instead of the copy we `pip install`ed to site-packages above. That's needed for the code
# coverage analysis to work.
run: export SOMA_PY_NEW_SHAPE=false; PYTHONPATH=$(pwd)/apis/python/src python -m pytest --cov=apis/python/src --cov-report=xml apis/python/tests -v --durations=20 --maxfail=50

- name: Run pytests for Python with new shape
- name: Run pytests for Python
shell: bash
# Setting PYTHONPATH ensures the tests load the in-tree source code under apis/python/src
# instead of the copy we `pip install`ed to site-packages above. That's needed for the code
Expand Down
7 changes: 1 addition & 6 deletions .github/workflows/r-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,7 @@ jobs:
# -e "devtools::install(upgrade = FALSE)" \
# -e "testthat::test_local('tests/testthat', load_package = 'installed')"

- name: Test without new shape
if: ${{ matrix.covr == 'no' }}
run: export SOMA_R_NEW_SHAPE=false && cd apis/r/tests && Rscript testthat.R

# https://github.com/single-cell-data/TileDB-SOMA/issues/2407
- name: Test with new shape
- name: Test
if: ${{ matrix.covr == 'no' }}
run: cd apis/r/tests && Rscript testthat.R

Expand Down
9 changes: 1 addition & 8 deletions .github/workflows/r-python-interop-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,7 @@ jobs:
- name: Update Packages
run: Rscript -e 'update.packages(ask=FALSE)'

- name: Interop tests with new-shape feature flag off
- name: Interop tests
run: python -m pytest apis/system/tests/
env:
TILEDB_SOMA_INIT_BUFFER_BYTES: 33554432 # accommodate tiny runners

- name: Interop tests with new-shape feature flag on
run: python -m pytest apis/system/tests/
env:
TILEDB_SOMA_INIT_BUFFER_BYTES: 33554432 # accommodate tiny runners
SOMA_PY_NEW_SHAPE: true
SOMA_R_NEW_SHAPE: true
28 changes: 11 additions & 17 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
from somacore import options
from typing_extensions import Self

from tiledbsoma._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED

from . import _arrow_types, _util
from . import pytiledbsoma as clib
from ._constants import SOMA_JOINID
Expand Down Expand Up @@ -308,16 +306,12 @@ def create(
# [4] core current domain hi

index_column_schema.append(pa_field)
if NEW_SHAPE_FEATURE_FLAG_ENABLED:

index_column_data[pa_field.name] = [
*slot_core_max_domain,
extent,
*slot_core_current_domain,
]

else:
index_column_data[pa_field.name] = [*slot_core_current_domain, extent]
index_column_data[pa_field.name] = [
*slot_core_max_domain,
extent,
*slot_core_current_domain,
]

index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
Expand Down Expand Up @@ -916,7 +910,7 @@ def _fill_out_slot_soma_domain(
# will (and must) ignore these when creating the TileDB schema.
slot_domain = "", ""
elif np.issubdtype(dtype, NPInteger):
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
# Core max domain is immutable. If unspecified, it should be as big
# as possible since it can never be resized.
iinfo = np.iinfo(cast(NPInteger, dtype))
Expand All @@ -936,7 +930,7 @@ def _fill_out_slot_soma_domain(
# not 0.
slot_domain = 0, 0
elif np.issubdtype(dtype, NPFloating):
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
finfo = np.finfo(cast(NPFloating, dtype))
slot_domain = finfo.min, finfo.max
saturated_range = True
Expand All @@ -957,31 +951,31 @@ def _fill_out_slot_soma_domain(
# value representable by domain type. Reduce domain max by 1 tile extent
# to allow for expansion.
elif dtype == "datetime64[s]":
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
iinfo = np.iinfo(cast(NPInteger, np.int64))
slot_domain = np.datetime64(iinfo.min + 1, "s"), np.datetime64(
iinfo.max - 1000000, "s"
)
else:
slot_domain = np.datetime64(0, "s"), np.datetime64(0, "s")
elif dtype == "datetime64[ms]":
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
iinfo = np.iinfo(cast(NPInteger, np.int64))
slot_domain = np.datetime64(iinfo.min + 1, "ms"), np.datetime64(
iinfo.max - 1000000, "ms"
)
else:
slot_domain = np.datetime64(0, "ms"), np.datetime64(0, "ms")
elif dtype == "datetime64[us]":
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
iinfo = np.iinfo(cast(NPInteger, np.int64))
slot_domain = np.datetime64(iinfo.min + 1, "us"), np.datetime64(
iinfo.max - 1000000, "us"
)
else:
slot_domain = np.datetime64(0, "us"), np.datetime64(0, "us")
elif dtype == "datetime64[ns]":
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
iinfo = np.iinfo(cast(NPInteger, np.int64))
slot_domain = np.datetime64(iinfo.min + 1, "ns"), np.datetime64(
iinfo.max - 1000000, "ns"
Expand Down
6 changes: 3 additions & 3 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ._arrow_types import pyarrow_to_carrow_type
from ._common_nd_array import NDArray
from ._exception import SOMAError, map_exception_for_create
from ._flags import DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN, NEW_SHAPE_FEATURE_FLAG_ENABLED
from ._flags import DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN
from ._tdb_handles import DenseNDArrayWrapper
from ._types import OpenTimestamp, Slice, StatusAndReason
from ._util import dense_indices_to_shape
Expand Down Expand Up @@ -122,7 +122,7 @@ def create(
if dim_shape is None:
raise ValueError("DenseNDArray shape slots must be numeric")

if NEW_SHAPE_FEATURE_FLAG_ENABLED and DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
if DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
# The user specifies current domain -- this is the max domain
Expand Down Expand Up @@ -368,7 +368,7 @@ def tiledbsoma_upgrade_shape(
1.15 release notes. Raises an error if the new shape exceeds maxshape in
any dimension. Raises an error if the array already has a shape.
"""
if NEW_SHAPE_FEATURE_FLAG_ENABLED and DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
if DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
if check_only:
return self._handle.tiledbsoma_can_upgrade_shape(newshape)
else:
Expand Down
2 changes: 0 additions & 2 deletions apis/python/src/tiledbsoma/_flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# removed once https://github.com/single-cell-data/TileDB-SOMA/issues/2407 is
# complete.

NEW_SHAPE_FEATURE_FLAG_ENABLED = os.getenv("SOMA_PY_NEW_SHAPE") != "false"

DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN = clib.embedded_version_triple() >= (2, 27, 0)

# Temporary for # https://github.com/single-cell-data/TileDB-SOMA/issues/2407:
Expand Down
15 changes: 5 additions & 10 deletions apis/python/src/tiledbsoma/_point_cloud_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
_revise_domain_for_extent,
)
from ._exception import SOMAError, map_exception_for_create
from ._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED
from ._query_condition import QueryCondition
from ._read_iters import TableReadIter
from ._spatial_dataframe import SpatialDataFrame
Expand Down Expand Up @@ -215,16 +214,12 @@ def create(
# [4] core current domain hi

index_column_schema.append(pa_field)
if NEW_SHAPE_FEATURE_FLAG_ENABLED:

index_column_data[pa_field.name] = [
*slot_core_max_domain,
extent,
*slot_core_current_domain,
]

else:
index_column_data[pa_field.name] = [*slot_core_current_domain, extent]
index_column_data[pa_field.name] = [
*slot_core_max_domain,
extent,
*slot_core_current_domain,
]

index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
Expand Down
60 changes: 24 additions & 36 deletions apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
from somacore.options import PlatformConfig
from typing_extensions import Self

from tiledbsoma._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED

from . import _util

# This package's pybind11 code
Expand Down Expand Up @@ -162,41 +160,31 @@ def create(
# [3] core current domain lo
# [4] core current domain hi

if NEW_SHAPE_FEATURE_FLAG_ENABLED:
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
# The user specifies current domain -- this is the max domain
# which is taken from the max ranges for the dim datatype.
# We pass None here to detect those.
None,
len(shape),
TileDBCreateOptions.from_platform_config(platform_config),
)

if dim_shape == 0:
raise ValueError("SparseNDArray shape slots must be at least 1")
if dim_shape is None:
# Core current-domain semantics are (lo, hi) with both
# inclusive, with lo <= hi. This means smallest is (0, 0)
# which is shape 1, not 0.
dim_shape = 1

index_column_data[pa_field.name] = [
0,
dim_capacity - 1,
dim_extent,
0,
dim_shape - 1,
]
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
# The user specifies current domain -- this is the max domain
# which is taken from the max ranges for the dim datatype.
# We pass None here to detect those.
None,
len(shape),
TileDBCreateOptions.from_platform_config(platform_config),
)

else:
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
dim_shape,
len(shape),
TileDBCreateOptions.from_platform_config(platform_config),
)
index_column_data[pa_field.name] = [0, dim_capacity - 1, dim_extent]
if dim_shape == 0:
raise ValueError("SparseNDArray shape slots must be at least 1")
if dim_shape is None:
# Core current-domain semantics are (lo, hi) with both
# inclusive, with lo <= hi. This means smallest is (0, 0)
# which is shape 1, not 0.
dim_shape = 1

index_column_data[pa_field.name] = [
0,
dim_capacity - 1,
dim_extent,
0,
dim_shape - 1,
]

index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
Expand Down
27 changes: 9 additions & 18 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@
NotCreateableError,
SOMAError,
)
from .._flags import (
DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN,
NEW_SHAPE_FEATURE_FLAG_ENABLED,
)
from .._soma_array import SOMAArray
from .._soma_object import AnySOMAObject, SOMAObject
from .._tdb_handles import RawHandle
Expand Down Expand Up @@ -1211,9 +1207,7 @@ def _write_dataframe_impl(
try:
# Note: tiledbsoma.io creates dataframes with soma_joinid being the one
# and only index column.
domain = None
if NEW_SHAPE_FEATURE_FLAG_ENABLED:
domain = ((0, shape - 1),)
domain = ((0, shape - 1),)
soma_df = DataFrame.create(
df_uri,
schema=arrow_table.schema,
Expand Down Expand Up @@ -1318,17 +1312,14 @@ def _create_from_matrix(
try:
shape: Sequence[Union[int, None]] = ()
# A SparseNDArray must be appendable in soma.io.
if NEW_SHAPE_FEATURE_FLAG_ENABLED:
# Instead of
# shape = tuple(int(e) for e in matrix.shape)
# we consult the registration mapping. This is important
# in the case when multiple H5ADs/AnnDatas are being
# ingested to an experiment which doesn't pre-exist.
shape = (axis_0_mapping.get_shape(), axis_1_mapping.get_shape())
elif cls.is_sparse or DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
shape = tuple(None for _ in matrix.shape)
else:
shape = matrix.shape

# Instead of
# shape = tuple(int(e) for e in matrix.shape)
# we consult the registration mapping. This is important
# in the case when multiple H5ADs/AnnDatas are being
# ingested to an experiment which doesn't pre-exist.
shape = (axis_0_mapping.get_shape(), axis_1_mapping.get_shape())

soma_ndarray = cls.create(
uri,
type=pa.from_numpy_dtype(matrix.dtype),
Expand Down
7 changes: 3 additions & 4 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,10 +1336,9 @@ def test_nan_append(conftest_pbmc_small, dtype, nans, new_obs_ids):
var_field_name="var_id",
)

if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
nobs = rd.get_obs_shape()
nvars = rd.get_var_shapes()
tiledbsoma.io.resize_experiment(SOMA_URI, nobs=nobs, nvars=nvars)
nobs = rd.get_obs_shape()
nvars = rd.get_var_shapes()
tiledbsoma.io.resize_experiment(SOMA_URI, nobs=nobs, nvars=nvars)

# Append the second anndata object
tiledbsoma.io.from_anndata(
Expand Down
9 changes: 2 additions & 7 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ def test_dataframe(tmp_path, arrow_schema):
pydict["quux"] = [True, False, False, True, False]
rb = pa.Table.from_pydict(pydict)

if soma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
sdf.tiledbsoma_resize_soma_joinid_shape(len(rb))
sdf.tiledbsoma_resize_soma_joinid_shape(len(rb))

sdf.write(rb)

Expand All @@ -95,11 +94,7 @@ def test_dataframe(tmp_path, arrow_schema):
assert sdf.count == 5
assert len(sdf) == 5

# More to come on https://github.com/single-cell-data/TileDB-SOMA/issues/2407
assert (
sdf.tiledbsoma_has_upgraded_domain
== soma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED
)
assert sdf.tiledbsoma_has_upgraded_domain

with pytest.raises(AttributeError):
assert sdf.shape is None
Expand Down
Loading

0 comments on commit 4a2cbcb

Please sign in to comment.