Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python/r] Move beyond the new-shape feature flag #3301

Merged
merged 3 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions .github/workflows/python-ci-single.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,7 @@ jobs:
env:
TILEDB_SOMA_INIT_BUFFER_BYTES: 33554432 # accommodate tiny runners

- name: Run pytests for Python without new shape
shell: bash
# Setting PYTHONPATH ensures the tests load the in-tree source code under apis/python/src
# instead of the copy we `pip install`ed to site-packages above. That's needed for the code
# coverage analysis to work.
run: export SOMA_PY_NEW_SHAPE=false; PYTHONPATH=$(pwd)/apis/python/src python -m pytest --cov=apis/python/src --cov-report=xml apis/python/tests -v --durations=20 --maxfail=50

- name: Run pytests for Python with new shape
- name: Run pytests for Python
shell: bash
# Setting PYTHONPATH ensures the tests load the in-tree source code under apis/python/src
# instead of the copy we `pip install`ed to site-packages above. That's needed for the code
Expand Down
7 changes: 1 addition & 6 deletions .github/workflows/r-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,7 @@ jobs:
# -e "devtools::install(upgrade = FALSE)" \
# -e "testthat::test_local('tests/testthat', load_package = 'installed')"

- name: Test without new shape
if: ${{ matrix.covr == 'no' }}
run: export SOMA_R_NEW_SHAPE=false && cd apis/r/tests && Rscript testthat.R

# https://github.com/single-cell-data/TileDB-SOMA/issues/2407
- name: Test with new shape
- name: Test
if: ${{ matrix.covr == 'no' }}
run: cd apis/r/tests && Rscript testthat.R

Expand Down
9 changes: 1 addition & 8 deletions .github/workflows/r-python-interop-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,7 @@ jobs:
- name: Update Packages
run: Rscript -e 'update.packages(ask=FALSE)'

- name: Interop tests with new-shape feature flag off
- name: Interop tests
run: python -m pytest apis/system/tests/
env:
TILEDB_SOMA_INIT_BUFFER_BYTES: 33554432 # accommodate tiny runners

- name: Interop tests with new-shape feature flag on
run: python -m pytest apis/system/tests/
env:
TILEDB_SOMA_INIT_BUFFER_BYTES: 33554432 # accommodate tiny runners
SOMA_PY_NEW_SHAPE: true
SOMA_R_NEW_SHAPE: true
28 changes: 11 additions & 17 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
from somacore import options
from typing_extensions import Self

from tiledbsoma._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED

from . import _arrow_types, _util
from . import pytiledbsoma as clib
from ._constants import SOMA_JOINID
Expand Down Expand Up @@ -308,16 +306,12 @@ def create(
# [4] core current domain hi

index_column_schema.append(pa_field)
if NEW_SHAPE_FEATURE_FLAG_ENABLED:

index_column_data[pa_field.name] = [
*slot_core_max_domain,
extent,
*slot_core_current_domain,
]

else:
index_column_data[pa_field.name] = [*slot_core_current_domain, extent]
index_column_data[pa_field.name] = [
*slot_core_max_domain,
extent,
*slot_core_current_domain,
]

index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
Expand Down Expand Up @@ -916,7 +910,7 @@ def _fill_out_slot_soma_domain(
# will (and must) ignore these when creating the TileDB schema.
slot_domain = "", ""
elif np.issubdtype(dtype, NPInteger):
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
# Core max domain is immutable. If unspecified, it should be as big
# as possible since it can never be resized.
iinfo = np.iinfo(cast(NPInteger, dtype))
Expand All @@ -936,7 +930,7 @@ def _fill_out_slot_soma_domain(
# not 0.
slot_domain = 0, 0
elif np.issubdtype(dtype, NPFloating):
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
finfo = np.finfo(cast(NPFloating, dtype))
slot_domain = finfo.min, finfo.max
saturated_range = True
Expand All @@ -957,31 +951,31 @@ def _fill_out_slot_soma_domain(
# value representable by domain type. Reduce domain max by 1 tile extent
# to allow for expansion.
elif dtype == "datetime64[s]":
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
iinfo = np.iinfo(cast(NPInteger, np.int64))
slot_domain = np.datetime64(iinfo.min + 1, "s"), np.datetime64(
iinfo.max - 1000000, "s"
)
else:
slot_domain = np.datetime64(0, "s"), np.datetime64(0, "s")
elif dtype == "datetime64[ms]":
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
iinfo = np.iinfo(cast(NPInteger, np.int64))
slot_domain = np.datetime64(iinfo.min + 1, "ms"), np.datetime64(
iinfo.max - 1000000, "ms"
)
else:
slot_domain = np.datetime64(0, "ms"), np.datetime64(0, "ms")
elif dtype == "datetime64[us]":
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
iinfo = np.iinfo(cast(NPInteger, np.int64))
slot_domain = np.datetime64(iinfo.min + 1, "us"), np.datetime64(
iinfo.max - 1000000, "us"
)
else:
slot_domain = np.datetime64(0, "us"), np.datetime64(0, "us")
elif dtype == "datetime64[ns]":
if is_max_domain or not NEW_SHAPE_FEATURE_FLAG_ENABLED:
if is_max_domain:
iinfo = np.iinfo(cast(NPInteger, np.int64))
slot_domain = np.datetime64(iinfo.min + 1, "ns"), np.datetime64(
iinfo.max - 1000000, "ns"
Expand Down
6 changes: 3 additions & 3 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ._arrow_types import pyarrow_to_carrow_type
from ._common_nd_array import NDArray
from ._exception import SOMAError, map_exception_for_create
from ._flags import DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN, NEW_SHAPE_FEATURE_FLAG_ENABLED
from ._flags import DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN
from ._tdb_handles import DenseNDArrayWrapper
from ._types import OpenTimestamp, Slice, StatusAndReason
from ._util import dense_indices_to_shape
Expand Down Expand Up @@ -122,7 +122,7 @@
if dim_shape is None:
raise ValueError("DenseNDArray shape slots must be numeric")

if NEW_SHAPE_FEATURE_FLAG_ENABLED and DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
if DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
# The user specifies current domain -- this is the max domain
Expand Down Expand Up @@ -368,7 +368,7 @@
1.15 release notes. Raises an error if the new shape exceeds maxshape in
any dimension. Raises an error if the array already has a shape.
"""
if NEW_SHAPE_FEATURE_FLAG_ENABLED and DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
if DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:

Check warning on line 371 in apis/python/src/tiledbsoma/_dense_nd_array.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_dense_nd_array.py#L371

Added line #L371 was not covered by tests
if check_only:
return self._handle.tiledbsoma_can_upgrade_shape(newshape)
else:
Expand Down
2 changes: 0 additions & 2 deletions apis/python/src/tiledbsoma/_flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# removed once https://github.com/single-cell-data/TileDB-SOMA/issues/2407 is
# complete.

NEW_SHAPE_FEATURE_FLAG_ENABLED = os.getenv("SOMA_PY_NEW_SHAPE") != "false"

DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN = clib.embedded_version_triple() >= (2, 27, 0)

# Temporary for # https://github.com/single-cell-data/TileDB-SOMA/issues/2407:
Expand Down
15 changes: 5 additions & 10 deletions apis/python/src/tiledbsoma/_point_cloud_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
_revise_domain_for_extent,
)
from ._exception import SOMAError, map_exception_for_create
from ._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED
from ._query_condition import QueryCondition
from ._read_iters import TableReadIter
from ._spatial_dataframe import SpatialDataFrame
Expand Down Expand Up @@ -215,16 +214,12 @@ def create(
# [4] core current domain hi

index_column_schema.append(pa_field)
if NEW_SHAPE_FEATURE_FLAG_ENABLED:

index_column_data[pa_field.name] = [
*slot_core_max_domain,
extent,
*slot_core_current_domain,
]

else:
index_column_data[pa_field.name] = [*slot_core_current_domain, extent]
index_column_data[pa_field.name] = [
*slot_core_max_domain,
extent,
*slot_core_current_domain,
]

index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
Expand Down
60 changes: 24 additions & 36 deletions apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
from somacore.options import PlatformConfig
from typing_extensions import Self

from tiledbsoma._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED

from . import _util

# This package's pybind11 code
Expand Down Expand Up @@ -162,41 +160,31 @@
# [3] core current domain lo
# [4] core current domain hi

if NEW_SHAPE_FEATURE_FLAG_ENABLED:
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
# The user specifies current domain -- this is the max domain
# which is taken from the max ranges for the dim datatype.
# We pass None here to detect those.
None,
len(shape),
TileDBCreateOptions.from_platform_config(platform_config),
)

if dim_shape == 0:
raise ValueError("SparseNDArray shape slots must be at least 1")
if dim_shape is None:
# Core current-domain semantics are (lo, hi) with both
# inclusive, with lo <= hi. This means smallest is (0, 0)
# which is shape 1, not 0.
dim_shape = 1

index_column_data[pa_field.name] = [
0,
dim_capacity - 1,
dim_extent,
0,
dim_shape - 1,
]
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
# The user specifies current domain -- this is the max domain
# which is taken from the max ranges for the dim datatype.
# We pass None here to detect those.
None,
len(shape),
TileDBCreateOptions.from_platform_config(platform_config),
)

else:
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
dim_shape,
len(shape),
TileDBCreateOptions.from_platform_config(platform_config),
)
index_column_data[pa_field.name] = [0, dim_capacity - 1, dim_extent]
if dim_shape == 0:
raise ValueError("SparseNDArray shape slots must be at least 1")
if dim_shape is None:
# Core current-domain semantics are (lo, hi) with both
# inclusive, with lo <= hi. This means smallest is (0, 0)
# which is shape 1, not 0.
dim_shape = 1

Check warning on line 179 in apis/python/src/tiledbsoma/_sparse_nd_array.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_sparse_nd_array.py#L179

Added line #L179 was not covered by tests

index_column_data[pa_field.name] = [
0,
dim_capacity - 1,
dim_extent,
0,
dim_shape - 1,
]

index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
Expand Down
27 changes: 9 additions & 18 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@
NotCreateableError,
SOMAError,
)
from .._flags import (
DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN,
NEW_SHAPE_FEATURE_FLAG_ENABLED,
)
from .._soma_array import SOMAArray
from .._soma_object import AnySOMAObject, SOMAObject
from .._tdb_handles import RawHandle
Expand Down Expand Up @@ -1211,9 +1207,7 @@ def _write_dataframe_impl(
try:
# Note: tiledbsoma.io creates dataframes with soma_joinid being the one
# and only index column.
domain = None
if NEW_SHAPE_FEATURE_FLAG_ENABLED:
domain = ((0, shape - 1),)
domain = ((0, shape - 1),)
soma_df = DataFrame.create(
df_uri,
schema=arrow_table.schema,
Expand Down Expand Up @@ -1318,17 +1312,14 @@ def _create_from_matrix(
try:
shape: Sequence[Union[int, None]] = ()
# A SparseNDArray must be appendable in soma.io.
if NEW_SHAPE_FEATURE_FLAG_ENABLED:
# Instead of
# shape = tuple(int(e) for e in matrix.shape)
# we consult the registration mapping. This is important
# in the case when multiple H5ADs/AnnDatas are being
# ingested to an experiment which doesn't pre-exist.
shape = (axis_0_mapping.get_shape(), axis_1_mapping.get_shape())
elif cls.is_sparse or DENSE_ARRAYS_CAN_HAVE_CURRENT_DOMAIN:
shape = tuple(None for _ in matrix.shape)
else:
shape = matrix.shape

# Instead of
# shape = tuple(int(e) for e in matrix.shape)
# we consult the registration mapping. This is important
# in the case when multiple H5ADs/AnnDatas are being
# ingested to an experiment which doesn't pre-exist.
shape = (axis_0_mapping.get_shape(), axis_1_mapping.get_shape())

soma_ndarray = cls.create(
uri,
type=pa.from_numpy_dtype(matrix.dtype),
Expand Down
7 changes: 3 additions & 4 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,10 +1336,9 @@ def test_nan_append(conftest_pbmc_small, dtype, nans, new_obs_ids):
var_field_name="var_id",
)

if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
nobs = rd.get_obs_shape()
nvars = rd.get_var_shapes()
tiledbsoma.io.resize_experiment(SOMA_URI, nobs=nobs, nvars=nvars)
nobs = rd.get_obs_shape()
nvars = rd.get_var_shapes()
tiledbsoma.io.resize_experiment(SOMA_URI, nobs=nobs, nvars=nvars)

# Append the second anndata object
tiledbsoma.io.from_anndata(
Expand Down
9 changes: 2 additions & 7 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ def test_dataframe(tmp_path, arrow_schema):
pydict["quux"] = [True, False, False, True, False]
rb = pa.Table.from_pydict(pydict)

if soma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
sdf.tiledbsoma_resize_soma_joinid_shape(len(rb))
sdf.tiledbsoma_resize_soma_joinid_shape(len(rb))

sdf.write(rb)

Expand All @@ -95,11 +94,7 @@ def test_dataframe(tmp_path, arrow_schema):
assert sdf.count == 5
assert len(sdf) == 5

# More to come on https://github.com/single-cell-data/TileDB-SOMA/issues/2407
assert (
sdf.tiledbsoma_has_upgraded_domain
== soma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED
)
assert sdf.tiledbsoma_has_upgraded_domain

with pytest.raises(AttributeError):
assert sdf.shape is None
Expand Down
Loading
Loading