diff --git a/.coveragerc_omit b/.coveragerc_omit index 79315301..e670259b 100644 --- a/.coveragerc_omit +++ b/.coveragerc_omit @@ -10,4 +10,4 @@ omit = vitessce/data_utils/anndata.py vitessce/data_utils/ome.py vitessce/data_utils/entities.py - vitessce/data_utils/multivec.py \ No newline at end of file + vitessce/data_utils/multivec.py diff --git a/.coveragerc_real b/.coveragerc_real index d40e6d91..890ac4b9 100644 --- a/.coveragerc_real +++ b/.coveragerc_real @@ -1,3 +1,2 @@ [report] exclude_lines = - \ No newline at end of file diff --git a/.envrc b/.envrc index 40ee06b8..1a180ae5 100644 --- a/.envrc +++ b/.envrc @@ -1 +1 @@ -use conda vitessce-python-dev \ No newline at end of file +use conda vitessce-python-dev diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml deleted file mode 100644 index 42915059..00000000 --- a/.github/workflows/lint.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: Lint - -on: [push, pull_request] - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - with: - python-version: '3.9' - architecture: 'x64' - - run: pip install .[dev] - - run: flake8 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..a9535797 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,29 @@ +fail_fast: false +default_language_version: + python: python3 +default_stages: + - commit + - push +minimum_pre_commit_version: 2.16.0 +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.3.5 + hooks: + - id: ruff + types_or: [python, pyi, jupyter] + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + types_or: [python, pyi, jupyter] + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: detect-private-key + - id: check-ast + - id: end-of-file-fixer + - id: mixed-line-ending + args: [--fix=lf] + - id: trailing-whitespace + - id: check-case-conflict + # Check that there are no merge conflicts (could be generated by template sync) + - id: check-merge-conflict + args: [--assume-in-merge] diff --git a/README.md b/README.md index 0cd244e1..47a8f3c1 100644 --- a/README.md +++ b/README.md @@ -43,24 +43,10 @@ In this repository, there are multiple conda environments for different purposes ## Linting and testing ```sh -flake8 +pre-commit run --all pytest ``` -### Formatting - -One file: - -```sh -autopep8 --in-place --aggressive ./path/to/file.py -``` - -All `.py` files: - -```sh -find . -name '*.py' -exec autopep8 --in-place --aggressive '{}' \; -``` - ## Documentation @@ -115,7 +101,7 @@ vw = vc.widget(proxy=True) vw ``` -or +or ```py vc.display(proxy=True, host_name="http://localhost:8888") diff --git a/binder/environment.yml b/binder/environment.yml index fb0a9e7d..92ed49ea 100644 --- a/binder/environment.yml +++ b/binder/environment.yml @@ -15,4 +15,4 @@ dependencies: - pip: - loompy - scanpy - - vitessce[all] \ No newline at end of file + - vitessce[all] diff --git a/demos/.envrc b/demos/.envrc index 1f3424c5..a6d639fc 100644 --- a/demos/.envrc +++ b/demos/.envrc @@ -1 +1 @@ -use conda vitessce-python-demos \ No newline at end of file +use conda vitessce-python-demos diff --git a/demos/README.md b/demos/README.md index d9fc32fb..4d3bc778 100644 --- a/demos/README.md +++ b/demos/README.md @@ -1,6 +1,6 @@ ## Processing scripts for Vitessce demo datasets -Previously, we developed custom processing scripts for Vitessce demo data in the `vitessce-data` repository. +Previously, we developed custom processing scripts for Vitessce demo data in the `vitessce-data` repository. However, now that there are consensus single-cell file formats (e.g., h5ad) and an ecosystem of data processing packages in the community, we aim to leverage those. Writing demo dataset processing code using Scanpy and AnnData in small Snakemake workflows in this repository should allow us to iterate more quickly and share more demos. @@ -91,4 +91,4 @@ Configure the Google Cloud CLI by running `gcloud auth login` ([reference](https ```sh snakemake --cores all --rerun-triggers mtime --config upload=true -``` \ No newline at end of file +``` diff --git a/demos/Snakefile b/demos/Snakefile index f41382be..4d24c262 100644 --- a/demos/Snakefile +++ b/demos/Snakefile @@ -26,7 +26,7 @@ subworkflow codeluppi_2018: subworkflow codeluppi_2018_via_zarr: workdir: "codeluppi-2018-via-zarr" - + subworkflow combat_2022: workdir: "combat-2022" @@ -131,4 +131,4 @@ rule fill_templates: for subworkflow_dir in params.subworkflow_dirs: if os.path.exists(os.path.join(subworkflow_dir, 'vitessce.template.json')): shell(f"python fill_template.py -d {subworkflow_dir} -t local > {os.path.join(subworkflow_dir, 'vitessce.local.json')}") - shell(f"python fill_template.py -d {subworkflow_dir} -t remote -v {BUCKET_VERSION} > {os.path.join(subworkflow_dir, 'vitessce.remote.json')}") \ No newline at end of file + shell(f"python fill_template.py -d {subworkflow_dir} -t remote -v {BUCKET_VERSION} > {os.path.join(subworkflow_dir, 'vitessce.remote.json')}") diff --git a/demos/codeluppi-2018-via-zarr/vitessce.template.json b/demos/codeluppi-2018-via-zarr/vitessce.template.json index 8df96cef..badadf12 100644 --- a/demos/codeluppi-2018-via-zarr/vitessce.template.json +++ b/demos/codeluppi-2018-via-zarr/vitessce.template.json @@ -238,4 +238,4 @@ "h": 2 } ] -} \ No newline at end of file +} diff --git a/demos/codeluppi-2018/Snakefile b/demos/codeluppi-2018/Snakefile index 9a0dd78a..64c79c10 100644 --- a/demos/codeluppi-2018/Snakefile +++ b/demos/codeluppi-2018/Snakefile @@ -129,4 +129,4 @@ rule download_molecules_hdf5: shell: ''' curl -L --retry 999 --retry-delay 3 -C - -o {output} {params.file_url} - ''' \ No newline at end of file + ''' diff --git a/demos/codeluppi-2018/vitessce.template.json b/demos/codeluppi-2018/vitessce.template.json index ecbd6b8a..b745cd16 100644 --- a/demos/codeluppi-2018/vitessce.template.json +++ b/demos/codeluppi-2018/vitessce.template.json @@ -276,4 +276,4 @@ "h": 2 } ] -} \ No newline at end of file +} diff --git a/demos/combat-2022/Snakefile b/demos/combat-2022/Snakefile index a305702f..82c12783 100644 --- a/demos/combat-2022/Snakefile +++ b/demos/combat-2022/Snakefile @@ -35,4 +35,3 @@ rule download_data: ''' curl -L --retry 999 --retry-delay 3 -C - -o {output} {params.file_url} ''' - diff --git a/demos/combat-2022/config.yml b/demos/combat-2022/config.yml index 750ff227..97dd07d6 100644 --- a/demos/combat-2022/config.yml +++ b/demos/combat-2022/config.yml @@ -1,2 +1,2 @@ output: -- combat_2022_cell.h5ad.zarr \ No newline at end of file +- combat_2022_cell.h5ad.zarr diff --git a/demos/combat-2022/vitessce.template.json b/demos/combat-2022/vitessce.template.json index 4cb8107d..7df5fa3a 100644 --- a/demos/combat-2022/vitessce.template.json +++ b/demos/combat-2022/vitessce.template.json @@ -204,4 +204,4 @@ "uid": "G" } ] -} \ No newline at end of file +} diff --git a/demos/common.smk b/demos/common.smk index 5929ad96..9f110451 100644 --- a/demos/common.smk +++ b/demos/common.smk @@ -14,4 +14,4 @@ def flatten(l): return [item for sublist in l for item in sublist] def is_aws(output_path): - return not output_path.endswith('.ome.zarr') \ No newline at end of file + return not output_path.endswith('.ome.zarr') diff --git a/demos/create_demo.sh b/demos/create_demo.sh index 66adcf2b..68055ffc 100755 --- a/demos/create_demo.sh +++ b/demos/create_demo.sh @@ -35,4 +35,4 @@ Add the following to the main demos Snakefile: subworkflow $SUBWORKFLOW_KEY: workdir: "$SUBWORKFLOW_DIR" -EOF \ No newline at end of file +EOF diff --git a/demos/eng-2019/Snakefile b/demos/eng-2019/Snakefile index 14a4d5fa..0be9033e 100644 --- a/demos/eng-2019/Snakefile +++ b/demos/eng-2019/Snakefile @@ -33,4 +33,4 @@ rule download_file: shell: ''' curl -L -o {output} {BASE_URL}.{wildcards.prefix}.json - ''' \ No newline at end of file + ''' diff --git a/demos/eng-2019/vitessce.template.json b/demos/eng-2019/vitessce.template.json index 89cc2dd7..c640ddc6 100644 --- a/demos/eng-2019/vitessce.template.json +++ b/demos/eng-2019/vitessce.template.json @@ -191,4 +191,4 @@ "h": 4 } ] -} \ No newline at end of file +} diff --git a/demos/environment.yml b/demos/environment.yml index cbeb9d20..59164c5a 100644 --- a/demos/environment.yml +++ b/demos/environment.yml @@ -28,4 +28,4 @@ dependencies: - generate-tiff-offsets>=0.1.7 - obonet==0.2.5 - leidenalg - - ome-zarr==0.2.1 \ No newline at end of file + - ome-zarr==0.2.1 diff --git a/demos/habib-2017/Snakefile b/demos/habib-2017/Snakefile index 7a0b3c20..a1e8ca42 100644 --- a/demos/habib-2017/Snakefile +++ b/demos/habib-2017/Snakefile @@ -35,4 +35,3 @@ rule download_data: ''' curl -L -o {output} {params.file_url} ''' - diff --git a/demos/habib-2017/config.yml b/demos/habib-2017/config.yml index 9bec2170..ab11cd33 100644 --- a/demos/habib-2017/config.yml +++ b/demos/habib-2017/config.yml @@ -1,2 +1,2 @@ output: -- habib_2017_nature_methods.h5ad.zarr \ No newline at end of file +- habib_2017_nature_methods.h5ad.zarr diff --git a/demos/habib-2017/vitessce.template.json b/demos/habib-2017/vitessce.template.json index 81d30b71..76efded8 100644 --- a/demos/habib-2017/vitessce.template.json +++ b/demos/habib-2017/vitessce.template.json @@ -120,4 +120,4 @@ "uid": "F" } ] -} \ No newline at end of file +} diff --git a/demos/human-lymph-node-10x-visium/Snakefile b/demos/human-lymph-node-10x-visium/Snakefile index ec961d19..747779be 100644 --- a/demos/human-lymph-node-10x-visium/Snakefile +++ b/demos/human-lymph-node-10x-visium/Snakefile @@ -20,4 +20,3 @@ rule create_zarr: -oa {output.adata} \ -oi {output.img} ''' - diff --git a/demos/human-lymph-node-10x-visium/config.yml b/demos/human-lymph-node-10x-visium/config.yml index 48bfa600..7baa1c68 100644 --- a/demos/human-lymph-node-10x-visium/config.yml +++ b/demos/human-lymph-node-10x-visium/config.yml @@ -1,3 +1,3 @@ output: - human_lymph_node_10x_visium.h5ad.zarr -- human_lymph_node_10x_visium.ome.zarr \ No newline at end of file +- human_lymph_node_10x_visium.ome.zarr diff --git a/demos/human-lymph-node-10x-visium/vitessce.template.json b/demos/human-lymph-node-10x-visium/vitessce.template.json index f03ae7da..3bb2ca06 100644 --- a/demos/human-lymph-node-10x-visium/vitessce.template.json +++ b/demos/human-lymph-node-10x-visium/vitessce.template.json @@ -197,4 +197,4 @@ "x": 4, "y": 6, "w": 2, "h": 6 } ] -} \ No newline at end of file +} diff --git a/demos/kuppe-2022/Snakefile b/demos/kuppe-2022/Snakefile index 4dfe8f08..4addb7a6 100644 --- a/demos/kuppe-2022/Snakefile +++ b/demos/kuppe-2022/Snakefile @@ -122,4 +122,4 @@ rule download_atac: shell: ''' curl -L --retry 999 --retry-delay 3 -C - -o {output} "{params.file_url}" - ''' \ No newline at end of file + ''' diff --git a/demos/kuppe-2022/config.yml b/demos/kuppe-2022/config.yml index f6c5fa7d..58f4369d 100644 --- a/demos/kuppe-2022/config.yml +++ b/demos/kuppe-2022/config.yml @@ -3,4 +3,4 @@ output: - kuppe_2022_nature.atac.h5ad.zarr - kuppe_2022_nature.joint.h5ad.zarr - kuppe_2022_nature.visium.h5ad.zarr -- kuppe_2022_nature.visium.ome.zarr \ No newline at end of file +- kuppe_2022_nature.visium.ome.zarr diff --git a/demos/kuppe-2022/vitessce.template.json b/demos/kuppe-2022/vitessce.template.json index 08be5a92..f0bf2a2b 100644 --- a/demos/kuppe-2022/vitessce.template.json +++ b/demos/kuppe-2022/vitessce.template.json @@ -351,4 +351,4 @@ "uid": "I" } ] -} \ No newline at end of file +} diff --git a/demos/marshall-2022/Snakefile b/demos/marshall-2022/Snakefile index e5efd46c..0f554e11 100644 --- a/demos/marshall-2022/Snakefile +++ b/demos/marshall-2022/Snakefile @@ -38,4 +38,4 @@ rule download_adata: shell: ''' curl -L -o {output} "{params.file_url}" - ''' \ No newline at end of file + ''' diff --git a/demos/marshall-2022/vitessce.template.json b/demos/marshall-2022/vitessce.template.json index 33c63549..3592a3b0 100644 --- a/demos/marshall-2022/vitessce.template.json +++ b/demos/marshall-2022/vitessce.template.json @@ -149,4 +149,4 @@ "uid": "H" } ] -} \ No newline at end of file +} diff --git a/demos/meta-2022-azimuth/vitessce.template.json b/demos/meta-2022-azimuth/vitessce.template.json index 496f1b8b..e794582d 100644 --- a/demos/meta-2022-azimuth/vitessce.template.json +++ b/demos/meta-2022-azimuth/vitessce.template.json @@ -116,7 +116,7 @@ }, "uid": "C" }, - + { "component": "featureList", "h": 6, @@ -137,4 +137,4 @@ "uid": "F" } ] -} \ No newline at end of file +} diff --git a/demos/satija-2020/Snakefile b/demos/satija-2020/Snakefile index c1bd0e25..44faaf23 100644 --- a/demos/satija-2020/Snakefile +++ b/demos/satija-2020/Snakefile @@ -100,4 +100,4 @@ rule download_cl_obo: shell: ''' curl -L -o {output} {params.file_url} - ''' \ No newline at end of file + ''' diff --git a/demos/satija-2020/vitessce.template.json b/demos/satija-2020/vitessce.template.json index 7cfff396..5ed862c4 100644 --- a/demos/satija-2020/vitessce.template.json +++ b/demos/satija-2020/vitessce.template.json @@ -132,4 +132,4 @@ "y": 4 } ] -} \ No newline at end of file +} diff --git a/demos/sn-atac-seq-hubmap-2020/Snakefile b/demos/sn-atac-seq-hubmap-2020/Snakefile index bfa4094a..32e09646 100644 --- a/demos/sn-atac-seq-hubmap-2020/Snakefile +++ b/demos/sn-atac-seq-hubmap-2020/Snakefile @@ -32,4 +32,4 @@ rule download_file: shell: ''' curl -L -o {output} {BASE_URL}.{wildcards.prefix}.json - ''' \ No newline at end of file + ''' diff --git a/demos/sn-atac-seq-hubmap-2020/vitessce.template.json b/demos/sn-atac-seq-hubmap-2020/vitessce.template.json index e5aa4478..11bebe3e 100644 --- a/demos/sn-atac-seq-hubmap-2020/vitessce.template.json +++ b/demos/sn-atac-seq-hubmap-2020/vitessce.template.json @@ -84,4 +84,4 @@ } ], "initStrategy": "auto" -} \ No newline at end of file +} diff --git a/demos/wang-2018/Snakefile b/demos/wang-2018/Snakefile index 71dda5b9..07ccc2f4 100644 --- a/demos/wang-2018/Snakefile +++ b/demos/wang-2018/Snakefile @@ -41,4 +41,4 @@ rule download_file: shell: ''' curl -L -o {output} {BASE_URL}.{wildcards.prefix}.json - ''' \ No newline at end of file + ''' diff --git a/demos/wang-2018/vitessce.template.json b/demos/wang-2018/vitessce.template.json index 728c9a70..a8581acc 100644 --- a/demos/wang-2018/vitessce.template.json +++ b/demos/wang-2018/vitessce.template.json @@ -97,4 +97,4 @@ "h": 2 } ] -} \ No newline at end of file +} diff --git a/docs/_static/stylesheet.css b/docs/_static/stylesheet.css index 161ad7d2..fbec9730 100644 --- a/docs/_static/stylesheet.css +++ b/docs/_static/stylesheet.css @@ -36,4 +36,4 @@ legend { z-index: 201; text-align: center; margin: 0 auto; -} \ No newline at end of file +} diff --git a/docs/api_data.rst b/docs/api_data.rst index 34fffd81..f6bb43e4 100644 --- a/docs/api_data.rst +++ b/docs/api_data.rst @@ -30,4 +30,4 @@ vitessce.data_utils .. automodule:: vitessce.data_utils.ome :members: .. automodule:: vitessce.data_utils.anndata - :members: \ No newline at end of file + :members: diff --git a/docs/data_examples.rst b/docs/data_examples.rst index 673cb77f..8bc2d393 100644 --- a/docs/data_examples.rst +++ b/docs/data_examples.rst @@ -7,4 +7,4 @@ Data preparation examples notebooks/data_export_s3 notebooks/data_export_files - notebooks/widget_brain_with_base_dir \ No newline at end of file + notebooks/widget_brain_with_base_dir diff --git a/docs/data_options.rst b/docs/data_options.rst index d600efde..84dd3eea 100644 --- a/docs/data_options.rst +++ b/docs/data_options.rst @@ -105,6 +105,3 @@ Jupyter process: remote service like Colab/Binder; Files: remote & accessed via =================================================================================== Unfortunately, this will not work because the remote server cannot access the files that are on another machine behind SSH. - - - diff --git a/docs/notebooks/.envrc b/docs/notebooks/.envrc index ec2131bf..95ca7919 100644 --- a/docs/notebooks/.envrc +++ b/docs/notebooks/.envrc @@ -1 +1 @@ -use conda vitessce-python-notebooks \ No newline at end of file +use conda vitessce-python-notebooks diff --git a/docs/notebooks/environment.yml b/docs/notebooks/environment.yml index 6193a115..24f31b70 100644 --- a/docs/notebooks/environment.yml +++ b/docs/notebooks/environment.yml @@ -21,4 +21,4 @@ dependencies: - pip - pip: - loompy>=3.0.6 - - scikit-misc>=0.1.3 \ No newline at end of file + - scikit-misc>=0.1.3 diff --git a/docs/widget_examples.rst b/docs/widget_examples.rst index 173af5a4..48e7770f 100644 --- a/docs/widget_examples.rst +++ b/docs/widget_examples.rst @@ -10,4 +10,4 @@ Widget examples notebooks/widget_pbmc notebooks/widget_loom notebooks/widget_from_dict - notebooks/widget_pbmc_remote.ipynb \ No newline at end of file + notebooks/widget_pbmc_remote.ipynb diff --git a/pyproject.toml b/pyproject.toml index 5bac98af..d06bd70b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,12 +46,11 @@ dev = [ 'pytest>=6.2.4', 'loompy>=3.0.6', 'coverage>=6.3.2', - 'flake8==3.8.4', ] docs = [ 'sphinx==4.2.0', 'sphinx-rtd-theme==1.0.0', - + # Pin sub-dependencies of sphinx # Reference: https://github.com/sphinx-doc/sphinx/issues/11130 'sphinxcontrib-applehelp==1.0.4', @@ -60,7 +59,7 @@ docs = [ 'sphinxcontrib-jsmath==1.0.1', 'sphinxcontrib-qthelp==1.0.3', 'sphinxcontrib-serializinghtml==1.1.5', - + 'nbsphinx==0.8.8', 'nbclean==0.3.2', # Pin sqlalchemy to before 1.4 to fix issue importing nbclean.NotebookCleaner @@ -95,3 +94,64 @@ repository = "https://github.com/vitessce/vitessce-python" [tool.setuptools] packages = ["vitessce", "vitessce.data_utils"] + +[tool.ruff] +line-length = 120 +src = ["src"] +exclude = ["docs", "tests", "demos", "binder", "README.md"] +force-exclude = true +extend-include = ["*.ipynb"] + +[tool.ruff.format] +docstring-code-format = true + +[tool.ruff.lint] +select = [ + "F", # Errors detected by Pyflakes + "E", # Error detected by Pycodestyle + "W", # Warning detected by Pycodestyle + "I", # isort + "D", # pydocstyle + "B", # flake8-bugbear + "TID", # flake8-tidy-imports + "C4", # flake8-comprehensions + "BLE", # flake8-blind-except + "UP", # pyupgrade + "RUF100", # Report unused noqa directives +] +ignore = [ + # line too long -> we accept long comment lines; formatter gets rid of long code lines + "E501", + # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient + "E731", + # allow I, O, l as variable names -> I is the identity matrix + "E741", + # Missing docstring in public package + "D104", + # Missing docstring in public module + "D100", + # Missing docstring in __init__ + "D107", + # Errors from function calls in argument defaults. These are fine when the result is immutable. + "B008", + # __magic__ methods are are often self-explanatory, allow missing docstrings + "D105", + # first line should end with a period [Bug: doesn't work with single-line docstrings] + "D400", + # First line should be in imperative mood; try rephrasing + "D401", + ## Disable one in each pair of mutually incompatible rules + # We don’t want a blank line before a class docstring + "D203", + # We want docstrings to start immediately after the opening triple quote + "D213", +] + +[tool.ruff.lint.pydocstyle] +convention = "numpy" + +[tool.ruff.lint.per-file-ignores] +"docs/*" = ["I"] +"vitessce/*" = ["D"] +"tests/*" = ["D"] +"*/__init__.py" = ["F401"] diff --git a/pytest.ini b/pytest.ini index d52d0f7c..53405db8 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +1,2 @@ [pytest] -addopts = --doctest-modules --verbose --ignore=docs --ignore=demos \ No newline at end of file +addopts = --doctest-modules --verbose --ignore=docs --ignore=demos diff --git a/setup.cfg b/setup.cfg index b20c7454..8f4c17ff 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,4 +16,4 @@ exclude = ./js/node_modules/, ./docs/notebooks/.ipynb_checkpoints/, ./build/, - ./.ipynb_checkpoints/ \ No newline at end of file + ./.ipynb_checkpoints/ diff --git a/tests/data/test.ome.zarr/.zgroup b/tests/data/test.ome.zarr/.zgroup index 3b7daf22..3f3fad2d 100644 --- a/tests/data/test.ome.zarr/.zgroup +++ b/tests/data/test.ome.zarr/.zgroup @@ -1,3 +1,3 @@ { "zarr_format": 2 -} \ No newline at end of file +} diff --git a/vitessce/__init__.py b/vitessce/__init__.py index bc8ae8db..9df72c28 100644 --- a/vitessce/__init__.py +++ b/vitessce/__init__.py @@ -2,36 +2,33 @@ from warnings import warn from .config import ( - VitessceConfig, + CoordinationLevel, VitessceChainableConfig, + VitessceConfig, VitessceConfigDatasetFile, hconcat, vconcat, - CoordinationLevel, ) - -from .utils import ( - get_initial_coordination_scope_prefix, - get_initial_coordination_scope_name, +from .config_converter import ( + CellBrowserToAnndataZarrConverter, # only exported for testing. + convert_cell_browser_project_to_anndata, ) - -from .repr import make_repr - from .constants import ( + BASE_URL_PLACEHOLDER, CoordinationType, - ViewType, DataType, FileType, + ViewType, +) +from .constants import ( # For backwards compatibility, also export ViewType as Component ViewType as Component, - BASE_URL_PLACEHOLDER, ) - -from .config_converter import ( - CellBrowserToAnndataZarrConverter, # only exported for testing. - convert_cell_browser_project_to_anndata, +from .repr import make_repr +from .utils import ( + get_initial_coordination_scope_name, + get_initial_coordination_scope_prefix, ) - from .wrappers import AbstractWrapper # We allow installation without all of the dependencies that the widget requires. @@ -39,28 +36,28 @@ try: from .widget import VitessceWidget, data_server except ModuleNotFoundError as e: # pragma: no cover - warn(f'Extra installs are necessary to use widgets: {e}') + warn(f"Extra installs are necessary to use widgets: {e}", stacklevel=1) try: from .wrappers import ( - OmeTiffWrapper, - OmeZarrWrapper, - MultiImageWrapper, - CsvWrapper, AnnDataWrapper, - MultivecZarrWrapper, + CsvWrapper, ImageOmeTiffWrapper, - ObsSegmentationsOmeTiffWrapper, ImageOmeZarrWrapper, + MultiImageWrapper, + MultivecZarrWrapper, + ObsSegmentationsOmeTiffWrapper, ObsSegmentationsOmeZarrWrapper, + OmeTiffWrapper, + OmeZarrWrapper, ) except ModuleNotFoundError as e: # pragma: no cover - warn(f'Extra installs are necessary to use wrappers: {e}') + warn(f"Extra installs are necessary to use wrappers: {e}", stacklevel=1) try: from .export import ( - export_to_s3, export_to_files, + export_to_s3, ) except ModuleNotFoundError as e: # pragma: no cover - warn(f'Extra installs are necessary to use exports: {e}') + warn(f"Extra installs are necessary to use exports: {e}", stacklevel=1) diff --git a/vitessce/config.py b/vitessce/config.py index 4663ef38..71b15393 100644 --- a/vitessce/config.py +++ b/vitessce/config.py @@ -1,22 +1,20 @@ -import sys -import inspect import copy as copy_module -import black +import inspect +import sys from collections import OrderedDict -from .constants import ( - norm_enum, - CoordinationType as ct, - ViewType as cm, # TODO: change to vt - FileType as ft -) +import black -from .repr import make_repr, make_params_repr +from .constants import CoordinationType as ct +from .constants import FileType as ft +from .constants import ViewType as cm # TODO: change to vt +from .constants import norm_enum +from .repr import make_params_repr, make_repr from .utils import create_prefixed_get_next_scope_numeric def _get_next_scope(prev_scopes): - chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" next_char_indices = [0] def next(): @@ -63,9 +61,7 @@ def __init__(self, file_type, url=None, coordination_values=None, options=None, :type options: dict or list or None :param data_type: Deprecated / not used. Only included for backwards compatibility with the old API. """ - self.file = { - "fileType": file_type - } + self.file = {"fileType": file_type} if url: self.file["url"] = url if options: @@ -112,10 +108,7 @@ def __init__(self, uid, name, base_dir=None): self.base_dir = base_dir def _to_py_params(self): - return { - "uid": self.dataset["uid"], - "name": self.dataset["name"] - } + return {"uid": self.dataset["uid"], "name": self.dataset["name"]} def get_name(self): """ @@ -157,21 +150,20 @@ def add_file(self, file_type, url=None, coordination_values=None, options=None, from vitessce import VitessceConfig, DataType as dt, FileType as ft - vc = VitessceConfig(schema_version="1.0.15", name='My Config') - my_dataset = ( - vc.add_dataset(name='My Dataset') - .add_file( - url="http://example.com/cells.json", - data_type=dt.CELLS, - file_type=ft.CELLS_JSON, - ) + vc = VitessceConfig(schema_version="1.0.15", name="My Config") + my_dataset = vc.add_dataset(name="My Dataset").add_file( + url="http://example.com/cells.json", + data_type=dt.CELLS, + file_type=ft.CELLS_JSON, ) """ - file_type_str = norm_enum(file_type, ft) - self._add_file(VitessceConfigDatasetFile( - url=url, file_type=file_type_str, coordination_values=coordination_values, options=options)) + self._add_file( + VitessceConfigDatasetFile( + url=url, file_type=file_type_str, coordination_values=coordination_values, options=options + ) + ) return self def _add_file(self, obj): @@ -230,10 +222,7 @@ def get_routes(self): def get_stores(self, base_url=None): stores = {} for obj in self.objs: - stores = { - **stores, - **obj.get_stores(base_url) - } + stores = {**stores, **obj.get_stores(base_url)} return stores @@ -272,7 +261,7 @@ def hconcat(*views): from vitessce import VitessceConfig, ViewType as vt, hconcat, vconcat vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v2 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v3 = vc.add_view(vt.SPATIAL, dataset=my_dataset) @@ -315,7 +304,7 @@ def vconcat(*views): from vitessce import VitessceConfig, ViewType as vt, hconcat, vconcat vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v2 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v3 = vc.add_view(vt.SPATIAL, dataset=my_dataset) @@ -425,6 +414,7 @@ def process_level(parent_type, parent_scope, level_type, level_val): for next_level_type, next_level_val in level_val["children"].items(): process_level(level_type, level_val["scope"], next_level_type, next_level_val) # Else is the base case: no children + # End process_level inner function for top_level_type, top_level_val in scopes.items(): @@ -472,7 +462,7 @@ def __init__(self, component, coordination_scopes, x, y, w, h): "x": x, "y": y, "w": w, - "h": h + "h": h, } def _to_py_params(self): @@ -481,14 +471,12 @@ def _to_py_params(self): "x": self.view["x"], "y": self.view["y"], "w": self.view["w"], - "h": self.view["h"] + "h": self.view["h"], } # Only include coordination_scopes if there are coordination scopes other than # the coorindation scope for the 'dataset' coordination type. non_dataset_coordination_scopes = { - c_type: c_scope - for c_type, c_scope in self.view["coordinationScopes"].items() - if c_type != ct.DATASET.value + c_type: c_scope for c_type, c_scope in self.view["coordinationScopes"].items() if c_type != ct.DATASET.value } if len(non_dataset_coordination_scopes) > 0: params_dict["coordination_scopes"] = non_dataset_coordination_scopes @@ -522,7 +510,7 @@ def use_coordination(self, *c_scopes, allow_multiple_scopes_per_type=False): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v2 = vc.add_view(vt.SPATIAL, dataset=my_dataset) zoom_scope, x_scope, y_scope = vc.add_coordination( @@ -540,8 +528,8 @@ def use_coordination(self, *c_scopes, allow_multiple_scopes_per_type=False): assert isinstance(c_scope, VitessceConfigCoordinationScope) existing_value = self.view["coordinationScopes"].get(c_scope.c_type) new_value = c_scope.c_scope - if (existing_value is not None and allow_multiple_scopes_per_type): - if (isinstance(existing_value, list)): + if existing_value is not None and allow_multiple_scopes_per_type: + if isinstance(existing_value, list): self.view["coordinationScopes"][c_scope.c_type] = existing_value + [new_value] else: self.view["coordinationScopes"][c_scope.c_type] = [existing_value, new_value] @@ -565,13 +553,15 @@ def use_coordination_by_dict(self, scopes): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") spatial_view = vc.add_view(vt.SPATIAL, dataset=my_dataset) - scopes = vc.add_coordination_by_dict({ - ct.SPATIAL_ZOOM: 2, - ct.SPATIAL_TARGET_X: 0, - ct.SPATIAL_TARGET_Y: 0, - }) + scopes = vc.add_coordination_by_dict( + { + ct.SPATIAL_ZOOM: 2, + ct.SPATIAL_TARGET_X: 0, + ct.SPATIAL_TARGET_Y: 0, + } + ) spatial_view.use_coordination_by_dict(scopes) """ if "coordinationScopes" not in self.view["coordinationScopes"] or self.view["coordinationScopes"] is None: @@ -603,14 +593,16 @@ def use_meta_coordination(self, meta_scope): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") spatial_view = vc.add_view(vt.SPATIAL, dataset=my_dataset) lc_view = vc.add_view(vt.LAYER_CONTROLLER, dataset=my_dataset) - scopes = vc.add_coordination_by_dict({ - ct.SPATIAL_ZOOM: 2, - ct.SPATIAL_TARGET_X: 0, - ct.SPATIAL_TARGET_Y: 0, - }) + scopes = vc.add_coordination_by_dict( + { + ct.SPATIAL_ZOOM: 2, + ct.SPATIAL_TARGET_X: 0, + ct.SPATIAL_TARGET_Y: 0, + } + ) meta_scopes = vc.add_meta_coordination() meta_scopes.use_coordination_by_dict(scopes) @@ -682,6 +674,7 @@ def __or__(self, other): def __truediv__(self, other): return vconcat(self, other) + # would import as CL for convenience @@ -739,7 +732,7 @@ def set_value(self, c_value): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v2 = vc.add_view(vt.SPATIAL, dataset=my_dataset) zoom_scope, x_scope, y_scope = vc.add_coordination( @@ -811,14 +804,16 @@ def use_coordination_by_dict(self, scopes): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") spatial_view = vc.add_view(vt.SPATIAL, dataset=my_dataset) lc_view = vc.add_view(vt.LAYER_CONTROLLER, dataset=my_dataset) - scopes = vc.add_coordination_by_dict({ - ct.SPATIAL_ZOOM: 2, - ct.SPATIAL_TARGET_X: 0, - ct.SPATIAL_TARGET_Y: 0, - }) + scopes = vc.add_coordination_by_dict( + { + ct.SPATIAL_ZOOM: 2, + ct.SPATIAL_TARGET_X: 0, + ct.SPATIAL_TARGET_Y: 0, + } + ) meta_scopes = vc.add_meta_coordination() meta_scopes.use_coordination_by_dict(scopes) @@ -861,7 +856,7 @@ def __init__(self, schema_version, name=None, description=None, base_dir=None): from vitessce import VitessceConfig - vc = VitessceConfig(schema_version="1.0.15", name='My Config') + vc = VitessceConfig(schema_version="1.0.15", name="My Config") """ self.config = { "version": schema_version, @@ -870,7 +865,7 @@ def __init__(self, schema_version, name=None, description=None, base_dir=None): "datasets": [], "coordinationSpace": {}, "layout": [], - "initStrategy": "auto" + "initStrategy": "auto", } self.get_next_scope = _get_next_scope @@ -930,17 +925,13 @@ def add_dataset(self, name="", uid=None, files=None, objs=None): from vitessce import VitessceConfig, DataType as dt, FileType as ft - vc = VitessceConfig(schema_version="1.0.15", name='My Config') - my_dataset = ( - vc.add_dataset(name='My Dataset') - .add_file( - url="http://example.com/cells.json", - file_type=ft.CELLS_JSON, - ) + vc = VitessceConfig(schema_version="1.0.15", name="My Config") + my_dataset = vc.add_dataset(name="My Dataset").add_file( + url="http://example.com/cells.json", + file_type=ft.CELLS_JSON, ) """ - uid = uid if uid is not None else self.get_next_scope( - [d.dataset['uid'] for d in self.config["datasets"]]) + uid = uid if uid is not None else self.get_next_scope([d.dataset["uid"] for d in self.config["datasets"]]) assert isinstance(uid, str) vcd = VitessceConfigDataset(uid, name, base_dir=self.base_dir) self.config["datasets"].append(vcd) @@ -994,7 +985,19 @@ def get_datasets(self): """ return self.config["datasets"] - def add_view(self, view_type, dataset=None, dataset_uid=None, x=0, y=0, w=1, h=1, mapping=None, coordination_scopes=None, props=None): + def add_view( + self, + view_type, + dataset=None, + dataset_uid=None, + x=0, + y=0, + w=1, + h=1, + mapping=None, + coordination_scopes=None, + props=None, + ): """ Add a view to the config. @@ -1024,13 +1027,12 @@ def add_view(self, view_type, dataset=None, dataset_uid=None, x=0, y=0, w=1, h=1 from vitessce import VitessceConfig, ViewType as vt vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v2 = vc.add_view(vt.SCATTERPLOT, dataset=my_dataset, mapping="X_umap") """ # User should only provide dataset or dataset_uid, but not both. - assert isinstance(dataset, VitessceConfigDataset) or isinstance( - dataset_uid, str) + assert isinstance(dataset, VitessceConfigDataset) or isinstance(dataset_uid, str) assert dataset is None or dataset_uid is None component = view_type assert type(component) in [str, cm] @@ -1038,31 +1040,32 @@ def add_view(self, view_type, dataset=None, dataset_uid=None, x=0, y=0, w=1, h=1 if dataset is None: dataset = self.get_dataset_by_uid(dataset_uid) if dataset is None: - raise ValueError( - "A dataset with the provided dataset_uid could not be found.") + raise ValueError("A dataset with the provided dataset_uid could not be found.") component_str = norm_enum(component, cm) # Find the coordination scope name associated with the dataset - dataset_matches = [ - scope_name - for scope_name, dataset_scope in self.config["coordinationSpace"][ct.DATASET.value].items() - if dataset_scope.c_value == dataset.dataset["uid"] - ] if ct.DATASET.value in self.config["coordinationSpace"].keys() else [] + dataset_matches = ( + [ + scope_name + for scope_name, dataset_scope in self.config["coordinationSpace"][ct.DATASET.value].items() + if dataset_scope.c_value == dataset.dataset["uid"] + ] + if ct.DATASET.value in self.config["coordinationSpace"].keys() + else [] + ) if len(dataset_matches) == 1: dataset_scope = dataset_matches[0] else: raise ValueError( - "No coordination scope matching the dataset parameter could be found in the coordination space.") + "No coordination scope matching the dataset parameter could be found in the coordination space." + ) # Set up the view's dataset coordination scope based on the dataset parameter. - internal_coordination_scopes = { - ct.DATASET.value: dataset_scope - } + internal_coordination_scopes = {ct.DATASET.value: dataset_scope} if coordination_scopes is not None: internal_coordination_scopes.update(coordination_scopes) - vcv = VitessceConfigView( - component_str, internal_coordination_scopes, x, y, w, h) + vcv = VitessceConfigView(component_str, internal_coordination_scopes, x, y, w, h) # Use the mapping parameter if component is scatterplot and the mapping is not None if mapping is not None: @@ -1092,7 +1095,7 @@ def add_coordination(self, *c_types): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v2 = vc.add_view(vt.SPATIAL, dataset=my_dataset) zoom_scope, x_scope, y_scope = vc.add_coordination( @@ -1109,10 +1112,12 @@ def add_coordination(self, *c_types): result = [] for c_type in c_types: c_type_str = norm_enum(c_type, ct) - prev_scopes = list(self.config["coordinationSpace"][c_type_str].keys( - )) if c_type_str in self.config["coordinationSpace"].keys() else [] - scope = VitessceConfigCoordinationScope( - c_type_str, self.get_next_scope(prev_scopes)) + prev_scopes = ( + list(self.config["coordinationSpace"][c_type_str].keys()) + if c_type_str in self.config["coordinationSpace"].keys() + else [] + ) + scope = VitessceConfigCoordinationScope(c_type_str, self.get_next_scope(prev_scopes)) if scope.c_type not in self.config["coordinationSpace"]: self.config["coordinationSpace"][scope.c_type] = {} self.config["coordinationSpace"][scope.c_type][scope.c_scope] = scope @@ -1132,14 +1137,16 @@ def add_meta_coordination(self): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") spatial_view = vc.add_view(vt.SPATIAL, dataset=my_dataset) lc_view = vc.add_view(vt.LAYER_CONTROLLER, dataset=my_dataset) - scopes = vc.add_coordination_by_dict({ - ct.SPATIAL_ZOOM: 2, - ct.SPATIAL_TARGET_X: 0, - ct.SPATIAL_TARGET_Y: 0, - }) + scopes = vc.add_coordination_by_dict( + { + ct.SPATIAL_ZOOM: 2, + ct.SPATIAL_TARGET_X: 0, + ct.SPATIAL_TARGET_Y: 0, + } + ) meta_scopes = vc.add_meta_coordination() meta_scopes.use_coordination_by_dict(scopes) @@ -1158,8 +1165,12 @@ def add_meta_coordination(self): self.config["coordinationSpace"][ct.META_COORDINATION_SCOPES.value] = {} if ct.META_COORDINATION_SCOPES_BY.value not in self.config["coordinationSpace"]: self.config["coordinationSpace"][ct.META_COORDINATION_SCOPES_BY.value] = {} - self.config["coordinationSpace"][ct.META_COORDINATION_SCOPES.value][meta_container.meta_scope.c_scope] = meta_container.meta_scope - self.config["coordinationSpace"][ct.META_COORDINATION_SCOPES_BY.value][meta_container.meta_by_scope.c_scope] = meta_container.meta_by_scope + self.config["coordinationSpace"][ct.META_COORDINATION_SCOPES.value][meta_container.meta_scope.c_scope] = ( + meta_container.meta_scope + ) + self.config["coordinationSpace"][ct.META_COORDINATION_SCOPES_BY.value][meta_container.meta_by_scope.c_scope] = ( + meta_container.meta_by_scope + ) return meta_container def add_coordination_by_dict(self, input_val): @@ -1177,16 +1188,17 @@ def add_coordination_by_dict(self, input_val): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") spatial_view = vc.add_view(vt.SPATIAL, dataset=my_dataset) lc_view = vc.add_view(vt.LAYER_CONTROLLER, dataset=my_dataset) - scopes = vc.add_coordination_by_dict({ - ct.SPATIAL_ZOOM: 2, - ct.SPATIAL_TARGET_X: 0, - ct.SPATIAL_TARGET_Y: 0, - }) + scopes = vc.add_coordination_by_dict( + { + ct.SPATIAL_ZOOM: 2, + ct.SPATIAL_TARGET_X: 0, + ct.SPATIAL_TARGET_Y: 0, + } + ) """ - # Developer notes """ /* @@ -1267,6 +1279,7 @@ def add_coordination_by_dict(self, input_val): } */ """ + def process_level(level): result = {} if level is None: @@ -1281,29 +1294,32 @@ def process_level(level): if next_level_or_initial_value.is_cached(): result[c_type_str] = next_level_or_initial_value.get_cached() else: + def map_func(next_el): - (dummy_scope, ) = self.add_coordination(c_type_str) + (dummy_scope,) = self.add_coordination(c_type_str) # noqa: B023 # TODO: set a better initial value for dummy cases. - dummy_scope.set_value('__dummy__') + dummy_scope.set_value("__dummy__") return { "scope": dummy_scope, "children": process_level(next_el), } + processed_level = list(map(map_func, next_level)) next_level_or_initial_value.set_cached(processed_level) result[c_type_str] = processed_level else: - raise ValueError('Expected CoordinationLevel.value to be an array.') + raise ValueError("Expected CoordinationLevel.value to be an array.") else: # Base case. initial_value = next_level_or_initial_value if isinstance(initial_value, VitessceConfigCoordinationScope): result[c_type_str] = {"scope": initial_value} else: - (scope, ) = self.add_coordination(c_type_str) + (scope,) = self.add_coordination(c_type_str) scope.set_value(initial_value) result[c_type_str] = {"scope": scope} return result + # End process_level function # Begin recursion. @@ -1328,14 +1344,17 @@ def link_views_by_dict(self, views, input_val, meta=True, scope_prefix=None): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") spatial_view = vc.add_view(vt.SPATIAL, dataset=my_dataset) lc_view = vc.add_view(vt.LAYER_CONTROLLER, dataset=my_dataset) - scopes = vc.link_views_by_dict([spatial_view, lc_view], { - ct.SPATIAL_ZOOM: 2, - ct.SPATIAL_TARGET_X: 0, - ct.SPATIAL_TARGET_Y: 0, - }) + scopes = vc.link_views_by_dict( + [spatial_view, lc_view], + { + ct.SPATIAL_ZOOM: 2, + ct.SPATIAL_TARGET_X: 0, + ct.SPATIAL_TARGET_Y: 0, + }, + ) """ if scope_prefix: self.get_next_scope = create_prefixed_get_next_scope_numeric(scope_prefix) @@ -1411,7 +1430,7 @@ def layout(self, view_concat): from vitessce import VitessceConfig, ViewType as vt, hconcat, vconcat vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v2 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v3 = vc.add_view(vt.SPATIAL, dataset=my_dataset) @@ -1423,11 +1442,11 @@ def layout(self, view_concat): from vitessce import VitessceConfig, ViewType as vt vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v2 = vc.add_view(vt.SPATIAL, dataset=my_dataset) v3 = vc.add_view(vt.SPATIAL, dataset=my_dataset) - vc.layout(v1 | (v2 / v3)) # * magic * (alternative syntax) + vc.layout(v1 | (v2 / v3)) # * magic * (alternative syntax) """ def _layout(obj, x_min, x_max, y_min, y_max): @@ -1439,13 +1458,7 @@ def _layout(obj, x_min, x_max, y_min, y_max): views = obj.views num_views = len(views) for i in range(num_views): - _layout( - views[i], - x_min + (w / num_views) * i, - x_min + (w / num_views) * (i + 1), - y_min, - y_max - ) + _layout(views[i], x_min + (w / num_views) * i, x_min + (w / num_views) * (i + 1), y_min, y_max) elif isinstance(obj, VitessceConfigViewVConcat): views = obj.views num_views = len(views) @@ -1478,13 +1491,14 @@ def to_dict(self, base_url=None): return { **self.config, "datasets": [d.to_dict(base_url) for d in self.config["datasets"]], - "coordinationSpace": dict([ - (c_type, dict([ - (c_scope_name, c_scope.c_value) for c_scope_name, c_scope in c_scopes.items() - ])) for c_type, c_scopes in self.config["coordinationSpace"].items() - ]), + "coordinationSpace": dict( # noqa: C404 + [ + (c_type, dict([(c_scope_name, c_scope.c_value) for c_scope_name, c_scope in c_scopes.items()])) # noqa: C404 + for c_type, c_scopes in self.config["coordinationSpace"].items() + ] + ), # TODO: compute the x,y,w,h values if not explicitly defined - "layout": [c.to_dict() for c in self.config["layout"]] + "layout": [c.to_dict() for c in self.config["layout"]], } def get_routes(self): @@ -1508,10 +1522,7 @@ def get_stores(self, base_url=None): """ stores = {} for d in self.config["datasets"]: - stores = { - **stores, - **d.get_stores(base_url) - } + stores = {**stores, **d.get_stores(base_url)} return stores def to_python(self): @@ -1523,48 +1534,43 @@ def to_python(self): """ classes_to_import = OrderedDict() classes_to_import[VitessceChainableConfig.__name__] = True - code_block = f'{VitessceChainableConfig.__name__}({make_params_repr(self._to_py_params())})' + code_block = f"{VitessceChainableConfig.__name__}({make_params_repr(self._to_py_params())})" for vcd in self.config["datasets"]: - vcd_file_list_contents = ', '.join( - [repr(f) for f in vcd._get_files()]) - vcd_obj_list_contents = ', '.join( - [repr(f) for f in vcd._get_objects()]) + vcd_file_list_contents = ", ".join([repr(f) for f in vcd._get_files()]) + vcd_obj_list_contents = ", ".join([repr(f) for f in vcd._get_objects()]) add_dataset_func = self.add_dataset.__name__ add_dataset_params_list = [ make_params_repr(vcd._to_py_params()), ] if len(vcd._get_files()) > 0: - add_dataset_params_list.append( - f'files=[{vcd_file_list_contents}]') + add_dataset_params_list.append(f"files=[{vcd_file_list_contents}]") classes_to_import[VitessceConfigDatasetFile.__name__] = True if len(vcd._get_objects()) > 0: - add_dataset_params_list.append( - f'objs=[{vcd_obj_list_contents}]') - add_dataset_params = ', '.join(add_dataset_params_list) - code_block += f'.{add_dataset_func}({add_dataset_params})' + add_dataset_params_list.append(f"objs=[{vcd_obj_list_contents}]") + add_dataset_params = ", ".join(add_dataset_params_list) + code_block += f".{add_dataset_func}({add_dataset_params})" for obj in vcd._get_objects(): - if "vitessce" in sys.modules and obj.__class__.__name__ in dict(inspect.getmembers(sys.modules["vitessce"])): + if "vitessce" in sys.modules and obj.__class__.__name__ in dict( + inspect.getmembers(sys.modules["vitessce"]) + ): classes_to_import[obj.__class__.__name__] = True for c_type, c_obj in self.config["coordinationSpace"].items(): if c_type != ct.DATASET.value: - for c_scope_name, c_scope in c_obj.items(): + for c_scope_name, c_scope in c_obj.items(): # noqa:B007 set_coordination_func = self.set_coordination_value.__name__ - set_coordination_params = make_params_repr( - c_scope._to_py_params()) - code_block += f'.{set_coordination_func}({set_coordination_params})' + set_coordination_params = make_params_repr(c_scope._to_py_params()) + code_block += f".{set_coordination_func}({set_coordination_params})" for vcv in self.config["layout"]: - dataset_for_view = self.get_dataset_by_coordination_scope_name( - vcv.get_coordination_scope(ct.DATASET.value)) + dataset_for_view = self.get_dataset_by_coordination_scope_name(vcv.get_coordination_scope(ct.DATASET.value)) if dataset_for_view is not None: dataset_uid = dataset_for_view.get_uid() elif len(self.config["datasets"]) == 1: # If there is only one dataset available, assume it is the dataset for this view. dataset_uid = self.config["datasets"][0].get_uid() else: - raise ValueError( - "At least one dataset must be present in the config before adding a view.") + raise ValueError("At least one dataset must be present in the config before adding a view.") add_view_params_dict = { "dataset_uid": dataset_uid, } @@ -1573,9 +1579,8 @@ def to_python(self): add_view_params_dict["props"] = vcv.get_props() add_view_func = self.add_view.__name__ add_view_params = make_params_repr(add_view_params_dict) - code_block += f'.{add_view_func}({add_view_params})' - formatted_code_block = black.format_str( - code_block, mode=black.FileMode()) + code_block += f".{add_view_func}({add_view_params})" + formatted_code_block = black.format_str(code_block, mode=black.FileMode()) return list(classes_to_import), formatted_code_block @staticmethod @@ -1595,8 +1600,7 @@ def from_dict(config): vc = VitessceConfig.from_dict(my_existing_config) """ - vc = VitessceConfig( - schema_version=config["version"], name=config["name"], description=config["description"]) + vc = VitessceConfig(schema_version=config["version"], name=config["name"], description=config["description"]) # Add each dataset from the incoming config. for d in config["datasets"]: @@ -1606,30 +1610,28 @@ def from_dict(config): file_type=f["fileType"], url=f.get("url"), coordination_values=f.get("coordinationValues"), - options=f.get("options") + options=f.get("options"), ) - if 'coordinationSpace' in config: - for c_type in config['coordinationSpace'].keys(): + if "coordinationSpace" in config: + for c_type in config["coordinationSpace"].keys(): if c_type != ct.DATASET.value: - c_obj = config['coordinationSpace'][c_type] - vc.config['coordinationSpace'][c_type] = {} + c_obj = config["coordinationSpace"][c_type] + vc.config["coordinationSpace"][c_type] = {} for c_scope_name, c_scope_value in c_obj.items(): - scope = VitessceConfigCoordinationScope( - c_type, c_scope_name) + scope = VitessceConfigCoordinationScope(c_type, c_scope_name) scope.set_value(c_scope_value) - vc.config['coordinationSpace'][c_type][c_scope_name] = scope + vc.config["coordinationSpace"][c_type][c_scope_name] = scope - for c in config['layout']: - c_coord_scopes = c['coordinationScopes'] if 'coordinationScopes' in c.keys() else { - } + for c in config["layout"]: + c_coord_scopes = c["coordinationScopes"] if "coordinationScopes" in c.keys() else {} if len(config["datasets"]) > 1 and ct.DATASET.value not in c_coord_scopes: raise ValueError( - "Multiple datasets are present, so every view must have an explicit dataset coordination scope.") - new_view = VitessceConfigView( - c['component'], c_coord_scopes, c['x'], c['y'], c['w'], c['h']) - if 'props' in c.keys(): - new_view.set_props(**c['props']) - vc.config['layout'].append(new_view) + "Multiple datasets are present, so every view must have an explicit dataset coordination scope." + ) + new_view = VitessceConfigView(c["component"], c_coord_scopes, c["x"], c["y"], c["w"], c["h"]) + if "props" in c.keys(): + new_view.set_props(**c["props"]) + vc.config["layout"].append(new_view) return vc @@ -1678,13 +1680,14 @@ def widget(self, **kwargs): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) vc.layout(v1) vw = vc.widget() vw """ from .widget import VitessceWidget # TODO: Move import back to top when this is factored out. + return VitessceWidget(self, **kwargs) def web_app(self, **kwargs): @@ -1710,12 +1713,13 @@ def web_app(self, **kwargs): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) vc.layout(v1) vc.web_app() """ from .widget import launch_vitessce_io # TODO: Move import back to top when this is factored out. + return launch_vitessce_io(self, **kwargs) def display(self, **kwargs): @@ -1736,12 +1740,13 @@ def display(self, **kwargs): from vitessce import VitessceConfig, ViewType as vt, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) vc.layout(v1) vc.display() """ from .widget import ipython_display + return ipython_display(self, **kwargs) def export(self, to, *args, **kwargs): @@ -1759,13 +1764,14 @@ def export(self, to, *args, **kwargs): from vitessce import VitessceConfig, ViewType as cvtm, CoordinationType as ct vc = VitessceConfig(schema_version="1.0.15") - my_dataset = vc.add_dataset(name='My Dataset') + my_dataset = vc.add_dataset(name="My Dataset") v1 = vc.add_view(vt.SPATIAL, dataset=my_dataset) vc.layout(v1) config_dict = vc.export(to="S3") """ - from .export import (export_to_s3, export_to_files) # TODO: Move import back to top when this is factored out. + from .export import export_to_files, export_to_s3 # TODO: Move import back to top when this is factored out. + if to == "S3": return export_to_s3(self, *args, **kwargs) elif to == "files": @@ -1790,13 +1796,14 @@ def __init__(self, schema_version, **kwargs): from vitessce import VitessceChainableConfig - vc = VitessceChainableConfig(schema_version='1.0.15', name='My Config') + vc = VitessceChainableConfig(schema_version="1.0.15", name="My Config") """ super().__init__(schema_version, **kwargs) def __copy__(self): new_vc = VitessceChainableConfig( - schema_version=self.config["version"], name=self.config["name"], description=self.config["description"]) + schema_version=self.config["version"], name=self.config["name"], description=self.config["description"] + ) new_vc.config = self.config.copy() return new_vc diff --git a/vitessce/config_converter.py b/vitessce/config_converter.py index b3b57bf5..72b2130e 100644 --- a/vitessce/config_converter.py +++ b/vitessce/config_converter.py @@ -1,11 +1,12 @@ -import requests -from jsonschema import validate, ValidationError -import scanpy as sc -import pandas as pd -import anndata import gzip import io +import anndata +import pandas as pd +import requests +import scanpy as sc +from jsonschema import ValidationError, validate + from vitessce.data_utils import ( optimize_adata, ) @@ -45,10 +46,10 @@ def _validate_config(self): "properties": { "fname": {"type": "string"}, }, - "required": ["fname"] + "required": ["fname"], }, }, - "required": ["outMatrix"] + "required": ["outMatrix"], }, "coords": { "type": "array", @@ -58,12 +59,12 @@ def _validate_config(self): "shortLabel": {"type": "string"}, "textFname": {"type": "string"}, }, - "required": ["shortLabel"] + "required": ["shortLabel"], }, - "minItems": 1 + "minItems": 1, }, }, - "required": ["fileVersions", "coords"] + "required": ["fileVersions", "coords"], } try: @@ -82,7 +83,7 @@ def download_config(self): self.cellbrowser_config = response.json() print(f"Successfully fetched configuration: {config_url}.") return self._validate_config() - except Exception as e: + except Exception as e: # noqa: BLE001 print(f"Could not get configuration for dataset {self.project_name} because: {e}.") return False @@ -111,12 +112,12 @@ def _load_expr_matrix(self): print("Loading expression matrix into Anndata object ...") - with gzip.open(gzip_file, 'rt') as f: - expr_matrix = pd.read_csv(f, sep='\t', index_col=0).T # transpose it, because of Scanpy + with gzip.open(gzip_file, "rt") as f: + expr_matrix = pd.read_csv(f, sep="\t", index_col=0).T # transpose it, because of Scanpy # Now create anndata object - self.adata = anndata.AnnData(X=expr_matrix, dtype='float32') - self.adata.var['gene'] = self.adata.var_names + self.adata = anndata.AnnData(X=expr_matrix, dtype="float32") + self.adata.var["gene"] = self.adata.var_names # Filter out nan values self.adata = self.adata[~self.adata.obs.index.isnull(), :] @@ -125,7 +126,9 @@ def _load_expr_matrix(self): first_gene = list(self.adata.var_names)[0] if len(first_gene.split("|")) == 2: # TODO: sometimes we might want to keep them both - print("This dataset uses the format identifier|symbol for the ad.obs gene names (e.g. “ENSG0123123.3|HOX3”). We are keeping only the symbol.") + print( + "This dataset uses the format identifier|symbol for the ad.obs gene names (e.g. “ENSG0123123.3|HOX3”). We are keeping only the symbol." + ) self.adata.var_names = [x.split("|")[1] for x in list(self.adata.var_names)] def _load_coordinates(self): @@ -138,12 +141,12 @@ def _load_coordinates(self): in their name will taken into an account. """ coordinate_types = { - 'tsne': 'X_tsne', - 't-sne': 'X_tsne', - 'umap': 'X_umap', - 'pca': 'X_pca', - 'spatial': 'X_spatial', - 'segmentations': 'X_segmentations' + "tsne": "X_tsne", + "t-sne": "X_tsne", + "umap": "X_umap", + "pca": "X_pca", + "spatial": "X_spatial", + "segmentations": "X_segmentations", } coord_urls = {} @@ -154,8 +157,8 @@ def _load_coordinates(self): short_label = obj["shortLabel"].lower() for term, key in coordinate_types.items(): if term in short_label: - if 'textFname' in obj: - coord_urls[key] = obj['textFname'] + if "textFname" in obj: + coord_urls[key] = obj["textFname"] else: # if textFname is not defined, that means that the name of file is the shortLabel labels = obj["shortLabel"].split(" ") @@ -166,15 +169,15 @@ def _load_coordinates(self): print(f"Successful extraction of the following coordinates and URLS: {coord_urls}") - for (coord_type, url_suffix) in coord_urls.items(): + for coord_type, url_suffix in coord_urls.items(): try: print(f"Adding {coord_type} to Anndata object ...") response = requests.get("/".join([self.url_prefix, url_suffix])) response.raise_for_status() embedding_file = io.BytesIO(response.content) - with gzip.open(embedding_file, 'rt') as f: - coords = pd.read_csv(f, sep='\t', index_col=0) + with gzip.open(embedding_file, "rt") as f: + coords = pd.read_csv(f, sep="\t", index_col=0) # Ensure the indices are of the same type coords.index = coords.index.astype(str) @@ -215,7 +218,7 @@ def _load_cell_metadata(self): # Create a BytesIO object from the content meta_file = io.BytesIO(response.content) - meta = pd.read_csv(meta_file, sep='\t', index_col=0) + meta = pd.read_csv(meta_file, sep="\t", index_col=0) self.adata.obs = meta # remove space from obs column names to avoid Vitessce breaking downstream @@ -253,7 +256,11 @@ def _filter_data(self): sc.pp.log1p(self.adata) # If marker genes are defined, keep only marker genes in the Anndata object - if "topMarkers" in self.cellbrowser_config and len(self.cellbrowser_config["topMarkers"].keys()) > 0 and self.keep_only_marker_genes: + if ( + "topMarkers" in self.cellbrowser_config + and len(self.cellbrowser_config["topMarkers"].keys()) > 0 + and self.keep_only_marker_genes + ): print("Filtering out all non-marker genes from Anndata object ...") marker_genes = [gene for sublist in self.cellbrowser_config["topMarkers"].values() for gene in sublist] self.adata = self.adata[:, self.adata.var_names.isin(marker_genes)] @@ -293,7 +300,7 @@ def export_anndata_object(self): for col in self.adata.obs.columns.tolist(): if self.adata.obs[col].dtype == object: - self.adata.obs[col] = self.adata.obs[col].astype('category') + self.adata.obs[col] = self.adata.obs[col].astype("category") return optimize_adata( self.adata, diff --git a/vitessce/constants.py b/vitessce/constants.py index 5ca75390..2eefd826 100644 --- a/vitessce/constants.py +++ b/vitessce/constants.py @@ -1,6 +1,5 @@ from enum import Enum - BASE_URL_PLACEHOLDER = "{{ base_url }}" @@ -31,37 +30,59 @@ class CoordinationType(DocEnum): The term coordination type refers to a parameter to be coordinated, and its programming-language-like type. For example, the ``SPATIAL_ZOOM`` coordination type represents a coordination of the zoom level of a spatial view, which can take a float value. """ + META_COORDINATION_SCOPES = "metaCoordinationScopes", "Shared representation of view-level coordinationScopes." - META_COORDINATION_SCOPES_BY = "metaCoordinationScopesBy", "Shared representation of view-level coordinationScopesBy." + META_COORDINATION_SCOPES_BY = ( + "metaCoordinationScopesBy", + "Shared representation of view-level coordinationScopesBy.", + ) DATASET = "dataset", "The identifier for the dataset associated with a view." OBS_TYPE = "obsType", "The type of entity represented by each observation." FEATURE_TYPE = "featureType", "The type of entity represented by each feature." FEATURE_VALUE_TYPE = "featureValueType", "The type of value stored in the observation-by-feature matrix." - OBS_LABELS_TYPE = 'obsLabelsType', "Feature for displaying additional obs sets' data in heatmap/scatterplot/spatial tooltips." + OBS_LABELS_TYPE = ( + "obsLabelsType", + "Feature for displaying additional obs sets' data in heatmap/scatterplot/spatial tooltips.", + ) EMBEDDING_TYPE = "embeddingType", "The type of embedding used for a scatterplot view, such as PCA or t-SNE." EMBEDDING_ZOOM = "embeddingZoom", "The zoom level of an embedding scatterplot view." EMBEDDING_ROTATION = "embeddingRotation", "The rotation of an embedding scatterplot view." EMBEDDING_TARGET_X = "embeddingTargetX", "The x-coordinate of the center of an embedding scatterplot view." EMBEDDING_TARGET_Y = "embeddingTargetY", "The y-coordinate of the center of an embedding scatterplot view." EMBEDDING_TARGET_Z = "embeddingTargetZ", "The z-coordinate of the center of an embedding scatterplot view." - EMBEDDING_OBS_SET_POLYGONS_VISIBLE = 'embeddingObsSetPolygonsVisible', "Whether polygon boundaries for each selected obsSet are visible in the embedding scatterplt." - EMBEDDING_OBS_SET_LABELS_VISIBLE = 'embeddingObsSetLabelsVisible', "Whether labels for each selected obsSet are visible in the embedding scatterplot." - EMBEDDING_OBS_SET_LABEL_SIZE = 'embeddingObsSetLabelSize', "The size of labels for selected obsSets in the embedding scatterplot." - EMBEDDING_OBS_OPACITY = 'embeddingObsOpacity', "Opacity of cells in embedding (points in scatterplot view)" - EMBEDDING_OBS_RADIUS = 'embeddingObsRadius', "Radius of cells in embedding (points in scatterplot view)" - EMBEDDING_OBS_RADIUS_MODE = 'embeddingObsRadiusMode', "Radius mode of cells in embedding (points in scatterplot view) - auto or manual" - EMBEDDING_OBS_OPACITY_MODE = 'embeddingObsOpacityMode', "Opacity mode of cells in embedding (points in scatterplot view) - auto or manual" - EMBEDDING_CELL_OPACITY = 'embeddingCellOpacity', "Deprecated" - EMBEDDING_CELL_RADIUS = 'embeddingCellRadius', "Deprecated" - EMBEDDING_CELL_RADIUS_MODE = 'embeddingCellRadiusMode', "Deprecated" - EMBEDDING_CELL_OPACITY_MODE = 'embeddingCellOpacityMode', "Deprecated" + EMBEDDING_OBS_SET_POLYGONS_VISIBLE = ( + "embeddingObsSetPolygonsVisible", + "Whether polygon boundaries for each selected obsSet are visible in the embedding scatterplt.", + ) + EMBEDDING_OBS_SET_LABELS_VISIBLE = ( + "embeddingObsSetLabelsVisible", + "Whether labels for each selected obsSet are visible in the embedding scatterplot.", + ) + EMBEDDING_OBS_SET_LABEL_SIZE = ( + "embeddingObsSetLabelSize", + "The size of labels for selected obsSets in the embedding scatterplot.", + ) + EMBEDDING_OBS_OPACITY = "embeddingObsOpacity", "Opacity of cells in embedding (points in scatterplot view)" + EMBEDDING_OBS_RADIUS = "embeddingObsRadius", "Radius of cells in embedding (points in scatterplot view)" + EMBEDDING_OBS_RADIUS_MODE = ( + "embeddingObsRadiusMode", + "Radius mode of cells in embedding (points in scatterplot view) - auto or manual", + ) + EMBEDDING_OBS_OPACITY_MODE = ( + "embeddingObsOpacityMode", + "Opacity mode of cells in embedding (points in scatterplot view) - auto or manual", + ) + EMBEDDING_CELL_OPACITY = "embeddingCellOpacity", "Deprecated" + EMBEDDING_CELL_RADIUS = "embeddingCellRadius", "Deprecated" + EMBEDDING_CELL_RADIUS_MODE = "embeddingCellRadiusMode", "Deprecated" + EMBEDDING_CELL_OPACITY_MODE = "embeddingCellOpacityMode", "Deprecated" SPATIAL_ZOOM = "spatialZoom", "The zoom level of a spatial view." - SPATIAL_ROTATION_X = 'spatialRotationX', "The x rotation of a 3d spatial view." - SPATIAL_ROTATION_Y = 'spatialRotationY', "The y rotation of a 3d spatial view." - SPATIAL_ROTATION_Z = 'spatialRotationZ', "The z rotation of a 3d spatial view." - SPATIAL_ROTATION_ORBIT = 'spatialRotationOrbit', "The rotation orbit in degrees of a 3d spatial view." - SPATIAL_ORBIT_AXIS = 'spatialOrbitAxis', "The orbital axis of a 3d spatial view." - SPATIAL_AXIS_FIXED = 'spatialAxisFixed', "Boolean for whether or not the target axis of a spatial view is fixed." + SPATIAL_ROTATION_X = "spatialRotationX", "The x rotation of a 3d spatial view." + SPATIAL_ROTATION_Y = "spatialRotationY", "The y rotation of a 3d spatial view." + SPATIAL_ROTATION_Z = "spatialRotationZ", "The z rotation of a 3d spatial view." + SPATIAL_ROTATION_ORBIT = "spatialRotationOrbit", "The rotation orbit in degrees of a 3d spatial view." + SPATIAL_ORBIT_AXIS = "spatialOrbitAxis", "The orbital axis of a 3d spatial view." + SPATIAL_AXIS_FIXED = "spatialAxisFixed", "Boolean for whether or not the target axis of a spatial view is fixed." SPATIAL_TARGET_X = "spatialTargetX", "The x-coordinate of the center of a spatial view." SPATIAL_TARGET_Y = "spatialTargetY", "The y-coordinate of the center of a spatial view." SPATIAL_TARGET_Z = "spatialTargetZ", "The z-coordinate of the center of a spatial view." @@ -83,7 +104,7 @@ class CoordinationType(DocEnum): FEATURE_HIGHLIGHT = "featureHighlight", "A subset of genes to highlight." FEATURE_SELECTION = "featureSelection", "A subset of genes to select." FEATURE_VALUE_COLORMAP = "featureValueColormap", "The colormap to use for the gene expression scale." - FEATURE_VALUE_TRANSFORM = 'featureValueTransform', "Function to use to transform feature values." + FEATURE_VALUE_TRANSFORM = "featureValueTransform", "Function to use to transform feature values." FEATURE_VALUE_COLORMAP_RANGE = "featureValueColormapRange", "The range of gene expression values to map." GENE_FILTER = "geneFilter", "Deprecated" GENE_HIGHLIGHT = "geneHighlight", "Deprecated" @@ -92,42 +113,70 @@ class CoordinationType(DocEnum): GENE_EXPRESSION_COLORMAP_RANGE = "geneExpressionColormapRange", "Deprecated" OBS_COLOR_ENCODING = "obsColorEncoding", "The color encoding to use for cell entities." CELL_COLOR_ENCODING = "cellColorEncoding", "Deprecated" - SPATIAL_IMAGE_LAYER = 'spatialImageLayer', "Layer definitions for the imagery in the spatial view." - SPATIAL_SEGMENTATION_LAYER = 'spatialSegmentationLayer', "Layer definitions for the segmentations in the spatial view." - SPATIAL_POINT_LAYER = 'spatialPointLayer', "Layer definitions for the points in the spatial view." - SPATIAL_NEIGHBORHOOD_LAYER = 'spatialNeighborhoodLayer', "Layer definitions for the neighborhoods in the spatial view." - SPATIAL_RASTER_LAYERS = 'spatialRasterLayers', "Deprecated" - SPATIAL_CELLS_LAYER = 'spatialCellsLayer', "Deprecated" - SPATIAL_MOLECULES_LAYER = 'spatialMoleculesLayer', "Deprecated" - SPATIAL_NEIGHBORHOODS_LAYER = 'spatialNeighborhoodsLayer', "Deprecated" + SPATIAL_IMAGE_LAYER = "spatialImageLayer", "Layer definitions for the imagery in the spatial view." + SPATIAL_SEGMENTATION_LAYER = ( + "spatialSegmentationLayer", + "Layer definitions for the segmentations in the spatial view.", + ) + SPATIAL_POINT_LAYER = "spatialPointLayer", "Layer definitions for the points in the spatial view." + SPATIAL_NEIGHBORHOOD_LAYER = ( + "spatialNeighborhoodLayer", + "Layer definitions for the neighborhoods in the spatial view.", + ) + SPATIAL_RASTER_LAYERS = "spatialRasterLayers", "Deprecated" + SPATIAL_CELLS_LAYER = "spatialCellsLayer", "Deprecated" + SPATIAL_MOLECULES_LAYER = "spatialMoleculesLayer", "Deprecated" + SPATIAL_NEIGHBORHOODS_LAYER = "spatialNeighborhoodsLayer", "Deprecated" GENOMIC_ZOOM_X = "genomicZoomX", "The zoom level of a higlass view, X dimension." GENOMIC_ZOOM_Y = "genomicZoomY", "The zoom level of a higlass view, Y dimension." GENOMIC_TARGET_X = "genomicTargetX", "The x-coordinate of the center of a higlass view." GENOMIC_TARGET_Y = "genomicTargetY", "The y-coordinate of the center of a higlass view." ADDITIONAL_CELL_SETS = "additionalCellSets", "Deprecated" ADDITIONAL_OBS_SETS = "additionalObsSets", "User-defined cell sets." - MOLECULE_HIGHLIGHT = 'moleculeHighlight', "Deprecated" - GATING_FEATURE_SELECTION_X = 'gatingFeatureSelectionX', "Feature for the x-axis of the gating scatterplot." - GATING_FEATURE_SELECTION_Y = 'gatingFeatureSelectionY', "Feature for the y-axis of the gating scatterplot." - FEATURE_VALUE_TRANSFORM_COEFFICIENT = 'featureValueTransformCoefficient', "Coefficient to transform values in the gating scatterplot." - TOOLTIPS_VISIBLE = 'tooltipsVisible', "Boolean for whether or not tooltips are visible, used by the scatterplot, spatial, and heatmap views." + MOLECULE_HIGHLIGHT = "moleculeHighlight", "Deprecated" + GATING_FEATURE_SELECTION_X = "gatingFeatureSelectionX", "Feature for the x-axis of the gating scatterplot." + GATING_FEATURE_SELECTION_Y = "gatingFeatureSelectionY", "Feature for the y-axis of the gating scatterplot." + FEATURE_VALUE_TRANSFORM_COEFFICIENT = ( + "featureValueTransformCoefficient", + "Coefficient to transform values in the gating scatterplot.", + ) + TOOLTIPS_VISIBLE = ( + "tooltipsVisible", + "Boolean for whether or not tooltips are visible, used by the scatterplot, spatial, and heatmap views.", + ) class ViewType(DocEnum): """ An enum type representing a view type in the visualization layout. """ + SCATTERPLOT = "scatterplot", "The scatterplot component can be used for visualization of 2-dimensional embeddings." - SPATIAL = "spatial", "The spatial component can be used for visualization of cells, molecules, or images in spatial coordinates." + SPATIAL = ( + "spatial", + "The spatial component can be used for visualization of cells, molecules, or images in spatial coordinates.", + ) DESCRIPTION = "description", "The description component can display short informational text about a dataset." STATUS = "status", "The status component can display contextual information such as hover states or error messages." HEATMAP = "heatmap", "The heatmap component can be used to view a cell by gene expression matrix." - LAYER_CONTROLLER = "layerController", "The layer controller can be used to manipulate channel settings of the images rendered by the spatial component." - GENOMIC_PROFILES = "genomicProfiles", "The higlass component can be used to visualize genome-wide ATAC-seq profiles." + LAYER_CONTROLLER = ( + "layerController", + "The layer controller can be used to manipulate channel settings of the images rendered by the spatial component.", + ) + GENOMIC_PROFILES = ( + "genomicProfiles", + "The higlass component can be used to visualize genome-wide ATAC-seq profiles.", + ) OBS_SETS = "obsSets", "Observation sets" OBS_SET_SIZES = "obsSetSizes", "Observation set sizes bar plot" - OBS_SET_FEATURE_VALUE_DISTRIBUTION = "obsSetFeatureValueDistribution", "Violin plot visualizing the distribution of feature values per observation set." - FEATURE_VALUE_HISTOGRAM = "featureValueHistogram", "Histogram visualizing the distribution of values for a selected feature across all observations." + OBS_SET_FEATURE_VALUE_DISTRIBUTION = ( + "obsSetFeatureValueDistribution", + "Violin plot visualizing the distribution of feature values per observation set.", + ) + FEATURE_VALUE_HISTOGRAM = ( + "featureValueHistogram", + "Histogram visualizing the distribution of values for a selected feature across all observations.", + ) FEATURE_LIST = "featureList", "The feature list selector" GATING = "gating", "A gating scatterplot" CELL_SETS = "cellSets", "Deprecated" @@ -140,6 +189,7 @@ class DataType(DocEnum): """ An enum type representing the type of data contained in a file. """ + OBS_LABELS = "obsLabels", "Alternate label for each observation." OBS_EMBEDDING = "obsEmbedding", "Embedding coordinates for each observation." OBS_LOCATIONS = "obsLocations", "Spatial coordinates for each observation." @@ -161,23 +211,45 @@ class FileType(DocEnum): """ An enum type representing the file format or schema to which a file conforms. """ + ANNDATA_ZARR = "anndata.zarr", "Joint file type for AnnData objects" - OBS_EMBEDDING_CSV = 'obsEmbedding.csv', "File type for obsEmbedding values stored in a CSV file" - OBS_LOCATIONS_CSV = 'obsLocations.csv', "File type for obsLocations values stored in a CSV file" - OBS_LABELS_CSV = 'obsLabels.csv', "File type for obsLabels values stored in a CSV file" - FEATURE_LABELS_CSV = 'featureLabels.csv', "File type for featureLabels values stored in a CSV file" - OBS_FEATURE_MATRIX_CSV = 'obsFeatureMatrix.csv', "File type for obsFeatureMatrix stored in a CSV file" - OBS_SEGMENTATIONS_JSON = 'obsSegmentations.json', "File type for obsSegmentations polygons stored in a JSON file" - OBS_SETS_CSV = 'obsSets.csv', "File type for obsSets stored in a CSV file" - OBS_SETS_JSON = 'obsSets.json', "File type for obsSets stored in a JSON file" + OBS_EMBEDDING_CSV = "obsEmbedding.csv", "File type for obsEmbedding values stored in a CSV file" + OBS_LOCATIONS_CSV = "obsLocations.csv", "File type for obsLocations values stored in a CSV file" + OBS_LABELS_CSV = "obsLabels.csv", "File type for obsLabels values stored in a CSV file" + FEATURE_LABELS_CSV = "featureLabels.csv", "File type for featureLabels values stored in a CSV file" + OBS_FEATURE_MATRIX_CSV = "obsFeatureMatrix.csv", "File type for obsFeatureMatrix stored in a CSV file" + OBS_SEGMENTATIONS_JSON = "obsSegmentations.json", "File type for obsSegmentations polygons stored in a JSON file" + OBS_SETS_CSV = "obsSets.csv", "File type for obsSets stored in a CSV file" + OBS_SETS_JSON = "obsSets.json", "File type for obsSets stored in a JSON file" IMAGE_OME_ZARR = "image.ome-zarr", "File type for images stored as OME-NGFF Zarr stores." - OBS_FEATURE_MATRIX_ANNDATA_ZARR = 'obsFeatureMatrix.anndata.zarr', "File type for obsFeatureMatrix stored in an AnnData object saved to a Zarr store" - OBS_SETS_ANNDATA_ZARR = 'obsSets.anndata.zarr', "File type for obsSets stored in an AnnData object saved to a Zarr store" - OBS_EMBEDDING_ANNDATA_ZARR = 'obsEmbedding.anndata.zarr', "File type for obsEmbedding values stored in an AnnData object saved to a Zarr store" - OBS_LOCATIONS_ANNDATA_ZARR = 'obsLocations.anndata.zarr', "File type for obsLocations values stored in an AnnData object saved to a Zarr store" - OBS_SEGMENTATIONS_ANNDATA_ZARR = 'obsSegmentations.anndata.zarr', "File type for obsSegmentations polygons stored in an AnnData object saved to a Zarr store" - OBS_LABELS_ANNDATA_ZARR = 'obsLabels.anndata.zarr', "File type for obsLabels stored in an AnnData object saved to a Zarr store" - FEATURE_LABELS_ANNDATA_ZARR = 'featureLabels.anndata.zarr', "File type for featureLabels stored in an AnnData object saved to a Zarr store" + OBS_FEATURE_MATRIX_ANNDATA_ZARR = ( + "obsFeatureMatrix.anndata.zarr", + "File type for obsFeatureMatrix stored in an AnnData object saved to a Zarr store", + ) + OBS_SETS_ANNDATA_ZARR = ( + "obsSets.anndata.zarr", + "File type for obsSets stored in an AnnData object saved to a Zarr store", + ) + OBS_EMBEDDING_ANNDATA_ZARR = ( + "obsEmbedding.anndata.zarr", + "File type for obsEmbedding values stored in an AnnData object saved to a Zarr store", + ) + OBS_LOCATIONS_ANNDATA_ZARR = ( + "obsLocations.anndata.zarr", + "File type for obsLocations values stored in an AnnData object saved to a Zarr store", + ) + OBS_SEGMENTATIONS_ANNDATA_ZARR = ( + "obsSegmentations.anndata.zarr", + "File type for obsSegmentations polygons stored in an AnnData object saved to a Zarr store", + ) + OBS_LABELS_ANNDATA_ZARR = ( + "obsLabels.anndata.zarr", + "File type for obsLabels stored in an AnnData object saved to a Zarr store", + ) + FEATURE_LABELS_ANNDATA_ZARR = ( + "featureLabels.anndata.zarr", + "File type for featureLabels stored in an AnnData object saved to a Zarr store", + ) EXPRESSION_MATRIX_ZARR = "expression-matrix.zarr", "The Zarr-based expression matrix file type." CELLS_JSON = "cells.json", "The JSON-based cells file type." MOLECULES_JSON = "molecules.json", "The JSON-based molecules file type." @@ -189,7 +261,10 @@ class FileType(DocEnum): GENOMIC_PROFILES_ZARR = "genomic-profiles.zarr", "The Zarr-based genomic profile (multivec) file type." ANNDATA_CELLS_ZARR = "anndata-cells.zarr", "The Zarr-based cells file type from an anndata object." ANNDATA_CELL_SETS_ZARR = "anndata-cell-sets.zarr", "The Zarr-based cell-sets file type from an anndata object." - ANNDATA_EXPRESSION_MATRIX_ZARR = "anndata-expression-matrix.zarr", "The Zarr-based expression matrix file type from an anndata object." + ANNDATA_EXPRESSION_MATRIX_ZARR = ( + "anndata-expression-matrix.zarr", + "The Zarr-based expression matrix file type from an anndata object.", + ) OBS_SEGMENTATIONS_CELLS_JSON = "obsSegmentations.cells.json", "The JSON-based cells file type for obsSegmentations." OBS_LOCATIONS_CELLS_JSON = "obsLocations.cells.json", "The JSON-based cells file type for obsLocations." OBS_EMBEDDING_CELLS_JSON = "obsEmbedding.cells.json", "The JSON-based cells file type for obsEmbedding." diff --git a/vitessce/data_utils/__init__.py b/vitessce/data_utils/__init__.py index 88077ca1..7106da33 100644 --- a/vitessce/data_utils/__init__.py +++ b/vitessce/data_utils/__init__.py @@ -1,18 +1,18 @@ from .anndata import ( + VAR_CHUNK_SIZE, optimize_adata, optimize_arr, - to_dense, - to_uint8, sort_var_axis, + to_dense, to_diamond, - VAR_CHUNK_SIZE, + to_uint8, +) +from .multivec import ( + adata_to_multivec_zarr, ) from .ome import ( - rgb_img_to_ome_zarr, + multiplex_img_to_ome_tiff, multiplex_img_to_ome_zarr, rgb_img_to_ome_tiff, - multiplex_img_to_ome_tiff, -) -from .multivec import ( - adata_to_multivec_zarr, + rgb_img_to_ome_zarr, ) diff --git a/vitessce/data_utils/anndata.py b/vitessce/data_utils/anndata.py index a9743670..fa87ed96 100644 --- a/vitessce/data_utils/anndata.py +++ b/vitessce/data_utils/anndata.py @@ -1,6 +1,6 @@ import numpy as np -from anndata import AnnData import scipy.cluster +from anndata import AnnData from scipy.sparse import issparse VAR_CHUNK_SIZE = 10 @@ -21,35 +21,39 @@ def cast_arr(arr): orig_min = np.min(arr) # Try casting float to int for better downstream compression. - if arr.dtype.kind == 'f': - cast_arr = arr.astype(f'= 0: - arr = arr.astype(f' next_itemsize: - next_dtype = np.dtype(f'<{arr.dtype.kind}{next_itemsize}') - next_dtype_info = np.iinfo(next_dtype) if arr.dtype.kind == 'u' or arr.dtype.kind == 'i' else np.finfo(next_dtype) + next_dtype = np.dtype(f"<{arr.dtype.kind}{next_itemsize}") + next_dtype_info = ( + np.iinfo(next_dtype) if arr.dtype.kind == "u" or arr.dtype.kind == "i" else np.finfo(next_dtype) + ) if next_dtype_info.min <= orig_min and next_dtype_info.max >= orig_max: arr = arr.astype(next_dtype) - elif arr.dtype.itemsize == 8 and (arr.dtype.kind == 'u' or arr.dtype.kind == 'i'): - print(f"WARNING: Not casting array with dtype {arr.dtype.name}, but Zarr.js suggests avoiding int64 and uint64") + elif arr.dtype.itemsize == 8 and (arr.dtype.kind == "u" or arr.dtype.kind == "i"): + print( + f"WARNING: Not casting array with dtype {arr.dtype.name}, but Zarr.js suggests avoiding int64 and uint64" + ) # Check for float16 usage. - if arr.dtype.kind == 'f' and arr.dtype.itemsize == 2: + if arr.dtype.kind == "f" and arr.dtype.itemsize == 2: # Zarr.js does not have a Float16Array type - arr = arr.astype(' 0: arr = np.concatenate((arr, np.zeros((padding_len,)))) # Set the array in the Zarr store. - chromosomes_group[chr_name][str( - resolution)][profile_index, :] = arr + chromosomes_group[chr_name][str(resolution)][profile_index, :] = arr diff --git a/vitessce/data_utils/multivec.py b/vitessce/data_utils/multivec.py index cea834cd..4fa5865d 100644 --- a/vitessce/data_utils/multivec.py +++ b/vitessce/data_utils/multivec.py @@ -1,14 +1,26 @@ import math -import zarr + import numpy as np import pandas as pd +import zarr from tqdm import tqdm from .anndata import to_dense from .entities import GenomicProfiles -def adata_to_multivec_zarr(adata, output_path, obs_set_col, obs_set_name, obs_set_vals=None, var_interval_col="interval", layer_key=None, assembly="hg38", starting_resolution=5000, chr_subset=None): +def adata_to_multivec_zarr( + adata, + output_path, + obs_set_col, + obs_set_name, + obs_set_vals=None, + var_interval_col="interval", + layer_key=None, + assembly="hg38", + starting_resolution=5000, + chr_subset=None, +): """ Convert an AnnData object containing a cell-by-bin matrix to a Multivec-Zarr store. @@ -45,35 +57,31 @@ def adata_to_multivec_zarr(adata, output_path, obs_set_col, obs_set_name, obs_se # The bin datafram consists of one column like chrName:binStart-binEnd def convert_bin_name_to_chr_name(bin_name): try: - return bin_name[:bin_name.index(':')] + return bin_name[: bin_name.index(":")] except ValueError: return np.nan def convert_bin_name_to_chr_start(bin_name): try: - return int(bin_name[bin_name.index(':') + 1:bin_name.index('-')]) + return int(bin_name[bin_name.index(":") + 1 : bin_name.index("-")]) except ValueError: return np.nan def convert_bin_name_to_chr_end(bin_name): try: - return int(bin_name[bin_name.index('-') + 1:]) + return int(bin_name[bin_name.index("-") + 1 :]) except ValueError: return np.nan # Keep only the interval column in_bins_df = in_bins_df[[var_interval_col]] in_bins_df = in_bins_df.rename(columns={var_interval_col: "interval"}) - in_bins_df["chr_name"] = in_bins_df["interval"].apply( - convert_bin_name_to_chr_name) - in_bins_df["chr_start"] = in_bins_df["interval"].apply( - convert_bin_name_to_chr_start) - in_bins_df["chr_end"] = in_bins_df["interval"].apply( - convert_bin_name_to_chr_end) + in_bins_df["chr_name"] = in_bins_df["interval"].apply(convert_bin_name_to_chr_name) + in_bins_df["chr_start"] = in_bins_df["interval"].apply(convert_bin_name_to_chr_start) + in_bins_df["chr_end"] = in_bins_df["interval"].apply(convert_bin_name_to_chr_end) # Drop any rows that had incorrect bin strings (missing a chromosome name, bin start, or bin end value). - in_bins_df = in_bins_df.dropna( - subset=["chr_name", "chr_start", "chr_end"]).copy() + in_bins_df = in_bins_df.dropna(subset=["chr_name", "chr_start", "chr_end"]).copy() # Ensure that the columns have the expected types. in_bins_df["chr_name"] = in_bins_df["chr_name"].astype(str) @@ -82,13 +90,15 @@ def convert_bin_name_to_chr_end(bin_name): in_bins_df = in_bins_df.reset_index(drop=True) - interval_sizes = in_bins_df.apply(lambda row: row["chr_end"] - row["chr_start"], axis='columns') + interval_sizes = in_bins_df.apply(lambda row: row["chr_end"] - row["chr_start"], axis="columns") max_interval = interval_sizes.max() if max_interval > starting_resolution: raise ValueError("Starting resolution is smaller than largest interval.") # Round bins - in_bins_df["chr_start_round"] = in_bins_df["chr_start"].apply(lambda x: math.floor(x / starting_resolution) * starting_resolution + 1) + in_bins_df["chr_start_round"] = in_bins_df["chr_start"].apply( + lambda x: math.floor(x / starting_resolution) * starting_resolution + 1 + ) in_bins_df["chr_end_round"] = in_bins_df["chr_start_round"].apply(lambda x: x + starting_resolution - 1) # TODO: do the values need to be scaled based on the ratio of the original size of the interval to the rounded size? @@ -96,7 +106,9 @@ def convert_bin_name_to_chr_end(bin_name): in_bins_df["chr_start"] = in_bins_df["chr_start_round"] in_bins_df["chr_end"] = in_bins_df["chr_end_round"] in_bins_df = in_bins_df.drop(columns=["chr_start_round", "chr_end_round"]) - in_bins_df["interval"] = in_bins_df.apply(lambda r: f"{r['chr_name']}:{r['chr_start']}-{r['chr_end']}", axis='columns') + in_bins_df["interval"] = in_bins_df.apply( + lambda r: f"{r['chr_name']}:{r['chr_start']}-{r['chr_end']}", axis="columns" + ) # Use provided obs_set_vals if present, since these may be ordered # in a particular way. @@ -105,7 +117,7 @@ def convert_bin_name_to_chr_end(bin_name): cluster_paths = [[obs_set_name, cluster_id] for cluster_id in cluster_ids] # Create the Zarr store for the outputs. - out_f = zarr.open(output_path, mode='w') + out_f = zarr.open(output_path, mode="w") genomic_profiles = GenomicProfiles( out_f, profile_paths=cluster_paths, assembly=assembly, starting_resolution=starting_resolution @@ -140,70 +152,64 @@ def convert_bin_name_to_chr_end(bin_name): # Create a list of the "ground truth" bins (all bins from position 0 to the end of the chromosome). # We will join the input bins onto this dataframe to determine which bins are missing. chr_bins_gt_df = pd.DataFrame() - chr_bins_gt_df["chr_start"] = np.arange(0, math.ceil( - chr_len / starting_resolution)) * starting_resolution - chr_bins_gt_df["chr_end"] = chr_bins_gt_df["chr_start"] + \ - starting_resolution + chr_bins_gt_df["chr_start"] = np.arange(0, math.ceil(chr_len / starting_resolution)) * starting_resolution + chr_bins_gt_df["chr_end"] = chr_bins_gt_df["chr_start"] + starting_resolution chr_bins_gt_df["chr_start"] = chr_bins_gt_df["chr_start"] + 1 - chr_bins_gt_df["chr_start"] = chr_bins_gt_df["chr_start"].astype( - int) + chr_bins_gt_df["chr_start"] = chr_bins_gt_df["chr_start"].astype(int) chr_bins_gt_df["chr_end"] = chr_bins_gt_df["chr_end"].astype(int) chr_bins_gt_df["chr_name"] = chr_name - chr_bins_gt_df[0] = chr_bins_gt_df.apply(lambda r: f"{r['chr_name']}:{r['chr_start']}-{r['chr_end']}", axis='columns') + chr_bins_gt_df[0] = chr_bins_gt_df.apply( + lambda r: f"{r['chr_name']}:{r['chr_start']}-{r['chr_end']}", axis="columns" + ) # We will add a new column "i", which should match the _old_ index, so that we will be able join with the data matrix on the original indices. # For the new rows, we will add values for the "i" column that are greater than any of the original indices, # to prevent any joining with the incoming data matrix onto these bins for which the data is missing. chr_bins_in_df = chr_bins_in_df.reset_index(drop=True) chr_bins_in_df["i"] = chr_bins_in_df.index.values - chr_bins_gt_df["i"] = chr_bins_gt_df.index.values + \ - (in_mtx.shape[1] + 1) + chr_bins_gt_df["i"] = chr_bins_gt_df.index.values + (in_mtx.shape[1] + 1) # Set the full bin string column as the index of both data frames. chr_bins_gt_df = chr_bins_gt_df.set_index(0) chr_bins_in_df = chr_bins_in_df.set_index("interval") # Join the input bin subset dataframe right onto the full bin ground truth dataframe. - chr_bins_in_join_df = chr_bins_in_df.join( - chr_bins_gt_df, how='right', lsuffix="", rsuffix="_gt") + chr_bins_in_join_df = chr_bins_in_df.join(chr_bins_gt_df, how="right", lsuffix="", rsuffix="_gt") # The bins which were not present in the input will have NaN values in the "i" column. # For these rows, we replace the NaN values with the much higher "i_gt" values which will not match to any index of the data matrix. chr_bins_in_join_df["i"] = chr_bins_in_join_df.apply( - lambda r: r['i'] if pd.notna(r['i']) else r['i_gt'], axis='columns').astype(int) + lambda r: r["i"] if pd.notna(r["i"]) else r["i_gt"], axis="columns" + ).astype(int) # Clean up the joined data frame by removing unnecessary columns. - chr_bins_in_join_df = chr_bins_in_join_df.drop( - columns=['chr_name', 'chr_start', 'chr_end', 'i_gt']) + chr_bins_in_join_df = chr_bins_in_join_df.drop(columns=["chr_name", "chr_start", "chr_end", "i_gt"]) chr_bins_in_join_df = chr_bins_in_join_df.rename( - columns={'chr_name_gt': 'chr_name', 'chr_start_gt': 'chr_start', 'chr_end_gt': 'chr_end'}) + columns={"chr_name_gt": "chr_name", "chr_start_gt": "chr_start", "chr_end_gt": "chr_end"} + ) # Create a dataframe from the data matrix, so that we can join to the joined bins dataframe. chr_mtx_df = pd.DataFrame(data=chr_mtx.T) - chr_bins_i_df = chr_bins_in_join_df.drop( - columns=['chr_name', 'chr_start', 'chr_end']) + chr_bins_i_df = chr_bins_in_join_df.drop(columns=["chr_name", "chr_start", "chr_end"]) # Join the data matrix dataframe and the bins dataframe. # Bins that are missing from the data matrix will have "i" values higher than any of the data matrix dataframe row indices, # and therefore the data values for these bins in the resulting joined dataframe will all be NaN. - chr_mtx_join_df = chr_bins_i_df.join( - chr_mtx_df, how='left', on='i') + chr_mtx_join_df = chr_bins_i_df.join(chr_mtx_df, how="left", on="i") # We fill in these NaN values with 0. chr_mtx_join_df = chr_mtx_join_df.fillna(value=0.0) # Drop the "i" column, since it is not necessary now that we have done the join. - chr_mtx_join_df = chr_mtx_join_df.drop(columns=['i']) + chr_mtx_join_df = chr_mtx_join_df.drop(columns=["i"]) # Obtain the new full data matrix, which contains values for all bins of the chromosome. chr_mtx = chr_mtx_join_df.values.T # Fill in the Zarr store with data for each cluster. for cluster_index, cluster_id in enumerate(cluster_ids): # Get the list of cells in the current cluster. - cluster_df = in_barcodes_df.loc[in_barcodes_df[obs_set_col] - == cluster_id] + cluster_df = in_barcodes_df.loc[in_barcodes_df[obs_set_col] == cluster_id] cluster_cell_ids = cluster_df.index.values.tolist() - cluster_cells_tf = ( - in_barcodes_df.index.to_series().isin(cluster_cell_ids)).values + cluster_cells_tf = (in_barcodes_df.index.to_series().isin(cluster_cell_ids)).values # Get the rows of the data matrix corresponding to the cells in this cluster. cluster_cell_by_bin_mtx = chr_mtx[cluster_cells_tf, :] @@ -216,5 +222,4 @@ def convert_bin_name_to_chr_end(bin_name): profile_len = math.ceil(chr_len / starting_resolution) # TODO: raise warning if the cluster_profile length is longer than profile_len? - genomic_profiles.add_profile( - cluster_profile[0:profile_len], chr_name, cluster_index) + genomic_profiles.add_profile(cluster_profile[0:profile_len], chr_name, cluster_index) diff --git a/vitessce/data_utils/ome.py b/vitessce/data_utils/ome.py index 633d48b3..29ec548c 100644 --- a/vitessce/data_utils/ome.py +++ b/vitessce/data_utils/ome.py @@ -2,6 +2,7 @@ import zarr from ome_zarr.writer import write_image from tifffile import TiffWriter + from .anndata import cast_arr @@ -15,15 +16,15 @@ def rgb_img_to_ome_tiff(img_arr, output_path, img_name="Image", axes="CYX"): :param str img_name: The name of the image to include in the omero.name NGFF metadata field. :param str axes: The array axis ordering. By default, "CYX" """ - img_arr = img_arr.astype(np.dtype('uint8')) + img_arr = img_arr.astype(np.dtype("uint8")) tiff_writer = TiffWriter(output_path, ome=True) tiff_writer.write( img_arr, metadata={ - 'axes': axes, - 'Channel': {'Name': ['R', 'G', 'B']}, - } + "axes": axes, + "Channel": {"Name": ["R", "G", "B"]}, + }, ) tiff_writer.close() @@ -42,9 +43,9 @@ def multiplex_img_to_ome_tiff(img_arr, channel_names, output_path, axes="CYX"): tiff_writer.write( img_arr, metadata={ - 'axes': axes, - 'Channel': {'Name': channel_names}, - } + "axes": axes, + "Channel": {"Name": channel_names}, + }, ) tiff_writer.close() @@ -60,14 +61,9 @@ def rgb_img_to_ome_zarr(img_arr, output_path, img_name="Image", chunks=(1, 256, :param tuple[int] chunks: The chunk sizes of each axis. By default, (1, 256, 256). :param str axes: The array axis ordering. By default, "cyx" """ - img_arr = img_arr.astype(np.dtype('uint8')) + img_arr = img_arr.astype(np.dtype("uint8")) - default_window = { - "start": 0, - "min": 0, - "max": 255, - "end": 255 - } + default_window = {"start": 0, "min": 0, "max": 255, "end": 255} z_root = zarr.open_group(output_path, mode="w") @@ -75,7 +71,7 @@ def rgb_img_to_ome_zarr(img_arr, output_path, img_name="Image", chunks=(1, 256, image=img_arr, group=z_root, axes=axes, - storage_options=dict(chunks=chunks), + storage_options={"chunks": chunks}, **kwargs, ) z_root.attrs["omero"] = { @@ -83,26 +79,16 @@ def rgb_img_to_ome_zarr(img_arr, output_path, img_name="Image", chunks=(1, 256, "version": "0.3", "rdefs": {}, "channels": [ - { - "label": "R", - "color": "FF0000", - "window": default_window - }, - { - "label": "G", - "color": "00FF00", - "window": default_window - }, - { - "label": "B", - "color": "0000FF", - "window": default_window - } - ] + {"label": "R", "color": "FF0000", "window": default_window}, + {"label": "G", "color": "00FF00", "window": default_window}, + {"label": "B", "color": "0000FF", "window": default_window}, + ], } -def multiplex_img_to_ome_zarr(img_arr, channel_names, output_path, img_name="Image", chunks=(1, 256, 256), axes="cyx", channel_colors=None): +def multiplex_img_to_ome_zarr( + img_arr, channel_names, output_path, img_name="Image", chunks=(1, 256, 256), axes="cyx", channel_colors=None +): """ Convert a multiplexed image to OME-Zarr v0.3. @@ -118,23 +104,15 @@ def multiplex_img_to_ome_zarr(img_arr, channel_names, output_path, img_name="Ima """ img_arr = cast_arr(img_arr) - dtype_info = np.iinfo(img_arr.dtype) if img_arr.dtype.kind == 'u' or img_arr.dtype.kind == 'i' else np.finfo(img_arr.dtype) + dtype_info = ( + np.iinfo(img_arr.dtype) if img_arr.dtype.kind == "u" or img_arr.dtype.kind == "i" else np.finfo(img_arr.dtype) + ) - default_window = { - "start": 0, - "min": 0, - "max": dtype_info.max, - "end": dtype_info.max - } + default_window = {"start": 0, "min": 0, "max": dtype_info.max, "end": dtype_info.max} z_root = zarr.open_group(output_path, mode="w") - write_image( - image=img_arr, - group=z_root, - axes=axes, - storage_options=dict(chunks=chunks) - ) + write_image(image=img_arr, group=z_root, axes=axes, storage_options={"chunks": chunks}) z_root.attrs["omero"] = { "name": img_name, "version": "0.3", @@ -143,9 +121,8 @@ def multiplex_img_to_ome_zarr(img_arr, channel_names, output_path, img_name="Ima { "label": channel_name, "color": channel_colors[channel_name] if channel_colors is not None else "FFFFFF", - "window": default_window + "window": default_window, } - for channel_name - in channel_names - ] + for channel_name in channel_names + ], } diff --git a/vitessce/export.py b/vitessce/export.py index d8311e3a..55fe8100 100644 --- a/vitessce/export.py +++ b/vitessce/export.py @@ -6,10 +6,10 @@ from starlette.routing import Mount from starlette.staticfiles import StaticFiles -from .routes import JsonRoute, FileRoute +from .routes import FileRoute, JsonRoute -def export_to_s3(config, s3, bucket_name, prefix=''): +def export_to_s3(config, s3, bucket_name, prefix=""): """ :param config: The Vitessce view config to export to S3. :type config: VitessceConfig @@ -21,9 +21,7 @@ def export_to_s3(config, s3, bucket_name, prefix=''): :returns: The config as a dict, with S3 urls filled in. :rtype: dict """ - - base_url = f"https://{bucket_name}.s3.amazonaws.com" + \ - ("/" + prefix if len(prefix) > 0 else "") + base_url = f"https://{bucket_name}.s3.amazonaws.com" + ("/" + prefix if len(prefix) > 0 else "") bucket = s3.Bucket(bucket_name) config_dict = config.to_dict(base_url=base_url) routes = config.get_routes() @@ -50,16 +48,15 @@ def export_to_s3(config, s3, bucket_name, prefix=''): if route not in uploaded_routes: uploaded_routes.append(route) static_dir = route_app.directory - for root, dirs, files in os.walk(static_dir): + for root, dirs, files in os.walk(static_dir): # noqa: B007 for filename in files: - file_key = key + \ - join(root, filename)[len(static_dir):] + file_key = key + join(root, filename)[len(static_dir) :] bucket.upload_file(join(root, filename), file_key) return config_dict -def export_to_files(config, base_url, out_dir='.'): +def export_to_files(config, base_url, out_dir="."): """ :param config: The Vitessce view config to export to files. :type config: VitessceConfig @@ -69,7 +66,6 @@ def export_to_files(config, base_url, out_dir='.'): :returns: The config as a dict, with urls filled in. :rtype: dict """ - config_dict = config.to_dict(base_url=base_url) routes = config.get_routes() for route in routes: @@ -79,7 +75,7 @@ def export_to_files(config, base_url, out_dir='.'): if isinstance(route, JsonRoute): data_json = route.data_json os.makedirs(os.path.dirname(out_path), exist_ok=True) - with open(out_path, 'w') as f: + with open(out_path, "w") as f: json.dump(data_json, f) elif isinstance(route, FileRoute): local_file_path = route.file_path @@ -90,10 +86,9 @@ def export_to_files(config, base_url, out_dir='.'): if isinstance(route_app, StaticFiles): static_dir = route_app.directory - for root, dirs, files in os.walk(static_dir): + for root, dirs, files in os.walk(static_dir): # noqa: B007 for filename in files: - file_key = out_path + \ - join(root, filename)[len(static_dir):] + file_key = out_path + join(root, filename)[len(static_dir) :] os.makedirs(os.path.dirname(file_key), exist_ok=True) copyfile(join(root, filename), file_key) diff --git a/vitessce/repr.py b/vitessce/repr.py index 25287f31..3a45dfca 100644 --- a/vitessce/repr.py +++ b/vitessce/repr.py @@ -2,21 +2,21 @@ def make_repr(init_locals, class_def=None): - ''' + """ >>> from .wrappers import MultiImageWrapper - >>> orig = MultiImageWrapper('IMAGE_WRAPPERS', foo='bar') + >>> orig = MultiImageWrapper("IMAGE_WRAPPERS", foo="bar") >>> orig_repr = repr(orig) >>> print(orig_repr) MultiImageWrapper(image_wrappers='IMAGE_WRAPPERS', foo='bar') >>> evalled = eval(orig_repr) >>> assert orig_repr == repr(evalled) - ''' + """ # Get the class definition from locals. clazz = None - if '__class__' in init_locals: + if "__class__" in init_locals: # Requires superclass to be initialized. - clazz = init_locals.pop('__class__') - elif 'self' in init_locals and hasattr(init_locals['self'], '__class__'): + clazz = init_locals.pop("__class__") + elif "self" in init_locals and hasattr(init_locals["self"], "__class__"): clazz = init_locals["self"].__class__ # pragma: no cover elif class_def is not None: clazz = class_def @@ -24,8 +24,8 @@ def make_repr(init_locals, class_def=None): raise ValueError("make_repr could not locate the class definition") # pragma: no cover # Remove self from locals. - if 'self' in init_locals: - del init_locals['self'] + if "self" in init_locals: + del init_locals["self"] # Get the class name. class_name = clazz.__name__ @@ -50,23 +50,20 @@ def make_repr(init_locals, class_def=None): pass # Convert the kwargs dict to named args. - if 'kwargs' in init_locals: - kwargs = init_locals.pop('kwargs') + if "kwargs" in init_locals: + kwargs = init_locals.pop("kwargs") else: kwargs = {} - args = { - **init_locals, - **kwargs - } - params = ', '.join([f'{k}={repr(v)}' for k, v in args.items()]) - return f'{class_name}({params})' + args = {**init_locals, **kwargs} + params = ", ".join([f"{k}={repr(v)}" for k, v in args.items()]) + return f"{class_name}({params})" def make_params_repr(args): - ''' - >>> print(make_params_repr({ "uid": 1, "name": "My Dataset"})) + """ + >>> print(make_params_repr({"uid": 1, "name": "My Dataset"})) uid=1, name='My Dataset' - ''' - params = ', '.join([f'{k}={repr(v)}' for k, v in args.items()]) + """ + params = ", ".join([f"{k}={repr(v)}" for k, v in args.items()]) return params diff --git a/vitessce/routes.py b/vitessce/routes.py index ed4fc966..73fc53c7 100644 --- a/vitessce/routes.py +++ b/vitessce/routes.py @@ -1,7 +1,7 @@ +from pathlib import Path -from starlette.routing import Route from starlette.responses import StreamingResponse -from pathlib import Path +from starlette.routing import Route # Adapted from https://gist.github.com/tombulled/712fd8e19ed0618c5f9f7d5f5f543782 @@ -11,10 +11,7 @@ def ranged(file, start=0, end=None, block_size=65535): file.seek(start) while True: - data_length = ( - min(block_size, end - start - consumed) - if end else block_size - ) + data_length = min(block_size, end - start - consumed) if end else block_size if data_length <= 0: break data = file.read(data_length) @@ -23,42 +20,42 @@ def ranged(file, start=0, end=None, block_size=65535): consumed += data_length yield data - if hasattr(file, 'close'): + if hasattr(file, "close"): file.close() def range_repsonse(request, file_path): path = Path(file_path) - file = path.open('rb') + file = path.open("rb") file_size = path.stat().st_size - content_range = request.headers.get('range') + content_range = request.headers.get("range") content_length = file_size status_code = 200 headers = {} if content_range is not None: content_range = content_range.strip().lower() - content_ranges = content_range.split('=')[-1] - range_start, range_end, * \ - _ = map(str.strip, (content_ranges + '-').split('-')) + content_ranges = content_range.split("=")[-1] + range_start, range_end, *_ = map(str.strip, (content_ranges + "-").split("-")) range_start = max(0, int(range_start)) if range_start else 0 - range_end = min(file_size - 1, int(range_end) - ) if range_end else file_size - 1 + range_end = min(file_size - 1, int(range_end)) if range_end else file_size - 1 content_length = (range_end - range_start) + 1 file = ranged(file, start=range_start, end=range_end + 1) status_code = 206 - headers['Content-Range'] = f'bytes {range_start}-{range_end}/{file_size}' + headers["Content-Range"] = f"bytes {range_start}-{range_end}/{file_size}" response = StreamingResponse( file, - media_type='tiff', + media_type="tiff", status_code=status_code, ) - response.headers.update({ - 'Accept-Ranges': 'bytes', - 'Content-Length': str(content_length), - **headers, - }) + response.headers.update( + { + "Accept-Ranges": "bytes", + "Content-Length": str(content_length), + **headers, + } + ) return response diff --git a/vitessce/utils.py b/vitessce/utils.py index fdb31c27..ec9cebd5 100644 --- a/vitessce/utils.py +++ b/vitessce/utils.py @@ -11,7 +11,6 @@ def get_next_scope_numeric(prev_scopes): def create_prefixed_get_next_scope_numeric(prefix): - def inner_get_next_scope(prev_scopes): next_scope_int = 0 next_scope_str = None diff --git a/vitessce/widget.py b/vitessce/widget.py index 473dc3b9..81300158 100644 --- a/vitessce/widget.py +++ b/vitessce/widget.py @@ -1,21 +1,21 @@ import importlib.util -from urllib.parse import quote_plus import json - -# Widget dependencies -import anywidget -from traitlets import Unicode, Dict, List, Int, Bool +import socket import time import uuid +from threading import Thread +from urllib.parse import quote_plus -# Server dependencies -from uvicorn import Config, Server - +# Widget dependencies +import anywidget from starlette.applications import Starlette from starlette.middleware import Middleware from starlette.middleware.cors import CORSMiddleware -from threading import Thread -import socket +from traitlets import Bool, Dict, Int, List, Unicode + +# Server dependencies +# Server dependencies +from uvicorn import Config, Server MAX_PORT_TRIES = 1000 DEFAULT_PORT = 8000 @@ -25,8 +25,7 @@ class BackgroundServer: # Reference: https://github.com/gosling-lang/gos/blob/main/gosling/data/_background_server.py#L10 def __init__(self, routes): middleware = [ - Middleware(CORSMiddleware, allow_origins=[ - '*'], allow_methods=["OPTIONS", "GET"], allow_headers=['Range']) + Middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["OPTIONS", "GET"], allow_headers=["Range"]) ] self.app = Starlette(debug=True, routes=routes, middleware=middleware) self.port = None @@ -49,12 +48,7 @@ def start(self, port=None, timeout=1, daemon=True, log_level="warning"): if self.thread is not None: return self - config = Config( - app=self.app, - port=port, - timeout_keep_alive=timeout, - log_level=log_level - ) + config = Config(app=self.app, port=port, timeout_keep_alive=timeout, log_level=log_level) self.port = config.port self.server = Server(config=config) self.thread = Thread(target=self.server.run, daemon=daemon) @@ -87,7 +81,7 @@ def register(self, config): def is_port_in_use(port): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - return s.connect_ex(('localhost', port)) == 0 + return s.connect_ex(("localhost", port)) == 0 def get_base_url_and_port(port, next_port, proxy=False, base_url=None, host_name=None): @@ -104,9 +98,8 @@ def get_base_url_and_port(port, next_port, proxy=False, base_url=None, host_name if base_url is None: if proxy: - if importlib.util.find_spec('jupyter_server_proxy') is None: - raise ValueError( - "To use the widget through a proxy, jupyter-server-proxy must be installed.") + if importlib.util.find_spec("jupyter_server_proxy") is None: + raise ValueError("To use the widget through a proxy, jupyter-server-proxy must be installed.") if host_name is None: base_url = f"proxy/{use_port}" else: @@ -125,15 +118,16 @@ def serve_routes(config, routes, use_port): server.start(port=use_port) -def launch_vitessce_io(config, theme='light', port=None, base_url=None, host_name=None, proxy=False, open=True): +def launch_vitessce_io(config, theme="light", port=None, base_url=None, host_name=None, proxy=False, open=True): import webbrowser + base_url, use_port, _ = get_base_url_and_port( - port, DEFAULT_PORT, proxy=proxy, base_url=base_url, host_name=host_name) + port, DEFAULT_PORT, proxy=proxy, base_url=base_url, host_name=host_name + ) config_dict = config.to_dict(base_url=base_url) routes = config.get_routes() serve_routes(config, routes, use_port) - vitessce_url = f"http://vitessce.io/#?theme={theme}&url=data:," + quote_plus( - json.dumps(config_dict)) + vitessce_url = f"http://vitessce.io/#?theme={theme}&url=data:," + quote_plus(json.dumps(config_dict)) if open: webbrowser.open(vitessce_url) return vitessce_url @@ -382,6 +376,7 @@ class VitessceWidget(anywidget.AnyWidget): """ A class to represent a Jupyter widget for Vitessce. """ + _esm = ESM # Widget specific property. @@ -390,22 +385,35 @@ class VitessceWidget(anywidget.AnyWidget): # It is synced back to Python from the frontend *any* time the model is touched. config = Dict({}).tag(sync=True) height = Int(600).tag(sync=True) - theme = Unicode('auto').tag(sync=True) + theme = Unicode("auto").tag(sync=True) proxy = Bool(False).tag(sync=True) - uid = Unicode('').tag(sync=True) + uid = Unicode("").tag(sync=True) has_host_name = Bool(False).tag(sync=True) next_port = DEFAULT_PORT - js_package_version = Unicode('3.3.12').tag(sync=True) + js_package_version = Unicode("3.3.12").tag(sync=True) js_dev_mode = Bool(False).tag(sync=True) - custom_js_url = Unicode('').tag(sync=True) + custom_js_url = Unicode("").tag(sync=True) plugin_esm = Unicode(DEFAULT_PLUGIN_ESM).tag(sync=True) remount_on_uid_change = Bool(True).tag(sync=True) - store_urls = List(trait=Unicode(''), default_value=[]).tag(sync=True) - - def __init__(self, config, height=600, theme='auto', uid=None, port=None, proxy=False, js_package_version='3.3.12', js_dev_mode=False, custom_js_url='', plugin_esm=DEFAULT_PLUGIN_ESM, remount_on_uid_change=True): + store_urls = List(trait=Unicode(""), default_value=[]).tag(sync=True) + + def __init__( + self, + config, + height=600, + theme="auto", + uid=None, + port=None, + proxy=False, + js_package_version="3.3.12", + js_dev_mode=False, + custom_js_url="", + plugin_esm=DEFAULT_PLUGIN_ESM, + remount_on_uid_change=True, + ): """ Construct a new Vitessce widget. @@ -430,9 +438,9 @@ def __init__(self, config, height=600, theme='auto', uid=None, port=None, proxy= vw = vc.widget() vw """ - base_url, use_port, VitessceWidget.next_port = get_base_url_and_port( - port, VitessceWidget.next_port, proxy=proxy) + port, VitessceWidget.next_port, proxy=proxy + ) self.config_obj = config self.port = use_port config_dict = config.to_dict(base_url=base_url) @@ -442,37 +450,45 @@ def __init__(self, config, height=600, theme='auto', uid=None, port=None, proxy= uid_str = get_uid_str(uid) - super(VitessceWidget, self).__init__( - config=config_dict, height=height, theme=theme, proxy=proxy, - js_package_version=js_package_version, js_dev_mode=js_dev_mode, custom_js_url=custom_js_url, - plugin_esm=plugin_esm, remount_on_uid_change=remount_on_uid_change, - uid=uid_str, store_urls=list(self._stores.keys()) + super().__init__( + config=config_dict, + height=height, + theme=theme, + proxy=proxy, + js_package_version=js_package_version, + js_dev_mode=js_dev_mode, + custom_js_url=custom_js_url, + plugin_esm=plugin_esm, + remount_on_uid_change=remount_on_uid_change, + uid=uid_str, + store_urls=list(self._stores.keys()), ) serve_routes(config, routes, use_port) def _get_coordination_value(self, coordination_type, coordination_scope): - obj = self.config['coordinationSpace'][coordination_type] + obj = self.config["coordinationSpace"][coordination_type] obj_scopes = list(obj.keys()) if coordination_scope is not None: if coordination_scope in obj_scopes: return obj[coordination_scope] else: raise ValueError( - f"The specified coordination scope '{coordination_scope}' could not be found for the coordination type '{coordination_type}'. Known coordination scopes are {obj_scopes}") + f"The specified coordination scope '{coordination_scope}' could not be found for the coordination type '{coordination_type}'. Known coordination scopes are {obj_scopes}" + ) else: if len(obj_scopes) == 1: auto_coordination_scope = obj_scopes[0] return obj[auto_coordination_scope] elif len(obj_scopes) > 1: raise ValueError( - f"The coordination scope could not be automatically determined because multiple coordination scopes exist for the coordination type '{coordination_type}'. Please specify one of {obj_scopes} using the scope parameter.") + f"The coordination scope could not be automatically determined because multiple coordination scopes exist for the coordination type '{coordination_type}'. Please specify one of {obj_scopes} using the scope parameter." + ) else: - raise ValueError( - f"No coordination scopes were found for the coordination type '{coordination_type}'.") + raise ValueError(f"No coordination scopes were found for the coordination type '{coordination_type}'.") def get_cell_selection(self, scope=None): - return self._get_coordination_value('cellSelection', scope) + return self._get_coordination_value("cellSelection", scope) def close(self): self.config_obj.stop_server(self.port) @@ -488,15 +504,32 @@ def _zarr_get(self, params, buffers): buffers = [] return {"success": len(buffers) == 1}, buffers + # Launch Vitessce using plain HTML representation (no ipywidgets) -def ipython_display(config, height=600, theme='auto', base_url=None, host_name=None, uid=None, port=None, proxy=False, js_package_version='3.3.12', js_dev_mode=False, custom_js_url='', plugin_esm=DEFAULT_PLUGIN_ESM, remount_on_uid_change=True): - from IPython.display import display, HTML +def ipython_display( + config, + height=600, + theme="auto", + base_url=None, + host_name=None, + uid=None, + port=None, + proxy=False, + js_package_version="3.3.12", + js_dev_mode=False, + custom_js_url="", + plugin_esm=DEFAULT_PLUGIN_ESM, + remount_on_uid_change=True, +): + from IPython.display import HTML, display + uid_str = "vitessce" + get_uid_str(uid) base_url, use_port, _ = get_base_url_and_port( - port, DEFAULT_PORT, proxy=proxy, base_url=base_url, host_name=host_name) + port, DEFAULT_PORT, proxy=proxy, base_url=base_url, host_name=host_name + ) config_dict = config.to_dict(base_url=base_url) routes = config.get_routes() serve_routes(config, routes, use_port) @@ -562,21 +595,28 @@ def ipython_display(config, height=600, theme='auto', base_url=None, host_name=N } """ - HTML_STR = f""" + HTML_STR = ( + f"""
""" + ) display(HTML(HTML_STR)) diff --git a/vitessce/wrappers.py b/vitessce/wrappers.py index f8f70269..a28166d3 100644 --- a/vitessce/wrappers.py +++ b/vitessce/wrappers.py @@ -1,16 +1,23 @@ import os -from os.path import join import tempfile -from uuid import uuid4 +from os.path import join from pathlib import PurePath, PurePosixPath +from uuid import uuid4 + import zarr from .constants import ( - norm_enum, - ViewType as cm, - FileType as ft, DataType as dt, ) +from .constants import ( + FileType as ft, +) +from .constants import ( + ViewType as cm, +) +from .constants import ( + norm_enum, +) from .repr import make_repr @@ -39,15 +46,14 @@ def __init__(self, **kwargs): :param str out_dir: The path to a local directory used for data processing outputs. By default, uses a temp. directory. :param dict request_init: options to be passed along with every fetch request from the browser, like `{ "header": { "Authorization": "Bearer dsfjalsdfa1431" } }` """ - self.out_dir = kwargs['out_dir'] if 'out_dir' in kwargs else tempfile.mkdtemp( - ) + self.out_dir = kwargs["out_dir"] if "out_dir" in kwargs else tempfile.mkdtemp() self.routes = [] self.is_remote = False # TODO: change to needs_localhost_serving for clarity self.is_store = False # TODO: change to needs_store_registration for clarity self.file_def_creators = [] self.base_dir = None self.stores = {} - self._request_init = kwargs['request_init'] if 'request_init' in kwargs else None + self._request_init = kwargs["request_init"] if "request_init" in kwargs else None def __repr__(self): return self._repr @@ -117,10 +123,10 @@ def get_out_dir_route(self, dataset_uid, obj_i): if not self.is_remote: out_dir = self._get_out_dir(dataset_uid, obj_i) # TODO: Move imports back to top when this is factored out. - from starlette.staticfiles import StaticFiles from starlette.routing import Mount - return [Mount(self._get_route_str(dataset_uid, obj_i), - app=StaticFiles(directory=out_dir, html=False))] + from starlette.staticfiles import StaticFiles + + return [Mount(self._get_route_str(dataset_uid, obj_i), app=StaticFiles(directory=out_dir, html=False))] return [] def get_local_dir_url(self, base_url, dataset_uid, obj_i, local_dir_path, local_dir_uid): @@ -171,10 +177,10 @@ def get_local_dir_route(self, dataset_uid, obj_i, local_dir_path, local_dir_uid) route_path = file_path_to_url_path(local_dir_path) local_dir_path = join(self.base_dir, local_dir_path) # TODO: Move imports back to top when this is factored out. - from starlette.staticfiles import StaticFiles from starlette.routing import Mount - return [Mount(route_path, - app=StaticFiles(directory=local_dir_path, html=False))] + from starlette.staticfiles import StaticFiles + + return [Mount(route_path, app=StaticFiles(directory=local_dir_path, html=False))] return [] def _get_url(self, base_url, dataset_uid, obj_i, *args): @@ -200,7 +206,8 @@ def auto_view_config(self, vc): :type vc: VitessceConfig """ raise NotImplementedError( - "Auto view configuration has not yet been implemented for this data object wrapper class.") + "Auto view configuration has not yet been implemented for this data object wrapper class." + ) class MultiImageWrapper(AbstractWrapper): @@ -220,42 +227,36 @@ def __init__(self, image_wrappers, use_physical_size_scaling=False, **kwargs): def convert_and_save(self, dataset_uid, obj_i, base_dir=None): for image in self.image_wrappers: image.convert_and_save(dataset_uid, obj_i, base_dir=base_dir) - file_def_creator = self.make_raster_file_def_creator( - dataset_uid, obj_i) + file_def_creator = self.make_raster_file_def_creator(dataset_uid, obj_i) routes = self.make_raster_routes() self.file_def_creators.append(file_def_creator) self.routes += routes def make_raster_routes(self): obj_routes = [] - for num, image in enumerate(self.image_wrappers): + for num, image in enumerate(self.image_wrappers): # noqa: B007 obj_routes = obj_routes + image.get_routes() return obj_routes def make_raster_file_def_creator(self, dataset_uid, obj_i): - def raster_file_def_creator(base_url): raster_json = { "schemaVersion": "0.0.2", "usePhysicalSizeScaling": self.use_physical_size_scaling, "images": [], - "renderLayers": [] + "renderLayers": [], } for image in self.image_wrappers: image_json = image.make_image_def(dataset_uid, obj_i, base_url) - raster_json['images'].append(image_json) - raster_json['renderLayers'].append(image.name) + raster_json["images"].append(image_json) + raster_json["renderLayers"].append(image.name) - return { - "fileType": ft.RASTER_JSON.value, - "options": raster_json - } + return {"fileType": ft.RASTER_JSON.value, "options": raster_json} return raster_file_def_creator class OmeTiffWrapper(AbstractWrapper): - """ Wrap an OME-TIFF File by creating an instance of the ``OmeTiffWrapper`` class. @@ -269,8 +270,17 @@ class OmeTiffWrapper(AbstractWrapper): :param \\*\\*kwargs: Keyword arguments inherited from :class:`~vitessce.wrappers.AbstractWrapper` """ - def __init__(self, img_path=None, offsets_path=None, img_url=None, offsets_url=None, name="", transformation_matrix=None, is_bitmask=False, - **kwargs): + def __init__( + self, + img_path=None, + offsets_path=None, + img_url=None, + offsets_url=None, + name="", + transformation_matrix=None, + is_bitmask=False, + **kwargs, + ): super().__init__(**kwargs) self._repr = make_repr(locals()) self.name = name @@ -283,16 +293,14 @@ def __init__(self, img_path=None, offsets_path=None, img_url=None, offsets_url=N self.local_img_uid = make_unique_filename(".ome.tif") self.local_offsets_uid = make_unique_filename(".offsets.json") if img_url is not None and (img_path is not None or offsets_path is not None): - raise ValueError( - "Did not expect img_path or offsets_path to be provided with img_url") + raise ValueError("Did not expect img_path or offsets_path to be provided with img_url") def convert_and_save(self, dataset_uid, obj_i, base_dir=None): # Only create out-directory if needed if not self.is_remote: super().convert_and_save(dataset_uid, obj_i, base_dir=base_dir) - file_def_creator = self.make_raster_file_def_creator( - dataset_uid, obj_i) + file_def_creator = self.make_raster_file_def_creator(dataset_uid, obj_i) routes = self.make_raster_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) @@ -303,14 +311,16 @@ def make_raster_routes(self, dataset_uid, obj_i): return [] else: # TODO: Move imports back to top when this is factored out. - from .routes import range_repsonse, JsonRoute, FileRoute from generate_tiff_offsets import get_offsets from starlette.responses import UJSONResponse + from .routes import FileRoute, JsonRoute, range_repsonse + offsets = get_offsets(self._img_path) async def response_func(req): return UJSONResponse(offsets) + if self.base_dir is None: local_img_path = self._img_path local_img_route_path = self._get_route_str(dataset_uid, obj_i, self.local_img_uid) @@ -342,10 +352,8 @@ def raster_file_def_creator(base_url): "images": [self.make_image_def(dataset_uid, obj_i, base_url)], } - return { - "fileType": ft.RASTER_JSON.value, - "options": raster_json - } + return {"fileType": ft.RASTER_JSON.value, "options": raster_json} + return raster_file_def_creator def create_image_json(self, img_url, offsets_url=None): @@ -359,9 +367,7 @@ def create_image_json(self, img_url, offsets_url=None): # Do not include offsets in base_dir mode. metadata["omeTiffOffsetsUrl"] = offsets_url if self._transformation_matrix is not None: - metadata["transform"] = { - "matrix": self._transformation_matrix - } + metadata["transform"] = {"matrix": self._transformation_matrix} metadata["isBitmask"] = self.is_bitmask # Only attach metadata if there is some - otherwise schema validation fails. if len(metadata.keys()) > 0: @@ -373,19 +379,16 @@ def get_img_url(self, base_url="", dataset_uid="", obj_i=""): return self._img_url if self.base_dir is not None: return self._get_url_simple(base_url, file_path_to_url_path(self._img_path, prepend_slash=False)) - return self._get_url(base_url, dataset_uid, - obj_i, self.local_img_uid) + return self._get_url(base_url, dataset_uid, obj_i, self.local_img_uid) def get_offsets_url(self, base_url="", dataset_uid="", obj_i=""): if self._offsets_url is not None or self.is_remote: return self._offsets_url - offsets_url = self._get_url( - base_url, dataset_uid, obj_i, self.local_offsets_uid) + offsets_url = self._get_url(base_url, dataset_uid, obj_i, self.local_offsets_uid) return offsets_url class ImageOmeTiffWrapper(AbstractWrapper): - """ Wrap an OME-TIFF File by creating an instance of the ``ImageOmeTiffWrapper`` class. Intended to be used with the spatialBeta and layerControllerBeta views. @@ -398,7 +401,16 @@ class ImageOmeTiffWrapper(AbstractWrapper): :param \\*\\*kwargs: Keyword arguments inherited from :class:`~vitessce.wrappers.AbstractWrapper` """ - def __init__(self, img_path=None, offsets_path=None, img_url=None, offsets_url=None, coordinate_transformations=None, coordination_values=None, **kwargs): + def __init__( + self, + img_path=None, + offsets_path=None, + img_url=None, + offsets_url=None, + coordinate_transformations=None, + coordination_values=None, + **kwargs, + ): super().__init__(**kwargs) self._repr = make_repr(locals()) self._img_path = img_path @@ -411,16 +423,14 @@ def __init__(self, img_path=None, offsets_path=None, img_url=None, offsets_url=N self.local_img_uid = make_unique_filename(".ome.tif") self.local_offsets_uid = make_unique_filename(".offsets.json") if img_url is not None and (img_path is not None or offsets_path is not None): - raise ValueError( - "Did not expect img_path or offsets_path to be provided with img_url") + raise ValueError("Did not expect img_path or offsets_path to be provided with img_url") def convert_and_save(self, dataset_uid, obj_i, base_dir=None): # Only create out-directory if needed if not self.is_remote: super().convert_and_save(dataset_uid, obj_i, base_dir=base_dir) - file_def_creator = self.make_raster_file_def_creator( - dataset_uid, obj_i) + file_def_creator = self.make_raster_file_def_creator(dataset_uid, obj_i) routes = self.make_raster_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) @@ -431,14 +441,16 @@ def make_raster_routes(self, dataset_uid, obj_i): return [] else: # TODO: Move imports back to top when this is factored out. - from .routes import range_repsonse, JsonRoute, FileRoute from generate_tiff_offsets import get_offsets from starlette.responses import UJSONResponse + from .routes import FileRoute, JsonRoute, range_repsonse + offsets = get_offsets(self._img_path) async def response_func(req): return UJSONResponse(offsets) + if self.base_dir is None: local_img_path = self._img_path local_img_route_path = self._get_route_str(dataset_uid, obj_i, self.local_img_uid) @@ -478,6 +490,7 @@ def raster_file_def_creator(base_url): if self._coordination_values is not None: file_def["coordinationValues"] = self._coordination_values return file_def + return raster_file_def_creator def get_img_url(self, base_url="", dataset_uid="", obj_i=""): @@ -485,19 +498,16 @@ def get_img_url(self, base_url="", dataset_uid="", obj_i=""): return self._img_url if self.base_dir is not None: return self._get_url_simple(base_url, file_path_to_url_path(self._img_path, prepend_slash=False)) - return self._get_url(base_url, dataset_uid, - obj_i, self.local_img_uid) + return self._get_url(base_url, dataset_uid, obj_i, self.local_img_uid) def get_offsets_url(self, base_url="", dataset_uid="", obj_i=""): if self._offsets_url is not None or self.is_remote: return self._offsets_url - offsets_url = self._get_url( - base_url, dataset_uid, obj_i, self.local_offsets_uid) + offsets_url = self._get_url(base_url, dataset_uid, obj_i, self.local_offsets_uid) return offsets_url class ObsSegmentationsOmeTiffWrapper(AbstractWrapper): - """ Wrap an OME-TIFF File by creating an instance of the ``ObsSegmentationsOmeTiffWrapper`` class. Intended to be used with the spatialBeta and layerControllerBeta views. @@ -511,7 +521,17 @@ class ObsSegmentationsOmeTiffWrapper(AbstractWrapper): :param \\*\\*kwargs: Keyword arguments inherited from :class:`~vitessce.wrappers.AbstractWrapper` """ - def __init__(self, img_path=None, offsets_path=None, img_url=None, offsets_url=None, coordinate_transformations=None, obs_types_from_channel_names=None, coordination_values=None, **kwargs): + def __init__( + self, + img_path=None, + offsets_path=None, + img_url=None, + offsets_url=None, + coordinate_transformations=None, + obs_types_from_channel_names=None, + coordination_values=None, + **kwargs, + ): super().__init__(**kwargs) self._repr = make_repr(locals()) self._img_path = img_path @@ -525,16 +545,14 @@ def __init__(self, img_path=None, offsets_path=None, img_url=None, offsets_url=N self.local_img_uid = make_unique_filename(".ome.tif") self.local_offsets_uid = make_unique_filename(".offsets.json") if img_url is not None and (img_path is not None or offsets_path is not None): - raise ValueError( - "Did not expect img_path or offsets_path to be provided with img_url") + raise ValueError("Did not expect img_path or offsets_path to be provided with img_url") def convert_and_save(self, dataset_uid, obj_i, base_dir=None): # Only create out-directory if needed if not self.is_remote: super().convert_and_save(dataset_uid, obj_i, base_dir=base_dir) - file_def_creator = self.make_raster_file_def_creator( - dataset_uid, obj_i) + file_def_creator = self.make_raster_file_def_creator(dataset_uid, obj_i) routes = self.make_raster_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) @@ -545,14 +563,16 @@ def make_raster_routes(self, dataset_uid, obj_i): return [] else: # TODO: Move imports back to top when this is factored out. - from .routes import range_repsonse, JsonRoute, FileRoute from generate_tiff_offsets import get_offsets from starlette.responses import UJSONResponse + from .routes import FileRoute, JsonRoute, range_repsonse + offsets = get_offsets(self._img_path) async def response_func(req): return UJSONResponse(offsets) + if self.base_dir is None: local_img_path = self._img_path local_img_route_path = self._get_route_str(dataset_uid, obj_i, self.local_img_uid) @@ -595,6 +615,7 @@ def raster_file_def_creator(base_url): if self._coordination_values is not None: file_def["coordinationValues"] = self._coordination_values return file_def + return raster_file_def_creator def get_img_url(self, base_url="", dataset_uid="", obj_i=""): @@ -602,19 +623,16 @@ def get_img_url(self, base_url="", dataset_uid="", obj_i=""): return self._img_url if self.base_dir is not None: return self._get_url_simple(base_url, file_path_to_url_path(self._img_path, prepend_slash=False)) - return self._get_url(base_url, dataset_uid, - obj_i, self.local_img_uid) + return self._get_url(base_url, dataset_uid, obj_i, self.local_img_uid) def get_offsets_url(self, base_url="", dataset_uid="", obj_i=""): if self._offsets_url is not None or self.is_remote: return self._offsets_url - offsets_url = self._get_url( - base_url, dataset_uid, obj_i, self.local_offsets_uid) + offsets_url = self._get_url(base_url, dataset_uid, obj_i, self.local_offsets_uid) return offsets_url class CsvWrapper(AbstractWrapper): - """ Wrap a CSV file by creating an instance of the ``CsvWrapper`` class. @@ -626,8 +644,7 @@ class CsvWrapper(AbstractWrapper): :param \\*\\*kwargs: Keyword arguments inherited from :class:`~vitessce.wrappers.AbstractWrapper` """ - def __init__(self, csv_path=None, csv_url=None, data_type=None, options=None, coordination_values=None, - **kwargs): + def __init__(self, csv_path=None, csv_url=None, data_type=None, options=None, coordination_values=None, **kwargs): super().__init__(**kwargs) self._repr = make_repr(locals()) self._csv_path = csv_path @@ -640,19 +657,16 @@ def __init__(self, csv_path=None, csv_url=None, data_type=None, options=None, co if data_type is None: raise ValueError("Expected data_type to be provided") if csv_url is not None and csv_path is not None: - raise ValueError( - "Did not expect csv_url to be provided with csv_path") + raise ValueError("Did not expect csv_url to be provided with csv_path") if csv_url is None and csv_path is None: - raise ValueError( - "Expected csv_url or csv_path to be provided") + raise ValueError("Expected csv_url or csv_path to be provided") def convert_and_save(self, dataset_uid, obj_i, base_dir=None): # Only create out-directory if needed if not self.is_remote: super().convert_and_save(dataset_uid, obj_i, base_dir=base_dir) - file_def_creator = self.make_csv_file_def_creator( - dataset_uid, obj_i) + file_def_creator = self.make_csv_file_def_creator(dataset_uid, obj_i) routes = self.make_csv_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) @@ -663,9 +677,10 @@ def make_csv_routes(self, dataset_uid, obj_i): return [] else: # TODO: Move imports back to top when this is factored out. - from .routes import FileRoute from starlette.responses import FileResponse + from .routes import FileRoute + if self.base_dir is not None: local_csv_path = join(self.base_dir, self._csv_path) local_csv_route_path = file_path_to_url_path(self._csv_path) @@ -675,6 +690,7 @@ def make_csv_routes(self, dataset_uid, obj_i): async def response_func(req): return FileResponse(local_csv_path, filename=os.path.basename(self._csv_path)) + routes = [ FileRoute(local_csv_route_path, response_func, local_csv_path), ] @@ -691,6 +707,7 @@ def csv_file_def_creator(base_url): if self._coordination_values is not None: file_def["coordinationValues"] = self._coordination_values return file_def + return csv_file_def_creator def get_csv_url(self, base_url="", dataset_uid="", obj_i=""): @@ -698,12 +715,10 @@ def get_csv_url(self, base_url="", dataset_uid="", obj_i=""): return self._csv_url if self.base_dir is not None: return self._get_url_simple(base_url, file_path_to_url_path(self._csv_path, prepend_slash=False)) - return self._get_url(base_url, dataset_uid, - obj_i, self.local_csv_uid) + return self._get_url(base_url, dataset_uid, obj_i, self.local_csv_uid) class OmeZarrWrapper(AbstractWrapper): - """ Wrap an OME-NGFF Zarr store by creating an instance of the ``OmeZarrWrapper`` class. @@ -716,11 +731,9 @@ def __init__(self, img_path=None, img_url=None, name="", is_bitmask=False, **kwa super().__init__(**kwargs) self._repr = make_repr(locals()) if img_url is not None and img_path is not None: - raise ValueError( - "Did not expect img_path to be provided with img_url") + raise ValueError("Did not expect img_path to be provided with img_url") if img_url is None and img_path is None: - raise ValueError( - "Expected either img_url or img_path to be provided") + raise ValueError("Expected either img_url or img_path to be provided") self._img_path = img_path self._img_url = img_url self.name = name @@ -736,8 +749,7 @@ def convert_and_save(self, dataset_uid, obj_i, base_dir=None): if not self.is_remote: super().convert_and_save(dataset_uid, obj_i, base_dir=base_dir) - file_def_creator = self.make_image_file_def_creator( - dataset_uid, obj_i) + file_def_creator = self.make_image_file_def_creator(dataset_uid, obj_i) routes = self.make_image_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) @@ -756,10 +768,8 @@ def get_img_url(self, base_url="", dataset_uid="", obj_i=""): def make_image_file_def_creator(self, dataset_uid, obj_i): def image_file_def_creator(base_url): - return { - "fileType": "image.ome-zarr", - "url": self.get_img_url(base_url, dataset_uid, obj_i) - } + return {"fileType": "image.ome-zarr", "url": self.get_img_url(base_url, dataset_uid, obj_i)} + return image_file_def_creator # The following two functions will be used when OmeZarrWrapper @@ -784,7 +794,6 @@ def create_image_json(self, img_url): class ImageOmeZarrWrapper(AbstractWrapper): - """ Wrap an OME-NGFF Zarr store by creating an instance of the ``ImageOmeZarrWrapper`` class. Intended to be used with the spatialBeta and layerControllerBeta views. @@ -795,15 +804,15 @@ class ImageOmeZarrWrapper(AbstractWrapper): :param \\*\\*kwargs: Keyword arguments inherited from :class:`~vitessce.wrappers.AbstractWrapper` """ - def __init__(self, img_path=None, img_url=None, coordinate_transformations=None, coordination_values=None, **kwargs): + def __init__( + self, img_path=None, img_url=None, coordinate_transformations=None, coordination_values=None, **kwargs + ): super().__init__(**kwargs) self._repr = make_repr(locals()) if img_url is not None and img_path is not None: - raise ValueError( - "Did not expect img_path to be provided with img_url") + raise ValueError("Did not expect img_path to be provided with img_url") if img_url is None and img_path is None: - raise ValueError( - "Expected either img_url or img_path to be provided") + raise ValueError("Expected either img_url or img_path to be provided") self._img_path = img_path self._img_url = img_url self._coordinate_transformations = coordinate_transformations @@ -819,8 +828,7 @@ def convert_and_save(self, dataset_uid, obj_i, base_dir=None): if not self.is_remote: super().convert_and_save(dataset_uid, obj_i, base_dir=base_dir) - file_def_creator = self.make_image_file_def_creator( - dataset_uid, obj_i) + file_def_creator = self.make_image_file_def_creator(dataset_uid, obj_i) routes = self.make_image_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) @@ -843,10 +851,7 @@ def image_file_def_creator(base_url): if self._coordinate_transformations is not None: options["coordinateTransformations"] = self._coordinate_transformations - file_def = { - "fileType": "image.ome-zarr", - "url": self.get_img_url(base_url, dataset_uid, obj_i) - } + file_def = {"fileType": "image.ome-zarr", "url": self.get_img_url(base_url, dataset_uid, obj_i)} if len(options.keys()) > 0: file_def["options"] = options @@ -858,7 +863,6 @@ def image_file_def_creator(base_url): class ObsSegmentationsOmeZarrWrapper(AbstractWrapper): - """ Wrap an OME-NGFF Zarr store by creating an instance of the ``ObsSegmentationsOmeZarrWrapper`` class. Intended to be used with the spatialBeta and layerControllerBeta views. @@ -870,15 +874,21 @@ class ObsSegmentationsOmeZarrWrapper(AbstractWrapper): :param \\*\\*kwargs: Keyword arguments inherited from :class:`~vitessce.wrappers.AbstractWrapper` """ - def __init__(self, img_path=None, img_url=None, coordinate_transformations=None, coordination_values=None, obs_types_from_channel_names=None, **kwargs): + def __init__( + self, + img_path=None, + img_url=None, + coordinate_transformations=None, + coordination_values=None, + obs_types_from_channel_names=None, + **kwargs, + ): super().__init__(**kwargs) self._repr = make_repr(locals()) if img_url is not None and img_path is not None: - raise ValueError( - "Did not expect img_path to be provided with img_url") + raise ValueError("Did not expect img_path to be provided with img_url") if img_url is None and img_path is None: - raise ValueError( - "Expected either img_url or img_path to be provided") + raise ValueError("Expected either img_url or img_path to be provided") self._img_path = img_path self._img_url = img_url self._coordinate_transformations = coordinate_transformations @@ -895,8 +905,7 @@ def convert_and_save(self, dataset_uid, obj_i, base_dir=None): if not self.is_remote: super().convert_and_save(dataset_uid, obj_i, base_dir=base_dir) - file_def_creator = self.make_image_file_def_creator( - dataset_uid, obj_i) + file_def_creator = self.make_image_file_def_creator(dataset_uid, obj_i) routes = self.make_image_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) @@ -922,10 +931,7 @@ def image_file_def_creator(base_url): if self._obs_types_from_channel_names is not None: options["obsTypesFromChannelNames"] = self._obs_types_from_channel_names - file_def = { - "fileType": "obsSegmentations.ome-zarr", - "url": self.get_img_url(base_url, dataset_uid, obj_i) - } + file_def = {"fileType": "obsSegmentations.ome-zarr", "url": self.get_img_url(base_url, dataset_uid, obj_i)} if len(options.keys()) > 0: file_def["options"] = options @@ -937,7 +943,31 @@ def image_file_def_creator(base_url): class AnnDataWrapper(AbstractWrapper): - def __init__(self, adata_path=None, adata_url=None, adata_store=None, obs_feature_matrix_path=None, feature_filter_path=None, initial_feature_filter_path=None, obs_set_paths=None, obs_set_names=None, obs_locations_path=None, obs_segmentations_path=None, obs_embedding_paths=None, obs_embedding_names=None, obs_embedding_dims=None, obs_spots_path=None, obs_points_path=None, feature_labels_path=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_paths=None, obs_labels_names=None, **kwargs): + def __init__( + self, + adata_path=None, + adata_url=None, + adata_store=None, + obs_feature_matrix_path=None, + feature_filter_path=None, + initial_feature_filter_path=None, + obs_set_paths=None, + obs_set_names=None, + obs_locations_path=None, + obs_segmentations_path=None, + obs_embedding_paths=None, + obs_embedding_names=None, + obs_embedding_dims=None, + obs_spots_path=None, + obs_points_path=None, + feature_labels_path=None, + obs_labels_path=None, + convert_to_dense=True, + coordination_values=None, + obs_labels_paths=None, + obs_labels_names=None, + **kwargs, + ): """ Wrap an AnnData object by creating an instance of the ``AnnDataWrapper`` class. @@ -974,16 +1004,14 @@ def __init__(self, adata_path=None, adata_url=None, adata_store=None, obs_featur num_inputs = sum([1 for x in [adata_path, adata_url, adata_store] if x is not None]) if num_inputs > 1: - raise ValueError( - "Expected only one of adata_path, adata_url, or adata_store to be provided") + raise ValueError("Expected only one of adata_path, adata_url, or adata_store to be provided") if num_inputs == 0: - raise ValueError( - "Expected one of adata_path, adata_url, or adata_store to be provided") + raise ValueError("Expected one of adata_path, adata_url, or adata_store to be provided") if adata_path is not None: self.is_remote = False self.is_store = False - self.zarr_folder = 'anndata.zarr' + self.zarr_folder = "anndata.zarr" elif adata_url is not None: self.is_remote = True self.is_store = False @@ -1009,9 +1037,9 @@ def __init__(self, adata_path=None, adata_url=None, adata_store=None, obs_featur self._spatial_points_obsm = obs_points_path self._gene_alias = feature_labels_path # Support legacy provision of single obs labels path - if (obs_labels_path is not None): + if obs_labels_path is not None: self._obs_labels_paths = [obs_labels_path] - self._obs_labels_names = [obs_labels_path.split('/')[-1]] + self._obs_labels_names = [obs_labels_path.split("/")[-1]] else: self._obs_labels_paths = obs_labels_paths self._obs_labels_names = obs_labels_names @@ -1023,8 +1051,7 @@ def convert_and_save(self, dataset_uid, obj_i, base_dir=None): if not self.is_remote: super().convert_and_save(dataset_uid, obj_i, base_dir=base_dir) - file_def_creator = self.make_file_def_creator( - dataset_uid, obj_i) + file_def_creator = self.make_file_def_creator(dataset_uid, obj_i) routes = self.make_anndata_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) @@ -1049,65 +1076,42 @@ def make_file_def_creator(self, dataset_uid, obj_i): def get_anndata_zarr(base_url): options = {} if self._spatial_centroid_obsm is not None: - options["obsLocations"] = { - "path": self._spatial_centroid_obsm - } + options["obsLocations"] = {"path": self._spatial_centroid_obsm} if self._spatial_polygon_obsm is not None: - options["obsSegmentations"] = { - "path": self._spatial_polygon_obsm - } + options["obsSegmentations"] = {"path": self._spatial_polygon_obsm} if self._spatial_spots_obsm is not None: - options["obsSpots"] = { - "path": self._spatial_spots_obsm - } + options["obsSpots"] = {"path": self._spatial_spots_obsm} if self._spatial_points_obsm is not None: - options["obsPoints"] = { - "path": self._spatial_points_obsm - } + options["obsPoints"] = {"path": self._spatial_points_obsm} if self._mappings_obsm is not None: options["obsEmbedding"] = [] if self._mappings_obsm_names is not None: for key, mapping in zip(self._mappings_obsm_names, self._mappings_obsm): - options["obsEmbedding"].append({ - "path": mapping, - "dims": [0, 1], - "embeddingType": key - }) + options["obsEmbedding"].append({"path": mapping, "dims": [0, 1], "embeddingType": key}) else: for mapping in self._mappings_obsm: - mapping_key = mapping.split('/')[-1] + mapping_key = mapping.split("/")[-1] self._mappings_obsm_names = mapping_key - options["obsEmbedding"].append({ - "path": mapping, - "dims": [0, 1], - "embeddingType": mapping_key - }) + options["obsEmbedding"].append({"path": mapping, "dims": [0, 1], "embeddingType": mapping_key}) if self._mappings_obsm_dims is not None: for dim_i, dim in enumerate(self._mappings_obsm_dims): - options["obsEmbedding"][dim_i]['dims'] = dim + options["obsEmbedding"][dim_i]["dims"] = dim if self._cell_set_obs is not None: options["obsSets"] = [] if self._cell_set_obs_names is not None: names = self._cell_set_obs_names else: - names = [obs.split('/')[-1] for obs in self._cell_set_obs] + names = [obs.split("/")[-1] for obs in self._cell_set_obs] for obs, name in zip(self._cell_set_obs, names): - options["obsSets"].append({ - "name": name, - "path": obs - }) + options["obsSets"].append({"name": name, "path": obs}) if self._expression_matrix is not None: - options["obsFeatureMatrix"] = { - "path": self._expression_matrix - } + options["obsFeatureMatrix"] = {"path": self._expression_matrix} if self._gene_var_filter is not None: options["obsFeatureMatrix"]["featureFilterPath"] = self._gene_var_filter if self._matrix_gene_var_filter is not None: options["obsFeatureMatrix"]["initialFeatureFilterPath"] = self._matrix_gene_var_filter if self._gene_alias is not None: - options["featureLabels"] = { - "path": self._gene_alias - } + options["featureLabels"] = {"path": self._gene_alias} if self._obs_labels_paths is not None: if self._obs_labels_names is not None and len(self._obs_labels_paths) == len(self._obs_labels_names): # A name was provided for each path element, so use those values. @@ -1115,7 +1119,7 @@ def get_anndata_zarr(base_url): else: # Names were not provided for each path element, # so fall back to using the final part of each path for the names. - names = [labels_path.split('/')[-1] for labels_path in self._obs_labels_paths] + names = [labels_path.split("/")[-1] for labels_path in self._obs_labels_paths] obs_labels = [] for path, name in zip(self._obs_labels_paths, names): obs_labels.append({"path": path, "obsLabelsType": name}) @@ -1124,45 +1128,43 @@ def get_anndata_zarr(base_url): obj_file_def = { "fileType": ft.ANNDATA_ZARR.value, "url": self.get_zarr_url(base_url, dataset_uid, obj_i), - "options": options + "options": options, } if self._request_init is not None: - obj_file_def['requestInit'] = self._request_init + obj_file_def["requestInit"] = self._request_init if self._coordination_values is not None: - obj_file_def['coordinationValues'] = self._coordination_values + obj_file_def["coordinationValues"] = self._coordination_values return obj_file_def return None + return get_anndata_zarr def auto_view_config(self, vc): dataset = vc.add_dataset().add_object(self) - mapping_name = self._mappings_obsm_names[0] if ( - self._mappings_obsm_names is not None) else self._mappings_obsm[0].split('/')[-1] - scatterplot = vc.add_view( - cm.SCATTERPLOT, dataset=dataset, mapping=mapping_name) + mapping_name = ( + self._mappings_obsm_names[0] + if (self._mappings_obsm_names is not None) + else self._mappings_obsm[0].split("/")[-1] + ) + scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=mapping_name) cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset) genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset) heatmap = vc.add_view(cm.HEATMAP, dataset=dataset) if self._spatial_polygon_obsm is not None or self._spatial_centroid_obsm is not None: spatial = vc.add_view(cm.SPATIAL, dataset=dataset) - vc.layout((scatterplot | spatial) - / (heatmap | (cell_sets / genes))) + vc.layout((scatterplot | spatial) / (heatmap | (cell_sets / genes))) else: - vc.layout((scatterplot | (cell_sets / genes)) - / heatmap) + vc.layout((scatterplot | (cell_sets / genes)) / heatmap) class MultivecZarrWrapper(AbstractWrapper): - def __init__(self, zarr_path=None, zarr_url=None, **kwargs): super().__init__(**kwargs) self._repr = make_repr(locals()) if zarr_url is not None and zarr_path is not None: - raise ValueError( - "Did not expect zarr_path to be provided with zarr_url") + raise ValueError("Did not expect zarr_path to be provided with zarr_url") if zarr_url is None and zarr_path is None: - raise ValueError( - "Expected either zarr_url or zarr_path to be provided") + raise ValueError("Expected either zarr_url or zarr_path to be provided") self._zarr_path = zarr_path self._zarr_url = zarr_url if self._zarr_path is not None: @@ -1176,8 +1178,7 @@ def convert_and_save(self, dataset_uid, obj_i, base_dir=None): if not self.is_remote: super().convert_and_save(dataset_uid, obj_i, base_dir=base_dir) - file_def_creator = self.make_genomic_profiles_file_def_creator( - dataset_uid, obj_i) + file_def_creator = self.make_genomic_profiles_file_def_creator(dataset_uid, obj_i) routes = self.make_genomic_profiles_routes(dataset_uid, obj_i) self.file_def_creators.append(file_def_creator) @@ -1196,11 +1197,9 @@ def get_zarr_url(self, base_url="", dataset_uid="", obj_i=""): def make_genomic_profiles_file_def_creator(self, dataset_uid, obj_i): def genomic_profiles_file_def_creator(base_url): - obj_file_def = { - "fileType": "genomic-profiles.zarr", - "url": self.get_zarr_url(base_url, dataset_uid, obj_i) - } + obj_file_def = {"fileType": "genomic-profiles.zarr", "url": self.get_zarr_url(base_url, dataset_uid, obj_i)} if self._request_init is not None: - obj_file_def['requestInit'] = self._request_init + obj_file_def["requestInit"] = self._request_init return obj_file_def + return genomic_profiles_file_def_creator