diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 20905a5..9045846 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,14 +87,15 @@ jobs: - name: Unit tests run: | python -m pytest --cov=trisicell ./tests - - name: Upload coverage - if: success() - env: - CODECOV_NAME: ${{ matrix.python }}-${{ matrix.os }} - run: | - codecov --no-color --required --flags unittests + # - name: Upload coverage + # if: success() + # env: + # CODECOV_NAME: ${{ matrix.python }}-${{ matrix.os }} + # run: | + # codecov --no-color --required --flags unittests codeql: + needs: lint name: code quality runs-on: ubuntu-latest permissions: diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..de9d585 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,12 @@ +cff-version: 1.2.0 +title: "Profiles of expressed mutations in single cells reveal subclonal expansion patterns and therapeutic impact of intratumor heterogeneity" +message: "To cite Trisicell in publications, please cite it as below:" +type: "article" +authors: + - family-names: "Rashidi Mehrabadi" + given-names: "Farid" + orcid: "https://orcid.org/0000-0003-4103-4904" +doi: "10.1101/2021.03.26.437185" +url: "https://doi.org/10.1101/2021.03.26.437185" +journal: "bioRxiv" +year: "2021" diff --git a/CONTRIBUTING.rst b/CODE_OF_CONDUCT.rst similarity index 100% rename from CONTRIBUTING.rst rename to CODE_OF_CONDUCT.rst diff --git a/README.rst b/README.rst index b43fdd8..8d9631c 100644 --- a/README.rst +++ b/README.rst @@ -57,7 +57,7 @@ Trisicell was developed in collaboration between the `Cancer Data Science Labora :alt: Stars .. |Contributions Welcome| image:: https://img.shields.io/static/v1.svg?label=contributions&message=welcome&color=blue&logo=handshake&logoColor=FFFFFF&style=flat-square - :target: https://github.com/faridrashidi/trisicell/blob/master/CONTRIBUTING.rst + :target: https://github.com/faridrashidi/trisicell/blob/master/CODE_OF_CONDUCT.rst :alt: Contributions Welcome .. |Compatible| image:: https://img.shields.io/pypi/pyversions/trisicell.svg?logo=python&logoColor=FFFFFF&style=flat-square&color=blue diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 7c85bff..78444e8 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -44,4 +44,4 @@ If you run into issues, do not hesitate to approach us or raise a .. _PyPI: https://pypi.org/project/trisicell .. _Github: https://github.com/faridrashidi/trisicell .. _`Github issue`: https://github.com/faridrashidi/trisicell/issues/new/choose -.. _contributing guide: https://github.com/faridrashidi/trisicell/blob/master/CONTRIBUTING.rst +.. _contributing guide: https://github.com/faridrashidi/trisicell/blob/master/CODE_OF_CONDUCT.rst diff --git a/requirements.txt b/requirements.txt index f52b9af..08f4bae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,6 +18,7 @@ pybnb pydot python_sat pyyaml +scanpy scikit_learn scipy seaborn diff --git a/tests/test_datasets.py b/tests/test_datasets.py index b24093c..8334539 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -26,57 +26,20 @@ def test_load_datasets(self): adata = tsc.datasets.test() assert adata.shape == (20, 20) - adata = tsc.datasets.sublines_bwes() - assert adata.shape == (24, 6653) - mdata = tsc.datasets.sublines_bwts() - assert mdata.shape == (33, 55937) - mdata = tsc.datasets.sublines_scrnaseq() - assert mdata.shape == (175, 55851) - mdata = tsc.datasets.treated_actla4() - assert mdata.shape == (508, 58710) - mdata = tsc.datasets.treated_igg_ss2() - assert mdata.shape == (163, 56854) - mdata = tsc.datasets.treated_igg_sw() - assert mdata.shape == (163, 56854) + # adata = tsc.datasets.sublines_bwes() + # assert adata.shape == (24, 6653) + # mdata = tsc.datasets.sublines_bwts() + # assert mdata.shape == (33, 55937) + # mdata = tsc.datasets.sublines_scrnaseq() + # assert mdata.shape == (175, 55851) + # mdata = tsc.datasets.treated_actla4() + # assert mdata.shape == (508, 58710) + # mdata = tsc.datasets.treated_igg_ss2() + # assert mdata.shape == (163, 56854) + # mdata = tsc.datasets.treated_igg_sw() + # assert mdata.shape == (163, 56854) - adata = tsc.datasets.acute_lymphocytic_leukemia1() - assert adata.shape == (111, 20) - adata = tsc.datasets.acute_lymphocytic_leukemia2() - assert adata.shape == (102, 16) - adata = tsc.datasets.acute_lymphocytic_leukemia3() - assert adata.shape == (150, 49) - adata = tsc.datasets.acute_lymphocytic_leukemia4() - assert adata.shape == (143, 78) - adata = tsc.datasets.acute_lymphocytic_leukemia5() - assert adata.shape == (96, 105) - adata = tsc.datasets.acute_lymphocytic_leukemia6() - assert adata.shape == (146, 10) - adata = tsc.datasets.colorectal1() - assert adata.shape == (178, 16) adata = tsc.datasets.colorectal2() assert adata.shape == (78, 25) - # adata = tsc.datasets.colorectal3() - adata = tsc.datasets.erbc() - assert adata.shape == (47, 40) adata = tsc.datasets.high_grade_serous_ovarian_cancer_3celllines() assert adata.shape == (891, 14068) - adata = tsc.datasets.melanoma20() - assert adata.shape == (20, 2367) - adata = tsc.datasets.muscle_invasive_bladder() - assert adata.shape == (44, 443) - adata = tsc.datasets.myeloproliferative_neoplasms18() - assert adata.shape == (58, 18) - adata = tsc.datasets.myeloproliferative_neoplasms78() - assert adata.shape == (58, 78) - adata = tsc.datasets.myeloproliferative_neoplasms712() - assert adata.shape == (58, 712) - adata = tsc.datasets.oligodendroglioma_idh_mutated_tumor() - assert adata.shape == (579, 77) - adata = tsc.datasets.renal_cell_carcinoma() - assert adata.shape == (17, 35) - adata = tsc.datasets.tnbc() - assert adata.shape == (16, 20) - # adata = tsc.datasets.high_grade_serous_ovarian_cancer1() - # adata = tsc.datasets.high_grade_serous_ovarian_cancer2() - # adata = tsc.datasets.high_grade_serous_ovarian_cancer3() - # adata = tsc.datasets.acute_lymphocytic_leukemia_many() diff --git a/trisicell/datasets/__init__.py b/trisicell/datasets/__init__.py index 5b4210b..d5dd597 100644 --- a/trisicell/datasets/__init__.py +++ b/trisicell/datasets/__init__.py @@ -1,33 +1,13 @@ """Datasets Module.""" from trisicell.datasets._datasets import ( - acute_lymphocytic_leukemia1, - acute_lymphocytic_leukemia2, - acute_lymphocytic_leukemia3, - acute_lymphocytic_leukemia4, - acute_lymphocytic_leukemia5, - acute_lymphocytic_leukemia6, - colorectal1, colorectal2, - colorectal3, - erbc, example, - high_grade_serous_ovarian_cancer1, - high_grade_serous_ovarian_cancer2, - high_grade_serous_ovarian_cancer3, high_grade_serous_ovarian_cancer_3celllines, - melanoma20, - muscle_invasive_bladder, - myeloproliferative_neoplasms18, - myeloproliferative_neoplasms78, - myeloproliferative_neoplasms712, - oligodendroglioma_idh_mutated_tumor, - renal_cell_carcinoma, sublines_bwes, sublines_bwts, sublines_scrnaseq, test, - tnbc, treated_actla4, treated_igg_ss2, treated_igg_sw, @@ -35,30 +15,9 @@ from trisicell.datasets._simulate import add_doublets, add_noise, simulate __all__ = ( - acute_lymphocytic_leukemia1, - acute_lymphocytic_leukemia2, - acute_lymphocytic_leukemia3, - acute_lymphocytic_leukemia4, - acute_lymphocytic_leukemia5, - acute_lymphocytic_leukemia6, - colorectal1, colorectal2, - colorectal3, - erbc, example, - high_grade_serous_ovarian_cancer1, - high_grade_serous_ovarian_cancer2, - high_grade_serous_ovarian_cancer3, high_grade_serous_ovarian_cancer_3celllines, - melanoma20, - muscle_invasive_bladder, - myeloproliferative_neoplasms18, - myeloproliferative_neoplasms78, - myeloproliferative_neoplasms712, - oligodendroglioma_idh_mutated_tumor, - renal_cell_carcinoma, - test, - tnbc, simulate, add_noise, add_doublets, @@ -68,4 +27,5 @@ treated_actla4, treated_igg_ss2, treated_igg_sw, + test, ) diff --git a/trisicell/datasets/_datasets.py b/trisicell/datasets/_datasets.py index 4130588..eea976d 100644 --- a/trisicell/datasets/_datasets.py +++ b/trisicell/datasets/_datasets.py @@ -1,7 +1,10 @@ import mudata as md +import scanpy as sc import trisicell as tsc +url = "https://github.com/faridrashidi/trisicell/releases/download/d0.0.1" + def treated_igg_sw(): """Trisicell treated mice (igg, seq-well) scRNAseq data. @@ -35,9 +38,11 @@ def treated_igg_sw(): :func:`trisicell.datasets.treated_igg_ss2`. """ - mdata = md.read_h5mu( - tsc.ul.get_file("trisicell.datasets/data/treated_igg_sw.h5md.gz") + name = "treated_igg_sw.h5md.gz" + sc.readwrite._check_datafile_present_and_download( + f"data/{name}", backup_url=f"{url}/{name}" ) + mdata = md.read_h5mu(f"data/{name}") return mdata @@ -73,9 +78,11 @@ def treated_igg_ss2(): :func:`trisicell.datasets.treated_igg_sw`. """ - mdata = md.read_h5mu( - tsc.ul.get_file("trisicell.datasets/data/treated_igg_ss2.h5md.gz") + name = "treated_igg_ss2.h5md.gz" + sc.readwrite._check_datafile_present_and_download( + f"data/{name}", backup_url=f"{url}/{name}" ) + mdata = md.read_h5mu(f"data/{name}") return mdata @@ -111,9 +118,11 @@ def treated_actla4(): :func:`trisicell.datasets.treated_igg_sw`. """ - mdata = md.read_h5mu( - tsc.ul.get_file("trisicell.datasets/data/treated_actla4.h5md.gz") + name = "treated_actla4.h5md.gz" + sc.readwrite._check_datafile_present_and_download( + f"data/{name}", backup_url=f"{url}/{name}" ) + mdata = md.read_h5mu(f"data/{name}") return mdata @@ -148,9 +157,11 @@ def sublines_scrnaseq(): :func:`trisicell.datasets.sublines_bwts`. """ - mdata = md.read_h5mu( - tsc.ul.get_file("trisicell.datasets/data/sublines_scrnaseq.h5md.gz") + name = "sublines_scrnaseq.h5md.gz" + sc.readwrite._check_datafile_present_and_download( + f"data/{name}", backup_url=f"{url}/{name}" ) + mdata = md.read_h5mu(f"data/{name}") return mdata @@ -186,9 +197,12 @@ def sublines_bwes(): :func:`trisicell.datasets.sublines_scrnaseq`. :func:`trisicell.datasets.sublines_bwts`. """ - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/data/sublines_bwes.h5ad.gz") + + name = "sublines_bwes.h5ad.gz" + sc.readwrite._check_datafile_present_and_download( + f"data/{name}", backup_url=f"{url}/{name}" ) + adata = sc.read_h5ad(f"data/{name}") return adata @@ -223,9 +237,11 @@ def sublines_bwts(): :func:`trisicell.datasets.sublines_scrnaseq`. """ - mdata = md.read_h5mu( - tsc.ul.get_file("trisicell.datasets/data/sublines_bwts.h5md.gz") + name = "sublines_bwts.h5md.gz" + sc.readwrite._check_datafile_present_and_download( + f"data/{name}", backup_url=f"{url}/{name}" ) + mdata = md.read_h5mu(f"data/{name}") return mdata @@ -254,70 +270,14 @@ def test(): return df -def melanoma20(): - """Mouse Melanoma dataset with 20 sublines. - - This dataset was introduced in :cite:`Wolf_2019` and was used in: - - * :cite:`PhISCS-BnB` Figure 1. - - The size is n_cells × n_muts = 20 × 2367 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - - `.layers['solution_fig1']` is the solution presented in Figure 1 of PhISCS-BnB - paper. - """ - - adata = tsc.io.read(tsc.ul.get_file("trisicell.datasets/real/melanoma20.h5ad")) - return adata - - -def colorectal1(): - """Human Colorectal Cancer (Patient 1). - - This dataset was introduced in :cite:`Leung_2017` and was used in: - - * :cite:`B-SCITE` Figure 8a. - * :cite:`SiFit` Figure 6. - * :cite:`SPhyR` Table 1. - * :cite:`SiCloneFit` Figure 3. - - The size is n_cells × n_muts = 178 × 16 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - - Notes - ----- - This dataset includes single cells from two sites of the patient body; - 133 single cells from colon as primary tumor site and 45 single cells from liver - as the tumor metastatic site (178 in total). The number of mutations in this - dataset is 16. One can remove the cells in this dataset that carry none of these 16 - mutations before feeding it to our network. After removing cells with zero profile, - the number of cells are 40 and 32 from primary and metastatic sites, respectively - (72 in total). - """ - - adata = tsc.io.read(tsc.ul.get_file("trisicell.datasets/real/colorectal1.h5ad")) - return adata - - def colorectal2(readcount=False): """Human Colorectal Cancer (Patient 2). This dataset was introduced in :cite:`Leung_2017` and was used in: - * :cite:`PhISCS` Figure 7. * :cite:`B-SCITE` Figure 8b. * :cite:`SiCloneFit` Figure 4. * :cite:`SCARLET` Figure 4. - The size is n_cells × n_muts = 78 × 25 Parameters @@ -329,7 +289,6 @@ def colorectal2(readcount=False): ------- :class:`anndata.AnnData` An anndata in which `.X` is the input noisy. - - `.layers['solution_fig7a']` is the solution presented in Figure 7a of PhISCS paper. - `.layers['solution_fig7b']` is the solution presented in Figure 7b of @@ -345,441 +304,9 @@ def colorectal2(readcount=False): ) else: adata = tsc.io.read(tsc.ul.get_file("trisicell.datasets/real/colorectal2.h5ad")) - # FIXME: (86 x 25 in B-SCITE) (182 x 36 SiCloneFit) - # https://github.com/cbg-ethz/infSCITE/blob/master/pat_2.csv - # https://github.com/hzi-bifo/scelestial-paper-materials-devel/tree/master/testing/ - return adata - - -def colorectal3(): - """Human Colorectal Cancer. - - This dataset was introduced in :cite:`Wu_2016` and was used in: - - * :cite:`SiFit` Figure 5. - - The size is n_cells × n_muts = 48 × 77 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - # adata = tsc.io.read(tsc.ul.get_file("trisicell.datasets/real/colorectal3.h5ad")) - # TODO: extract - return None - - -def acute_lymphocytic_leukemia1(): - """Human Acute Lymphocytic Leukemia dataset (Patient 1). - - This dataset was introduced in :cite:`Gawad_2014` and was used in: - - * :cite:`B-SCITE` Figure 5. - * :cite:`infSCITE` Figure S16. - - The size is n_cells × n_muts = 111 × 20 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/acute_lymphocytic_leukemia1.h5ad") - ) - return adata - - -def acute_lymphocytic_leukemia2(): - """Human Acute Lymphocytic Leukemia dataset (Patient 2). - - This dataset was introduced in :cite:`Gawad_2014` and was used in: - - * :cite:`PhISCS` Figure 9. - * :cite:`B-SCITE` in Figure 6. - * :cite:`infSCITE` Figure S17. - * :cite:`Phyolin` Table 2. - - The size is n_cells × n_muts = 102 × 16 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - - - `.layers['solution_fig9']` is the solution presented in Figure 9 of PhISCS - paper. - - `.uns['params_fig9']` is parameters used as input to get 'solution_fig9'. - - `.var` includes information of the bulk samples. - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/acute_lymphocytic_leukemia2.h5ad") - ) - # FIXME: 115 x 16 in B-SCITE? - return adata - - -def acute_lymphocytic_leukemia3(): - """Human Acute Lymphocytic Leukemia dataset (Patient 3). - - This dataset was introduced in :cite:`Gawad_2014` and was used in: - - * :cite:`infSCITE` Figure S18. - * :cite:`SCIPhI` Figure 5. - * :cite:`ScisTree` Figure S3. - - The size is n_cells × n_muts = 150 × 49 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/acute_lymphocytic_leukemia3.h5ad") - ) - # FIXME: 255 x 49 in SCIPhI and ScisTree? - return adata - - -def acute_lymphocytic_leukemia4(): - """Human Acute Lymphocytic Leukemia dataset (Patient 4). - - This dataset was introduced in :cite:`Gawad_2014` and was used in: - - * :cite:`infSCITE` Figure S19. - * :cite:`gpps` Figure 3. - * :cite:`SASC` Figure 6. - - The size is n_cells × n_muts = 143 × 78 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/acute_lymphocytic_leukemia4.h5ad") - ) - return adata - - -def acute_lymphocytic_leukemia5(): - """Human Acute Lymphocytic Leukemia dataset (Patient 5). - - This dataset was introduced in :cite:`Gawad_2014` and was used in: - - * :cite:`infSCITE` Figure S20. - * :cite:`SASC` Figure 7. - - The size is n_cells × n_muts = 96 × 105 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/acute_lymphocytic_leukemia5.h5ad") - ) - return adata - - -def acute_lymphocytic_leukemia6(): - """Human Acute Lymphocytic Leukemia dataset (Patient 6). - - This dataset was introduced in :cite:`Gawad_2014` and was used in: - - * :cite:`infSCITE` Figure S21. - * :cite:`Phyolin` Table 2. - - The size is n_cells × n_muts = 146 × 10 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/acute_lymphocytic_leukemia6.h5ad") - ) - return adata - - -def tnbc(): - """Triple-negative Breast Cancer. - - This dataset was introduced in :cite:`Wang_2014` and was used in: - - * :cite:`SCIPhI` Figure 4. - * :cite:`B-SCITE` Figure 7. - * :cite:`TRaIT` Figure 6. - - The size is n_cells × n_muts = 16 × 20 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - - Examples - -------- - >>> adata = tsc.datasets.tnbc() - >>> df_in = adata.to_df() - """ - - adata = tsc.io.read(tsc.ul.get_file("trisicell.datasets/real/tnbc.h5ad")) - return adata - - -def erbc(): - """Oestrogen-receptor-positive (ER+) Breast Cancer. - - This dataset was introduced in :cite:`Wang_2014` and was used in: - - * :cite:`SCITE` Figure S8 and S9. - * :cite:`infSCITE` Figure S15. - * :cite:`gpps` Figure 1. - - The size is n_cells × n_muts = 47 × 40 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - - - `.uns['params_scite']` is parameters inferred by SCITE. - - Examples - -------- - >>> adata = tsc.datasets.erbc() - >>> df_in = adata.to_df() - """ - - adata = tsc.io.read(tsc.ul.get_file("trisicell.datasets/real/erbc.h5ad")) - return adata - - -def muscle_invasive_bladder(): - """Muscle Invasive Bladder Cancer. - - This dataset was introduced in :cite:`Li_2012` and was used in: - - * :cite:`OncoNEM` Figure 6B. - - The size is n_cells × n_muts = 44 × 443 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - - - `.uns['params_onconem']` is parameters inferred by OncoNEM. - - Examples - -------- - >>> adata = tsc.datasets.muscle_invasive_bladder() - >>> df_in = adata.to_df() - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/muscle_invasive_bladder.h5ad") - ) - return adata - - -def renal_cell_carcinoma(): - """Clear-cell Renal-cell Carcinoma. - - This dataset was introduced in :cite:`Xu_2012` and was used in: - - * :cite:`SCITE` Figure S6 and S7. - * :cite:`infSCITE` Figure S14. - - The size is n_cells × n_muts = 17 × 35 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - - - `.uns['params_scite']` is parameters inferred by SCITE. - - Examples - -------- - >>> adata = tsc.datasets.renal_cell_carcinoma() - >>> df_in = adata.to_df() - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/renal_cell_carcinoma.h5ad") - ) return adata -def myeloproliferative_neoplasms712(): - """JAK2-Negative Myeloproliferative Neoplasm. - - This dataset was introduced in :cite:`Hou_2012` and was used in: - - * :cite:`OncoNEM` Figure 6D. - - The size is n_cells × n_muts = 58 × 712 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - - - `.uns['params_onconem']` is parameters inferred by OncoNEM. - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/myeloproliferative_neoplasms712.h5ad") - ) - return adata - - -def myeloproliferative_neoplasms78(): - """JAK2-Negative Myeloproliferative Neoplasm. - - This dataset was introduced in :cite:`Hou_2012` and was used in: - - * :cite:`SCITE` Figure S5. - - The size is n_cells × n_muts = 58 × 78 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - - Notes - ----- - The original dataset contains 712 mutations but 78 ones were considered as - non-synonymous mutations from the full data. - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/myeloproliferative_neoplasms78.h5ad") - ) - return adata - - -def myeloproliferative_neoplasms18(): - """JAK2-Negative Myeloproliferative Neoplasm. - - This dataset was introduced in :cite:`Hou_2012` and was used in: - - * :cite:`SCITE` Figure S2, S3 and S4. - * :cite:`Kim_2014` Figure 1. - * :cite:`infSCITE` Figure S13. - * :cite:`gpps` Figure 2. - - The size is n_cells × n_muts = 58 × 18 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - - - `.uns['params_scite']` is parameters inferred by SCITE. - - Notes - ----- - The original dataset contains 712 mutations but 18 ones were considered as - cancer related mutations from the full data. - """ - - adata = tsc.io.read( - tsc.ul.get_file("trisicell.datasets/real/myeloproliferative_neoplasms18.h5ad") - ) - return adata - - -def high_grade_serous_ovarian_cancer1(): - """High Grade Serous Ovarian Cancer (Patient 2). - - This dataset was introduced in :cite:`McPherson_2016` and was used in: - - * :cite:`infSCITE` Figure S22. - - The size is n_cells × n_muts = 588 × 37 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - # adata = tsc.io.read( - # tsc.ul.get_file( - # "trisicell.datasets/real/high_grade_serous_ovarian_cancer1.h5ad" - # ) - # ) - # TODO: extract - return None - - -def high_grade_serous_ovarian_cancer2(): - """High Grade Serous Ovarian Cancer (Patient 3). - - This dataset was introduced in :cite:`McPherson_2016` and was used in: - - * :cite:`infSCITE` Figure S23. - - The size is n_cells × n_muts = 672 × 60 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - # adata = tsc.io.read( - # tsc.ul.get_file( - # "trisicell.datasets/real/high_grade_serous_ovarian_cancer2.h5ad" - # ) - # ) - # TODO: extract - return None - - -def high_grade_serous_ovarian_cancer3(): - """High Grade Serous Ovarian Cancer (Patient 9). - - This dataset was introduced in :cite:`McPherson_2016` and was used in: - - * :cite:`infSCITE` Figure S24. - * :cite:`scVILP` Figure 5. - * :cite:`SCIPhI` Figure S10. - - The size is n_cells × n_muts = 420 × 37 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - # adata = tsc.io.read( - # tsc.ul.get_file( - # "trisicell.datasets/real/high_grade_serous_ovarian_cancer3.h5ad" - # ) - # ) - # TODO: extract - # FIXME: 370 x 43 in scVILP and SCIPhI? - return None - - def high_grade_serous_ovarian_cancer_3celllines(): """High Grade Serous Ovarian Cancer (3 cell lines). @@ -826,62 +353,3 @@ def high_grade_serous_ovarian_cancer_3celllines(): adata = tsc.io.read(tsc.ul.get_file("trisicell.datasets/real/ovarian.h5ad.gz")) return adata - - -def oligodendroglioma_idh_mutated_tumor(): - """Oligodendroglioma IDH-mutated tumor. - - This dataset was introduced in :cite:`Tirosh_2016` and was used in: - - * :cite:`SASC` Figure 5. - - The size is n_cells × n_muts = 579 × 77 - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - adata = tsc.io.read( - tsc.ul.get_file( - "trisicell.datasets/real/oligodendroglioma_idh_mutated_tumor.h5ad" - ) - ) - return adata - - -def acute_lymphocytic_leukemia_many(): - """Human Acute Lymphocytic Leukemia datasets. - - This dataset was introduced in :cite:`Morita_2020` and was used in: - - * :cite:`Phyolin` Figure 3 and Table 1. - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - # TODO: extract all - return None - - -def isogenic_fibroblast_cell_line(): - """Isogenic Fibroblast cell line dataset. - - This dataset was introduced in :cite:`Leung_2015` and was used in: - - * :cite:`SCIPhI` Figure S9. - - The size is n_cells × n_muts = 19 × ? - - Returns - ------- - :class:`anndata.AnnData` - An anndata in which `.X` is the input noisy. - """ - - # TODO: extract - return None diff --git a/trisicell/datasets/data/sublines_bwes.h5ad.gz b/trisicell/datasets/data/sublines_bwes.h5ad.gz deleted file mode 100644 index 0a5d134..0000000 Binary files a/trisicell/datasets/data/sublines_bwes.h5ad.gz and /dev/null differ diff --git a/trisicell/datasets/data/sublines_bwts.h5md.gz b/trisicell/datasets/data/sublines_bwts.h5md.gz deleted file mode 100644 index 91aaa3b..0000000 Binary files a/trisicell/datasets/data/sublines_bwts.h5md.gz and /dev/null differ diff --git a/trisicell/datasets/data/sublines_scrnaseq.h5md.gz b/trisicell/datasets/data/sublines_scrnaseq.h5md.gz deleted file mode 100644 index 1fc156a..0000000 Binary files a/trisicell/datasets/data/sublines_scrnaseq.h5md.gz and /dev/null differ diff --git a/trisicell/datasets/data/treated_actla4.h5md.gz b/trisicell/datasets/data/treated_actla4.h5md.gz deleted file mode 100644 index 7093f0d..0000000 Binary files a/trisicell/datasets/data/treated_actla4.h5md.gz and /dev/null differ diff --git a/trisicell/datasets/data/treated_igg_ss2.h5md.gz b/trisicell/datasets/data/treated_igg_ss2.h5md.gz deleted file mode 100644 index 8e1266c..0000000 Binary files a/trisicell/datasets/data/treated_igg_ss2.h5md.gz and /dev/null differ diff --git a/trisicell/datasets/data/treated_igg_sw.h5md.gz b/trisicell/datasets/data/treated_igg_sw.h5md.gz deleted file mode 100644 index 2c72352..0000000 Binary files a/trisicell/datasets/data/treated_igg_sw.h5md.gz and /dev/null differ diff --git a/trisicell/datasets/real/acute_lymphocytic_leukemia1.h5ad b/trisicell/datasets/real/acute_lymphocytic_leukemia1.h5ad deleted file mode 100644 index e6e4016..0000000 Binary files a/trisicell/datasets/real/acute_lymphocytic_leukemia1.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/acute_lymphocytic_leukemia2.h5ad b/trisicell/datasets/real/acute_lymphocytic_leukemia2.h5ad deleted file mode 100644 index 0165612..0000000 Binary files a/trisicell/datasets/real/acute_lymphocytic_leukemia2.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/acute_lymphocytic_leukemia3.h5ad b/trisicell/datasets/real/acute_lymphocytic_leukemia3.h5ad deleted file mode 100644 index 94f4606..0000000 Binary files a/trisicell/datasets/real/acute_lymphocytic_leukemia3.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/acute_lymphocytic_leukemia4.h5ad b/trisicell/datasets/real/acute_lymphocytic_leukemia4.h5ad deleted file mode 100644 index 0eb3fcd..0000000 Binary files a/trisicell/datasets/real/acute_lymphocytic_leukemia4.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/acute_lymphocytic_leukemia5.h5ad b/trisicell/datasets/real/acute_lymphocytic_leukemia5.h5ad deleted file mode 100644 index 0346bee..0000000 Binary files a/trisicell/datasets/real/acute_lymphocytic_leukemia5.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/acute_lymphocytic_leukemia6.h5ad b/trisicell/datasets/real/acute_lymphocytic_leukemia6.h5ad deleted file mode 100644 index c707b9f..0000000 Binary files a/trisicell/datasets/real/acute_lymphocytic_leukemia6.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/colorectal1.h5ad b/trisicell/datasets/real/colorectal1.h5ad deleted file mode 100644 index 7476692..0000000 Binary files a/trisicell/datasets/real/colorectal1.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/erbc.h5ad b/trisicell/datasets/real/erbc.h5ad deleted file mode 100644 index 7dfede4..0000000 Binary files a/trisicell/datasets/real/erbc.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/melanoma20.h5ad b/trisicell/datasets/real/melanoma20.h5ad deleted file mode 100644 index cec426e..0000000 Binary files a/trisicell/datasets/real/melanoma20.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/muscle_invasive_bladder.h5ad b/trisicell/datasets/real/muscle_invasive_bladder.h5ad deleted file mode 100644 index d1b59aa..0000000 Binary files a/trisicell/datasets/real/muscle_invasive_bladder.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/myeloproliferative_neoplasms18.h5ad b/trisicell/datasets/real/myeloproliferative_neoplasms18.h5ad deleted file mode 100644 index 1ab0257..0000000 Binary files a/trisicell/datasets/real/myeloproliferative_neoplasms18.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/myeloproliferative_neoplasms712.h5ad b/trisicell/datasets/real/myeloproliferative_neoplasms712.h5ad deleted file mode 100644 index eaaddfe..0000000 Binary files a/trisicell/datasets/real/myeloproliferative_neoplasms712.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/myeloproliferative_neoplasms78.h5ad b/trisicell/datasets/real/myeloproliferative_neoplasms78.h5ad deleted file mode 100644 index 8bd92b9..0000000 Binary files a/trisicell/datasets/real/myeloproliferative_neoplasms78.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/oligodendroglioma_idh_mutated_tumor.h5ad b/trisicell/datasets/real/oligodendroglioma_idh_mutated_tumor.h5ad deleted file mode 100644 index 76e025e..0000000 Binary files a/trisicell/datasets/real/oligodendroglioma_idh_mutated_tumor.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/renal_cell_carcinoma.h5ad b/trisicell/datasets/real/renal_cell_carcinoma.h5ad deleted file mode 100644 index bd78b60..0000000 Binary files a/trisicell/datasets/real/renal_cell_carcinoma.h5ad and /dev/null differ diff --git a/trisicell/datasets/real/tnbc.h5ad b/trisicell/datasets/real/tnbc.h5ad deleted file mode 100644 index be1ae4f..0000000 Binary files a/trisicell/datasets/real/tnbc.h5ad and /dev/null differ