From 5f2e9c1e3291645ad97ff2bab7f01445dd2384e9 Mon Sep 17 00:00:00 2001 From: LTLA Date: Sat, 28 Dec 2024 13:16:00 -0800 Subject: [PATCH] Simplify the factorization of the labels. --- setup.cfg | 2 +- src/singler/train_single.py | 4 +--- tests/test_train_single.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/setup.cfg b/setup.cfg index 2b8e9cc..fa0c58c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -55,7 +55,7 @@ install_requires = biocframe>=0.5.0 summarizedexperiment>=0.4.0 singlecellexperiment>=0.4.6 - biocutils>=0.1.7 + biocutils>=0.2.0 [options.packages.find] where = src diff --git a/src/singler/train_single.py b/src/singler/train_single.py index 639f592..35e19d9 100644 --- a/src/singler/train_single.py +++ b/src/singler/train_single.py @@ -205,9 +205,7 @@ def train_single( keep.append(i) ref_data = delayedarray.DelayedArray(ref_data)[:,keep] ref_labels = biocutils.subset_sequence(ref_labels, keep) - ref_labels = biocutils.Factor.from_sequence(ref_labels, sort_levels=True) # TODO: add a dtype= option. - unique_labels = ref_labels.levels - label_idx = ref_labels.codes.astype(dtype=numpy.uint32, copy=False) + unique_labels, label_idx = biocutils.factorize(ref_labels, sort_levels=True, dtype=numpy.uint32, fail_missing=True) markers = _identify_genes( ref_data=ref_data, diff --git a/tests/test_train_single.py b/tests/test_train_single.py index 31800e7..596a6c2 100644 --- a/tests/test_train_single.py +++ b/tests/test_train_single.py @@ -12,7 +12,7 @@ def test_train_single_basic(): assert built.num_labels() == 5 assert built.num_markers() < len(features) assert built.features == features - assert built.labels.as_list() == ["A", "B", "C", "D", "E"] + assert built.labels == ["A", "B", "C", "D", "E"] all_markers = built.marker_subset() assert len(all_markers) == built.num_markers()