Skip to content

Commit

Permalink
feat: Cell Ranger 8.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
10x Genomics committed Aug 21, 2024
1 parent 5b0acf8 commit a039816
Show file tree
Hide file tree
Showing 39 changed files with 4,406 additions and 2,584 deletions.
2,162 changes: 1,189 additions & 973 deletions conda_spec.bzl

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions lib/python/cellranger/cell_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from cellranger.chemistry import (
CHEMISTRY_DESCRIPTION_FIELD,
CHEMISTRY_SC3P_LT,
HT_CHEMISTRIES,
SC3P_V4_CHEMISTRIES,
SC5P_V3_CHEMISTRIES,
)
Expand Down Expand Up @@ -132,8 +133,12 @@ def get_empty_drops_range(chemistry_description: str, num_probe_bcs: int | None)
# The chips used with V4 have roughly double the GEMs as the older V3 chips
v4_chemistries = SC3P_V4_CHEMISTRIES + SC5P_V3_CHEMISTRIES
v4_chem_names = [chem[CHEMISTRY_DESCRIPTION_FIELD] for chem in v4_chemistries]
ht_chem_names = [chem[CHEMISTRY_DESCRIPTION_FIELD] for chem in HT_CHEMISTRIES]

if chemistry_description == CHEMISTRY_SC3P_LT[CHEMISTRY_DESCRIPTION_FIELD]:
N_PARTITIONS = 9000
elif chemistry_description in ht_chem_names:
N_PARTITIONS = 160000
elif chemistry_description in v4_chem_names:
N_PARTITIONS = 80000 * num_probe_bcs if num_probe_bcs and num_probe_bcs > 1 else 160000
else:
Expand Down
2 changes: 1 addition & 1 deletion lib/python/cellranger/cell_calling_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,7 +960,7 @@ def filter_cellular_barcodes_fixed_cutoff(bc_counts, cutoff: int):
top_n = min(cutoff, nonzero_bcs)
top_bc_idx = np.sort(np.argsort(bc_counts, kind=NP_SORT_KIND)[::-1][0:top_n])
metrics = BarcodeFilterResults.init_with_constant_call(top_n)
metrics.filtered_bcs_cutoff = np.sort(bc_counts)[::-1][top_n]
metrics.filtered_bcs_cutoff = np.sort(bc_counts)[::-1][top_n - 1]
return top_bc_idx, metrics, None


Expand Down
3 changes: 3 additions & 0 deletions lib/python/cellranger/chemistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
open(os.path.join(os.path.dirname(__file__), "chemistry_defs.json"))
)

# HT chemistries
HT_CHEMISTRIES = [CHEMISTRY_DEFS["SC3Pv3HT"], CHEMISTRY_DEFS["SC5PHT"]]

# LT v3 Chemistry
CHEMISTRY_SC3P_LT = CHEMISTRY_DEFS["SC3Pv3LT"]

Expand Down
74 changes: 6 additions & 68 deletions lib/python/cellranger/chemistry_defs.json
Original file line number Diff line number Diff line change
Expand Up @@ -234,37 +234,6 @@
}
]
},
"SC3Pv4HT": {
"barcode": [
{
"kind": "gel_bead",
"length": 16,
"offset": 0,
"read_type": "R1",
"whitelist": {
"name": "3M-3pgex-may-2023"
}
}
],
"description": "Single Cell 3' v4 HT",
"endedness": "three_prime",
"name": "SC3Pv4HT",
"rna": {
"length": null,
"offset": 0,
"read_type": "R2"
},
"rna2": null,
"strandedness": "+",
"umi": [
{
"length": 12,
"min_length": 10,
"offset": 16,
"read_type": "R1"
}
]
},
"SC3Pv3LT": {
"barcode": [
{
Expand Down Expand Up @@ -352,7 +321,7 @@
"umi": [
{
"length": 12,
"min_length": null,
"min_length": 10,
"offset": 16,
"read_type": "R1"
}
Expand Down Expand Up @@ -414,7 +383,7 @@
"umi": [
{
"length": 12,
"min_length": null,
"min_length": 10,
"offset": 16,
"read_type": "R1"
}
Expand Down Expand Up @@ -494,7 +463,7 @@
"umi": [
{
"length": 12,
"min_length": null,
"min_length": 10,
"offset": 16,
"read_type": "R1"
}
Expand Down Expand Up @@ -531,37 +500,6 @@
}
]
},
"SC5PHT-v3": {
"barcode": [
{
"kind": "gel_bead",
"length": 16,
"offset": 0,
"read_type": "R1",
"whitelist": {
"name": "3M-5pgex-jan-2023"
}
}
],
"description": "Single Cell 5' HT v3",
"endedness": "five_prime",
"name": "SC5PHT-v3",
"rna": {
"length": null,
"offset": 0,
"read_type": "R2"
},
"rna2": null,
"strandedness": "-",
"umi": [
{
"length": 12,
"min_length": null,
"offset": 16,
"read_type": "R1"
}
]
},
"SC5P-PE": {
"barcode": [
{
Expand Down Expand Up @@ -626,7 +564,7 @@
"umi": [
{
"length": 12,
"min_length": null,
"min_length": 10,
"offset": 16,
"read_type": "R1"
}
Expand Down Expand Up @@ -696,7 +634,7 @@
"umi": [
{
"length": 12,
"min_length": null,
"min_length": 10,
"offset": 16,
"read_type": "R1"
}
Expand Down Expand Up @@ -758,7 +696,7 @@
"umi": [
{
"length": 12,
"min_length": null,
"min_length": 10,
"offset": 16,
"read_type": "R1"
}
Expand Down
41 changes: 27 additions & 14 deletions lib/python/cellranger/feature/feature_assigner.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
UMI_NUM_TRIES = 10 # Number of initial points to try for GMM-fitting
UMI_MIX_INIT_SD = 0.25 # Initial standard deviation for GMM components
MIN_COUNTS_PER_ANTIBODY = 1000 # Filter out background antibodies
# TODO: revise after background correction is implemented

# Filtering feature assignments based on UMI thresholds and correlation with other tags
COUNTS_DYNAMIC_RANGE = 50.0
Expand Down Expand Up @@ -210,10 +209,10 @@ def _calculate_n_let_diversity_probs(n_let: int, n_tags: int) -> np.ndarray:
return probs


def call_presence_with_gmm_ab(umi_counts: np.ndarray, *, umi_threshold: int = 1) -> np.ndarray:
def call_presence_with_gmm_ab(umi_counts: np.ndarray, *, min_umi_threshold: int = 1) -> np.ndarray:
"""Given the UMI counts for a specific antibody, separate signal from background.
A cell must have at least `umi_threshold` UMIs for this feature to be considered positive.
A cell must have at least `min_umi_threshold` UMIs for this feature to be considered positive.
"""
if np.max(umi_counts) == 0 or max(umi_counts.shape) < 2:
# there are no UMIs, or only one UMI, each barcode has 0 count
Expand All @@ -227,7 +226,7 @@ def call_presence_with_gmm_ab(umi_counts: np.ndarray, *, umi_threshold: int = 1)
positive_component = np.argmax(gmm.means_)

# Classify each cell
return (umi_counts >= umi_threshold) & (gmm.predict(log_umi_counts) == positive_component)
return (umi_counts >= min_umi_threshold) & (gmm.predict(log_umi_counts) == positive_component)


# This cannot use a namedtuple because those are immutable.
Expand Down Expand Up @@ -675,7 +674,13 @@ def create_feature_assignments_matrix(self) -> FeatureAssignmentsMatrix:
class GuideAssigner(FeatureAssigner):
"""Sub-class of FeatureAssigner specific to CRISPR Library features."""

def __init__(self, matrix: cr_matrix.CountMatrix, feature_type: str):
def __init__(
self,
matrix: cr_matrix.CountMatrix,
*,
feature_type: str = rna_library.CRISPR_LIBRARY_TYPE,
min_crispr_umi_threshold: int,
):
super().__init__(
matrix,
feature_type,
Expand All @@ -687,6 +692,7 @@ def __init__(self, matrix: cr_matrix.CountMatrix, feature_type: str):

self.feature_mol_name = "guide"
self.feature_bc_name = "protospacer"
self.min_crispr_umi_threshold = min_crispr_umi_threshold

self.method = "GMM"
assert self.method in SUPPORTED_METHODS, f"Method {self.method} not supported"
Expand All @@ -711,22 +717,29 @@ def get_guide_assignments(self) -> dict[bytes, FeatureAssignments]:
log_transform=False, list_feature_ids=[feature_id]
)

in_high_umi_component = GuideAssigner._call_presence(umi_counts, self.method)
in_high_umi_component = GuideAssigner._call_presence(
umi_counts, self.method, min_crispr_umi_threshold=self.min_crispr_umi_threshold
)
assignments[feature_id] = FeatureAssignments(
np.flatnonzero(np.array(in_high_umi_component)), sum(umi_counts), False, None
)
return assignments

@staticmethod
def _call_presence(counts: np.ndarray, method: str = "GMM") -> np.ndarray:
def _call_presence(
counts: np.ndarray,
method: str = "GMM",
*,
min_crispr_umi_threshold: int,
) -> np.ndarray:
"""Classify each cell as positive/negative for a CRISPR feature using a GMM.
A cell must have at least 3 UMIs for this CRISPR feature to be considered positive.
This threshold is used to exclude CRISPR features with only background signal and
no foreground signal. Without this filter, a CRISPR feature with 0 or 1 UMIs in each cell
would call all the cells with one UMI as positive, which renders meaningless the metric
`Cells with one or more protospacers detected`. The threshold value was chosen by being
the smallest sufficient value on experimental data.
A cell must have at least `min_crispr_umi_threshold` UMIs for this CRISPR feature to be
considered positive. This threshold is used to exclude CRISPR features with only background
signal and no foreground signal. Without this filter, a CRISPR feature with 0 or 1 UMIs
in each cell would call all the cells with one UMI as positive, which renders meaningless
the metric `Cells with one or more protospacers detected`. The threshold value was chosen
by being the smallest sufficient value on experimental data.
Args:
counts: feature counts
Expand All @@ -736,7 +749,7 @@ def _call_presence(counts: np.ndarray, method: str = "GMM") -> np.ndarray:
Booleans indicating whether feature is present above background
"""
if method == "GMM":
return call_presence_with_gmm_ab(counts, umi_threshold=3)
return call_presence_with_gmm_ab(counts, min_umi_threshold=min_crispr_umi_threshold)
raise ValueError(f"Method {method} is not supported")

def create_guide_assignments_matrix(self) -> FeatureAssignmentsMatrix:
Expand Down
8 changes: 4 additions & 4 deletions lib/python/cellranger/feature_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import cellranger.hdf5 as cr_h5
from cellranger.feature.antigen.specificity import MHC_ALLELE, TARGETING_ANTIGEN
from cellranger.rna.library import ANTIGEN_LIBRARY_TYPE
from cellranger.targeted.targeted_constants import PROBE_ID_IGNORE_PREFIXES
from cellranger.targeted.targeted_constants import EXCLUDED_PROBE_ID_PREFIXES

FEATURE_TYPE = "feature_type"
# Required HDF5 datasets
Expand Down Expand Up @@ -228,18 +228,18 @@ def __ne__(self, other):

def get_feature_ids_excluding_deprecated_probes(self) -> list[bytes]:
"""Return the list of feature IDs excluding deprecated probes."""
return [f.id for f in self.feature_defs if not f.id.startswith(PROBE_ID_IGNORE_PREFIXES)]
return [f.id for f in self.feature_defs if not f.id.startswith(EXCLUDED_PROBE_ID_PREFIXES)]

def has_deprecated_probes(self) -> bool:
"""Return true if there are deprecated probes in features."""
return any(f.id.startswith(PROBE_ID_IGNORE_PREFIXES) for f in self.feature_defs)
return any(f.id.startswith(EXCLUDED_PROBE_ID_PREFIXES) for f in self.feature_defs)

def get_feature_types_excluding_deprecated_probes(self) -> list[str]:
"""Return the list of feature types excluding deprecated probes."""
return [
f.feature_type
for f in self.feature_defs
if not f.id.startswith(PROBE_ID_IGNORE_PREFIXES)
if not f.id.startswith(EXCLUDED_PROBE_ID_PREFIXES)
]

def get_antigen_control(self) -> tuple | None:
Expand Down
2 changes: 1 addition & 1 deletion lib/python/cellranger/preflight.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ class _VersionCmd(NamedTuple):
_PACKAGE_VERSION_CMDS = [
_VersionCmd(name="mro", cmd=["mro", "--version"]),
_VersionCmd(name="mrp", cmd=["mrp", "--version"]),
_VersionCmd(name="Anaconda", cmd=["python", "--version"]),
_VersionCmd(name="python", cmd=["python", "--version"]),
_VersionCmd(name="numpy", cmd=["python", "-c", "import numpy; print(numpy.__version__)"]),
_VersionCmd(name="scipy", cmd=["python", "-c", "import scipy; print(scipy.__version__)"]),
_VersionCmd(name="pysam", cmd=["python", "-c", "import pysam; print(pysam.__version__)"]),
Expand Down
11 changes: 5 additions & 6 deletions lib/python/cellranger/targeted/targeted_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,12 @@ def get_targeting_method_from_metadata(cls, metadata):
ALL_TARGETING_METHODS = list(TARGETING_METHOD_FILE_NAMES.keys())

# List of gene/probe ID prefixes that are excluded from the filtered_feature_bc_matrix.
PROBE_ID_IGNORE_PREFIXES = (
# Ensure that the corresponding Python and Rust constants are identical.
EXCLUDED_PROBE_ID_PREFIXES = (
b"DEPRECATED",
b"Hum-",
b"IGNORE",
b"INTERGENIC",
b"IR",
b"NC",
b"VAR",
b"VDJ",
b"NC-",
b"VAR_",
b"VDJ_",
)
2 changes: 1 addition & 1 deletion lib/python/tenkit/preflight.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ def fun():
_PACKAGE_VERSION_CMDS: list[_VersionCmd] = [
_VersionCmd(name="mro", cmd=_call(["mro", "--version"])),
_VersionCmd(name="mrp", cmd=_call(["mrp", "--version"])),
_VersionCmd(name="Anaconda", cmd=_call(["python", "--version"])),
_VersionCmd(name="python", cmd=_call(["python", "--version"])),
_VersionCmd(
name="numpy", cmd=_call(["python", "-c", "import numpy; print(numpy.__version__)"])
),
Expand Down
Loading

0 comments on commit a039816

Please sign in to comment.