Skip to content

Commit

Permalink
Merge pull request #1031 from jiajic/suite_dev
Browse files Browse the repository at this point in the history
feat: labelTransfer & quantile norm
  • Loading branch information
jiajic authored Sep 23, 2024
2 parents e5334e4 + 71e0d31 commit 211fefa
Show file tree
Hide file tree
Showing 6 changed files with 469 additions and 68 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,7 @@ export(writeHMRFresults)
exportMethods("$")
exportMethods("$<-")
exportMethods(interpolateFeature)
exportMethods(labelTransfer)
import(GiottoClass)
import(GiottoUtils)
import(GiottoVisuals)
Expand Down
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,19 @@
* remove deprecated `createSpatialEnrich()`. Use `runSpatialEnrich()` instead
* remove deprecated `heatmSpatialCorGenes()`. Use `heatmSpatialCorFeats()` instead
* remove deprecated `runPAGEEnrich_OLD()`. Use `runPAGEEnrich()` instead
* remove `do_pca`, `expression_values`, `feats_to_use` args from `runGiottoHarmony()`. Running PCA during the `harmony::RunHarmony()` call is deprecated.

## Enhancements
* `normalizeGiotto()` now has `'quantile'` normalization
* add `'quantile'` normalization method to `normalizeGiotto()`

## Changes
* `limma`, `plotly`, and `Rtsne` moved to Suggests
* move `progressr` and `jsonlite` dependencies to GiottoUtils v0.1.12
* remove `reshape2` dependency.

## New
* `labelTransfer()` for transferring labels between giotto objects or subsets thereof. Supercedes `doClusterProjection()`

# Giotto 4.1.1

## Bug fixes
Expand Down
328 changes: 327 additions & 1 deletion R/clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -3262,6 +3262,327 @@ getDendrogramSplits <- function(

# projection ####

#' @name labelTransfer
#' @title Transfer labels/annotations between sets of data via similarity
#' voting
#' @description
#' When two sets of data share an embedding space, transfer the labels from
#' one of the sets to the other based on KNN similarity voting in that space.
#' @param x target object
#' @param y source object
#' @param source_cell_ids cell/spatial IDs with the source labels to transfer
#' @param target_cell_ids cell/spatial IDs to transfer the labels to.
#' IDs from `source_cell_ids` are always included as well.
#' @param labels metadata column in source with labels to transfer
#' @param k number of k-neighbors to train a KNN classifier
#' @param name metadata column in target to apply the full set of labels to
#' @param prob output probabilities together with label predictions
#' @param reduction reduction on cells or features (default = 'cells')
#' @param reduction_method shared reduction method (default = 'pca' space)
#' @param reduction_name name of shared reduction space (default name = 'pca')
#' @param dimensions_to_use dimensions to use in shared reduction space
#' (default = 1:10)
#' @returns object `x` with new transferred labels added to metadata
#' @inheritDotParams FNN::knn -train -test -cl -k -prob
#' @details
#' This function trains a KNN classifier with [FNN::knn()].
#' The training data is from object `y` or `source_cell_ids` subset in `x` and
#' uses existing annotations within the cell metadata.
#' Cells without annotation/labels from `x` or `target_cell_ids` subset in `x`
#' will receive predicted labels (and optional probabilities when
#' `prob = TRUE`).
#'
#' **IMPORTANT** This projection assumes that you're using the same dimension
#' reduction space (e.g. PCA) and number of dimensions (e.g. first 10 PCs) to
#' train the KNN classifier as you used to create the initial
#' annotations/labels in the source Giotto object.
#'
#' This function can allow you to work with very big data as you can predict
#' cell labels on a smaller & subsetted Giotto object and then project the cell
#' labels to the remaining cells in the target Giotto object. It can also be
#' used to transfer labels from one set of annotated data to another dataset
#' based on expression similarity after joining and integrating.
#'
#' @examples
#' g <- GiottoData::loadGiottoMini("visium")
#' id_subset <- sample(spatIDs(g), 300)
#' n_pred <- nrow(pDataDT(g)) - 300
#'
#' # transfer labels from one object to another ###################
#' g_small <- g[, id_subset]
#' # additional steps to get labels to transfer on smaller object...
#' g <- labelTransfer(g, g_small, labels = "leiden_clus")
#' sum(!g$trnsfr_leiden_clus == g$leiden_clus) / n_pred * 100 # percent wrong
#'
#' # transfer labels between subsets of a single object ###########
#' g <- labelTransfer(g,
#' label = "leiden_clus", source_cell_ids = id_subset, name = "knn_leiden2"
#' )
#' sum(!g$knn_leiden2 == g$leiden_clus) / n_pred * 100 # percent wrong
#' @md
NULL

setGeneric("labelTransfer", function(x, y, ...) standardGeneric("labelTransfer"))

#' @rdname labelTransfer
#' @export
setMethod("labelTransfer", signature(x = "giotto", y = "giotto"), function(
x, y,
spat_unit = NULL,
feat_type = NULL,
labels,
k = 10,
name = paste0("trnsfr_", labels),
prob = TRUE,
reduction = "cells",
reduction_method = "pca",
reduction_name = "pca",
dimensions_to_use = 1:10,
return_gobject = TRUE,
...
) {
# NSE vars
temp_name <- cell_ID <- temp_name_prob <- NULL

package_check(pkg_name = "FNN", repository = "CRAN")
spat_unit <- set_default_spat_unit(x, spat_unit = spat_unit)
feat_type <- set_default_feat_type(x,
spat_unit = spat_unit, feat_type = feat_type
)

# get data
cx_src <- getCellMetadata(y,
spat_unit = spat_unit,
feat_type = feat_type,
output = "data.table"
)
cx_tgt <- getCellMetadata(x,
spat_unit = spat_unit,
feat_type = feat_type,
output = "data.table"
)
dim_coord <- getDimReduction(x,
spat_unit = spat_unit,
feat_type = feat_type,
reduction = reduction,
reduction_method = reduction_method,
name = reduction_name,
output = "matrix"
)

# source annotation vector #
# names : cell_ID
# values: label
source_annot_vec <- cx_src[[labels]]
names(source_annot_vec) <- cx_src[["cell_ID"]]

# create the matrix from the target object that you want to use for the kNN classifier
# the matrix should be the same for the source and target objects (e.g. same PCA space)
dimensions_to_use <- dimensions_to_use[
# ensure dims to use exist
dimensions_to_use %in% seq_len(ncol(dim_coord))
]
matrix_to_use <- dim_coord[, dimensions_to_use]

## create the training and testset from the matrix

# the training set is those spatial IDs that are in the source
# (w/ labels) AND target giotto object
in_common <- rownames(matrix_to_use) %in% names(source_annot_vec)
train <- matrix_to_use[in_common,]
train <- train[match(names(source_annot_vec), rownames(train)), ]

# the test set are the remaining cell_IDs that need a label
test <- matrix_to_use[!in_common,]

# make prediction
knnprediction <- FNN::knn(
train = train,
test = test,
cl = source_annot_vec,
k = k,
prob = prob,
...
)

# get prediction results
knnprediction_vec <- as.vector(knnprediction)
names(knnprediction_vec) <- rownames(test)

# add probability information
if (isTRUE(prob)) {
probs <- attr(knnprediction, "prob")
names(probs) <- rownames(test)
}

# create annotation vector for all cell IDs (from source and predicted)
all_vec <- c(source_annot_vec, knnprediction_vec)
cx_tgt[, temp_name := all_vec[cell_ID]]

if (isTRUE(prob)) {
cx_tgt[, temp_name_prob := probs[cell_ID]]
cx_tgt <- cx_tgt[, .(cell_ID, temp_name, temp_name_prob)]
cx_tgt[, temp_name_prob := ifelse(
is.na(temp_name_prob), 1, temp_name_prob
)]

data.table::setnames(cx_tgt,
old = c("temp_name", "temp_name_prob"),
new = c(name, paste0(name, "_prob"))
)
} else {
cx_tgt <- cx_tgt[, .(cell_ID, temp_name)]
data.table::setnames(cx_tgt, old = "temp_name", new = name)
}


if (return_gobject) {
x <- addCellMetadata(x,
spat_unit = spat_unit,
feat_type = feat_type,
new_metadata = cx_tgt,
by_column = TRUE,
column_cell_ID = "cell_ID"
)
return(x)
} else {
return(cx_tgt)
}
})

#' @rdname labelTransfer
#' @export
setMethod("labelTransfer", signature(x = "giotto", y = "missing"), function(
x,
spat_unit = NULL,
feat_type = NULL,
source_cell_ids,
target_cell_ids,
labels,
k = 10,
name = paste0("trnsfr_", labels),
prob = TRUE,
reduction = "cells",
reduction_method = "pca",
reduction_name = "pca",
dimensions_to_use = 1:10,
return_gobject = TRUE,
...
) {
# NSE vars
temp_name <- cell_ID <- temp_name_prob <- NULL

package_check(pkg_name = "FNN", repository = "CRAN")
spat_unit <- set_default_spat_unit(x, spat_unit = spat_unit)
feat_type <- set_default_feat_type(x,
spat_unit = spat_unit, feat_type = feat_type
)

# get data
cx <- getCellMetadata(x,
spat_unit = spat_unit,
feat_type = feat_type,
output = "data.table"
)
dim_coord <- getDimReduction(x,
spat_unit = spat_unit,
feat_type = feat_type,
reduction = reduction,
reduction_method = reduction_method,
name = reduction_name,
output = "matrix"
)

# source annotation vector #
# names : cell_ID
# values: label
source_annot_vec <- cx[[labels]]
names(source_annot_vec) <- cx[["cell_ID"]]
source_annot_vec <- source_annot_vec[source_cell_ids]

# target cell IDs (if not provided) are everything not in the source cell IDs
if (missing(target_cell_ids)) {
sids <- cx[["cell_ID"]]
target_cell_ids <- sids[!sids %in% source_cell_ids]
}

# create the matrix from the target object that you want to use for the kNN classifier
# the matrix should be the same for the source and target objects (e.g. same PCA space)
dimensions_to_use <- dimensions_to_use[
# ensure dims to use exist
dimensions_to_use %in% seq_len(ncol(dim_coord))
]
matrix_to_use <- dim_coord[, dimensions_to_use]

## create the training and testset from the matrix

# the training set is those spatial IDs that are in the source
# (w/ labels) AND target giotto object
train <- matrix_to_use[source_cell_ids,]
train <- train[match(names(source_annot_vec), rownames(train)), ]

# the test set are the remaining cell_IDs that need a label
test <- matrix_to_use[target_cell_ids,]

# make prediction
knnprediction <- FNN::knn(
train = train,
test = test,
cl = source_annot_vec,
k = k,
prob = prob,
...
)

# get prediction results
knnprediction_vec <- as.vector(knnprediction)
names(knnprediction_vec) <- rownames(test)

# add probability information
if (isTRUE(prob)) {
probs <- attr(knnprediction, "prob")
names(probs) <- rownames(test)
}

# create annotation vector for all cell IDs (from source and predicted)
all_vec <- c(source_annot_vec, knnprediction_vec)
cx[, temp_name := all_vec[cell_ID]]

if (isTRUE(prob)) {
cx[, temp_name_prob := probs[cell_ID]]
cx <- cx[, .(cell_ID, temp_name, temp_name_prob)]
cx[, temp_name_prob := ifelse(
is.na(temp_name_prob), 1, temp_name_prob
)]

data.table::setnames(cx,
old = c("temp_name", "temp_name_prob"),
new = c(name, paste0(name, "_prob"))
)
} else {
cx <- cx[, .(cell_ID, temp_name)]
data.table::setnames(cx, old = "temp_name", new = name)
}


if (return_gobject) {
x <- addCellMetadata(x,
spat_unit = spat_unit,
feat_type = feat_type,
new_metadata = cx,
by_column = TRUE,
column_cell_ID = "cell_ID"
)
return(x)
} else {
return(cx)
}
})





#' @title Projection of cluster labels
#' @name doClusterProjection
#' @description Use a fast KNN classifier to predict labels from a smaller
Expand Down Expand Up @@ -3325,10 +3646,15 @@ doClusterProjection <- function(
"cover_tree", "brute"
),
return_gobject = TRUE) {
deprecate_warn(
when = "4.1.2",
what = "doClusterProjection()",
with = "labelTransfer()"
)

# NSE vars
cell_ID <- temp_name_prob <- NULL

# package check for dendextend
package_check(pkg_name = "FNN", repository = "CRAN")

spat_unit <- set_default_spat_unit(
Expand Down
Loading

0 comments on commit 211fefa

Please sign in to comment.