Skip to content

Commit

Permalink
Merge pull request #26 from Irrationone/dev
Browse files Browse the repository at this point in the history
v0.99.1 -- improvements to analysis with nonexistant and rare cell types
  • Loading branch information
Irrationone authored Mar 14, 2019
2 parents 80fbdf8 + ca4a93b commit 3ce5923
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 106 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: cellassign
Version: 0.99.0
Version: 0.99.1
Title: What the Package Does (One Line, Title Case)
Description: What the package does (one paragraph).
Authors@R: c(
Expand Down
58 changes: 28 additions & 30 deletions R/cellassign.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#' @param shrinkage Logical - should the delta parameters
#' have hierarchical shrinkage?
#' @param n_batches Number of data subsample batches to use in inference
#' @param dirichlet_concentration Dirichlet concentration parameter for cell type abundances
#' @param rel_tol_adam The change in Q function value (in pct) below which
#' each optimization round is considered converged
#' @param rel_tol_em The change in log marginal likelihood value (in pct)
Expand Down Expand Up @@ -97,21 +98,22 @@
#' @return
#' An object of class \code{cellassign_fit}. See \code{details}
cellassign <- function(exprs_obj,
marker_gene_info,
s = NULL,
min_delta = log(2),
X = NULL,
B = 10,
shrinkage = FALSE,
n_batches = 1,
rel_tol_adam = 1e-4,
rel_tol_em = 1e-4,
max_iter_adam = 1e5,
max_iter_em = 20,
learning_rate = 0.1,
verbose = TRUE,
sce_assay = "counts",
num_runs = 1) {
marker_gene_info,
s = NULL,
min_delta = 2,
X = NULL,
B = 10,
shrinkage = FALSE,
n_batches = 1,
dirichlet_concentration = 1e-2,
rel_tol_adam = 1e-4,
rel_tol_em = 1e-4,
max_iter_adam = 1e5,
max_iter_em = 20,
learning_rate = 0.1,
verbose = TRUE,
sce_assay = "counts",
num_runs = 1) {

# Work out rho
rho <- NULL
Expand All @@ -123,20 +125,17 @@ cellassign <- function(exprs_obj,
stop("marker_gene_info must either be a matrix or list. See ?cellassign")
}

# Subset genes appropriately
# exprs_obj <- subset_exprs_obj(exprs_obj, rho)

# Get expression input
Y <- extract_expression_matrix(exprs_obj, sce_assay = sce_assay)


# Check X is correct
if(!is.null(X)) {
if(!(is.matrix(X) && is.numeric(X))) {
stop("X must either be NULL or a numeric matrix")
}
}


stopifnot(is.matrix(Y))
stopifnot(is.matrix(rho))
Expand All @@ -150,11 +149,6 @@ cellassign <- function(exprs_obj,
colnames(rho) <- paste0("cell_type_", seq_len(ncol(rho)))

}

# Remove non-expressed genes
val_result <- validate_genes(Y, rho)
Y <- val_result$Y
rho <- val_result$rho

N <- nrow(Y)

Expand All @@ -174,12 +168,16 @@ cellassign <- function(exprs_obj,
message("No size factors supplied - computing from matrix. It is highly recommended to supply size factors calculated using the full gene set")
s <- scran::computeSumFactors(t(Y))
}

# Make sure all size factors are positive
if (any(s <= 0)) {
stop("Cells with size factors <= 0 must be removed prior to analysis.")
}

# Make Dirichlet concentration parameter symmetric if not otherwise specified
if (length(dirichlet_concentration) == 1) {
dirichlet_concentration <- rep(dirichlet_concentration, C)
}

res <- NULL

Expand All @@ -194,16 +192,16 @@ cellassign <- function(exprs_obj,
N = N,
P = P,
B = B,
use_priors = shrinkage,
shrinkage = shrinkage,
verbose = verbose,
n_batches = n_batches,
rel_tol_adam = rel_tol_adam,
rel_tol_em = rel_tol_em,
max_iter_adam = max_iter_adam,
max_iter_em = max_iter_em,
learning_rate = learning_rate,
em_convergence_thres = rel_tol_em,
min_delta = min_delta)
min_delta = min_delta,
dirichlet_concentration = dirichlet_concentration)

return(structure(res, class = "cellassign_fit"))
})
Expand Down
55 changes: 25 additions & 30 deletions R/inference-tensorflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ inference_tensorflow <- function(Y,
N,
P,
B = 10,
use_priors,
shrinkage,
verbose = FALSE,
n_batches = 1,
rel_tol_adam = 1e-4,
Expand All @@ -39,8 +39,8 @@ inference_tensorflow <- function(Y,
max_iter_em = 20,
learning_rate = 1e-4,
random_seed = NULL,
em_convergence_thres = 1e-5,
min_delta = log(2)) {
min_delta = 2,
dirichlet_concentration = rep(1e-2, C)) {
tf$reset_default_graph()

tfd <- tf$contrib$distributions
Expand All @@ -66,7 +66,7 @@ inference_tensorflow <- function(Y,
# Variables

## Shrinkage prior on delta
if (use_priors) {
if (shrinkage) {
delta_log_mean <- tf$Variable(0, dtype = tf$float64)
delta_log_variance <- tf$Variable(1, dtype = tf$float64) # May need to bound this or put a prior over this
}
Expand All @@ -77,9 +77,7 @@ inference_tensorflow <- function(Y,

beta <- tf$Variable(tf$random_normal(shape(G,P), mean = 0, stddev = 1, seed = random_seed, dtype = tf$float64), dtype = tf$float64)

# total_concentration <- tf$Variable(tf$random_uniform(shape(1), minval = 0.5, maxval = 10, seed = random_seed, dtype = tf$float64), dtype = tf$float64,
# constraint = function(x) tf$clip_by_value(x, tf$constant(1e-2, dtype = tf$float64), tf$constant(Inf, dtype = tf$float64)))

theta_logit <- tf$Variable(tf$random_normal(shape(C), mean = 0, stddev = 1, seed = random_seed, dtype = tf$float64), dtype = tf$float64)

## Spline variables
a <- tf$exp(tf$Variable(tf$zeros(shape = B, dtype = tf$float64)))
Expand All @@ -90,6 +88,7 @@ inference_tensorflow <- function(Y,

# Transformed variables
delta = tf$exp(delta_log)
theta_log = tf$nn$log_softmax(theta_logit)

# Model likelihood
base_mean <- tf$transpose(tf$einsum('np,gp->gn', X_, beta) + tf$log(s_)) #+ tf$add(tf$log(s_), tf$log(control_pct_), name = "s_to_control"))
Expand All @@ -100,10 +99,6 @@ inference_tensorflow <- function(Y,

mu_cng = tf$transpose(mu_ngc, shape(2,0,1))

# if (random_effects) {
# mu_cng <- mu_cng + psi_times_W
# }

mu_cngb <- tf$tile(tf$expand_dims(mu_cng, axis = 3L), c(1L, 1L, 1L, B))

phi_cng <- tf$reduce_sum(a * tf$exp(-b * tf$square(mu_cngb - basis_means)), 3L) + LOWER_BOUND
Expand All @@ -123,31 +118,32 @@ inference_tensorflow <- function(Y,
Y__ = tf$stack(Y_tensor_list, axis = 2)

y_log_prob_raw <- nb_pdf$log_prob(Y__)
y_log_prob <- tf$transpose(y_log_prob_raw, shape(2,0,1))

y_log_prob <- tf$transpose(y_log_prob_raw, shape(0,2,1))
y_log_prob_sum <- tf$reduce_sum(y_log_prob, 2L) + theta_log
p_y_on_c_unorm <- tf$transpose(y_log_prob_sum, shape(1,0))

gamma_fixed = tf$placeholder(dtype = tf$float64, shape = shape(NULL,C))
p_y_on_c_unorm <- tf$reduce_sum(y_log_prob, 2L)

Q1 = -tf$einsum('nc,cng->', gamma_fixed, y_log_prob)

Q = -tf$einsum('nc,cn->', gamma_fixed, p_y_on_c_unorm)

p_y_on_c_norm <- tf$reshape(tf$reduce_logsumexp(p_y_on_c_unorm, 0L), shape(1,-1))

gamma <- tf$transpose(tf$exp(p_y_on_c_unorm - p_y_on_c_norm))

## Priors
if (use_priors) {
if (shrinkage) {
delta_log_prior <- tfd$Normal(loc = delta_log_mean * rho_,
scale = delta_log_variance)
delta_log_prob <- -tf$reduce_sum(delta_log_prior$log_prob(delta_log))

}

theta_log_prior <- tfd$Dirichlet(concentration = tf$constant(dirichlet_concentration, dtype = tf$float64))
theta_log_prob <- -theta_log_prior$log_prob(tf$exp(theta_log))

## End priors

Q = Q1
if (use_priors) {
Q <- Q + theta_log_prob
if (shrinkage) {
Q <- Q + delta_log_prob
}

Expand All @@ -156,12 +152,10 @@ inference_tensorflow <- function(Y,
train = optimizer$minimize(Q)

# Marginal log likelihood for monitoring convergence
eta_y = tf$reduce_sum(y_log_prob, 2L)

L_y1 = tf$reduce_sum(tf$reduce_logsumexp(eta_y, 0L))
L_y = tf$reduce_sum(tf$reduce_logsumexp(p_y_on_c_unorm, 0L))

L_y <- L_y1
if (use_priors) {
L_y <- L_y - theta_log_prob
if (shrinkage) {
L_y <- L_y - delta_log_prob
}

Expand Down Expand Up @@ -201,7 +195,7 @@ inference_tensorflow <- function(Y,

if(mi %% 20 == 0) {
if (verbose) {
message(paste(mi, sess$run(Q1, feed_dict = gfd)))
message(paste(mi, sess$run(Q, feed_dict = gfd)))
}
Q_new <- sess$run(Q, feed_dict = gfd)
Q_diff = -(Q_new - Q_old) / abs(Q_old)
Expand All @@ -218,17 +212,17 @@ inference_tensorflow <- function(Y,
ll_old <- ll
log_liks <- c(log_liks, ll)

if (ll_diff < em_convergence_thres) {
if (ll_diff < rel_tol_em) {
break
}
}

# Finished EM - peel off final values
variable_list <- list(delta, beta, phi, gamma, mu_ngc, a)
variable_names <- c("delta", "beta", "phi", "gamma", "mu", "a")
variable_list <- list(delta, beta, phi, gamma, mu_ngc, a, tf$exp(theta_log))
variable_names <- c("delta", "beta", "phi", "gamma", "mu", "a", "theta")


if (use_priors) {
if (shrinkage) {
variable_list <- c(variable_list, list(delta_log_mean, delta_log_variance))
variable_names <- c(variable_names, "ld_mean", "ld_var")
}
Expand All @@ -246,6 +240,7 @@ inference_tensorflow <- function(Y,
rownames(mle_params$delta) <- rownames(rho)
colnames(mle_params$delta) <- colnames(rho)
rownames(mle_params$beta) <- rownames(rho)
names(mle_params$theta) <- colnames(rho)


cell_type <- get_mle_cell_type(mle_params$gamma)
Expand Down
18 changes: 0 additions & 18 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,24 +121,6 @@ initialize_X <- function(X, N, verbose = FALSE) {
return(X)
}

#' Extract expression matrix from expression object
#'
#' @return The expression matrix and marker gene matrix, with only expressed genes, for input to \code{cellassign}
#'
#' @keywords internal
validate_genes <- function(Y, rho) {
expressed_genes <- colSums(Y) > 0
if (sum(expressed_genes) < ncol(Y)) {
message(paste("Removing genes", paste(colnames(Y)[!expressed_genes], collapse = ", "),
"with <= 0 total counts."))

Y <- Y[,expressed_genes]
rho <- rho[expressed_genes,]
}

return(list(Y=Y, rho=rho))
}

#' Makes sure the exprs obj is of correct size
# subset_exprs_obj <- function(exprs_obj, rho) {
# G_e <- NULL
Expand Down
11 changes: 7 additions & 4 deletions man/cellassign.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/inference_tensorflow.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 0 additions & 15 deletions man/validate_genes.Rd

This file was deleted.

10 changes: 5 additions & 5 deletions vignettes/introduction-to-cellassign.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,11 @@ We then call `cellassign` using the `cellassign()` function, passing in the abov
s <- sizeFactors(example_sce)
fit <- cellassign(exprs_obj = example_sce,
marker_gene_info = example_rho,
s = s,
learning_rate = 1e-2,
shrinkage = TRUE,
verbose = FALSE)
marker_gene_info = example_rho,
s = s,
learning_rate = 1e-2,
shrinkage = TRUE,
verbose = FALSE)
```

This returns a `cellassign_fit` object:
Expand Down

0 comments on commit 3ce5923

Please sign in to comment.