Skip to content

Commit

Permalink
Fixes #63; when sample sheet updates, the collating and following cou…
Browse files Browse the repository at this point in the history
…nting steps should follow accordingly
  • Loading branch information
borauyar committed Mar 29, 2022
1 parent 8e8dc75 commit 2592bf6
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
13 changes: 12 additions & 1 deletion scripts/collate_read_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
args <- commandArgs(trailingOnly = TRUE)

input_dir <- args[1]
out_file <- args[2]
colDataFile <- args[2]
out_file <- args[3]

count_files <- dir(input_dir, pattern = ".read_counts.csv$", full.names = TRUE)

Expand All @@ -38,6 +39,16 @@ counts_all <- as.data.frame(Reduce(function(dtf1, dtf2)
rownames(counts_all) <- counts_all$V1
counts_all$V1 <- NULL

# subset to only keep the counts for the samples in the
# colDataFile,which is the same as the sample sheet)
colData <- read.table(colDataFile, header = T, row.names = 1)
if(sum(!rownames(colData) %in% colnames(counts_all) > 0)){
stop("ERROR collating counts for samples in the colData file.
The count data for the following samples are missing:",
setdiff(rownames(colData), colnames(counts_all)))
}
counts_all <- subset(counts_all, select = rownames(colData))

# save results to out file
write.table(counts_all, out_file, quote = FALSE,
sep = '\t')
Expand Down
5 changes: 3 additions & 2 deletions snakefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,8 @@ def hisat2_file_arguments(args):

rule collate_read_counts:
input:
expand(os.path.join(MAPPED_READS_DIR, MAPPER, "{sample}.read_counts.csv"), sample = SAMPLES)
colDataFile = rules.translate_sample_sheet_for_report.output,
count_files = expand(os.path.join(MAPPED_READS_DIR, MAPPER, "{sample}.read_counts.csv"), sample = SAMPLES)
output:
os.path.join(COUNTS_DIR, "raw_counts", MAPPER, "counts.tsv")
resources:
Expand All @@ -522,7 +523,7 @@ def hisat2_file_arguments(args):
mapped_dir = os.path.join(MAPPED_READS_DIR, MAPPER),
script = os.path.join(SCRIPTS_DIR, "collate_read_counts.R")
shell:
"{RSCRIPT_EXEC} {params.script} {params.mapped_dir} {output} >> {log} 2>&1"
"{RSCRIPT_EXEC} {params.script} {params.mapped_dir} {input.colDataFile} {output} >> {log} 2>&1"

# create a normalized counts table including all samples
# using the median-of-ratios normalization procedure of
Expand Down

0 comments on commit 2592bf6

Please sign in to comment.