Skip to content

Commit

Permalink
add files
Browse files Browse the repository at this point in the history
  • Loading branch information
stevekm committed Jun 23, 2017
0 parents commit 2409036
Show file tree
Hide file tree
Showing 9 changed files with 4,822 additions and 0 deletions.
14 changes: 14 additions & 0 deletions compile-report.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env Rscript

## USAGE: compile_RMD_report.R report.Rmd
## Requires pandoc version 1.13+
# module load pandoc/1.13.1

# ~~~~~ GET SCRIPT ARGS ~~~~~~~ #
args <- commandArgs(TRUE)
input_dir <- args[1]


# ~~~~~ COMPILE ~~~~~ #
Rmdfile <- "peaks-report.Rmd"
rmarkdown::render(input = Rmdfile, params = list(input_dir = input_dir))
108 changes: 108 additions & 0 deletions peaks-report.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
---
title: "Peaks per Sample Report"
author: "`r system('whoami', intern = TRUE)`"
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
html_document:
keep_md: true
df_print: paged
params:
input_dir: "/path/to/peaks_dir"
---

```{r setup, include=FALSE}
# ~~~~~ CHUNK OPTIONS ~~~~~ #
knitr::opts_chunk$set(echo = FALSE, warning = FALSE)
# ~~~~~ LIBRARIES ~~~~~ #
library("ggplot2")
library("reshape2")
library("DT")
library("scales")
# ~~~~~ FUNCTIONS ~~~~~ #
find_all_beds <- function (input_dirs, name_pattern = FALSE) {
# find all .bed files in the supplied dirs
bed_files <- dir(input_dirs, pattern = '.bed', full.names = TRUE, recursive = TRUE)
if(name_pattern != FALSE) bed_files <- bed_files[which(basename(bed_files) %in% as.character(name_pattern))]
return(bed_files)
}
get_numlines <- function(input_file) {
# count the number of lines in a file
return(length(readLines(input_file)))
}
mycat <- function(text){
# function for formatting text in the report
cat(gsub(pattern = "\n", replacement = " \n", x = text))
}
split_sampleID_col <- function(df, new_colnames = NA, sample_colname = "sample", split_char = "-"){
# split the 'sample' column in the df into separate columns, return the new df with all columns
split_df <- as.data.frame(do.call(rbind, strsplit(x = df[[sample_colname]], split = split_char)))
if (! is.na(new_colnames)) colnames(split_df) <- new_colnames
df <- cbind(split_df, df)
return(df)
}
make_bed_df <- function(input_dir){
# create a dataframe from the bed files found in the dir
bed_files <- find_all_beds(input_dirs = input_dir, name_pattern = "peaks.bed")
# sample ID is saved in the dirname of each bed file
samples <- basename(dirname(bed_files))
bed_df <- data.frame(sample = basename(dirname(bed_files)),
peaks = sapply(X = bed_files, FUN = get_numlines),
# file = bed_files,
stringsAsFactors = FALSE)
# split the sample ID's into separate columns
bed_df <- split_sampleID_col(df = bed_df, new_colnames = c("patient", "status", "mark")) # ABC-R-H3K9AC
# reindex the rownames
rownames(bed_df) <- seq(nrow(bed_df))
return(bed_df)
}
make_peaks_barplot <- function(df){
# make the ggplot bar plot
# re order the df
# bed_df[order(bed_df[["peaks"]], decreasing = TRUE), ]
peaks_barplot <- ggplot(data = df, aes(x = sample, y = peaks)) + geom_bar(stat = 'identity') + coord_flip() + ggtitle("Peaks per Sample") + scale_y_continuous(labels = comma)
return(peaks_barplot)
}
# ~~~~~ GLOBALS ~~~~~ #
input_dir <- normalizePath(params$input_dir)
bed_df <- make_bed_df(input_dir = input_dir)
peaks_barplot <- make_peaks_barplot(df = bed_df)
```

# Input dir

```{r}
mycat(input_dir)
```

# Peaks per Sample

## Plot

```{r, fig.height = 18}
peaks_barplot
```

## Table

```{r}
datatable(data = bed_df, rownames = FALSE, options = list(pageLength = -1))
```

100 changes: 100 additions & 0 deletions test/Sample1-D-H3K27AC/peaks.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
chr1 713404 714771 PEAK_000000006879 5.43218
chr1 762076 763696 PEAK_000000009942 4.73610
chr1 805301 805587 PEAK_000000018007 2.99584
chr1 839714 842477 PEAK_000000015416 3.27085
chr1 856369 856864 PEAK_000000034513 2.52153
chr1 858393 861419 PEAK_000000013148 3.77847
chr1 875351 878268 PEAK_000000015147 3.31411
chr1 893645 896889 PEAK_000000003419 6.19332
chr1 901689 902860 PEAK_000000010313 4.63175
chr1 934368 937436 PEAK_000000017213 3.05593
chr1 948124 949646 PEAK_000000009603 4.82685
chr1 954523 956380 PEAK_000000000880 7.14794
chr1 968147 969541 PEAK_000000014654 3.41174
chr1 974418 976164 PEAK_000000037159 2.48553
chr1 994390 995547 PEAK_000000002158 6.54421
chr1 1003650 1006104 PEAK_000000011767 4.20425
chr1 1050525 1052368 PEAK_000000006930 5.42175
chr1 1072282 1073285 PEAK_000000014210 3.50851
chr1 1092848 1094301 PEAK_000000006902 5.42760
chr1 1105587 1107720 PEAK_000000042428 2.42152
chr1 1136244 1137052 PEAK_000000010470 4.58344
chr1 1140653 1142169 PEAK_000000031526 2.56725
chr1 1143840 1145034 PEAK_000000032154 2.55792
chr1 1147654 1149392 PEAK_000000021174 2.82053
chr1 1166113 1168342 PEAK_000000001375 6.85551
chr1 1208078 1209925 PEAK_000000000123 8.23012
chr1 1240557 1244841 PEAK_000000008691 5.04208
chr1 1259462 1260984 PEAK_000000007869 5.22741
chr1 1283563 1285374 PEAK_000000000063 8.58599
chr1 1288279 1288515 PEAK_000000032162 2.55787
chr1 1306160 1307980 PEAK_000000037146 2.48562
chr1 1309853 1311572 PEAK_000000005586 5.69834
chr1 1333342 1336045 PEAK_000000004572 5.92366
chr1 1341487 1343777 PEAK_000000003950 6.06451
chr1 1368998 1371808 PEAK_000000014547 3.43596
chr1 1373100 1374030 PEAK_000000040526 2.44325
chr1 1406334 1408471 PEAK_000000004957 5.83228
chr1 1440626 1440846 PEAK_000000028336 2.62401
chr1 1446478 1448450 PEAK_000000006910 5.42488
chr1 1475465 1476360 PEAK_000000005356 5.74826
chr1 1508757 1511179 PEAK_000000002165 6.54159
chr1 1534959 1535876 PEAK_000000014091 3.53432
chr1 1549467 1552067 PEAK_000000000679 7.30268
chr1 1564643 1565080 PEAK_000000049659 2.34029
chr1 1565949 1567416 PEAK_000000015905 3.20072
chr1 1589876 1591162 PEAK_000000012383 4.01229
chr1 1609034 1609548 PEAK_000000035140 2.51249
chr1 1623066 1624605 PEAK_000000013084 3.80028
chr1 1655486 1655994 PEAK_000000013468 3.69653
chr1 1677089 1677831 PEAK_000000015922 3.19824
chr1 1702846 1703786 PEAK_000000049108 2.34671
chr1 1708526 1712270 PEAK_000000007659 5.27064
chr1 1714036 1714653 PEAK_000000040436 2.44420
chr1 1765283 1766236 PEAK_000000052526 2.29971
chr1 1772925 1773268 PEAK_000000051732 2.31138
chr1 1792039 1792938 PEAK_000000045313 2.38872
chr1 1819973 1823412 PEAK_000000000022 8.96714
chr1 1839750 1841467 PEAK_000000010323 4.62888
chr1 1849915 1850748 PEAK_000000010575 4.55140
chr1 1950860 1951292 PEAK_000000016615 3.11655
chr1 1956661 1957971 PEAK_000000038894 2.46306
chr1 1959060 1960527 PEAK_000000021343 2.81364
chr1 1976148 1977208 PEAK_000000041117 2.43639
chr1 1981318 1981581 PEAK_000000041520 2.43204
chr1 2064258 2065186 PEAK_000000022206 2.78181
chr1 2120628 2121651 PEAK_000000011314 4.34242
chr1 2125581 2126838 PEAK_000000003933 6.06731
chr1 2135908 2137180 PEAK_000000016024 3.18550
chr1 2143863 2144767 PEAK_000000015608 3.24085
chr1 2158552 2161581 PEAK_000000012689 3.92487
chr1 2165597 2165828 PEAK_000000043032 2.41447
chr1 2186803 2187637 PEAK_000000048717 2.35126
chr1 2221231 2224317 PEAK_000000031089 2.57444
chr1 2225406 2226346 PEAK_000000032541 2.55142
chr1 2230994 2233972 PEAK_000000021175 2.82051
chr1 2245921 2247678 PEAK_000000009078 4.95317
chr1 2292273 2293280 PEAK_000000048268 2.35298
chr1 2294703 2295755 PEAK_000000019171 2.91808
chr1 2305614 2307110 PEAK_000000026298 2.66705
chr1 2310710 2311453 PEAK_000000040564 2.44298
chr1 2318794 2324171 PEAK_000000010442 4.59511
chr1 2343418 2344951 PEAK_000000008491 5.08851
chr1 2456670 2458588 PEAK_000000002370 6.48110
chr1 2477809 2480927 PEAK_000000015120 3.31970
chr1 2486308 2488901 PEAK_000000011418 4.31155
chr1 2509416 2510137 PEAK_000000014105 3.52990
chr1 2516463 2518929 PEAK_000000008687 5.04303
chr1 2573750 2575204 PEAK_000000001506 6.79734
chr1 2585412 2585829 PEAK_000000046217 2.37794
chr1 2986302 2987201 PEAK_000000024021 2.72394
chr1 3370911 3371822 PEAK_000000010589 4.54649
chr1 3433162 3433460 PEAK_000000032872 2.54587
chr1 3446869 3448453 PEAK_000000007042 5.39841
chr1 3454635 3455174 PEAK_000000046434 2.37530
chr1 3486714 3487406 PEAK_000000028502 2.62084
chr1 3491633 3492766 PEAK_000000026382 2.66486
chr1 3495003 3498181 PEAK_000000038069 2.47337
chr1 3509826 3510537 PEAK_000000016141 3.17296
chr1 3513670 3515269 PEAK_000000018382 2.96975
chr1 3525280 3528673 PEAK_000000013869 3.58733
Loading

0 comments on commit 2409036

Please sign in to comment.