forked from igordot/sns
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
34 changed files
with
5,242 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
|
||
# Created by https://www.gitignore.io/ | ||
|
||
### OSX ### | ||
*.DS_Store | ||
.AppleDouble | ||
.LSOverride | ||
|
||
# Icon must end with two \r | ||
Icon | ||
|
||
# Thumbnails | ||
._* | ||
|
||
# Files that might appear in the root of a volume | ||
.DocumentRevisions-V100 | ||
.fseventsd | ||
.Spotlight-V100 | ||
.TemporaryItems | ||
.Trashes | ||
.VolumeIcon.icns | ||
.com.apple.timemachine.donotpresent | ||
|
||
# Directories potentially created on remote AFP share | ||
.AppleDB | ||
.AppleDesktop | ||
Network Trash Folder | ||
Temporary Items | ||
.apdisk | ||
|
||
|
||
### Linux ### | ||
*~ | ||
|
||
# temporary files which can be created if a process still has a handle open of a deleted file | ||
.fuse_hidden* | ||
|
||
# KDE directory preferences | ||
.directory | ||
|
||
# Linux trash folder which might appear on any partition or disk | ||
.Trash-* | ||
|
||
|
||
### Windows ### | ||
# Windows image file caches | ||
Thumbs.db | ||
ehthumbs.db | ||
|
||
# Folder config file | ||
Desktop.ini | ||
|
||
# Recycle Bin used on file shares | ||
$RECYCLE.BIN/ | ||
|
||
# Windows Installer files | ||
*.cab | ||
*.msi | ||
*.msm | ||
*.msp | ||
|
||
# Windows shortcuts | ||
*.lnk | ||
|
||
|
||
### R ### | ||
# History files | ||
.Rhistory | ||
.Rapp.history | ||
|
||
# Session Data files | ||
.RData | ||
|
||
# Example code in package build process | ||
*-Ex.R | ||
|
||
# Output files from R CMD build | ||
/*.tar.gz | ||
|
||
# Output files from R CMD check | ||
/*.Rcheck/ | ||
|
||
# RStudio files | ||
.Rproj.user/ | ||
|
||
# produced vignettes | ||
vignettes/*.html | ||
vignettes/*.pdf | ||
|
||
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 | ||
.httr-oauth | ||
|
||
# knitr and R markdown default cache directories | ||
/*_cache/ | ||
/cache/ | ||
|
||
# Temporary files created by R markdown | ||
*.utf8.md | ||
*.knit.md | ||
|
||
|
||
### Perl ### | ||
/blib/ | ||
/.build/ | ||
_build/ | ||
cover_db/ | ||
inc/ | ||
Build | ||
!Build/ | ||
Build.bat | ||
.last_cover_stats | ||
/Makefile | ||
/Makefile.old | ||
/MANIFEST.bak | ||
/META.yml | ||
/META.json | ||
/MYMETA.* | ||
nytprof.out | ||
/pm_to_blib | ||
*.o | ||
*.bs | ||
/_eumm/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
#!/usr/bin/env perl | ||
|
||
use strict; | ||
use warnings; | ||
use List::MoreUtils qw(uniq); | ||
|
||
my $HELP = <<HELP; | ||
Find FASTQ files in a given directory (must have "_R1" or "_1" in file name). | ||
Extract sample names and paired reads based on file names. | ||
Generate sample table file samples.fastq.csv in current directory. | ||
If run multiple times, it will add new samples to the sample table. | ||
usage: gather-fastqs dir | ||
HELP | ||
|
||
if (!$ARGV[0]) { | ||
die $HELP; | ||
} | ||
|
||
main(); | ||
|
||
# main subroutine | ||
sub main { | ||
my $search_dir = $ARGV[0]; | ||
|
||
# convert dir from relative to absolute | ||
$search_dir = `readlink -f $search_dir`; | ||
chomp($search_dir); | ||
|
||
# check that dir exists | ||
unless ( -d $search_dir ) { | ||
die "\n\n ERROR! $search_dir DOES NOT EXIST \n\n"; | ||
} | ||
|
||
# find fastqs in given directory | ||
my $find_fastq_cmd = 'find -L ' . $search_dir . ' -maxdepth 2 -type f -name "*_R1*.fastq.gz" -or -name "*_1.fastq.gz" | LC_ALL=C sort'; | ||
my @fastqs = `$find_fastq_cmd`; | ||
|
||
# counter single and paired reads | ||
my $reads_se = 0; | ||
my $reads_pe = 0; | ||
my @samples = (); | ||
|
||
# sample table file | ||
my $filename = "samples.fastq-raw.csv"; | ||
open(my $fh, ">>", $filename); | ||
|
||
# process each fastq | ||
while (my $fastq_r1 = shift(@fastqs)) { | ||
chomp($fastq_r1); | ||
|
||
# check that R1 exists | ||
unless ( -e $fastq_r1 ) { | ||
die "\n\n ERROR! $fastq_r1 DOES NOT EXIST \n\n"; | ||
} | ||
|
||
# generate R2 filename | ||
my $fastq_r2 = $fastq_r1; | ||
$fastq_r2 =~ s/(.*)_R1_00(.*.fastq.gz)/${1}_R2_00${2}/; | ||
$fastq_r2 =~ s/(.*)_R1.fastq.gz/${1}_R2.fastq.gz/; | ||
$fastq_r2 =~ s/(.*)_1.fastq.gz/${1}_2.fastq.gz/; | ||
|
||
# blank if R2 does not exist | ||
unless ( -e $fastq_r2 ) { | ||
$fastq_r2 = ""; | ||
} | ||
|
||
# blank if R2 is same as R1 (in case of not standard file name, for example) | ||
if ( $fastq_r1 eq $fastq_r2 ) { | ||
$fastq_r2 = ""; | ||
} | ||
|
||
# count based on read type | ||
if ( length($fastq_r2) ) { | ||
$reads_pe++; | ||
} | ||
else { | ||
$reads_se++; | ||
} | ||
|
||
# extract sample name | ||
my $sample = $fastq_r1; | ||
# remove directory structure | ||
$sample =~ s/.*\///; | ||
# bcl2fastq2 format (with S sample number) | ||
$sample =~ s/_S[0-9]{1,3}_L00[0-9]_R1.*//; | ||
# bcl2fastq format with 2 barcodes | ||
$sample =~ s/_[ACTG]{6,}-[ACTG]{6,}_L00[0-9]_R1.*//; | ||
# bcl2fastq format with 1 barcode | ||
$sample =~ s/_[ACTG]{4,}_L00[0-9]_R1.*//; | ||
# no barcodes | ||
$sample =~ s/_L00[0-9]_R[12].*//; | ||
# no barcodes or lane | ||
$sample =~ s/_R[12].fastq.gz//; | ||
# no barcodes or lane | ||
$sample =~ s/_[12].fastq.gz//; | ||
|
||
push @samples, $sample; | ||
|
||
# show progress | ||
print STDERR " SAMPLE : $sample \n"; | ||
print STDERR " FASTQ R1 : $fastq_r1 \n"; | ||
print STDERR " FASTQ R2 : $fastq_r2 \n"; | ||
|
||
# print sample table line | ||
my $output = "${sample},${fastq_r1},${fastq_r2}\n"; | ||
print $fh "$output"; | ||
|
||
} | ||
close($fh); | ||
|
||
# remove duplicate entries | ||
system("cat $filename | LC_ALL=C sort | uniq > ${filename}.tmp && mv -f ${filename}.tmp $filename"); | ||
|
||
# get number of unique sample names | ||
my $num_files = @samples; | ||
@samples = uniq(@samples); | ||
my $num_samples = @samples; | ||
|
||
# print stats | ||
print STDERR "\n"; | ||
print STDERR " NUMBER OF SAMPLES : $num_samples \n"; | ||
print STDERR " NUMBER OF SINGLE FILES : $reads_se \n"; | ||
print STDERR " NUMBER OF PAIRED FILES : $reads_pe \n"; | ||
|
||
} | ||
|
||
|
||
|
||
# end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/usr/bin/env perl | ||
|
||
use strict; | ||
use warnings; | ||
use File::Basename; | ||
|
||
my $HELP = <<HELP; | ||
Generate the initial settings.txt file for a specified genome. | ||
usage: generate-settings genome | ||
* genome = name (hg19, mm10, etc.) or exact dir (contains genome.fa and genes.gtf) | ||
HELP | ||
|
||
if (!$ARGV[0]) { | ||
die $HELP; | ||
} | ||
|
||
main(); | ||
|
||
# main subroutine | ||
sub main { | ||
my $genome_arg = $ARGV[0]; | ||
|
||
# pipeline directory (the directory that this file is in) | ||
my $pipeline_dir = dirname(__FILE__); | ||
|
||
# settings file | ||
my $settings_file = "settings.txt"; | ||
|
||
# set genome setting | ||
my $genome_dir_setting; | ||
if ($genome_arg =~ m/\//) { | ||
# use directory if given | ||
$genome_dir_setting = "GENOME-DIR|${genome_arg}\n"; | ||
} | ||
else { | ||
# use id460 dir as reference if genome name is provided | ||
$genome_dir_setting = "GENOME-DIR|/ifs/home/id460/ref/${genome_arg}\n"; | ||
} | ||
|
||
# save genome setting | ||
open(my $fh, ">", $settings_file); | ||
# print $fh $genome_setting; | ||
print $fh $genome_dir_setting; | ||
close $fh; | ||
|
||
# set fasta to test genome dir setting | ||
# system("bash ${pipeline_dir}/scripts/get-set-setting.sh $settings_file REF-FASTA"); | ||
|
||
# get values to make sure they were set properly | ||
my $settings_genome = `bash ${pipeline_dir}/scripts/get-set-setting.sh $settings_file GENOME-DIR`; | ||
my $settings_fasta = `bash ${pipeline_dir}/scripts/get-set-setting.sh $settings_file REF-FASTA`; | ||
|
||
# print values | ||
print STDERR "\n"; | ||
print STDERR " REF DIR : $settings_genome \n"; | ||
print STDERR " REF FASTA : $settings_fasta \n"; | ||
|
||
} | ||
|
||
|
||
|
||
# end |
Oops, something went wrong.