-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
17 changed files
with
1,575 additions
and
692 deletions.
There are no files selected for viewing
319 changes: 319 additions & 0 deletions
319
HPC_analysis/output/Popgen/angsd/all/F0B_allJuveniles_pcangsd.cov
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# sign into a node | ||
ssh himem04 | ||
|
||
# load | ||
module load bio/pcangsd | ||
|
||
# source activate as guided when loading | ||
|
||
# call pcangd.py to create .cov files | ||
pcangsd.py -plink all_final -e 2 -threads 64 -o ./pcangsd/all_final_pcangsd |
394 changes: 394 additions & 0 deletions
394
HPC_analysis/output/Popgen/angsd/all/all_final_pcangsd.cov
Large diffs are not rendered by default.
Oops, something went wrong.
101 changes: 101 additions & 0 deletions
101
HPC_analysis/output/Popgen/angsd/all/broodstock_final_pcangsd.cov
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,22 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name="F0B_angsd_run_dupscoord" | ||
#SBATCH --job-name="angsdF0alljuvOM" | ||
#SBATCH -t 500:00:00 | ||
#SBATCH --mail-type=ALL | ||
#SBATCH --mem=160GB | ||
#SBATCH --mem=180GB | ||
#SBATCH -c 20 | ||
#SBATCH -p medmem | ||
#SBATCH [email protected] | ||
#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_Broodstock/coord_dups/"%x_out.%j" | ||
#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_Broodstock/coord_dups/"%x_err.%j" | ||
#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_out.%j" | ||
#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_err.%j" | ||
|
||
# Set-up | ||
|
||
|
||
# output dir | ||
mkdir -p angsd/output | ||
|
||
|
||
## load module | ||
module load bio/angsd/0.933 | ||
module load bio/samtools/1.15.1 | ||
|
||
#IDS=('F1_Broodstock' 'F1_Juveniles' 'F2_Juveniles' 'F2_Broodstock' 'F3_Juveniles') # loop strings | ||
|
||
## loop vars by treatment (for loop #2) | ||
#TREATMENT_ALL=('LOWpCO2' 'MODpCO2' 'HIGHpCO2') # a list of the common str on file names and outputs in the for loop below | ||
|
||
module load bio/angsd/0.940 | ||
module load bio/samtools/1.19 | ||
|
||
## dir shortcuts | ||
DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files | ||
|
@@ -57,15 +50,14 @@ MINMAF=0.05 # Minimum minor allele frequency filter | |
# objective to input a strata / metadta to identify treatments associated with ids downstream.. | ||
|
||
|
||
mkdir -p $OUTDIR/F0_Broodstock # make director for ansd output files | ||
mkdir -p $OUTDIR/FO_Broodstock/coord_dups | ||
mkdir -p $OUTDIR/all_juveniles # make director for ansd output files | ||
|
||
# nav to dir | ||
cd $DATDIR/F0_Broodstock/merged_bams/coord_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME! | ||
ls ./*.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams | ||
cd $DATDIR/Master_query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME! | ||
#ls *.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams | ||
|
||
# cal number of indiv | ||
nIND=$(wc -l $DATDIR/F0_Broodstock/merged_bams/coord_dups_removed/merged_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id | ||
nIND=$(wc -l $DATDIR/Master_query_dups_removed/F0_all_juveniles_OM_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id | ||
minIND=$(echo "(${nIND} * 0.95)" | bc) # min individual is 90% of the total count | ||
minINDRd=$(printf "%.0f" ${minIND}) # round that | ||
|
||
|
@@ -74,11 +66,11 @@ MINDP=$(echo "(${minINDRd} * 5)" | bc) # min coverage of 5X accoutning for the m | |
MAXDP=$(echo "(${minINDRd} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci | ||
|
||
# call unique outbase | ||
OUTBASE='F0Broodstock_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files | ||
OUTBASE='F0_OM_all_juveniles_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files | ||
|
||
# run angsd | ||
angsd -b $DATDIR/F0_Broodstock/merged_bams/coord_dups_removed/merged_bamlist.txt \ | ||
-ref $REFDIR/Argopecten_irradians_irradians_genome.fasta \ | ||
angsd -b $DATDIR/Master_query_dups_removed/F0_all_juveniles_OM_bamlist.txt \ | ||
-ref $REFDIR/GCA_041381155.1_Ai_NY_genomic.fna \ | ||
-GL $GENOLIKE \ | ||
-doGlf $DOGLF \ | ||
-doMaf $DOMAF \ | ||
|
@@ -103,11 +95,11 @@ angsd -b $DATDIR/F0_Broodstock/merged_bams/coord_dups_removed/merged_bamlist.txt | |
-minMapQ $MINMAPQ \ | ||
-minMaf $MINMAF \ | ||
-SNP_pval 1e-6 \ | ||
-P -20 \ | ||
-P 2 \ | ||
-remove_bads 1 \ | ||
-only_proper_pairs 1 \ | ||
-uniqueOnly 1 \ | ||
-C 50 \ | ||
-out $OUTDIR/F0_Broodstock/coord_dups/$OUTBASE \ | ||
>& $OUTDIR/F0_Broodstock/coord_dups/$OUTBASE'.log'; | ||
-out $OUTDIR/F0_all_juveniles/$OUTBASE \ | ||
>& $OUTDIR/F0_all_juveniles/$OUTBASE'.log'; | ||
#done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,22 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name="F0B_angsd_run_dupsquery" | ||
#SBATCH --job-name="angsdF0alljuv" | ||
#SBATCH -t 500:00:00 | ||
#SBATCH --mail-type=ALL | ||
#SBATCH --mem=160GB | ||
#SBATCH --mem=180GB | ||
#SBATCH -c 20 | ||
#SBATCH -p medmem | ||
#SBATCH [email protected] | ||
#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_Broodstock/query_dups/"%x_out.%j" | ||
#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_Broodstock/query_dups/"%x_err.%j" | ||
#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_out.%j" | ||
#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_err.%j" | ||
|
||
# Set-up | ||
|
||
|
||
# output dir | ||
mkdir -p angsd/output | ||
|
||
|
||
## load module | ||
module load bio/angsd/0.933 | ||
module load bio/samtools/1.15.1 | ||
|
||
#IDS=('F1_Broodstock' 'F1_Juveniles' 'F2_Juveniles' 'F2_Broodstock' 'F3_Juveniles') # loop strings | ||
|
||
## loop vars by treatment (for loop #2) | ||
#TREATMENT_ALL=('LOWpCO2' 'MODpCO2' 'HIGHpCO2') # a list of the common str on file names and outputs in the for loop below | ||
|
||
module load bio/angsd/0.940 | ||
module load bio/samtools/1.19 | ||
|
||
## dir shortcuts | ||
DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files | ||
|
@@ -57,15 +50,14 @@ MINMAF=0.05 # Minimum minor allele frequency filter | |
# objective to input a strata / metadta to identify treatments associated with ids downstream.. | ||
|
||
|
||
mkdir -p $OUTDIR/F0_Broodstock # make director for ansd output files | ||
mkdir -p $OUTDIR/FO_Broodstock/query_dups | ||
mkdir -p $OUTDIR/all_juveniles # make director for ansd output files | ||
|
||
# nav to dir | ||
cd $DATDIR/F0_Broodstock/merged_bams/query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME! | ||
ls ./*.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams | ||
cd $DATDIR/Master_query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME! | ||
#ls *.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams | ||
|
||
# cal number of indiv | ||
nIND=$(wc -l $DATDIR/F0_Broodstock/merged_bams/query_dups_removed/merged_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id | ||
nIND=$(wc -l $DATDIR/Master_query_dups_removed/F0_all_juveniles_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id | ||
minIND=$(echo "(${nIND} * 0.95)" | bc) # min individual is 90% of the total count | ||
minINDRd=$(printf "%.0f" ${minIND}) # round that | ||
|
||
|
@@ -74,11 +66,11 @@ MINDP=$(echo "(${minINDRd} * 5)" | bc) # min coverage of 5X accoutning for the m | |
MAXDP=$(echo "(${minINDRd} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci | ||
|
||
# call unique outbase | ||
OUTBASE='F0Broodstock_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files | ||
OUTBASE='F0_all_juveniles_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files | ||
|
||
# run angsd | ||
angsd -b $DATDIR/F0_Broodstock/merged_bams/query_dups_removed/merged_bamlist.txt \ | ||
-ref $REFDIR/Argopecten_irradians_irradians_genome.fasta \ | ||
angsd -b $DATDIR/Master_query_dups_removed/F0_all_juveniles_bamlist.txt \ | ||
-ref $REFDIR/GCA_041381155.1_Ai_NY_genomic.fna \ | ||
-GL $GENOLIKE \ | ||
-doGlf $DOGLF \ | ||
-doMaf $DOMAF \ | ||
|
@@ -103,11 +95,11 @@ angsd -b $DATDIR/F0_Broodstock/merged_bams/query_dups_removed/merged_bamlist.txt | |
-minMapQ $MINMAPQ \ | ||
-minMaf $MINMAF \ | ||
-SNP_pval 1e-6 \ | ||
-P -20 \ | ||
-P 20 \ | ||
-remove_bads 1 \ | ||
-only_proper_pairs 1 \ | ||
-uniqueOnly 1 \ | ||
-C 50 \ | ||
-out $OUTDIR/F0_Broodstock/query_dups/$OUTBASE \ | ||
>& $OUTDIR/F0_Broodstock/query_dups/$OUTBASE'.log'; | ||
-out $OUTDIR/F0_all_juveniles/$OUTBASE \ | ||
>& $OUTDIR/F0_all_juveniles/$OUTBASE'.log'; | ||
#done |
165 changes: 0 additions & 165 deletions
165
HPC_analysis/scripts/lcWGS/F1_Broodstock_angsd_dupsquery.sh
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.