Skip to content

Commit

Permalink
organization update
Browse files Browse the repository at this point in the history
  • Loading branch information
SamGurr committed Nov 12, 2024
1 parent 8313750 commit 2b7c2c9
Show file tree
Hide file tree
Showing 17 changed files with 1,575 additions and 692 deletions.
319 changes: 319 additions & 0 deletions HPC_analysis/output/Popgen/angsd/all/F0B_allJuveniles_pcangsd.cov

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions HPC_analysis/output/Popgen/angsd/all/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# sign into a node
ssh himem04

# load
module load bio/pcangsd

# source activate as guided when loading

# call pcangd.py to create .cov files
pcangsd.py -plink all_final -e 2 -threads 64 -o ./pcangsd/all_final_pcangsd
394 changes: 394 additions & 0 deletions HPC_analysis/output/Popgen/angsd/all/all_final_pcangsd.cov

Large diffs are not rendered by default.

101 changes: 101 additions & 0 deletions HPC_analysis/output/Popgen/angsd/all/broodstock_final_pcangsd.cov

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,29 +1,22 @@
#!/bin/bash
#SBATCH --job-name="F0B_angsd_run_dupscoord"
#SBATCH --job-name="angsdF0alljuvOM"
#SBATCH -t 500:00:00
#SBATCH --mail-type=ALL
#SBATCH --mem=160GB
#SBATCH --mem=180GB
#SBATCH -c 20
#SBATCH -p medmem
#SBATCH [email protected]
#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_Broodstock/coord_dups/"%x_out.%j"
#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_Broodstock/coord_dups/"%x_err.%j"
#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_out.%j"
#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_err.%j"

# Set-up


# output dir
mkdir -p angsd/output


## load module
module load bio/angsd/0.933
module load bio/samtools/1.15.1

#IDS=('F1_Broodstock' 'F1_Juveniles' 'F2_Juveniles' 'F2_Broodstock' 'F3_Juveniles') # loop strings

## loop vars by treatment (for loop #2)
#TREATMENT_ALL=('LOWpCO2' 'MODpCO2' 'HIGHpCO2') # a list of the common str on file names and outputs in the for loop below

module load bio/angsd/0.940
module load bio/samtools/1.19

## dir shortcuts
DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files
Expand Down Expand Up @@ -57,15 +50,14 @@ MINMAF=0.05 # Minimum minor allele frequency filter
# objective to input a strata / metadta to identify treatments associated with ids downstream..


mkdir -p $OUTDIR/F0_Broodstock # make director for ansd output files
mkdir -p $OUTDIR/FO_Broodstock/coord_dups
mkdir -p $OUTDIR/all_juveniles # make director for ansd output files

# nav to dir
cd $DATDIR/F0_Broodstock/merged_bams/coord_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME!
ls ./*.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams
cd $DATDIR/Master_query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME!
#ls *.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams

# cal number of indiv
nIND=$(wc -l $DATDIR/F0_Broodstock/merged_bams/coord_dups_removed/merged_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id
nIND=$(wc -l $DATDIR/Master_query_dups_removed/F0_all_juveniles_OM_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id
minIND=$(echo "(${nIND} * 0.95)" | bc) # min individual is 90% of the total count
minINDRd=$(printf "%.0f" ${minIND}) # round that

Expand All @@ -74,11 +66,11 @@ MINDP=$(echo "(${minINDRd} * 5)" | bc) # min coverage of 5X accoutning for the m
MAXDP=$(echo "(${minINDRd} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci

# call unique outbase
OUTBASE='F0Broodstock_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files
OUTBASE='F0_OM_all_juveniles_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files

# run angsd
angsd -b $DATDIR/F0_Broodstock/merged_bams/coord_dups_removed/merged_bamlist.txt \
-ref $REFDIR/Argopecten_irradians_irradians_genome.fasta \
angsd -b $DATDIR/Master_query_dups_removed/F0_all_juveniles_OM_bamlist.txt \
-ref $REFDIR/GCA_041381155.1_Ai_NY_genomic.fna \
-GL $GENOLIKE \
-doGlf $DOGLF \
-doMaf $DOMAF \
Expand All @@ -103,11 +95,11 @@ angsd -b $DATDIR/F0_Broodstock/merged_bams/coord_dups_removed/merged_bamlist.txt
-minMapQ $MINMAPQ \
-minMaf $MINMAF \
-SNP_pval 1e-6 \
-P -20 \
-P 2 \
-remove_bads 1 \
-only_proper_pairs 1 \
-uniqueOnly 1 \
-C 50 \
-out $OUTDIR/F0_Broodstock/coord_dups/$OUTBASE \
>& $OUTDIR/F0_Broodstock/coord_dups/$OUTBASE'.log';
-out $OUTDIR/F0_all_juveniles/$OUTBASE \
>& $OUTDIR/F0_all_juveniles/$OUTBASE'.log';
#done
Original file line number Diff line number Diff line change
@@ -1,29 +1,22 @@
#!/bin/bash
#SBATCH --job-name="F0B_angsd_run_dupsquery"
#SBATCH --job-name="angsdF0alljuv"
#SBATCH -t 500:00:00
#SBATCH --mail-type=ALL
#SBATCH --mem=160GB
#SBATCH --mem=180GB
#SBATCH -c 20
#SBATCH -p medmem
#SBATCH [email protected]
#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_Broodstock/query_dups/"%x_out.%j"
#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_Broodstock/query_dups/"%x_err.%j"
#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_out.%j"
#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_err.%j"

# Set-up


# output dir
mkdir -p angsd/output


## load module
module load bio/angsd/0.933
module load bio/samtools/1.15.1

#IDS=('F1_Broodstock' 'F1_Juveniles' 'F2_Juveniles' 'F2_Broodstock' 'F3_Juveniles') # loop strings

## loop vars by treatment (for loop #2)
#TREATMENT_ALL=('LOWpCO2' 'MODpCO2' 'HIGHpCO2') # a list of the common str on file names and outputs in the for loop below

module load bio/angsd/0.940
module load bio/samtools/1.19

## dir shortcuts
DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files
Expand Down Expand Up @@ -57,15 +50,14 @@ MINMAF=0.05 # Minimum minor allele frequency filter
# objective to input a strata / metadta to identify treatments associated with ids downstream..


mkdir -p $OUTDIR/F0_Broodstock # make director for ansd output files
mkdir -p $OUTDIR/FO_Broodstock/query_dups
mkdir -p $OUTDIR/all_juveniles # make director for ansd output files

# nav to dir
cd $DATDIR/F0_Broodstock/merged_bams/query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME!
ls ./*.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams
cd $DATDIR/Master_query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME!
#ls *.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams

# cal number of indiv
nIND=$(wc -l $DATDIR/F0_Broodstock/merged_bams/query_dups_removed/merged_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id
nIND=$(wc -l $DATDIR/Master_query_dups_removed/F0_all_juveniles_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id
minIND=$(echo "(${nIND} * 0.95)" | bc) # min individual is 90% of the total count
minINDRd=$(printf "%.0f" ${minIND}) # round that

Expand All @@ -74,11 +66,11 @@ MINDP=$(echo "(${minINDRd} * 5)" | bc) # min coverage of 5X accoutning for the m
MAXDP=$(echo "(${minINDRd} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci

# call unique outbase
OUTBASE='F0Broodstock_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files
OUTBASE='F0_all_juveniles_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files

# run angsd
angsd -b $DATDIR/F0_Broodstock/merged_bams/query_dups_removed/merged_bamlist.txt \
-ref $REFDIR/Argopecten_irradians_irradians_genome.fasta \
angsd -b $DATDIR/Master_query_dups_removed/F0_all_juveniles_bamlist.txt \
-ref $REFDIR/GCA_041381155.1_Ai_NY_genomic.fna \
-GL $GENOLIKE \
-doGlf $DOGLF \
-doMaf $DOMAF \
Expand All @@ -103,11 +95,11 @@ angsd -b $DATDIR/F0_Broodstock/merged_bams/query_dups_removed/merged_bamlist.txt
-minMapQ $MINMAPQ \
-minMaf $MINMAF \
-SNP_pval 1e-6 \
-P -20 \
-P 20 \
-remove_bads 1 \
-only_proper_pairs 1 \
-uniqueOnly 1 \
-C 50 \
-out $OUTDIR/F0_Broodstock/query_dups/$OUTBASE \
>& $OUTDIR/F0_Broodstock/query_dups/$OUTBASE'.log';
-out $OUTDIR/F0_all_juveniles/$OUTBASE \
>& $OUTDIR/F0_all_juveniles/$OUTBASE'.log';
#done
165 changes: 0 additions & 165 deletions HPC_analysis/scripts/lcWGS/F1_Broodstock_angsd_dupsquery.sh

This file was deleted.

Loading

0 comments on commit 2b7c2c9

Please sign in to comment.