etc/settings.yaml.in

locations:
  reads-dir: /path/to/reads/
  output-dir: /path/to/output/
  genome-fasta: /path/to/genome.fasta
  cdna-fasta: /path/to/sample.cdna.fasta
  gtf-file: /path/to/sample.gtf

# The "organism" field is needed for GO term analysis. Leave it empty
# if not interested in GO analysis.  Otherwise provide a string with
# the initial of genus and the species name (e.g. hsapiens, mmusculus,
# dmelanogaster, celegans)
organism: ''

mapping:
  mapper: 'hisat2' # options are star or hisat2
  genome_build: 'GRCm38'  #genome build version identifier that is used in hisat2 index file names

coverage:
  tool: 'megadepth' # options are megadepth or bamCoverage 

# Configuration for how to do read counting using
# GenomicAlignments::summarizeOverlaps feature type and grouping
# features must match what is available in the GTF file most GTF files
# will use "exon" or "CDS" as features and "gene_id", and
# "transcript_id" for combining features.
counting:
  counting_mode: "Union" # other options are "IntersectionStrict" and "IntersectionNotEmpty"
  drop_nonunique: TRUE # boolean, When set to TRUE, multi-mapping reads are not counted (see inter.feature argument of summarizeOverlaps)
  strandedness: "unspecific" # other options are "forward" and "reverse" for strand-specific read-counting
  feature: "exon"
  group_feature_by: "gene_id"
  yield_size: 2000000 # how many reads to process at a time (this impacts memory consumption)

#DEanalyses:
#  # The names of analyses can be anything but they have to be unique
#  # for each combination of case control group comparisons.
#  # Every analysis also needs a covariates line with a quoted character
#  # string value.
#
#  analysis1:
#    description: "A couple of sentences describing the purpose to perform this particular comparison and the selection of covariates."
#    # If multiple sample names are provided, they must be separated by comma.
#    case_sample_groups: "HBR"
#    control_sample_groups: "UHR"
#
#    # comma separated list of additional co-variates to control for
#    # in differential expression analysis (e.g. batch, age,
#    # temperature, sequencing_technology etc.). Must correspond to a
#    # column field in the sample_sheet.csv file.)
#    covariates: ""
#
#    # reports that are self-contained have all JavaScript code shipping as part of the result file.
#    # That result file will hence be functional throughout time, which is the default setting.
#    # For a more modular approach that references external sources set "FALSE"
#    self_contained: TRUE
#
#  analysis2:
#    description: "Every analysis has its disting description which is then shown in the introduction section of the report."
#    case_sample_groups: "UHR"
#    control_sample_groups: "HBR"
#
#    # comma separated list of additional co-variates to control for
#    # in differential expression analysis (e.g. batch, age,
#    # temperature, sequencing_technology etc.). Must correspond to a
#    # column field in the sample_sheet.csv file.)
#    covariates: ''

execution:
  submit-to-cluster: no
  jobs: 6
  nice: 19
  mem_mb: 64000
  cluster:
    missing-file-timeout: 120
    memory: 8G
    stack: 128M
    queue: all.q
    contact-email: none
    log-dir: '.'
    args: ''
  rules:
    __default__:
      threads: 1
      memory: 2000
    translate_sample_sheet_for_report:
      threads: 1
      memory: 500
    trim_qc_reads_pe:
      threads: 1
      memory: 4000
    trim_qc_reads_se:
      threads: 1
      memory: 4000
    star_index:
      threads: 2
      memory: 32000
    hisat2_index:
      threads: 2 
      memory: 32000
    salmon_index:
      threads: 8
      memory: 5000
    salmon_quant:
      threads: 8
      memory: 6000
    counts_from_SALMON: 
      threads: 1
      memory: 200
    collate_read_counts:
      threads: 1
      memory: 200
    norm_counts_deseq:
      threads: 1
      memory: 1000 
    star_map:
      threads: 2
      memory: 16000
    hisat2_map:
      threads: 2
      memory: 8000
      disk_mb: 7000
    index_bam:
      threads: 2
      memory: 500
    multiqc:
      threads: 2
      memory: 500
      disk_mb: 20000
    coverage_bamCoverage:
      threads: 1
      memory: 4000
    coverage_megadepth:
      threads: 2
      memory: 4000
    count_reads:
      threads: 1
      memory: 6000
    check_annotation_files: 
      threads: 1
      memory: 6000
    report1:
      threads: 1
      memory: 4000  
    deseq_collate_report1:
      threads: 1
      memory: 4000  
    report2:
      threads: 1
      memory: 4000  
    deseq_collate_report2:
      threads: 1
      memory: 4000  
    report3:
      threads: 1
      memory: 4000  
    deseq_collate_report3:
      threads: 1
      memory: 4000  

tools:
  gunzip:
    executable: @GUNZIP@
    args: ""
  multiqc:
    executable: @MULTIQC@
    args: ""
  star_map:
    executable: @STAR@
    args: ""
  star_index:
    executable: @STAR@
    args: ""
    # If STAR gives an error then that error message is likely suggest the parameter to adjust.
    # args: "--limitGenomeGenerateRAM 31000000000"
  hisat2:
    executable: @HISAT2@
    args: "--fast" 
    # If low alignment rates are observed with "--fast" setting, 
    # other options to try are "--sensitive or --very-sensitive". 
  hisat2-build:
    executable: @HISAT2_BUILD@
    args: ""
  samtools:
    executable: @SAMTOOLS@
    args: ""
  salmon_index:
    executable: @SALMON@
    args: "index"
  salmon_quant:
    executable: @SALMON@
    args: "quant"
  Rscript:
    executable: @RSCRIPT@
    args: "--vanilla"
  sed:
    executable: @SED@
    args: ""
  fastp:
    executable: @FASTP@
    args: "--adapter_sequence=AGATCGGAAGAGCACACGTCTGAACTCCAGTCA --adapter_sequence_r2=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT" 
    # Please refer to fastp manual about setting adapter sequences: https://github.com/OpenGene/fastp#adapters
  bamCoverage:
    executable: @BAMCOVERAGE@
    args: "--normalizeUsing BPM --numberOfProcessors 2" 
    # see https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html
    # for more detailed arguments used for normalising coverage data files
  megadepth:
    executable: @MEGADEPTH@
    args: ""