diff --git a/.dockstore.yml b/.dockstore.yml index 263f8e54d89..c51407c4d95 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -240,6 +240,7 @@ workflows: branches: - master - ah_var_store + - vs_1520_loftee tags: - /.*/ - name: GvsCreateVATFilesFromBigQuery @@ -343,7 +344,7 @@ workflows: branches: - master - ah_var_store - - vs_1739_cmrg_learnings + - vs_1520_loftee tags: - /.*/ - name: GvsIngestTieout diff --git a/scripts/variantstore/scripts/variant_annotation_table/schema/variant_transcript_schema.json b/scripts/variantstore/scripts/variant_annotation_table/schema/variant_transcript_schema.json index 067910322b1..8a420eca440 100644 --- a/scripts/variantstore/scripts/variant_annotation_table/schema/variant_transcript_schema.json +++ b/scripts/variantstore/scripts/variant_annotation_table/schema/variant_transcript_schema.json @@ -431,6 +431,48 @@ "type": "String", "mode": "Nullable" }, + { + "description": "HGNC (HUGO Gene Nomenclature Committee) Symbol", + "name": "hgnc_symbol", + "type": "String", + "mode": "Nullable" + }, + { + "description": "HGNC_ID (HUGO Gene Nomenclature Committee) Identifier", + "name": "hgnc_id", + "type": "Integer", + "mode": "Nullable" + }, + { + "description": "LOFTEE Loss-of-function annotation (HC = High Confidence; LC = Low Confidence)", + "name": "LoF", + "type": "String", + "mode": "Nullable" + }, + { + "description": "LOFTEE Reason for LoF not being HC", + "name": "LoF_filter", + "type": "String", + "mode": "Repeated" + }, + { + "description": "LOFTEE Possible warning flags for LoF", + "name": "LoF_flags", + "type": "String", + "mode": "Repeated" + }, + { + "description": "LOFTEE Info used for LoF annotation", + "name": "LoF_info", + "type": "String", + "mode": "Repeated" + }, + { + "description": "GERP (Genomic Evolutionary Rate Profiling) Scores", + "name": "GERP", + "type": "Float", + "mode": "Repeated" + }, { "description": "gnomAD: 'Total' frequency", "name": "gnomad_all_af", diff --git a/scripts/variantstore/scripts/variant_annotation_table/schema/vat_schema.json b/scripts/variantstore/scripts/variant_annotation_table/schema/vat_schema.json index f8d829b6048..241f06b8529 100644 --- a/scripts/variantstore/scripts/variant_annotation_table/schema/vat_schema.json +++ b/scripts/variantstore/scripts/variant_annotation_table/schema/vat_schema.json @@ -431,6 +431,48 @@ "type": "String", "mode": "Nullable" }, + { + "description": "HGNC (HUGO Gene Nomenclature Committee) Symbol", + "name": "hgnc_symbol", + "type": "String", + "mode": "Nullable" + }, + { + "description": "HGNC_ID (HUGO Gene Nomenclature Committee) Identifier", + "name": "hgnc_id", + "type": "Integer", + "mode": "Nullable" + }, + { + "description": "LOFTEE Loss-of-function annotation (HC = High Confidence; LC = Low Confidence)", + "name": "LoF", + "type": "String", + "mode": "Nullable" + }, + { + "description": "LOFTEE Reason for LoF not being HC", + "name": "LoF_filter", + "type": "String", + "mode": "Repeated" + }, + { + "description": "LOFTEE Possible warning flags for LoF", + "name": "LoF_flags", + "type": "String", + "mode": "Repeated" + }, + { + "description": "LOFTEE Info used for LoF annotation", + "name": "LoF_info", + "type": "String", + "mode": "Repeated" + }, + { + "description": "GERP (Genomic Evolutionary Rate Profiling) Scores", + "name": "GERP", + "type": "Float", + "mode": "Repeated" + }, { "description": "gnomAD: 'Total' frequency", "name": "gnomad_all_af", diff --git a/scripts/variantstore/scripts/variant_annotation_table/schema/vep_loftee_115_cooked.json b/scripts/variantstore/scripts/variant_annotation_table/schema/vep_loftee_115_cooked.json new file mode 100644 index 00000000000..a14cefb5c77 --- /dev/null +++ b/scripts/variantstore/scripts/variant_annotation_table/schema/vep_loftee_115_cooked.json @@ -0,0 +1,137 @@ +[ + { + "name": "vid", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Uploaded_variation", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Location", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Allele", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Gene", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Feature", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Feature_type", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Consequence", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "cDNA_position", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "CDS_position", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Protein_position", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Amino_acids", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Codons", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Existing_variation", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "IMPACT", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "DISTANCE", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "STRAND", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "FLAGS", + "type": "STRING", + "mode": "REPEATED" + }, + { + "name": "HGNC_SYMBOL", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "SYMBOL_SOURCE", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "HGNC_ID", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "SOURCE", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "LoF", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "LoF_filter", + "type": "STRING", + "mode": "REPEATED" + }, + { + "name": "LoF_flags", + "type": "STRING", + "mode": "REPEATED" + }, + { + "name": "LoF_info", + "type": "STRING", + "mode": "REPEATED" + }, + { + "name": "GERP", + "type": "FLOAT", + "mode": "REPEATED" + } +] diff --git a/scripts/variantstore/scripts/variant_annotation_table/schema/vep_loftee_115_raw.json b/scripts/variantstore/scripts/variant_annotation_table/schema/vep_loftee_115_raw.json new file mode 100644 index 00000000000..33ca7eda8c3 --- /dev/null +++ b/scripts/variantstore/scripts/variant_annotation_table/schema/vep_loftee_115_raw.json @@ -0,0 +1,132 @@ +[ + { + "name": "Uploaded_variation", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Location", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Allele", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Gene", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Feature", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Feature_type", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Consequence", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "cDNA_position", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "CDS_position", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Protein_position", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Amino_acids", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Codons", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "Existing_variation", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "IMPACT", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "DISTANCE", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "STRAND", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "FLAGS", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "SYMBOL", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "SYMBOL_SOURCE", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "HGNC_ID", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "SOURCE", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "LoF", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "LoF_filter", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "LoF_flags", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "LoF_info", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "GERP", + "type": "STRING", + "mode": "NULLABLE" + } +] \ No newline at end of file diff --git a/scripts/variantstore/variant-annotations-table/GvsCreateVATfromVDS.wdl b/scripts/variantstore/variant-annotations-table/GvsCreateVATfromVDS.wdl index 467c08a3953..77762bffe55 100644 --- a/scripts/variantstore/variant-annotations-table/GvsCreateVATfromVDS.wdl +++ b/scripts/variantstore/variant-annotations-table/GvsCreateVATfromVDS.wdl @@ -35,6 +35,12 @@ workflow GvsCreateVATfromVDS { String? gatk_docker String? variants_docker String? variants_nirvana_docker + String? vep_loftee_docker + + String? vep_loftee_data_table_raw + String? vep_loftee_data_table_cooked + + String loftee_references_dir = "gs://gvs-internal/loftee/" } parameter_meta { @@ -84,6 +90,7 @@ workflow GvsCreateVATfromVDS { String effective_variants_docker = select_first([variants_docker, GetToolVersions.variants_docker]) String effective_gatk_docker = select_first([gatk_docker, GetToolVersions.gatk_docker]) String effective_variants_nirvana_docker = select_first([variants_nirvana_docker, GetToolVersions.variants_nirvana_docker]) + String effective_vep_loftee_docker = select_first([vep_loftee_docker, GetToolVersions.vep_loftee_docker]) String effective_hail_version = select_first([hail_version, GetToolVersions.hail_version]) String effective_google_project = select_first([workspace_gcs_project, GetToolVersions.google_project]) @@ -265,6 +272,18 @@ workflow GvsCreateVATfromVDS { variants_docker = effective_variants_docker, } + call GenerateVepAndLofteeAnnotations { + input: + vep_loftee_docker = effective_vep_loftee_docker, + vep_cache = loftee_references_dir + "homo_sapiens_vep_115_GRCh38.tar.gz", + loftee_human_ancestor_fa_gz = loftee_references_dir + "human_ancestor.fa.gz", + loftee_human_ancestor_fa_gz_fai = loftee_references_dir + "human_ancestor.fa.gz.fai", + loftee_human_ancestor_fa_gz_gzi = loftee_references_dir + "human_ancestor.fa.gz.gzi", + loftee_gerp_scores = loftee_references_dir + "gerp_conservation_scores.homo_sapiens.GRCh38.bw", + loftee_phylo_csf_database = loftee_references_dir + "loftee.sql", + input_vcf = StripCustomAnnotationsFromSitesOnlyVCF.output_vcf, + } + ## Use Nirvana to annotate the sites-only VCF and include the AC/AN/AF calculations as custom annotations call AnnotateVCF { input: @@ -291,7 +310,27 @@ workflow GvsCreateVATfromVDS { output_path = genes_output_path, variants_docker = effective_variants_docker, } + } + call BigQueryLoadRawVepAndLofteeAnnotations { + input: + vep_loftee_raw_output = GenerateVepAndLofteeAnnotations.output_file, + project_id = project_id, + dataset_name = dataset_name, + raw_data_table = select_first([vep_loftee_data_table_raw, "vep_loftee_data_table_raw"]), + raw_data_table_schema = MakeSubpopulationFilesAndReadSchemaFiles.vep_loftee_raw_schema_json_file, + variants_docker = effective_variants_docker, + } + + call BigQueryCookVepAndLofteeRawAnnotations { + input: + go = BigQueryLoadRawVepAndLofteeAnnotations.done, + project_id = project_id, + dataset_name = dataset_name, + raw_data_table = select_first([vep_loftee_data_table_raw, "vep_loftee_data_table_raw"]), + cooked_data_table = select_first([vep_loftee_data_table_cooked, "vep_loftee_data_table_cooked"]), + cooked_data_table_schema = MakeSubpopulationFilesAndReadSchemaFiles.vep_loftee_cooked_schema_json_file, + variants_docker = effective_variants_docker, } call Utils.MergeTsvs { @@ -314,6 +353,7 @@ workflow GvsCreateVATfromVDS { variant_transcript_schema = MakeSubpopulationFilesAndReadSchemaFiles.variant_transcript_schema_json_file, genes_schema = MakeSubpopulationFilesAndReadSchemaFiles.genes_schema_json_file, mane_table_name = LoadManeDataIntoBigQuery.mane_table, + vep_loftee_cooked_table_name = BigQueryCookVepAndLofteeRawAnnotations.cooked_table_name, project_id = project_id, dataset_name = dataset_name, variant_transcripts_path = variant_transcripts_output_path, @@ -343,6 +383,8 @@ workflow GvsCreateVATfromVDS { vat_table_name = DeduplicateVatInBigQuery.vat_table, output_path = effective_output_path, merge_vcfs_disk_size_override = merge_vcfs_disk_size_override, + # This precondition seems wrong / misleading. This task is actually gated on DeduplicateVatInBigQuery, + # as it should be. precondition_met = BigQueryLoadJson.done, cloud_sdk_docker = effective_cloud_sdk_docker, cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker, @@ -508,6 +550,8 @@ task MakeSubpopulationFilesAndReadSchemaFiles { String vat_schema_json_filename = "vat_schema.json" String variant_transcript_schema_json_filename = "variant_transcript_schema.json" String genes_schema_json_filename = "genes_schema.json" + String vep_loftee_115_raw_schema_json_filename = "vep_loftee_115_raw.json" + String vep_loftee_115_cooked_schema_json_filename = "vep_loftee_115_cooked.json" String variants_docker } String output_ancestry_filename = "ancestry_mapping.tsv" @@ -539,6 +583,8 @@ task MakeSubpopulationFilesAndReadSchemaFiles { File vat_schema_json_file = vat_schema_json_filename File variant_transcript_schema_json_file = variant_transcript_schema_json_filename File genes_schema_json_file = genes_schema_json_filename + File vep_loftee_raw_schema_json_file = vep_loftee_115_raw_schema_json_filename + File vep_loftee_cooked_schema_json_file = vep_loftee_115_cooked_schema_json_filename File ancestry_mapping_list = output_ancestry_filename File custom_annotations_template_file = custom_annotations_template_filename @@ -732,6 +778,342 @@ for line in sys.stdin: } } +task GenerateVepAndLofteeAnnotations { + input { + String vep_loftee_docker + # TODO make a reference disk for this stuff, some of these references are quite large. + File vep_cache + File loftee_human_ancestor_fa_gz + File loftee_human_ancestor_fa_gz_fai + File loftee_human_ancestor_fa_gz_gzi + File loftee_gerp_scores + File loftee_phylo_csf_database + File input_vcf + File monitoring_script = "gs://gvs_quickstart_storage/cromwell_monitoring_script.sh" + Float memory_mib = 8 * 1024 + # The memory headroom left for other processes including the Batch agent. + Float overhead_memory_mib = 1.6 * 1024 + } + + command <<< + # Prepend date, time and pwd to xtrace log entries. + PS4='\D{+%F %T} \w $ ' + set -o errexit -o nounset -o pipefail -o xtrace + + echo "MEM_SIZE is ${MEM_SIZE}" + echo "MEM_UNIT is ${MEM_UNIT}" + + if [[ -z "${MEM_UNIT:-}" ]] + then + vep_memory_kib=$(python -c "from math import floor; print(int(floor((~{memory_mib} - ~{overhead_memory_mib}) * 1024)))") + elif [[ ${MEM_UNIT} == "GB" ]] + then + vep_memory_kib=$(python -c "from math import floor; print(int(floor(((${MEM_SIZE} * 1024) - ~{overhead_memory_mib}) * 1024)))") + else + echo "Unexpected memory unit: ${MEM_UNIT}" 1>&2 + exit 1 + fi + + echo "memory_mib is ~{memory_mib}" + echo "overhead_memory_mib is ~{overhead_memory_mib}" + echo "vep_memory_kib is ${vep_memory_kib}" + + bash ~{monitoring_script} > monitoring.log & + + if { grep -E -v '^#' ~{input_vcf} 2>&1 > /dev/null; } + then + # Only copy these references if there are actually data lines in the VCF to be processed, + # Most of the shards in 20/X/Y integration runs don't have any work to do and don't need + # to localize the references. + # + # gcloud storage cp ~{vep_cache} ~{vep_cache} ~{loftee_human_ancestor_fa_gz} ~{loftee_human_ancestor_fa_gz_fai} ~{loftee_human_ancestor_fa_gz_gzi} ~{loftee_gerp_scores} ~{loftee_phylo_csf_database} . + # + # TODO yeah that would be nice but here's no gcloud on the VEP + LOFTEE image. These references + # *really* should be on a reference disk. + tar xzf ~{vep_cache} + + LOFTEE_PATH=/opt/vep/src/loftee-1.0.4_GRCh38 + args=( + + # Some logging please. + --verbose + --warning_file warnings.txt + + # Explicitly turn off forking as LOFTEE might not deal well with that. + --fork 1 + + # Breaks out data into their own columns that otherwise would be nested (semicolon delimited) in the "Extra" column. + --tab + + # Force writing versions on Ensembl transcripts for VAT compatibility. + --transcript_version + + # Emit HGNC symbols and IDs. + --symbol + + # Basic LOFTEE plugin setup + --plugin LoF,loftee_path:$LOFTEE_PATH,gerp_bigwig:~{loftee_gerp_scores},human_ancestor_fa:~{loftee_human_ancestor_fa_gz},conservation_file:~{loftee_phylo_csf_database},check_complete_cds:false + --dir_plugins $LOFTEE_PATH + + # Basic VEP cache setup + --cache + --offline + --dir_cache . + + # For GERP (Genomic Evolutionary Rate Profiling) score output. + --custom file=~{loftee_gerp_scores},short_name=GERP,format=bigwig,num_records=all + + # Input and output files + --input_file ~{input_vcf} + --output_file vep_loftee_raw_output.txt + ) + + # Limit the amount of memory the VEP Python process uses, expressed in KiB. + # If we don't do this it seems that the Batch agent is often (though not always) starved for memory and + # unable to check in with the Batch service. If this happens the job fails for reasons that appear to + # Cromwell to be unretryable, and thus the whole workflow fails. e.g. + # + # Task GvsCreateVATfromVDS.GenerateVepAndLofteeAnnotations:150:4 failed. The job was stopped before the command finished. GCP Batch task exited with VMReportingTimeout(50002). + # + ulimit -v $vep_memory_kib + set +o errexit + vep "${args[@]}" + set -o errexit + + VEP_RC=$? + if (( VEP_RC == 137 )) + then + # Cromwell does not currently consider the value in the rc file when determining retryability, though + # there are PRs open that would enable this. + # https://github.com/broadinstitute/cromwell/pull/7786/files + echo "VEP + LOFTEE appears to have OOMed with exit code 137, writing messages to stderr to hopefully trigger Cromwell to retry with more memory." + echo "Killed" >& 2 + echo "java.lang.OutOfMemoryError" >& 2 + exit 1 + else + echo "VEP + LOFTEE run complete." + fi + else + echo "No data found for processing in VCF, exit 0." + touch "vep_loftee_raw_output.txt" + fi + + >>> + + runtime { + preemptible: 2 + maxRetries: 3 + noAddress: true + docker: vep_loftee_docker + memory: "8 GB" + disks: "local-disk 500 HDD" + } + + output { + File output_file = "vep_loftee_raw_output.txt" + File monitoring_log = "monitoring.log" + File? warnings = "warnings.txt" + Boolean done = true + } +} + +task BigQueryLoadRawVepAndLofteeAnnotations { + input { + String variants_docker + Array[File] vep_loftee_raw_output + String project_id + String dataset_name + String raw_data_table + File raw_data_table_schema + } + + parameter_meta { + vep_loftee_raw_output: { + localization_optional: true + } + } + + command <<< + # Prepend date, time and pwd to xtrace log entries. + PS4='\D{+%F %T} \w $ ' + set -o errexit -o nounset -o pipefail -o xtrace + + set +o errexit + bq --apilog=false show --project_id=~{project_id} ~{dataset_name}.~{raw_data_table} > /dev/null + BQ_SHOW_RC=$? + set -o errexit + + if [ $BQ_SHOW_RC -ne 0 ]; then + echo "Creating raw VEP + LOFTEE table ~{dataset_name}.~{raw_data_table}" + + # 3 day TTL for this table + DATE=$((3 * 24 * 60 * 60)) + bq --apilog=false mk --expiration=$DATE --project_id=~{project_id} ~{dataset_name}.~{raw_data_table} ~{raw_data_table_schema} + fi + + num_rows=$(bq --apilog=false show --project_id=~{project_id} --format json ~{dataset_name}.~{raw_data_table} | jq -r .numRows) + if ((num_rows != 0)) + then + echo "Found preexisting table with data, not adding more raw data." + else + echo "Raw data table is empty, copying VEP output to be loaded." + gcloud storage cp ~{sep=' ' vep_loftee_raw_output} . + for file in ~{sep=' ' vep_loftee_raw_output} + do + filename=$(basename $file) + if [ ! -e load_file.txt ] + then + # Do a wee bit of processing of the raw output to create a load file for raw VEP + LOFTEE data + # - Remove lines beginning with '##'. + # - Remove the leading '#' from the one line that should be left with a single leading '#' so + # the line can serve as a TSV header. + sed -E '/^##/d' $filename | sed -E 's/^#//' > load_file.txt + fi + grep -E -v '^#' $filename >> load_file.txt + done + + bq --apilog=false load --project_id=~{project_id} --source_format=CSV --field_delimiter='\t' \ + --skip_leading_rows=1 --null_marker="-" ~{dataset_name}.~{raw_data_table} load_file.txt + + echo "VEP + LOFTEE raw data loading complete." + fi + >>> + + runtime { + docker: variants_docker + preemptible: 2 + memory: "7 GB" + disks: "local-disk 1000 HDD" + } + + output { + Boolean done = true + } +} + +task BigQueryCookVepAndLofteeRawAnnotations { + input { + Boolean go + String variants_docker + String project_id + String dataset_name + String raw_data_table + String cooked_data_table + File cooked_data_table_schema + } + + command <<< + # Prepend date, time and pwd to xtrace log entries. + PS4='\D{+%F %T} \w $ ' + set -o errexit -o nounset -o pipefail -o xtrace + + set +o errexit + bq --apilog=false show --project_id=~{project_id} ~{dataset_name}.~{cooked_data_table} > /dev/null + BQ_SHOW_RC=$? + set -o errexit + + if [ $BQ_SHOW_RC -ne 0 ]; then + echo 'Creating "cooked" VEP + LOFTEE table ~{dataset_name}.~{cooked_data_table}' + + # 3 day TTL for this table + DATE=$((3 * 24 * 60 * 60)) + bq --apilog=false mk --expiration=$DATE --project_id=~{project_id} ~{dataset_name}.~{cooked_data_table} ~{cooked_data_table_schema} + fi + + num_rows=$(bq --apilog=false show --project_id=~{project_id} --format json ~{dataset_name}.~{cooked_data_table} | jq -r .numRows) + if ((num_rows != 0)) + then + echo "Found preexisting table with data, not adding more cooked data." + else + + bq --apilog=false query --nouse_legacy_sql --destination_table=~{dataset_name}.~{cooked_data_table} --replace \ + --project_id=~{project_id} ' + + SELECT * EXCEPT(row_number) FROM ( + SELECT + -- Make a VID-compatible string from the data in Uploaded_variation. + -- VEP appears to use a different convention for the encoding of indel positions than what is used in GVS: + -- VEP indel positions are based on the first *discrepant* base and not the first base mentioned, which in the + -- GVS convention agrees between reference and allele. Correct for that in the VID-building code below to + -- subtract 1 if the variant is an indel. + REGEXP_EXTRACT(Uploaded_variation, "^chr([^_]+)") || "-" || + -- A Location specified with a "-" range is an indel. Single-base deletions are a special case with a single + -- position, but like all deletions they have a NULL Allele so look for that as well. + IF ((Location LIKE "%-%") OR (Allele is NULL), + -- If this is an indel decrement the position by one for VAT compatibility. + CAST((CAST(REGEXP_EXTRACT(Uploaded_variation, "_(\\d+)") AS INT64) - 1) AS STRING), + -- Else SNPs use position without adjustment. + REGEXP_EXTRACT(Uploaded_variation, "_(\\d+)")) || + "-" || REGEXP_EXTRACT(Uploaded_variation, "_([ACGT]+)/") || "-" || + REGEXP_EXTRACT(Uploaded_variation, "([ACGT]+)$") AS vid, + Uploaded_variation, + Location, + Allele, + Gene, + Feature, + Feature_type, + Consequence, + cDNA_position, + CDS_position, + Protein_position, + Amino_acids, + Codons, + Existing_variation, + IMPACT, + DISTANCE, + STRAND, + -- FLAGS can be multi-valued so SPLIT to make this REPEATED. + SPLIT(FLAGS, ",") AS FLAGS, + SYMBOL as HGNC_SYMBOL, + SYMBOL_SOURCE, + -- HGNC IDs are formatted like HGNC:1234; we only want the number part. + CAST(SPLIT(HGNC_ID, ":")[OFFSET(1)] AS INTEGER) AS HGNC_ID, + SOURCE, + LoF, + -- These three appear to sometimes be multi-valued so SPLIT to make them REPEATEDs. + SPLIT(LoF_filter, ",") AS LoF_filter, + SPLIT(LoF_flags, ",") AS LoF_flags, + SPLIT(LoF_info, ",") AS LoF_info, + -- Split and cast the GERP string to REPEATED FLOAT64s. + ( + SELECT + ARRAY_AGG(SAFE_CAST(s AS FLOAT64)) + FROM + UNNEST(SPLIT(GERP, ",")) AS s + ) AS GERP, + + -- Use the ROW_NUMBER() magic to squash duplicates. A small number of deletions span interval boundaries + -- and are assigned to two different VEP processing shards. This duplicate data would cause problems when + -- we try to assign back + ROW_NUMBER() + -- The expression below uses Uploaded_variation rather than vid because BigQuery claims to not be able to + -- find the vid identifier. Uploaded_variation contains equivalent information to vid in a different format. + OVER (PARTITION BY Uploaded_variation, Feature) + ROW_NUMBER + + FROM + ~{project_id}.~{dataset_name}.~{raw_data_table} + ) + + WHERE ROW_NUMBER = 1 + + ' + fi + + >>> + + runtime { + docker: variants_docker + memory: "7 GB" + disks: "local-disk 1000 HDD" + } + + output { + Boolean done = true + String cooked_table_name = cooked_data_table + } +} + task AnnotateVCF { input { @@ -1049,6 +1431,7 @@ task BigQueryLoadJson { File variant_transcript_schema File genes_schema String mane_table_name + String vep_loftee_cooked_table_name String project_id String dataset_name String variant_transcripts_path @@ -1101,6 +1484,30 @@ task BigQueryLoadJson { bq --apilog=false --project_id=~{project_id} query --format=csv --use_legacy_sql=false ~{bq_labels} \ 'UPDATE `~{dataset_name}.~{variant_transcript_table}` vtt SET vtt.mane_plus_clinical_name = mane.name FROM `~{dataset_name}.~{mane_table_name}` mane WHERE vtt.transcript = mane.Ensembl_nuc AND mane.MANE_status = "MANE Plus Clinical" AND vtt.transcript is not null;' + echo "Adding VEP + LOFTEE annotation data to the pre-vat table ~{dataset_name}.~{variant_transcript_table}" + bq --apilog=false --project_id=~{project_id} query --format=csv --use_legacy_sql=false ~{bq_labels} ' + + UPDATE `~{dataset_name}.~{variant_transcript_table}` vtt SET + + vtt.hgnc_symbol = vep.hgnc_symbol, + vtt.hgnc_id = vep.hgnc_id, + vtt.LoF = vep.LoF, + vtt.LoF_filter = vep.LoF_filter, + vtt.LoF_flags = vep.LoF_flags, + vtt.LoF_info = vep.LoF_info, + vtt.GERP = vep.GERP + + FROM `~{dataset_name}.~{vep_loftee_cooked_table_name}` vep WHERE + + vtt.transcript is not null AND + vep.Feature_type is not null AND + vtt.vid = vep.vid AND + -- Do not consider version numbers when matching on transcripts. In Quickstart about 25% of the transcripts are + -- mismatched on version number, with VEP having newer versions. + SPLIT(vtt.transcript, ".")[OFFSET(0)] = SPLIT(vep.Feature, ".")[OFFSET(0)] + + ' + set +o errexit bq --apilog=false show --project_id=~{project_id} ~{dataset_name}.~{genes_table} > /dev/null BQ_SHOW_RC=$? @@ -1251,7 +1658,14 @@ task BigQueryLoadJson { v.clinvar_rcv_classifications, v.clinvar_rcv_num_stars, v.mane_select_name, - v.mane_plus_clinical_name + v.mane_plus_clinical_name, + v.hgnc_symbol, + v.hgnc_id, + v.LoF, + v.LoF_filter, + v.LoF_flags, + v.LoF_info, + v.GERP FROM `~{dataset_name}.~{variant_transcript_table}` as v left join (SELECT gene_symbol, ANY_VALUE(gene_omim_id) AS gene_omim_id, ANY_VALUE(omim_phenotypes_id) AS omim_phenotypes_id, ANY_VALUE(omim_phenotypes_name) AS omim_phenotypes_name FROM `~{dataset_name}.~{genes_table}` group by gene_symbol) as g @@ -1263,7 +1677,7 @@ task BigQueryLoadJson { memory: "3 GB" preemptible: 3 cpu: "1" - disks: "local-disk 100 HDD" + disks: "local-disk 1000 HDD" } output { diff --git a/scripts/variantstore/variant-annotations-table/vep_loftee/Dockerfile b/scripts/variantstore/variant-annotations-table/vep_loftee/Dockerfile new file mode 100644 index 00000000000..7e938c73147 --- /dev/null +++ b/scripts/variantstore/variant-annotations-table/vep_loftee/Dockerfile @@ -0,0 +1,261 @@ +ARG BRANCH=release/115 + +################################################### +# Stage 1 - docker container to build ensembl-vep # +################################################### +FROM ubuntu:22.04 AS builder + +# Update aptitude and install some required packages +# a lot of them are required for Bio::DB::BigFile +RUN apt-get update && apt-get -y install \ + build-essential \ + git \ + libpng-dev \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + perl \ + perl-base \ + unzip \ + wget \ + curl \ + libncurses5-dev \ + libncursesw5-dev \ + libcurl4-openssl-dev && \ + rm -rf /var/lib/apt/lists/* + +# Setup VEP environment +ENV OPT=/opt/vep +ENV OPT_SRC=$OPT/src +ENV HTSLIB_DIR=$OPT_SRC/htslib +ENV SAMTOOLS_DIR=$OPT_SRC/samtools +ENV HTS_VERSION=1.9 +ARG BRANCH + +# samtools +WORKDIR /tmp +RUN wget -q https://github.com/samtools/samtools/releases/download/$HTS_VERSION/samtools-$HTS_VERSION.tar.bz2 -O samtools-$HTS_VERSION.tar.bz2 && \ + tar -xjf samtools-$HTS_VERSION.tar.bz2 +WORKDIR /tmp/samtools-$HTS_VERSION +RUN ./configure --prefix=$SAMTOOLS_DIR && make && make install && rm -r Makefile *.c + +# Working directory +WORKDIR $OPT_SRC + +# loftee +ENV LOFTEE_ZIP=v1.0.4_GRCh38.zip +RUN wget -q https://github.com/konradjk/loftee/archive/refs/tags/${LOFTEE_ZIP} && \ + unzip ${LOFTEE_ZIP} && \ + rm ${LOFTEE_ZIP} + +# Add ensembl-vep files from current context +ADD . ensembl-vep + +# For release branches, raise an error if VEP version does not match the branch name +RUN if expr "$BRANCH" : "^release/.*" > /dev/null ; \ + then \ + branch_version=$(echo $BRANCH | sed -E 's|release/([0-9]+).*|\1|g'); \ + vep_version=$(grep VEP_VERSION */modules/Bio/EnsEMBL/VEP/Constants.pm | grep -Eo '[0-9]+'); \ + if [ $branch_version -ne $vep_version ]; then \ + echo "ERROR: VEP version $vep_version does not match version in branch name '$BRANCH'"; exit 1; \ + fi; \ + fi + +# Clone/download repositories/libraries +RUN if [ "$BRANCH" = "main" ]; \ + then export BRANCH_OPT=""; \ + else export BRANCH_OPT="-b $BRANCH"; \ + fi && \ + # Get ensembl cpanfile in order to get the list of the required Perl libraries + wget -q "https://raw.githubusercontent.com/Ensembl/ensembl/$BRANCH/cpanfile" -O "ensembl_cpanfile" && \ + # Clone ensembl-variation git repository and compile C code + git clone $BRANCH_OPT --depth 1 https://github.com/Ensembl/ensembl-variation.git && \ + mkdir var_c_code && \ + cp ensembl-variation/C_code/*.c ensembl-variation/C_code/Makefile var_c_code/ && \ + rm -rf ensembl-variation && \ + chmod u+x var_c_code/* && \ + # Clone bioperl-ext git repository - used by Haplosaurus + git clone --depth 1 https://github.com/bioperl/bioperl-ext.git && \ + # Download ensembl-xs - it contains compiled versions of certain key subroutines used in VEP + wget https://github.com/Ensembl/ensembl-xs/archive/2.3.2.zip -O ensembl-xs.zip && \ + unzip -q ensembl-xs.zip && mv ensembl-xs-2.3.2 ensembl-xs && rm -rf ensembl-xs.zip && \ + # Clone/Download other repositories: bioperl-live is needed so the cpanm dependencies installation from the ensembl-vep/cpanfile file takes less disk space + ensembl-vep/travisci/get_dependencies.sh && \ + # Only keep the bioperl-live "Bio" library + mv bioperl-live bioperl-live_bak && mkdir bioperl-live && mv bioperl-live_bak/Bio bioperl-live/ && rm -rf bioperl-live_bak && \ + ## A lot of cleanup on the imported libraries, in order to reduce the docker image ## + rm -rf Bio-HTS/.??* Bio-HTS/Changes Bio-HTS/DISCLAIMER Bio-HTS/MANIFEST* Bio-HTS/README Bio-HTS/scripts Bio-HTS/t Bio-HTS/travisci \ + bioperl-ext/.??* bioperl-ext/Bio/SeqIO bioperl-ext/Bio/Tools bioperl-ext/Makefile.PL bioperl-ext/README* bioperl-ext/t bioperl-ext/examples \ + ensembl-xs/.??* ensembl-xs/TODO ensembl-xs/Changes ensembl-xs/INSTALL ensembl-xs/MANIFEST ensembl-xs/README ensembl-xs/t ensembl-xs/travisci \ + htslib/.??* htslib/INSTALL htslib/NEWS htslib/README* htslib/test && \ + # Only keep needed kent-335_base libraries for VEP - used by Bio::DB::BigFile (bigWig parsing) + mv kent-335_base kent-335_base_bak && mkdir -p kent-335_base/src && \ + cp -R kent-335_base_bak/src/lib kent-335_base_bak/src/inc kent-335_base_bak/src/jkOwnLib kent-335_base/src/ && \ + cp kent-335_base_bak/src/*.sh kent-335_base/src/ && \ + rm -rf kent-335_base_bak + +# Setup bioperl-ext +WORKDIR bioperl-ext/Bio/Ext/Align/ +RUN perl -pi -e"s|(cd libs.+)CFLAGS=\\\'|\$1CFLAGS=\\\'-fPIC |" Makefile.PL + +# Install htslib binaries (for 'bgzip' and 'tabix') +# htslib requires the packages 'zlib1g-dev', 'libbz2-dev' and 'liblzma-dev' +WORKDIR $HTSLIB_DIR +RUN make install && rm -f Makefile *.c + +# Compile Variation LD C scripts +WORKDIR $OPT_SRC/var_c_code +RUN make && rm -f Makefile *.c + + +################################################### +# Stage 2 - docker container to build ensembl-vep # +################################################### +FROM ubuntu:22.04 + +# Update aptitude and install some required packages +# a lot of them are required for Bio::DB::BigFile +RUN apt-get update && apt-get -y install \ + build-essential \ + cpanminus \ + curl \ + libmysqlclient-dev \ + libdbd-mysql-perl \ + libpng-dev \ + libssl-dev \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + locales \ + openssl \ + perl \ + perl-base \ + unzip \ + vim && \ + apt-get -y purge manpages-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Setup VEP environment +ENV OPT=/opt/vep +ENV OPT_SRC=$OPT/src +ENV LOFTEE_PATH=$OPT_SRC/loftee-1.0.4_GRCh38 +ENV PERL5LIB_TMP=${LOFTEE_PATH}:$OPT_SRC/ensembl-vep:$OPT_SRC/ensembl-vep/modules:/plugins +ENV PERL5LIB=$PERL5LIB_TMP:$OPT_SRC/bioperl-live +ENV KENT_SRC=$OPT/src/kent-335_base/src +ENV HTSLIB_DIR=$OPT_SRC/htslib +ENV DEPS=$OPT_SRC +ENV PATH=$OPT_SRC/samtools/bin:$OPT_SRC/ensembl-vep:$OPT_SRC/var_c_code:$PATH +ENV LANG_VAR=en_US.UTF-8 +ARG BRANCH + +# Create vep user +RUN useradd -r -m -U -d "$OPT" -s /bin/bash -c "VEP User" -p '' vep && \ + chmod a+rx $OPT && \ + usermod -a -G sudo vep && \ + mkdir -p $OPT_SRC +USER vep + +# Copy downloaded libraries (stage 1) to this image (stage 2) +COPY --chown=vep:vep --from=builder $OPT_SRC $OPT_SRC +############################################################# + +# Change user to root for the following complilations/installations +USER root + +# Install bioperl-ext, faster alignments for haplo (XS-based BioPerl extensions to C libraries) +WORKDIR $OPT_SRC/bioperl-ext/Bio/Ext/Align/ +RUN perl Makefile.PL && make && make install && rm -f Makefile* + +# Install ensembl-xs, faster run using re-implementation in C of some of the Perl subroutines +WORKDIR $OPT_SRC/ensembl-xs +RUN perl Makefile.PL && make && make install && rm -f Makefile* cpanfile + +WORKDIR $OPT_SRC +# Install/compile more libraries +RUN export MACHTYPE=$(uname -m) &&\ + ensembl-vep/travisci/build_c.sh && \ + # Remove unused Bio-DB-HTS files + rm -rf Bio-HTS/cpanfile Bio-HTS/Build.PL Bio-HTS/Build Bio-HTS/_build Bio-HTS/INSTALL.pl && \ + # Install ensembl perl dependencies (cpanm) + cpanm --installdeps --with-recommends --notest --cpanfile ensembl_cpanfile . && \ + cpanm --installdeps --with-recommends --notest --cpanfile ensembl-vep/cpanfile . && \ + # Delete bioperl and cpanfiles after the cpanm installs as bioperl will be reinstalled by the INSTALL.pl script + rm -rf bioperl-live ensembl_cpanfile ensembl-vep/cpanfile && \ + # Configure "locale", see https://github.com/rocker-org/rocker/issues/19 + echo "$LANG_VAR UTF-8" >> /etc/locale.gen && locale-gen en_US.utf8 && \ + /usr/sbin/update-locale LANG=$LANG_VAR && \ + # Copy htslib executables. It also requires the packages 'zlib1g-dev', 'libbz2-dev' and 'liblzma-dev' + cp $HTSLIB_DIR/bgzip $HTSLIB_DIR/tabix $HTSLIB_DIR/htsfile /usr/local/bin/ && \ + # additional perl module for loftee + cpanm DBD::SQLite && \ + # Remove CPAN cache + rm -rf /root/.cpanm + +ENV LC_ALL=$LANG_VAR +ENV LANG=$LANG_VAR + +# Switch back to vep user +USER vep +ENV PERL5LIB=$PERL5LIB_TMP + +# Setup Docker environment for when users run VEP and INSTALL.pl in Docker image: +# - skip VEP updates in INSTALL.pl +ENV VEP_NO_UPDATE=1 +# - avoid Faidx/HTSLIB installation in INSTALL.pl +ENV VEP_NO_HTSLIB=1 +# - skip plugin installation in INSTALL.pl +ENV VEP_NO_PLUGINS=1 +# - set plugins directory for VEP and INSTALL.pl +ENV VEP_DIR_PLUGINS=/plugins +ENV VEP_PLUGINSDIR=$VEP_DIR_PLUGINS +WORKDIR $VEP_DIR_PLUGINS + +# Update bash profile +WORKDIR $OPT_SRC/ensembl-vep +RUN echo >> $OPT/.profile && \ + echo PATH=$PATH:\$PATH >> $OPT/.profile && \ + echo export PATH >> $OPT/.profile && \ + # Install Ensembl API and plugins + ./INSTALL.pl --auto ap --plugins all --skip_plugins LoF --pluginsdir $VEP_DIR_PLUGINS --no_update --no_htslib && \ + # Remove ensemb-vep's travisci folder + rm -rf travisci + +# Install dependencies for VEP plugins: +USER root +ENV PLUGIN_DEPS="https://raw.githubusercontent.com/Ensembl/VEP_plugins/$BRANCH/config" +# - Ubuntu packages +RUN curl -O "$PLUGIN_DEPS/ubuntu-packages.txt" && \ + apt-get update && apt-get install -y --no-install-recommends \ + $(sed -e s/\#.*//g ubuntu-packages.txt) && \ + rm -rf /var/lib/apt/lists/* ubuntu-packages.txt +# - Symlink python to python2 +RUN ln -s /usr/bin/python2 /usr/bin/python +# - Perl modules +RUN curl -O "$PLUGIN_DEPS/cpanfile" && \ + cpanm --installdeps --with-recommends . && \ + rm -rf /root/.cpanm cpanfile +# - Python packages +RUN curl -O https://raw.githubusercontent.com/paulfitz/mysql-connector-c/master/include/my_config.h && \ + mv my_config.h /usr/include/mysql/my_config.h +RUN curl -O "$PLUGIN_DEPS/requirements.txt" && \ + python2 -m pip install --no-cache-dir -r requirements.txt && \ + rm requirements.txt + +# Install GeneSplicer binary +USER vep +WORKDIR $VEP_DIR_PLUGINS +RUN curl -O ftp://ftp.ccb.jhu.edu/pub/software/genesplicer/GeneSplicer.tar.gz && \ + tar -xzf GeneSplicer.tar.gz && \ + rm GeneSplicer.tar.gz && \ + cd GeneSplicer/sources && \ + make && \ + mv genesplicer .. && \ + rm -rf GeneSplicer/*/ +ENV PATH=$VEP_DIR_PLUGINS/GeneSplicer:$PATH + +# Set working directory as symlink to $OPT/.vep (containing VEP cache and data) +USER root +RUN ln -s $OPT/.vep /data +USER vep +WORKDIR /data diff --git a/scripts/variantstore/variant-annotations-table/vep_loftee/README.md b/scripts/variantstore/variant-annotations-table/vep_loftee/README.md new file mode 100644 index 00000000000..223dcf918d2 --- /dev/null +++ b/scripts/variantstore/variant-annotations-table/vep_loftee/README.md @@ -0,0 +1,30 @@ +The Dockerfile in this directory is used to build a Docker image for Ensembl VEP 115 with GRCh38 LOFTEE support. +This file is intended to replace the Dockerfile at `docker/Dockerfile` in the Ensembl VEP repo. On an x86 VM +the image is can be built from the root of the Ensembl VEP repo with the command: + +``` +docker build -f docker/Dockerfile . +``` + +Get the image id from `docker images` and assign: + +``` +IMAGE_ID= +``` + +Then: + +``` +TAG="$(date -Idate)-${IMAGE_ID}" +BASE_REPO="broad-dsde-methods/gvs" +REPO_WITH_TAG="${BASE_REPO}/loftee:${TAG}" +docker tag "${IMAGE_ID}" "${REPO_WITH_TAG}" + +# Tag and push +GAR_TAG="us-central1-docker.pkg.dev/${REPO_WITH_TAG}" +docker tag "${REPO_WITH_TAG}" "${GAR_TAG}" + +docker push "${GAR_TAG}" + +echo "Docker image pushed to \"${GAR_TAG}\"" +``` diff --git a/scripts/variantstore/wdl/GvsUtils.wdl b/scripts/variantstore/wdl/GvsUtils.wdl index 388535907de..873d18cbaac 100644 --- a/scripts/variantstore/wdl/GvsUtils.wdl +++ b/scripts/variantstore/wdl/GvsUtils.wdl @@ -131,12 +131,13 @@ task GetToolVersions { # GVS generally uses the smallest `alpine` version of the Google Cloud SDK as it suffices for most tasks, but # there are a handful of tasks that require the larger GNU libc-based `slim`. String cloud_sdk_slim_docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:524.0.0-slim" - String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2025-10-28-alpine-4a74414607d9" + String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2025-12-04-alpine-11ede1e609a0" String variants_nirvana_docker = "us.gcr.io/broad-dsde-methods/variantstore:nirvana_2022_10_19" String gatk_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/gatk:2025-10-17-gatkbase-0a4709121758" String real_time_genomics_docker = "docker.io/realtimegenomics/rtg-tools:latest" String gotc_imputation_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" String plink_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/plink2:2024-04-23-slim-a0a65f52cc0e" + String vep_loftee_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/loftee:2025-12-10-3e71c688e658" String workspace_bucket = read_string(workspace_bucket_output) String workspace_id = read_string(workspace_id_output) diff --git a/scripts/variantstore/wdl/test/GvsQuickstartIntegration.wdl b/scripts/variantstore/wdl/test/GvsQuickstartIntegration.wdl index ed8e8f5e106..fb6cc781cd2 100644 --- a/scripts/variantstore/wdl/test/GvsQuickstartIntegration.wdl +++ b/scripts/variantstore/wdl/test/GvsQuickstartIntegration.wdl @@ -38,8 +38,8 @@ workflow GvsQuickstartIntegration { } String expected_subdir = if (!chr20_X_Y_only) then "all_chrs/" else "" - File expected_output_prefix = "gs://gvs-internal-quickstart/integration/2025-07-21/" + expected_subdir - File truth_data_prefix = "gs://gvs-internal-quickstart/integration/test_data/2025-07-21/" + expected_subdir + File expected_output_prefix = "gs://gvs-internal-quickstart/integration/2025-12-05/" + expected_subdir + File truth_data_prefix = "gs://gvs-internal-quickstart/integration/test_data/2025-12-05/" + expected_subdir # WDL 1.0 trick to set a variable ('none') to be undefined. if (false) { diff --git a/src/main/resources/placate_codeql.js b/src/main/resources/placate_codeql.js new file mode 100644 index 00000000000..af400fa033e --- /dev/null +++ b/src/main/resources/placate_codeql.js @@ -0,0 +1,9 @@ +` + +This file exists solely to prevent CodeQL from failing its "Analyze (javascript-typescript)" action with: + +CodeQL detected code written in Java/Kotlin, Python and GitHub Actions, but not any written in JavaScript/TypeScript. Confirm that there is some source code for JavaScript/TypeScript in the project. + +Because apart from the contents of this file, there is currently no JavaScript or TypeScript in the GATK repo. + +`;