diff --git a/defaults/auspice_config.json b/defaults/auspice_config.json index 7e0894a28..3c646bcdc 100644 --- a/defaults/auspice_config.json +++ b/defaults/auspice_config.json @@ -10,6 +10,16 @@ "title": "Emerging Lineage", "type": "categorical" }, + { + "key": "immune_escape", + "title": "Immune Escape vs BA.2", + "type": "continuous" + }, + { + "key": "ace2_binding", + "title": "ACE2 binding vs BA.2", + "type": "continuous" + }, { "key": "pango_lineage", "title": "PANGO Lineage", diff --git a/defaults/parameters.yaml b/defaults/parameters.yaml index ae71149fa..4cf6391b4 100644 --- a/defaults/parameters.yaml +++ b/defaults/parameters.yaml @@ -47,6 +47,7 @@ sanitize_metadata: - "GC-Content=gc_content" reference_node_name: "USA/WA1/2020" +nextclade_dataset: sars-cov-2 # Define files used for external configuration. Common examples consist of a # list of strains to include and exclude from analyses, a reference sequence to diff --git a/docs/src/reference/change_log.md b/docs/src/reference/change_log.md index abd7d707f..5372f596c 100644 --- a/docs/src/reference/change_log.md +++ b/docs/src/reference/change_log.md @@ -5,7 +5,9 @@ We also use this change log to document new features that maintain backward comp ## New features since last version update -- 30 January 2022: Include new clade 23A correspoding to Pango lineage XBB.1.5. See [PR 1043](https://github.com/nextstrain/ncov/pull/1043) for the rationale behind this clade update. +- 16 March 2023: Add a build configuration option, `nextclade_dataset`, to allow users to change the Nextclade dataset used for alignment and quality control. For example, setting `nextclade_dataset: sars-cov-2-21L` will use the BA.2 (Nextstrain 21L) dataset that provides immune escape and ACE2 binding scores. [See the workflow configuration guide for more details](https://docs.nextstrain.org/projects/ncov/en/latest/reference/workflow-config-file.html#nextclade-dataset). [PR 1046](https://github.com/nextstrain/ncov/pull/1046) + +- 30 January 2023: Include new clade 23A correspoding to Pango lineage XBB.1.5. See [PR 1043](https://github.com/nextstrain/ncov/pull/1043) for the rationale behind this clade update. - 9 December 2022: Add `immune escape` and `ace2_binding` from metadata as colorings for `nextstrain-open` and `nextstrain-gisaid` builds. [PR 1036](https://github.com/nextstrain/ncov/pull/1036) diff --git a/docs/src/reference/workflow-config-file.rst b/docs/src/reference/workflow-config-file.rst index 52a12e831..04878ddf5 100644 --- a/docs/src/reference/workflow-config-file.rst +++ b/docs/src/reference/workflow-config-file.rst @@ -374,6 +374,17 @@ Secondary configuration These parameters are other high-level parameters which may affect multiple Snakemake rules, or modify which rules are run. +nextclade_dataset +----------------- + +- type: string +- description: Name of a Nextclade dataset that appears in the output of ``nextclade dataset list``. The workflow will download the corresponding dataset by running ``nextclade dataset get --name {nextclade_dataset}`` where the value in the curly brackets is the value defined in the configuration file. The final alignment for each build will use the reference sequence and gene map from this dataset. +- default: ``sars-cov-2`` +- examples: + + - ``sars-cov-2-21L`` + - ``sars-cov-2-no-recomb`` + default_build_name ------------------ diff --git a/scripts/join-metadata-and-clades.py b/scripts/join-metadata-and-clades.py index 108f7770c..dccba30aa 100644 --- a/scripts/join-metadata-and-clades.py +++ b/scripts/join-metadata-and-clades.py @@ -37,7 +37,9 @@ "deletions": "deletions", "insertions": "insertions", "substitutions": "substitutions", - "aaSubstitutions": "aaSubstitutions" + "aaSubstitutions": "aaSubstitutions", + "immune_escape": "immune_escape", + "ace2_binding": "ace2_binding", } preferred_types = { @@ -137,7 +139,8 @@ def main(): result.loc[np.isnan(div_array)|np.isnan(t), "clock_deviation"] = np.nan for col in list(column_map.values()) + ["clock_deviation"]: - result[col] = result[col].fillna(VALUE_MISSING_DATA) + if col in result: + result[col] = result[col].fillna(VALUE_MISSING_DATA) # Move the new column so that it's next to other clade columns if INSERT_BEFORE_THIS_COLUMN in result.columns: diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index c532e7177..bfb0bf7aa 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -458,7 +458,7 @@ rule prepare_nextclade: output: nextclade_dataset = "data/sars-cov-2-nextclade-defaults.zip", params: - name = "sars-cov-2", + name = config["nextclade_dataset"], conda: config["conda_environment"] shell: """