From 0b6e138616e42d7d93e6d2b630dfa18d7674ab15 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 18 Jun 2025 13:12:47 +1000 Subject: [PATCH 01/14] define workflow --- src/benchcab/data/meorg_jobscript.j2 | 36 ++++++++++++++++++++++------ src/benchcab/internal.py | 16 ++++++++++++- src/benchcab/utils/meorg.py | 2 +- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/benchcab/data/meorg_jobscript.j2 b/src/benchcab/data/meorg_jobscript.j2 index 1c89caa3..0495e052 100644 --- a/src/benchcab/data/meorg_jobscript.j2 +++ b/src/benchcab/data/meorg_jobscript.j2 @@ -18,19 +18,39 @@ set -ev # Set some things DATA_DIR={{data_dir}} NUM_THREADS={{num_threads}} -MODEL_OUTPUT_ID={{model_output_id}} CACHE_DELAY={{cache_delay}} -MEORG_BIN={{meorg_bin}} +MEORG_BIN={{me.meorg_bin}} +MODEL_OUTPUT_NAME={{me.model_output_name}} +MODEL_OUTPUT_ARGS=() -{% if purge_outputs %} -# Purge existing model outputs -echo "Purging existing outputs from $MODEL_OUTPUT_ID" + +# Create new model output entity +MODEL_OUTPUT_ARGS+=(--state-selection {{ me.state_selection }}) +MODEL_OUTPUT_ARGS+=(--parameter-selection {{ me.parameter_selection }}) +{% if me.is_bundle %} +MODEL_OUTPUT_ARGS+=(--is-bundle) +{% endif %} + +MODEL_OUTPUT_ID=$($MEORG_BIN output query $MODEL_OUTPUT_NAME | head -n 1) +{% if $MODEL_OUTPUT_ID %} +echo "Deleting existing files from model output ID" $MEORG_BIN file detach_all $MODEL_OUTPUT_ID +echo "Update model output ID" +{% else %} +echo "Create new model output ID" {% endif %} +MODEL_OUTPUT_ID="$($MEORG_BIN output create $MODEL_PROFILE_ID $MODEL_OUTPUT_NAME $MODEL_OUTPUT_ARGS)" + +echo "Add experiments to model output" +$MEORG_BIN experiments update $MODEL_OUTPUT_ID {{ model_output_experiment_ids|join(',') }} + +echo "Add benchmarks to model output" +$MEORG_BIN benchmark replace $MODEL_OUTPUT_ID {{ model_output_benchmark_ids|join(',') }} + # Upload the data echo "Uploading data to $MODEL_OUTPUT_ID" -$MEORG_BIN file upload $DATA_DIR/*.nc -n $NUM_THREADS --attach_to $MODEL_OUTPUT_ID +$MEORG_BIN file upload $DATA_DIR/*.nc -n $NUM_THREADS $MODEL_OUTPUT_ID # Wait for the cache to transfer to the object store. echo "Waiting for object store transfer ($CACHE_DELAY sec)" @@ -38,6 +58,8 @@ sleep $CACHE_DELAY # Trigger the analysis echo "Triggering analysis on $MODEL_OUTPUT_ID" -$MEORG_BIN analysis start $MODEL_OUTPUT_ID +{% for exp_id in model_output_experiment_ids %} +$MEORG_BIN analysis start $MODEL_OUTPUT_ID {{ exp_id }} +{% endfor %} echo "DONE" \ No newline at end of file diff --git a/src/benchcab/internal.py b/src/benchcab/internal.py index 66af72d8..94772c1f 100644 --- a/src/benchcab/internal.py +++ b/src/benchcab/internal.py @@ -249,9 +249,14 @@ "US-Whs", "US-Wkg", ], + "AU-Tum-P2": ["AU-Tum"] } -FLUXSITE_DEFAULT_EXPERIMENT = "forty-two-site-test" +MEORG_EXPERIMENT_MAP = { + "AU-Tum-P2": "aGKRjGTwckAytEjf5" +} + +FLUXSITE_DEFAULT_EXPERIMENT = "AU-Tum-P2" # "forty-two-site-test" FLUXSITE_DEFAULT_MEORG_MODEL_OUTPUT_ID = False OPTIONAL_COMMANDS = ["fluxsite-bitwise-cmp", "gen_codecov"] @@ -276,6 +281,12 @@ def get_met_forcing_file_names(experiment: str) -> list[str]: return file_names +def get_model_output_name(config: dict): + # format + # R1 - R2 ... Rn + # Rx = model.name, if local then only the last part + # Prepend CABLE + pass # Configuration for the client upload MEORG_CLIENT = dict( @@ -285,3 +296,6 @@ def get_met_forcing_file_names(experiment: str) -> list[str]: walltime="01:00:00", storage=["gdata/ks32", "gdata/hh5", "gdata/wd9", "gdata/rp23"], ) + +# MEORG_PROFILE = ("CABLE", "nFcjg4qqHGPkB9sqE") +MEORG_PROFILE = ("test-output", "QhrHMxeQcgbXboong") \ No newline at end of file diff --git a/src/benchcab/utils/meorg.py b/src/benchcab/utils/meorg.py index 9ee35dff..f7bbc94f 100644 --- a/src/benchcab/utils/meorg.py +++ b/src/benchcab/utils/meorg.py @@ -10,7 +10,7 @@ def do_meorg(config: dict, upload_dir: str, benchcab_bin: str, benchcab_job_id: str): - """Perform the upload of model outputs to modelevaluation.org + """Perform the upload of model outputs to modelevaluation.org. Parameters ---------- From 56b399c28ccb02689a51d6a987b70463a5f7863a Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 6 Aug 2025 12:15:45 +1000 Subject: [PATCH 02/14] working script ncitest --- src/benchcab/data/meorg_jobscript.j2 | 40 ++++++++++++++-------------- src/benchcab/internal.py | 12 ++++----- src/benchcab/utils/meorg.py | 22 ++++++++++++--- 3 files changed, 45 insertions(+), 29 deletions(-) diff --git a/src/benchcab/data/meorg_jobscript.j2 b/src/benchcab/data/meorg_jobscript.j2 index 0495e052..f2ea8e0d 100644 --- a/src/benchcab/data/meorg_jobscript.j2 +++ b/src/benchcab/data/meorg_jobscript.j2 @@ -19,34 +19,30 @@ set -ev DATA_DIR={{data_dir}} NUM_THREADS={{num_threads}} CACHE_DELAY={{cache_delay}} -MEORG_BIN={{me.meorg_bin}} -MODEL_OUTPUT_NAME={{me.model_output_name}} +MEORG_BIN={{meorg_bin}} +MODEL_PROFILE_ID={{ model_prof_id }} +MODEL_OUTPUT_NAME={{ mo.name }} MODEL_OUTPUT_ARGS=() - # Create new model output entity -MODEL_OUTPUT_ARGS+=(--state-selection {{ me.state_selection }}) -MODEL_OUTPUT_ARGS+=(--parameter-selection {{ me.parameter_selection }}) -{% if me.is_bundle %} -MODEL_OUTPUT_ARGS+=(--is-bundle) +MODEL_OUTPUT_ARGS+="--state-selection {{ mo.state_selection }}" +MODEL_OUTPUT_ARGS+=" --parameter-selection {{ mo.parameter_selection }}" +{% if mo.is_bundle %} +MODEL_OUTPUT_ARGS+=" --is-bundle" {% endif %} -MODEL_OUTPUT_ID=$($MEORG_BIN output query $MODEL_OUTPUT_NAME | head -n 1) -{% if $MODEL_OUTPUT_ID %} +MODEL_OUTPUT_ID=$($MEORG_BIN output query $MODEL_OUTPUT_NAME | head -n 1 ) +if [ ! -z "${MODEL_OUTPUT_ID}" ] ; then echo "Deleting existing files from model output ID" -$MEORG_BIN file detach_all $MODEL_OUTPUT_ID -echo "Update model output ID" -{% else %} +$MEORG_BIN file delete_all $MODEL_OUTPUT_ID +echo "Updated model output ID" +else echo "Create new model output ID" -{% endif %} - -MODEL_OUTPUT_ID="$($MEORG_BIN output create $MODEL_PROFILE_ID $MODEL_OUTPUT_NAME $MODEL_OUTPUT_ARGS)" +fi +MODEL_OUTPUT_ID="$($MEORG_BIN output create $MODEL_PROFILE_ID $MODEL_OUTPUT_NAME $MODEL_OUTPUT_ARGS | head -n 1 | awk '{print $NF}')" echo "Add experiments to model output" -$MEORG_BIN experiments update $MODEL_OUTPUT_ID {{ model_output_experiment_ids|join(',') }} - -echo "Add benchmarks to model output" -$MEORG_BIN benchmark replace $MODEL_OUTPUT_ID {{ model_output_benchmark_ids|join(',') }} +$MEORG_BIN experiment update $MODEL_OUTPUT_ID {{ model_exp_ids|join(',') }} # Upload the data echo "Uploading data to $MODEL_OUTPUT_ID" @@ -56,10 +52,14 @@ $MEORG_BIN file upload $DATA_DIR/*.nc -n $NUM_THREADS $MODEL_OUTPUT_ID echo "Waiting for object store transfer ($CACHE_DELAY sec)" sleep $CACHE_DELAY +{% for exp_id in model_exp_ids %} +echo "Replace benchmarks to model output" +$MEORG_BIN benchmark update $MODEL_OUTPUT_ID {{ exp_id }} {{ model_benchmark_ids|join(',') }} + # Trigger the analysis echo "Triggering analysis on $MODEL_OUTPUT_ID" -{% for exp_id in model_output_experiment_ids %} $MEORG_BIN analysis start $MODEL_OUTPUT_ID {{ exp_id }} + {% endfor %} echo "DONE" \ No newline at end of file diff --git a/src/benchcab/internal.py b/src/benchcab/internal.py index 94772c1f..263be377 100644 --- a/src/benchcab/internal.py +++ b/src/benchcab/internal.py @@ -249,14 +249,12 @@ "US-Whs", "US-Wkg", ], - "AU-Tum-P2": ["AU-Tum"] + "AU-Tum-P2": ["AU-Tum"], } -MEORG_EXPERIMENT_MAP = { - "AU-Tum-P2": "aGKRjGTwckAytEjf5" -} +MEORG_EXPERIMENT_MAP = {"AU-Tum-P2": "aGKRjGTwckAytEjf5"} -FLUXSITE_DEFAULT_EXPERIMENT = "AU-Tum-P2" # "forty-two-site-test" +FLUXSITE_DEFAULT_EXPERIMENT = "AU-Tum-P2" # "forty-two-site-test" FLUXSITE_DEFAULT_MEORG_MODEL_OUTPUT_ID = False OPTIONAL_COMMANDS = ["fluxsite-bitwise-cmp", "gen_codecov"] @@ -281,6 +279,7 @@ def get_met_forcing_file_names(experiment: str) -> list[str]: return file_names + def get_model_output_name(config: dict): # format # R1 - R2 ... Rn @@ -288,6 +287,7 @@ def get_model_output_name(config: dict): # Prepend CABLE pass + # Configuration for the client upload MEORG_CLIENT = dict( num_threads=1, # Parallel uploads over 4 cores @@ -298,4 +298,4 @@ def get_model_output_name(config: dict): ) # MEORG_PROFILE = ("CABLE", "nFcjg4qqHGPkB9sqE") -MEORG_PROFILE = ("test-output", "QhrHMxeQcgbXboong") \ No newline at end of file +MEORG_PROFILE = ("test-output", "QhrHMxeQcgbXboong") diff --git a/src/benchcab/utils/meorg.py b/src/benchcab/utils/meorg.py index f7bbc94f..b7356e3a 100644 --- a/src/benchcab/utils/meorg.py +++ b/src/benchcab/utils/meorg.py @@ -7,9 +7,12 @@ import benchcab.utils as bu from benchcab.internal import MEORG_CLIENT +from benchcab.utils import interpolate_file_template -def do_meorg(config: dict, upload_dir: str, benchcab_bin: str, benchcab_job_id: str): +def do_meorg( + config: dict, upload_dir: str, benchcab_bin: str, benchcab_job_id: str = None +): """Perform the upload of model outputs to modelevaluation.org. Parameters @@ -80,15 +83,28 @@ def do_meorg(config: dict, upload_dir: str, benchcab_bin: str, benchcab_job_id: logger.info("Uploading outputs to modelevaluation.org") + mo = { + "state_selection": "default", + "parameter_selection": "automated", + "is_bundle": True, + "name": "benchcab_test_workflow", + } + model_prof_id = "QhrHMxeQcgbXboong" + model_exp_ids = ["N4cfSrR49NPCRvAmS"] + model_benchmark_ids = ["8yNz4bHKoqwznLuK2"] + # Submit the outputs client = get_client() meorg_jobid = client.submit( bu.get_installed_root() / "data" / "meorg_jobscript.j2", render=True, dry_run=False, - depends_on=benchcab_job_id, + # depends_on=benchcab_job_id, # Interpolate into the job script - model_output_id=model_output_id, + mo=mo, + model_prof_id=model_prof_id, + model_exp_ids=model_exp_ids, + model_benchmark_ids=model_benchmark_ids, data_dir=upload_dir, cache_delay=MEORG_CLIENT["cache_delay"], mem=MEORG_CLIENT["mem"], From 52d23490de8ef321ac5f17467db3e968c9154064 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 13 Aug 2025 13:29:49 +1000 Subject: [PATCH 03/14] set config option for name (coupled for now) --- src/benchcab/benchcab.py | 18 ++++++++++++++++++ src/benchcab/data/config-schema.yml | 4 ++++ src/benchcab/utils/meorg.py | 29 ++++++++++++++++++++++++++--- 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/benchcab/benchcab.py b/src/benchcab/benchcab.py index 419b7a75..daf10f93 100644 --- a/src/benchcab/benchcab.py +++ b/src/benchcab/benchcab.py @@ -153,6 +153,7 @@ def _get_config(self, config_path: str) -> dict: return self._config def _get_models(self, config: dict) -> list[Model]: + model_output_name_idx = None if not self._models: for id, sub_config in enumerate(config["realisations"]): repo = create_repo( @@ -161,6 +162,20 @@ def _get_models(self, config: dict) -> list[Model]: / (sub_config["name"] if sub_config["name"] else Path()), ) self._models.append(Model(repo=repo, model_id=id, **sub_config)) + + if sub_config.get("model_output_name") is not None: + if model_output_name_idx is not None: + msg = "More than 1" + raise Exception(msg) + + model_output_name_idx = id + + if model_output_name_idx is None: + msg = "None" + raise Exception(msg) + + config["model_output_name"] = self._models[model_output_name_idx].name + return self._models def _fluxsite_show_task_composition(self, config: dict) -> str: @@ -207,6 +222,9 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None: msg = "Path to benchcab executable is undefined." raise RuntimeError(msg) + # TODO: Better method + _ = self._get_models(config) + job_script_path = Path(internal.QSUB_FNAME) logger.info("Creating PBS job script to run fluxsite tasks on compute nodes") diff --git a/src/benchcab/data/config-schema.yml b/src/benchcab/data/config-schema.yml index 654d1a5a..5461e922 100644 --- a/src/benchcab/data/config-schema.yml +++ b/src/benchcab/data/config-schema.yml @@ -51,6 +51,10 @@ realisations: path: type: "string" required: true + model_output_name: + nullable: true + type: "boolean" + required: false name: nullable: true type: "string" diff --git a/src/benchcab/utils/meorg.py b/src/benchcab/utils/meorg.py index b7356e3a..ee5a95ac 100644 --- a/src/benchcab/utils/meorg.py +++ b/src/benchcab/utils/meorg.py @@ -87,11 +87,34 @@ def do_meorg( "state_selection": "default", "parameter_selection": "automated", "is_bundle": True, - "name": "benchcab_test_workflow", + "name": config["model_output_name"], } - model_prof_id = "QhrHMxeQcgbXboong" - model_exp_ids = ["N4cfSrR49NPCRvAmS"] + model_prof_id = "nFcjg4qqHGPkB9sqE" + model_exp_ids = ["jwN9jNMWLEzbT2i9D"] model_benchmark_ids = ["8yNz4bHKoqwznLuK2"] + print( + interpolate_file_template( + "meorg_jobscript.j2", + render=True, + dry_run=False, + # depends_on=benchcab_job_id, + # Interpolate into the job script + mo=mo, + model_prof_id=model_prof_id, + model_exp_ids=model_exp_ids, + model_benchmark_ids=model_benchmark_ids, + data_dir=upload_dir, + cache_delay=MEORG_CLIENT["cache_delay"], + mem=MEORG_CLIENT["mem"], + num_threads=MEORG_CLIENT["num_threads"], + walltime=MEORG_CLIENT["walltime"], + storage=MEORG_CLIENT["storage"], + project=config["project"], + modules=config["modules"], + purge_outputs=True, + meorg_bin=meorg_bin, + ) + ) # Submit the outputs client = get_client() From f411043cf43be0fecd7254e02dd64275f60e0b48 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 18 Aug 2025 08:25:41 +1000 Subject: [PATCH 04/14] Add functionality for config model output name --- docs/user_guide/config_options.md | 18 ++++---- docs/user_guide/use_cases.md | 40 +++++++---------- src/benchcab/benchcab.py | 17 ------- src/benchcab/config.py | 40 ++++++++++++++--- src/benchcab/data/config-schema.yml | 6 --- src/benchcab/data/test/config-basic.yml | 1 + src/benchcab/data/test/config-optional.yml | 2 +- src/benchcab/data/test/integration_meorg.sh | 2 +- src/benchcab/internal.py | 49 +++++++++++++++------ src/benchcab/utils/meorg.py | 28 ++++++------ tests/test_config.py | 19 +++++--- 11 files changed, 124 insertions(+), 98 deletions(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index b5803fe8..ddc6f13a 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -68,7 +68,6 @@ fluxsite: walltime: 06:00:00 storage: [scratch/a00, gdata/xy11] multiprocess: True - meorg_model_output_id: XXXXXXXX ``` ### [experiment](#experiment) @@ -164,14 +163,6 @@ fluxsites: ``` -### [meorg_model_output_id](#meorg_model_output_id) - -: **Default:** False, _optional key_. :octicons-dash-24: The unique Model Output ID from modelevaluation.org to which output files will be automatically uploaded for analysis. - -A separate upload job will be submitted at the successful completion of benchcab tasks if this key is present, however, the validity is not checked by benchcab at this stage. - -Note: It is the user's responsbility to ensure the model output is configured on modelevaluation.org. - ## spatial Contains settings specific to spatial tests. @@ -381,6 +372,13 @@ realisations: : **Default:** _required key, no default_. :octicons-dash-24: Specify the local checkout path of CABLE branch. +### [meorg_model_output_name](#meorg_model_output_name) + +: **Default:** False :octicons-dash-24: The Model Output Name from modelevaluation.org to which output files will be automatically uploaded for analysis. Chosen as the model name from one of the realisations. The user must set only one of the realisations keys as True for the name to be chosen. + +Note: It is the user's responsbility to ensure the model output name does not clash with existing names belonging to other users on modelevaluation.org. + + ### [name](#name) : **Default:** base name of [branch_path](#+repo.svn.branch_path) if an SVN repository is given; the branch name if a git repository is given; the folder name if a local path is given, _optional key_. :octicons-dash-24: An alias name used internally by `benchcab` for the branch. The `name` key also specifies the directory name of the source code when retrieving from SVN, GitHub or local. @@ -506,7 +504,7 @@ codecov: ## meorg_bin -: **Default:** False, _optional key. :octicons-dash-24: Specifies the absolute system path to the ME.org client executable. In the absence of this key it will be inferred from the same directory as benchcab should `meorg_model_output_id` be set in `fluxsite` above. +: **Default:** False, _optional key. :octicons-dash-24: Specifies the absolute system path to the ME.org client executable. In the absence of this key it will be inferred from the same directory as benchcab should `meorg_model_output_name` be set in `realisations` above. ``` yaml diff --git a/docs/user_guide/use_cases.md b/docs/user_guide/use_cases.md index 123c2ef2..49e16b02 100644 --- a/docs/user_guide/use_cases.md +++ b/docs/user_guide/use_cases.md @@ -35,17 +35,15 @@ realisations: - repo: git: branch: main + model_output_name: True # (1) - repo: git: branch: XXXXX - patch: # (1) + patch: # (2) cable: cable_user: existing_feature: YYYY -fluxsite: - meorg_model_output_id: ZZZZ # (2) - modules: [ intel-compiler/2021.1.1, netcdf/4.7.4, @@ -53,8 +51,8 @@ modules: [ ] ``` -1. Use the option names and values as implemented in the cable namelist file. -2. You need to setup your environment for meorg_client before using this feature. +1. You need to setup your environment for meorg_client before using this feature. +2. Use the option names and values as implemented in the cable namelist file. The evaluation results will be on modelevaluation.org accessible from the Model Output page you've specified @@ -74,17 +72,15 @@ realisations: cable: cable_user: existing_feature: YYYY + model_output_name: True # (2) - repo: git: branch: XXXXX - patch: # (2) + patch: # (3) cable: cable_user: existing_feature: YYYY -fluxsite: - meorg_model_output_id: ZZZZ # (3) - modules: [ intel-compiler/2021.1.1, netcdf/4.7.4, @@ -93,8 +89,8 @@ modules: [ ``` 1. Use the option names and values as implemented in the cable namelist file. -2. Use the option names and values as implemented in the cable namelist file. -3. You need to setup your environment for meorg_client before using this feature. +2. You need to setup your environment for meorg_client before using this feature. +3. Use the option names and values as implemented in the cable namelist file. The evaluation results will be on modelevaluation.org accessible from the Model Output page you've specified @@ -110,13 +106,11 @@ realisations: - repo: git: branch: main + model_output_name: True # (2) - repo: git: branch: XXXXX -fluxsite: - meorg_model_output_id: ZZZZ # (1) - modules: [ intel-compiler/2021.1.1, netcdf/4.7.4, @@ -141,21 +135,21 @@ realisations: - repo: git: branch: main + model_output_name: True # (2) - repo: - name: my-feature-off # (2) + name: my-feature-off # (3) local: - path: XXXXX # (3) + path: XXXXX # (4) - repo: name: my-feature-on local: path: XXXXX - patch: # (4) + patch: # (5) cable: cable_user: new_feature: YYYY fluxsite: - meorg_model_output_id: ZZZZ # (5) pbs: # (6) ncpus: 8 mem: 16GB @@ -169,10 +163,10 @@ modules: [ ``` 1. Testing at one flux site only to save time and resources. -2. We are using the same branch twice so we need to name each occurrence differently. -3. Give the full path to your local CABLE repository with your code changes. -4. Use the option names and values as implemented in the cable namelist file. -5. You need to setup your environment for meorg_client before using this feature. +2. You need to setup your environment for meorg_client before using this feature. +3. We are using the same branch twice so we need to name each occurrence differently. +4. Give the full path to your local CABLE repository with your code changes. +5. Use the option names and values as implemented in the cable namelist file. 6. You can reduce the requested resources to reduce the cost of the test. Comparisons of R0 and R1 should show bitwise agreement. R2 and R0 (and R1) comparison on modelevaluation.org shows the impact of the changes. \ No newline at end of file diff --git a/src/benchcab/benchcab.py b/src/benchcab/benchcab.py index daf10f93..a3f1e80d 100644 --- a/src/benchcab/benchcab.py +++ b/src/benchcab/benchcab.py @@ -153,7 +153,6 @@ def _get_config(self, config_path: str) -> dict: return self._config def _get_models(self, config: dict) -> list[Model]: - model_output_name_idx = None if not self._models: for id, sub_config in enumerate(config["realisations"]): repo = create_repo( @@ -163,19 +162,6 @@ def _get_models(self, config: dict) -> list[Model]: ) self._models.append(Model(repo=repo, model_id=id, **sub_config)) - if sub_config.get("model_output_name") is not None: - if model_output_name_idx is not None: - msg = "More than 1" - raise Exception(msg) - - model_output_name_idx = id - - if model_output_name_idx is None: - msg = "None" - raise Exception(msg) - - config["model_output_name"] = self._models[model_output_name_idx].name - return self._models def _fluxsite_show_task_composition(self, config: dict) -> str: @@ -222,9 +208,6 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None: msg = "Path to benchcab executable is undefined." raise RuntimeError(msg) - # TODO: Better method - _ = self._get_models(config) - job_script_path = Path(internal.QSUB_FNAME) logger.info("Creating PBS job script to run fluxsite tasks on compute nodes") diff --git a/src/benchcab/config.py b/src/benchcab/config.py index 14766e19..c3c25c07 100644 --- a/src/benchcab/config.py +++ b/src/benchcab/config.py @@ -10,6 +10,8 @@ import benchcab.utils as bu from benchcab import internal +from benchcab.utils.repo import create_repo +from benchcab.model import Model class ConfigValidationError(Exception): @@ -82,7 +84,7 @@ def read_optional_key(config: dict): Parameters ---------- config : dict - The configuration file with with/without optional keys + The configuration file with without optional keys """ if "project" not in config: @@ -119,12 +121,35 @@ def read_optional_key(config: dict): config["fluxsite"]["pbs"] = internal.FLUXSITE_DEFAULT_PBS | config["fluxsite"].get( "pbs", {} ) - config["fluxsite"]["meorg_model_output_id"] = config["fluxsite"].get( - "meorg_model_output_id", internal.FLUXSITE_DEFAULT_MEORG_MODEL_OUTPUT_ID - ) config["codecov"] = config.get("codecov", False) + return config + + +def add_model_output_name(config: dict): + """Determine model output name from realisations. + + Parameters + ---------- + config : dict + The configuration file with with optional keys + + """ + is_model_output_name = False + for r in config["realisations"]: + assert not is_model_output_name + if r.get("model_output_name"): + is_model_output_name = True + repo = create_repo( + spec=r["repo"], + path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()), + ) + config["model_output_name"] = Model(repo).name + break + assert is_model_output_name + return config + def read_config_file(config_path: str) -> dict: """Load the config file in a dict. @@ -154,6 +179,8 @@ def read_config(config_path: str) -> dict: ---------- config_path : str Path to the configuration file. + is_meorg: str + Whether workflow includes meorg job submission. If true, determine the model output name Returns ------- @@ -169,7 +196,8 @@ def read_config(config_path: str) -> dict: # Read configuration file config = read_config_file(config_path) # Populate configuration dict with optional keys - read_optional_key(config) - # Validate and return. + config = read_optional_key(config) + # Validate. validate_config(config) + config = add_model_output_name(config) return config diff --git a/src/benchcab/data/config-schema.yml b/src/benchcab/data/config-schema.yml index 5461e922..fa104d7b 100644 --- a/src/benchcab/data/config-schema.yml +++ b/src/benchcab/data/config-schema.yml @@ -111,12 +111,6 @@ fluxsite: schema: type: "string" required: false - meorg_model_output_id: - type: - - "boolean" - - "string" - required: false - default: false spatial: type: "dict" diff --git a/src/benchcab/data/test/config-basic.yml b/src/benchcab/data/test/config-basic.yml index 846e64ff..6932c0be 100644 --- a/src/benchcab/data/test/config-basic.yml +++ b/src/benchcab/data/test/config-basic.yml @@ -21,6 +21,7 @@ realisations: - repo: svn: branch_path: trunk + model_output_name: True - repo: svn: branch_path: branches/Users/ccc561/v3.0-YP-changes diff --git a/src/benchcab/data/test/config-optional.yml b/src/benchcab/data/test/config-optional.yml index e36c4365..0948ab98 100644 --- a/src/benchcab/data/test/config-optional.yml +++ b/src/benchcab/data/test/config-optional.yml @@ -3,7 +3,6 @@ project: hh5 fluxsite: experiment: AU-Tum - meorg_model_output_id: False multiprocess: False pbs: ncpus: 6 @@ -31,6 +30,7 @@ realisations: svn: branch_path: trunk name: svn_trunk + model_output_name: True - repo: svn: branch_path: branches/Users/ccc561/v3.0-YP-changes diff --git a/src/benchcab/data/test/integration_meorg.sh b/src/benchcab/data/test/integration_meorg.sh index e90c531c..294b59b9 100644 --- a/src/benchcab/data/test/integration_meorg.sh +++ b/src/benchcab/data/test/integration_meorg.sh @@ -31,6 +31,7 @@ realisations: - repo: local: path: $CABLE_DIR + model_output_name: true - repo: git: branch: main @@ -47,7 +48,6 @@ fluxsite: - scratch/$PROJECT - gdata/$PROJECT # This ID is currently configured on the me.org server. - meorg_model_output_id: Sss7qupAHEZ8ovbCv EOL benchcab run -v diff --git a/src/benchcab/internal.py b/src/benchcab/internal.py index 263be377..ececca2d 100644 --- a/src/benchcab/internal.py +++ b/src/benchcab/internal.py @@ -249,13 +249,43 @@ "US-Whs", "US-Wkg", ], - "AU-Tum-P2": ["AU-Tum"], } -MEORG_EXPERIMENT_MAP = {"AU-Tum-P2": "aGKRjGTwckAytEjf5"} +# Map experiment with their IDs and benchmarks +# For now, each experiment is associated with 3 benchmarks (1lin, 3km27, LSTM) +MEORG_EXPERIMENT_ID_MAP = { + "AU-Tum": { + "experiment": "aGKRjGTwckAytEjf5", + "benchmarks": ["J9BBQCJdsuehsmMf2", "N5X2rjmp96baXrrJ3", "Q7Xu6yGGYdzvvAwbn"], + }, + "AU-How": { + "experiment": "XfC6MTEMm23C4m4iL", + "benchmarks": ["tdrQrKmaihmWdZSZu", "qZWhR3g7JfGhKWPa7", "p2SFiZdQw6ChQK6pr"], + }, + "FI-Hyy": { + "experiment": "nXpDC2Yt7RhhwSKor", + "benchmarks": ["Ym7gwY4k2J2pvDKDJ", "xYA3tSrL2bCeEmvai", "kXFt8mCMtHG4rsnJz"], + }, + "US-Var": { + "experiment": "sD9N2dKx4Jca8B82T", + "benchmarks": ["NbMEBX4sPNHNYkTtq", "X3FoGtYvWmjCyRHGd", "uejBLuHnf4RxAqZXH"], + }, + "US-Whs": { + "experiment": "aWDKqBoTe88ssinuc", + "benchmarks": ["QWsdgXGCWYx7HobXJ", "C42GurGaYDdSRrc2x", "zdnCDJXJzuSheP6T5"], + }, + "five-site-test": { + "experiment": "Nb37QxkAz3FczWDd7", + "benchmarks": ["PP4rFWJGiixFZP8q4", "8kWgyuSkwAKyghsFp", "DYWQuYvxZDgEsp4iX"], + }, + "forty-two-site-test": { + "experiment": "s6k22L3WajmiS9uGv", + "benchmarks": ["zKRrfM7bJpxWPcQ3L", "LMvzc2WL5Qa5jKTpv", "D3XqYwQgH88Tx6NCW"], + }, +} + -FLUXSITE_DEFAULT_EXPERIMENT = "AU-Tum-P2" # "forty-two-site-test" -FLUXSITE_DEFAULT_MEORG_MODEL_OUTPUT_ID = False +FLUXSITE_DEFAULT_EXPERIMENT = "AU-Tum" # "forty-two-site-test" OPTIONAL_COMMANDS = ["fluxsite-bitwise-cmp", "gen_codecov"] @@ -280,14 +310,6 @@ def get_met_forcing_file_names(experiment: str) -> list[str]: return file_names -def get_model_output_name(config: dict): - # format - # R1 - R2 ... Rn - # Rx = model.name, if local then only the last part - # Prepend CABLE - pass - - # Configuration for the client upload MEORG_CLIENT = dict( num_threads=1, # Parallel uploads over 4 cores @@ -297,5 +319,4 @@ def get_model_output_name(config: dict): storage=["gdata/ks32", "gdata/hh5", "gdata/wd9", "gdata/rp23"], ) -# MEORG_PROFILE = ("CABLE", "nFcjg4qqHGPkB9sqE") -MEORG_PROFILE = ("test-output", "QhrHMxeQcgbXboong") +MEORG_PROFILE = {"name": "CABLE", "id": "nFcjg4qqHGPkB9sqE"} diff --git a/src/benchcab/utils/meorg.py b/src/benchcab/utils/meorg.py index ee5a95ac..a855453d 100644 --- a/src/benchcab/utils/meorg.py +++ b/src/benchcab/utils/meorg.py @@ -6,7 +6,7 @@ from meorg_client.client import Client as MeorgClient import benchcab.utils as bu -from benchcab.internal import MEORG_CLIENT +from benchcab.internal import MEORG_CLIENT, MEORG_PROFILE, MEORG_EXPERIMENT_ID_MAP from benchcab.utils import interpolate_file_template @@ -32,12 +32,12 @@ def do_meorg( """ logger = bu.get_logger() - model_output_id = config["fluxsite"]["meorg_model_output_id"] + model_output_name = config["model_output_name"] num_threads = MEORG_CLIENT["num_threads"] # Check if a model output id has been assigned - if model_output_id == False: - logger.info("No model_output_id found in fluxsite configuration.") + if config.get("model_output_name") is None: + logger.info("No model_output_name resolved in configuration.") logger.info("NOT uploading to modelevaluation.org") return False @@ -63,7 +63,7 @@ def do_meorg( if MeorgClient().is_initialised() == False: logger.warn( - "A model_output_id has been supplied, but the meorg_client is not initialised." + "A model_output_name has been supplied, but the meorg_client is not initialised." ) logger.warn( "To initialise, run `meorg initialise` in the installation environment." @@ -72,15 +72,16 @@ def do_meorg( "Once initialised, the outputs from this run can be uploaded with the following command:" ) logger.warn( - f"meorg file upload {upload_dir}/*.nc -n {num_threads} --attach_to {model_output_id}" + f"meorg file upload {upload_dir}/*.nc -n {num_threads} --attach_to {model_output_name}" ) logger.warn("Then the analysis can be triggered with:") - logger.warn(f"meorg analysis start {model_output_id}") + logger.warn(f"meorg analysis start {model_output_name}") return False # Finally, attempt the upload! else: + experiment = config["fluxsite"]["experiment"] logger.info("Uploading outputs to modelevaluation.org") mo = { @@ -89,9 +90,8 @@ def do_meorg( "is_bundle": True, "name": config["model_output_name"], } - model_prof_id = "nFcjg4qqHGPkB9sqE" - model_exp_ids = ["jwN9jNMWLEzbT2i9D"] - model_benchmark_ids = ["8yNz4bHKoqwznLuK2"] + model_exp_id = MEORG_EXPERIMENT_ID_MAP[experiment]["experiment"] + model_benchmark_ids = MEORG_EXPERIMENT_ID_MAP[experiment]["benchmarks"] print( interpolate_file_template( "meorg_jobscript.j2", @@ -100,8 +100,8 @@ def do_meorg( # depends_on=benchcab_job_id, # Interpolate into the job script mo=mo, - model_prof_id=model_prof_id, - model_exp_ids=model_exp_ids, + model_prof_id=MEORG_PROFILE["id"], + model_exp_ids=[model_exp_id], model_benchmark_ids=model_benchmark_ids, data_dir=upload_dir, cache_delay=MEORG_CLIENT["cache_delay"], @@ -125,8 +125,8 @@ def do_meorg( # depends_on=benchcab_job_id, # Interpolate into the job script mo=mo, - model_prof_id=model_prof_id, - model_exp_ids=model_exp_ids, + model_prof_id=MEORG_PROFILE["id"], + model_exp_ids=[model_exp_id], model_benchmark_ids=model_benchmark_ids, data_dir=upload_dir, cache_delay=MEORG_CLIENT["cache_delay"], diff --git a/tests/test_config.py b/tests/test_config.py index e995d995..63bfe18c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -53,7 +53,7 @@ def no_optional_config() -> dict: return { "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], "realisations": [ - {"repo": {"svn": {"branch_path": "trunk"}}}, + {"repo": {"svn": {"branch_path": "trunk"}}, "model_output_name": True}, { "repo": { "svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"} @@ -75,7 +75,6 @@ def all_optional_default_config(no_optional_config) -> dict: "experiment": bi.FLUXSITE_DEFAULT_EXPERIMENT, "multiprocess": bi.FLUXSITE_DEFAULT_MULTIPROCESS, "pbs": bi.FLUXSITE_DEFAULT_PBS, - "meorg_model_output_id": bi.FLUXSITE_DEFAULT_MEORG_MODEL_OUTPUT_ID }, "science_configurations": bi.DEFAULT_SCIENCE_CONFIGURATIONS, "spatial": { @@ -107,7 +106,6 @@ def all_optional_custom_config(no_optional_config) -> dict: "walltime": "10:00:00", "storage": ["scratch/$PROJECT"], }, - "meorg_model_output_id": False }, "science_configurations": [ { @@ -199,15 +197,24 @@ def test_no_project_name( ) +def test_add_model_output_name(no_optional_config): + """Test addition of correct model output name.""" + output_config = bc.add_model_output_name(no_optional_config) + assert output_config == no_optional_config | {"model_output_name": "trunk"} + + @pytest.mark.parametrize( - ("config_str", "output_config"), + ("config_str", "output_config_str"), [ ("config-basic.yml", "all_optional_default_config"), ("config-optional.yml", "all_optional_custom_config"), ], indirect=["config_str"], ) -def test_read_config(config_path, output_config, request): +def test_read_config(config_path, output_config_str, request): """Test overall behaviour of read_config.""" + output_config = request.getfixturevalue(output_config_str) | { + "model_output_name": "trunk" + } config = bc.read_config(config_path) - assert pformat(config) == pformat(request.getfixturevalue(output_config)) + assert pformat(config) == pformat(output_config) From 5305362fdf9e4c2bc1d7f8c0af5951c0a594651b Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 18 Aug 2025 08:57:12 +1000 Subject: [PATCH 05/14] modifications for pr compatibility --- docs/user_guide/config_options.md | 2 +- src/benchcab/benchcab.py | 1 - src/benchcab/config.py | 6 +++++- src/benchcab/utils/meorg.py | 25 +------------------------ tests/test_config.py | 6 +++++- 5 files changed, 12 insertions(+), 28 deletions(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index ddc6f13a..ccc1c5db 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -374,7 +374,7 @@ realisations: ### [meorg_model_output_name](#meorg_model_output_name) -: **Default:** False :octicons-dash-24: The Model Output Name from modelevaluation.org to which output files will be automatically uploaded for analysis. Chosen as the model name from one of the realisations. The user must set only one of the realisations keys as True for the name to be chosen. +: **Default:** False :octicons-dash-24: Chosen as the model name for one of the realisations. This would be the Model Output to which output files will be automatically uploaded for analysis. The user must set only one of the realisations keys as `true` for the name to be chosen. Note: It is the user's responsbility to ensure the model output name does not clash with existing names belonging to other users on modelevaluation.org. diff --git a/src/benchcab/benchcab.py b/src/benchcab/benchcab.py index a3f1e80d..419b7a75 100644 --- a/src/benchcab/benchcab.py +++ b/src/benchcab/benchcab.py @@ -161,7 +161,6 @@ def _get_models(self, config: dict) -> list[Model]: / (sub_config["name"] if sub_config["name"] else Path()), ) self._models.append(Model(repo=repo, model_id=id, **sub_config)) - return self._models def _fluxsite_show_task_composition(self, config: dict) -> str: diff --git a/src/benchcab/config.py b/src/benchcab/config.py index c3c25c07..c4002549 100644 --- a/src/benchcab/config.py +++ b/src/benchcab/config.py @@ -6,6 +6,7 @@ from pathlib import Path import yaml +import copy from cerberus import Validator import benchcab.utils as bu @@ -136,10 +137,13 @@ def add_model_output_name(config: dict): The configuration file with with optional keys """ + # pure function + config = copy.deepcopy(config) + is_model_output_name = False for r in config["realisations"]: assert not is_model_output_name - if r.get("model_output_name"): + if r.pop("model_output_name", None): is_model_output_name = True repo = create_repo( spec=r["repo"], diff --git a/src/benchcab/utils/meorg.py b/src/benchcab/utils/meorg.py index a855453d..0dad722e 100644 --- a/src/benchcab/utils/meorg.py +++ b/src/benchcab/utils/meorg.py @@ -92,29 +92,6 @@ def do_meorg( } model_exp_id = MEORG_EXPERIMENT_ID_MAP[experiment]["experiment"] model_benchmark_ids = MEORG_EXPERIMENT_ID_MAP[experiment]["benchmarks"] - print( - interpolate_file_template( - "meorg_jobscript.j2", - render=True, - dry_run=False, - # depends_on=benchcab_job_id, - # Interpolate into the job script - mo=mo, - model_prof_id=MEORG_PROFILE["id"], - model_exp_ids=[model_exp_id], - model_benchmark_ids=model_benchmark_ids, - data_dir=upload_dir, - cache_delay=MEORG_CLIENT["cache_delay"], - mem=MEORG_CLIENT["mem"], - num_threads=MEORG_CLIENT["num_threads"], - walltime=MEORG_CLIENT["walltime"], - storage=MEORG_CLIENT["storage"], - project=config["project"], - modules=config["modules"], - purge_outputs=True, - meorg_bin=meorg_bin, - ) - ) # Submit the outputs client = get_client() @@ -122,7 +99,7 @@ def do_meorg( bu.get_installed_root() / "data" / "meorg_jobscript.j2", render=True, dry_run=False, - # depends_on=benchcab_job_id, + depends_on=benchcab_job_id, # Interpolate into the job script mo=mo, model_prof_id=MEORG_PROFILE["id"], diff --git a/tests/test_config.py b/tests/test_config.py index 63bfe18c..b9afeac6 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -200,7 +200,10 @@ def test_no_project_name( def test_add_model_output_name(no_optional_config): """Test addition of correct model output name.""" output_config = bc.add_model_output_name(no_optional_config) - assert output_config == no_optional_config | {"model_output_name": "trunk"} + + del no_optional_config["realisations"][0]["model_output_name"] + no_optional_config = no_optional_config | {"model_output_name": "trunk"} + assert output_config == no_optional_config @pytest.mark.parametrize( @@ -216,5 +219,6 @@ def test_read_config(config_path, output_config_str, request): output_config = request.getfixturevalue(output_config_str) | { "model_output_name": "trunk" } + del output_config["realisations"][0]["model_output_name"] config = bc.read_config(config_path) assert pformat(config) == pformat(output_config) From 69c2f2a942e4835bda439f3a7f9d7442518fd24b Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 18 Aug 2025 09:07:45 +1000 Subject: [PATCH 06/14] fix au-tum id --- src/benchcab/internal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/benchcab/internal.py b/src/benchcab/internal.py index ececca2d..e2b099d7 100644 --- a/src/benchcab/internal.py +++ b/src/benchcab/internal.py @@ -255,7 +255,7 @@ # For now, each experiment is associated with 3 benchmarks (1lin, 3km27, LSTM) MEORG_EXPERIMENT_ID_MAP = { "AU-Tum": { - "experiment": "aGKRjGTwckAytEjf5", + "experiment": "jwN9jNMWLEzbT2i9D", "benchmarks": ["J9BBQCJdsuehsmMf2", "N5X2rjmp96baXrrJ3", "Q7Xu6yGGYdzvvAwbn"], }, "AU-How": { From 11a51742ca9bd4678c23263a84b9cf9b6fb5b595 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 18 Aug 2025 09:08:31 +1000 Subject: [PATCH 07/14] reset default experiment to 42 site test --- src/benchcab/internal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/benchcab/internal.py b/src/benchcab/internal.py index e2b099d7..4d9b0167 100644 --- a/src/benchcab/internal.py +++ b/src/benchcab/internal.py @@ -285,7 +285,7 @@ } -FLUXSITE_DEFAULT_EXPERIMENT = "AU-Tum" # "forty-two-site-test" +FLUXSITE_DEFAULT_EXPERIMENT = "forty-two-site-test" OPTIONAL_COMMANDS = ["fluxsite-bitwise-cmp", "gen_codecov"] From 87dd853e78e8f2a12c4e2fbb61ad73dc90f309d2 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Mon, 25 Aug 2025 09:44:36 +1000 Subject: [PATCH 08/14] resolve pr issues --- docs/user_guide/config_options.md | 2 +- src/benchcab/config.py | 61 +++++++++++++++++++--- src/benchcab/data/meorg_jobscript.j2 | 13 +++-- src/benchcab/data/test/config-basic.yml | 2 +- src/benchcab/data/test/config-optional.yml | 4 +- src/benchcab/internal.py | 3 +- tests/test_config.py | 44 ++++++++++++---- 7 files changed, 103 insertions(+), 26 deletions(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index ccc1c5db..e842378b 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -372,7 +372,7 @@ realisations: : **Default:** _required key, no default_. :octicons-dash-24: Specify the local checkout path of CABLE branch. -### [meorg_model_output_name](#meorg_model_output_name) +### [model_output_name](#model_output_name) : **Default:** False :octicons-dash-24: Chosen as the model name for one of the realisations. This would be the Model Output to which output files will be automatically uploaded for analysis. The user must set only one of the realisations keys as `true` for the name to be chosen. diff --git a/src/benchcab/config.py b/src/benchcab/config.py index c4002549..b1e3743a 100644 --- a/src/benchcab/config.py +++ b/src/benchcab/config.py @@ -13,6 +13,7 @@ from benchcab import internal from benchcab.utils.repo import create_repo from benchcab.model import Model +from typing import Optional class ConfigValidationError(Exception): @@ -85,7 +86,7 @@ def read_optional_key(config: dict): Parameters ---------- config : dict - The configuration file with without optional keys + The configuration file with, or without optional keys """ if "project" not in config: @@ -128,13 +129,47 @@ def read_optional_key(config: dict): return config +def is_valid_model_output_name(name: str) -> Optional[str]: + """Validate model output name against github issue standards. + + Standard: - + + Parameters + ---------- + name: str + The model output name + + Returns + ------- + Optional[str] + If model output name does not meet standard, then return error message + + """ + if len(name) == 0: + return "Model output name is empty" + + if len(name) > 255: + return "Model output name has length more than allowed limit on me.org (255)" + + if " " in name: + return "Model output name cannot have spaces" + + name_keywords = name.split("-") + + if not name_keywords[0].isdigit(): + return "Model output name does not start with number" + + if len(name_keywords) == 1: + return "Model output name does not contain keyword after number" + + def add_model_output_name(config: dict): """Determine model output name from realisations. Parameters ---------- config : dict - The configuration file with with optional keys + The configuration file with optional keys """ # pure function @@ -145,11 +180,23 @@ def add_model_output_name(config: dict): assert not is_model_output_name if r.pop("model_output_name", None): is_model_output_name = True - repo = create_repo( - spec=r["repo"], - path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()), - ) - config["model_output_name"] = Model(repo).name + + mo_name = None + if r.get("name"): + mo_name = r["name"] + else: + repo = create_repo( + spec=r["repo"], + path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()), + ) + mo_name = Model(repo).name + + msg = is_valid_model_output_name(mo_name) + + if msg is not None: + raise Exception(msg) + + config["model_output_name"] = mo_name break assert is_model_output_name return config diff --git a/src/benchcab/data/meorg_jobscript.j2 b/src/benchcab/data/meorg_jobscript.j2 index f2ea8e0d..4578ee81 100644 --- a/src/benchcab/data/meorg_jobscript.j2 +++ b/src/benchcab/data/meorg_jobscript.j2 @@ -31,15 +31,19 @@ MODEL_OUTPUT_ARGS+=" --parameter-selection {{ mo.parameter_selection }}" MODEL_OUTPUT_ARGS+=" --is-bundle" {% endif %} +echo "Querying whether $MODEL_OUTPUT_NAME already exists on me.org" MODEL_OUTPUT_ID=$($MEORG_BIN output query $MODEL_OUTPUT_NAME | head -n 1 ) if [ ! -z "${MODEL_OUTPUT_ID}" ] ; then +# Re-run analysis on the same model output ID, cleaning up existing files echo "Deleting existing files from model output ID" $MEORG_BIN file delete_all $MODEL_OUTPUT_ID -echo "Updated model output ID" +echo -n "Updated" else -echo "Create new model output ID" +echo -n "Created" fi +echo " $MODEL_OUTPUT_NAME on me.org and given this ID: $MODEL_OUTPUT_ID" + MODEL_OUTPUT_ID="$($MEORG_BIN output create $MODEL_PROFILE_ID $MODEL_OUTPUT_NAME $MODEL_OUTPUT_ARGS | head -n 1 | awk '{print $NF}')" echo "Add experiments to model output" $MEORG_BIN experiment update $MODEL_OUTPUT_ID {{ model_exp_ids|join(',') }} @@ -53,7 +57,7 @@ echo "Waiting for object store transfer ($CACHE_DELAY sec)" sleep $CACHE_DELAY {% for exp_id in model_exp_ids %} -echo "Replace benchmarks to model output" +echo "Add benchmarks to model output" $MEORG_BIN benchmark update $MODEL_OUTPUT_ID {{ exp_id }} {{ model_benchmark_ids|join(',') }} # Trigger the analysis @@ -62,4 +66,5 @@ $MEORG_BIN analysis start $MODEL_OUTPUT_ID {{ exp_id }} {% endfor %} -echo "DONE" \ No newline at end of file +MEORG_BASE_URL_DEV="${MEORG_BASE_URL_DEV:-https://modelevaluation.org/api/}" +echo "Files transferred to me.org. Analysis in progress. Open ${MEORG_BASE_URL_DEV}/display/${MODEL_OUTPUT_ID} to see results" \ No newline at end of file diff --git a/src/benchcab/data/test/config-basic.yml b/src/benchcab/data/test/config-basic.yml index 6932c0be..81fe155f 100644 --- a/src/benchcab/data/test/config-basic.yml +++ b/src/benchcab/data/test/config-basic.yml @@ -20,7 +20,7 @@ realisations: - repo: svn: - branch_path: trunk + branch_path: 123-sample model_output_name: True - repo: svn: diff --git a/src/benchcab/data/test/config-optional.yml b/src/benchcab/data/test/config-optional.yml index 0948ab98..09fbc119 100644 --- a/src/benchcab/data/test/config-optional.yml +++ b/src/benchcab/data/test/config-optional.yml @@ -28,8 +28,8 @@ science_configurations: realisations: - repo: svn: - branch_path: trunk - name: svn_trunk + branch_path: 123-sample + name: 123-sample-optional model_output_name: True - repo: svn: diff --git a/src/benchcab/internal.py b/src/benchcab/internal.py index 4d9b0167..1e7ee3b7 100644 --- a/src/benchcab/internal.py +++ b/src/benchcab/internal.py @@ -284,6 +284,7 @@ }, } +MEORG_PROFILE = {"name": "CABLE", "id": "nFcjg4qqHGPkB9sqE"} FLUXSITE_DEFAULT_EXPERIMENT = "forty-two-site-test" @@ -318,5 +319,3 @@ def get_met_forcing_file_names(experiment: str) -> list[str]: walltime="01:00:00", storage=["gdata/ks32", "gdata/hh5", "gdata/wd9", "gdata/rp23"], ) - -MEORG_PROFILE = {"name": "CABLE", "id": "nFcjg4qqHGPkB9sqE"} diff --git a/tests/test_config.py b/tests/test_config.py index b9afeac6..25341148 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -53,7 +53,7 @@ def no_optional_config() -> dict: return { "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], "realisations": [ - {"repo": {"svn": {"branch_path": "trunk"}}, "model_output_name": True}, + {"repo": {"svn": {"branch_path": "123-sample"}}, "model_output_name": True}, { "repo": { "svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"} @@ -122,7 +122,7 @@ def all_optional_custom_config(no_optional_config) -> dict: }, "codecov": True, } - branch_names = ["svn_trunk", "git_branch"] + branch_names = ["123-sample-optional", "git_branch"] for c_r, b_n in zip(config["realisations"], branch_names): c_r["name"] = b_n @@ -202,22 +202,48 @@ def test_add_model_output_name(no_optional_config): output_config = bc.add_model_output_name(no_optional_config) del no_optional_config["realisations"][0]["model_output_name"] - no_optional_config = no_optional_config | {"model_output_name": "trunk"} + no_optional_config = no_optional_config | {"model_output_name": "123-sample"} assert output_config == no_optional_config +def test_valid_valid_output_name(): + """Test reading config for a file that may/may not exist.""" + model_output_name = "123-sample-issue" + msg = bc.is_valid_model_output_name(model_output_name) + assert msg is None + + +@pytest.mark.parametrize( + ("model_output_name", "output_msg"), + [ + ("", "Model output name is empty"), + ( + "l" * 256, + "Model output name has length more than allowed limit on me.org (255)", + ), + ("123-fsd f", "Model output name cannot have spaces"), + ("hello-123", "Model output name does not start with number"), + ("123", "Model output name does not contain keyword after number"), + ], +) +def test_invalid_valid_output_name(model_output_name, output_msg): + """Test reading config for a file that may/may not exist.""" + msg = bc.is_valid_model_output_name(model_output_name) + assert msg == output_msg + + @pytest.mark.parametrize( - ("config_str", "output_config_str"), + ("config_str", "model_output_name", "output_config"), [ - ("config-basic.yml", "all_optional_default_config"), - ("config-optional.yml", "all_optional_custom_config"), + ("config-basic.yml", "123-sample", "all_optional_default_config"), + ("config-optional.yml", "123-sample-optional", "all_optional_custom_config"), ], indirect=["config_str"], ) -def test_read_config(config_path, output_config_str, request): +def test_read_config(request, config_path, model_output_name, output_config): """Test overall behaviour of read_config.""" - output_config = request.getfixturevalue(output_config_str) | { - "model_output_name": "trunk" + output_config = request.getfixturevalue(output_config) | { + "model_output_name": model_output_name } del output_config["realisations"][0]["model_output_name"] config = bc.read_config(config_path) From fcf3a2c1d807fa5ed1f9491a2a7cbbd22e3fefd3 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 27 Aug 2025 12:20:21 +1000 Subject: [PATCH 09/14] pr issue resolve --- docs/user_guide/config_options.md | 7 +- docs/user_guide/use_cases.md | 8 +- src/benchcab/benchcab.py | 15 +-- src/benchcab/config.py | 60 ++++++------ src/benchcab/data/config-schema.yml | 2 +- src/benchcab/data/meorg_jobscript.j2 | 10 +- src/benchcab/data/test/config-basic.yml | 1 - src/benchcab/data/test/config-optional.yml | 2 +- src/benchcab/data/test/integration_meorg.sh | 2 +- src/benchcab/utils/meorg.py | 14 +-- tests/test_config.py | 101 +++++++++++++------- 11 files changed, 132 insertions(+), 90 deletions(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index e842378b..b7e5d2ed 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -372,9 +372,10 @@ realisations: : **Default:** _required key, no default_. :octicons-dash-24: Specify the local checkout path of CABLE branch. -### [model_output_name](#model_output_name) +### [meorg_output_name](#meorg_output_name) -: **Default:** False :octicons-dash-24: Chosen as the model name for one of the realisations. This would be the Model Output to which output files will be automatically uploaded for analysis. The user must set only one of the realisations keys as `true` for the name to be chosen. + +: **Default:** unset, _optional key_. :octicons-dash-24: Chosen as the model name for one of the realisations. This would be the Model Output to which output files will be automatically uploaded for analysis. The user must set only one of the realisations keys as `true` for the name to be chosen. Note: It is the user's responsbility to ensure the model output name does not clash with existing names belonging to other users on modelevaluation.org. @@ -504,7 +505,7 @@ codecov: ## meorg_bin -: **Default:** False, _optional key. :octicons-dash-24: Specifies the absolute system path to the ME.org client executable. In the absence of this key it will be inferred from the same directory as benchcab should `meorg_model_output_name` be set in `realisations` above. +: **Default:** False, _optional key. :octicons-dash-24: Specifies the absolute system path to the ME.org client executable. In the absence of this key it will be inferred from the same directory as benchcab should `meorg_output_name` be set in `realisations` above. ``` yaml diff --git a/docs/user_guide/use_cases.md b/docs/user_guide/use_cases.md index 49e16b02..cb939dd9 100644 --- a/docs/user_guide/use_cases.md +++ b/docs/user_guide/use_cases.md @@ -35,7 +35,7 @@ realisations: - repo: git: branch: main - model_output_name: True # (1) + meorg_output_name: True # (1) - repo: git: branch: XXXXX @@ -72,7 +72,7 @@ realisations: cable: cable_user: existing_feature: YYYY - model_output_name: True # (2) + meorg_output_name: True # (2) - repo: git: branch: XXXXX @@ -106,7 +106,7 @@ realisations: - repo: git: branch: main - model_output_name: True # (2) + meorg_output_name: True # (2) - repo: git: branch: XXXXX @@ -135,7 +135,7 @@ realisations: - repo: git: branch: main - model_output_name: True # (2) + meorg_output_name: True # (2) - repo: name: my-feature-off # (3) local: diff --git a/src/benchcab/benchcab.py b/src/benchcab/benchcab.py index 419b7a75..0175f0be 100644 --- a/src/benchcab/benchcab.py +++ b/src/benchcab/benchcab.py @@ -245,13 +245,14 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None: logger.info("The NetCDF output for each task is written to:") logger.info(f"{internal.FLUXSITE_DIRS['OUTPUT']}/_out.nc") - # Upload to meorg by default - bm.do_meorg( - config, - upload_dir=internal.FLUXSITE_DIRS["OUTPUT"], - benchcab_bin=str(self.benchcab_exe_path), - benchcab_job_id=job_id, - ) + # Upload to meorg if meorg_output_name optional key is passed + if config.get("meorg_output_name") is not None: + bm.do_meorg( + config, + upload_dir=internal.FLUXSITE_DIRS["OUTPUT"], + benchcab_bin=str(self.benchcab_exe_path), + benchcab_job_id=job_id, + ) def gen_codecov(self, config_path: str): """Endpoint for `benchcab codecov`.""" diff --git a/src/benchcab/config.py b/src/benchcab/config.py index b1e3743a..515d5a6f 100644 --- a/src/benchcab/config.py +++ b/src/benchcab/config.py @@ -129,7 +129,7 @@ def read_optional_key(config: dict): return config -def is_valid_model_output_name(name: str) -> Optional[str]: +def is_valid_meorg_output_name(name: str) -> Optional[str]: """Validate model output name against github issue standards. Standard: - @@ -146,24 +146,31 @@ def is_valid_model_output_name(name: str) -> Optional[str]: """ if len(name) == 0: - return "Model output name is empty" + return "Model output name is empty\n" + + msg = "" if len(name) > 255: - return "Model output name has length more than allowed limit on me.org (255)" + msg += "The length of model output name must be shorter than 255 characters. E.g.: 1-length-is-20-chars\n" if " " in name: - return "Model output name cannot have spaces" + msg += "Model output name cannot have spaces. It should use dashes (-) to separate words. E.g. 123-word1-word2\n" name_keywords = name.split("-") if not name_keywords[0].isdigit(): - return "Model output name does not start with number" + msg += "Model output name does not start with number, E.g. 123-number-before-word\n" if len(name_keywords) == 1: - return "Model output name does not contain keyword after number" + msg += "Model output name does not contain keyword after number, E.g. 123-keyword\n" + + if msg == "": + return None + return f"Errors present when validating model output name:\n{msg}" -def add_model_output_name(config: dict): + +def add_meorg_output_name(config: dict): """Determine model output name from realisations. Parameters @@ -175,30 +182,29 @@ def add_model_output_name(config: dict): # pure function config = copy.deepcopy(config) - is_model_output_name = False + mo_names = [True for r in config["realisations"] if r.get("meorg_output_name")] + + if len(mo_names) > 1: + msg = "More than 1 value set as true" + raise AssertionError(msg) + for r in config["realisations"]: - assert not is_model_output_name - if r.pop("model_output_name", None): - is_model_output_name = True - - mo_name = None - if r.get("name"): - mo_name = r["name"] - else: - repo = create_repo( - spec=r["repo"], - path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()), - ) - mo_name = Model(repo).name - - msg = is_valid_model_output_name(mo_name) + if r.pop("meorg_output_name", None): + # `meorg_output_name` decided either via `name` parameter in a realisation, + # otherwise via `Repo` branch name + repo = create_repo( + spec=r["repo"], + path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()), + ) + mo_name = Model(repo, name=r.get("name")).name + + msg = is_valid_meorg_output_name(mo_name) if msg is not None: raise Exception(msg) - config["model_output_name"] = mo_name - break - assert is_model_output_name + config["meorg_output_name"] = mo_name + return config @@ -250,5 +256,5 @@ def read_config(config_path: str) -> dict: config = read_optional_key(config) # Validate. validate_config(config) - config = add_model_output_name(config) + config = add_meorg_output_name(config) return config diff --git a/src/benchcab/data/config-schema.yml b/src/benchcab/data/config-schema.yml index fa104d7b..d5c6dba3 100644 --- a/src/benchcab/data/config-schema.yml +++ b/src/benchcab/data/config-schema.yml @@ -51,7 +51,7 @@ realisations: path: type: "string" required: true - model_output_name: + meorg_output_name: nullable: true type: "boolean" required: false diff --git a/src/benchcab/data/meorg_jobscript.j2 b/src/benchcab/data/meorg_jobscript.j2 index 4578ee81..31cf89d3 100644 --- a/src/benchcab/data/meorg_jobscript.j2 +++ b/src/benchcab/data/meorg_jobscript.j2 @@ -21,7 +21,7 @@ NUM_THREADS={{num_threads}} CACHE_DELAY={{cache_delay}} MEORG_BIN={{meorg_bin}} MODEL_PROFILE_ID={{ model_prof_id }} -MODEL_OUTPUT_NAME={{ mo.name }} +meorg_output_name={{ mo.name }} MODEL_OUTPUT_ARGS=() # Create new model output entity @@ -31,8 +31,8 @@ MODEL_OUTPUT_ARGS+=" --parameter-selection {{ mo.parameter_selection }}" MODEL_OUTPUT_ARGS+=" --is-bundle" {% endif %} -echo "Querying whether $MODEL_OUTPUT_NAME already exists on me.org" -MODEL_OUTPUT_ID=$($MEORG_BIN output query $MODEL_OUTPUT_NAME | head -n 1 ) +echo "Querying whether $meorg_output_name already exists on me.org" +MODEL_OUTPUT_ID=$($MEORG_BIN output query $meorg_output_name | head -n 1 ) if [ ! -z "${MODEL_OUTPUT_ID}" ] ; then # Re-run analysis on the same model output ID, cleaning up existing files echo "Deleting existing files from model output ID" @@ -42,9 +42,9 @@ else echo -n "Created" fi -echo " $MODEL_OUTPUT_NAME on me.org and given this ID: $MODEL_OUTPUT_ID" +echo " $meorg_output_name on me.org and given this ID: $MODEL_OUTPUT_ID" -MODEL_OUTPUT_ID="$($MEORG_BIN output create $MODEL_PROFILE_ID $MODEL_OUTPUT_NAME $MODEL_OUTPUT_ARGS | head -n 1 | awk '{print $NF}')" +MODEL_OUTPUT_ID="$($MEORG_BIN output create $MODEL_PROFILE_ID $meorg_output_name $MODEL_OUTPUT_ARGS | head -n 1 | awk '{print $NF}')" echo "Add experiments to model output" $MEORG_BIN experiment update $MODEL_OUTPUT_ID {{ model_exp_ids|join(',') }} diff --git a/src/benchcab/data/test/config-basic.yml b/src/benchcab/data/test/config-basic.yml index 81fe155f..9ab44572 100644 --- a/src/benchcab/data/test/config-basic.yml +++ b/src/benchcab/data/test/config-basic.yml @@ -21,7 +21,6 @@ realisations: - repo: svn: branch_path: 123-sample - model_output_name: True - repo: svn: branch_path: branches/Users/ccc561/v3.0-YP-changes diff --git a/src/benchcab/data/test/config-optional.yml b/src/benchcab/data/test/config-optional.yml index 09fbc119..9a2c351e 100644 --- a/src/benchcab/data/test/config-optional.yml +++ b/src/benchcab/data/test/config-optional.yml @@ -30,7 +30,7 @@ realisations: svn: branch_path: 123-sample name: 123-sample-optional - model_output_name: True + meorg_output_name: True - repo: svn: branch_path: branches/Users/ccc561/v3.0-YP-changes diff --git a/src/benchcab/data/test/integration_meorg.sh b/src/benchcab/data/test/integration_meorg.sh index 294b59b9..2ccc3177 100644 --- a/src/benchcab/data/test/integration_meorg.sh +++ b/src/benchcab/data/test/integration_meorg.sh @@ -31,7 +31,7 @@ realisations: - repo: local: path: $CABLE_DIR - model_output_name: true + meorg_output_name: true - repo: git: branch: main diff --git a/src/benchcab/utils/meorg.py b/src/benchcab/utils/meorg.py index 0dad722e..cb8d731f 100644 --- a/src/benchcab/utils/meorg.py +++ b/src/benchcab/utils/meorg.py @@ -32,12 +32,12 @@ def do_meorg( """ logger = bu.get_logger() - model_output_name = config["model_output_name"] + meorg_output_name = config["meorg_output_name"] num_threads = MEORG_CLIENT["num_threads"] # Check if a model output id has been assigned - if config.get("model_output_name") is None: - logger.info("No model_output_name resolved in configuration.") + if config.get("meorg_output_name") is None: + logger.info("No meorg_output_name resolved in configuration.") logger.info("NOT uploading to modelevaluation.org") return False @@ -63,7 +63,7 @@ def do_meorg( if MeorgClient().is_initialised() == False: logger.warn( - "A model_output_name has been supplied, but the meorg_client is not initialised." + "A meorg_output_name has been supplied, but the meorg_client is not initialised." ) logger.warn( "To initialise, run `meorg initialise` in the installation environment." @@ -72,10 +72,10 @@ def do_meorg( "Once initialised, the outputs from this run can be uploaded with the following command:" ) logger.warn( - f"meorg file upload {upload_dir}/*.nc -n {num_threads} --attach_to {model_output_name}" + f"meorg file upload {upload_dir}/*.nc -n {num_threads} --attach_to {meorg_output_name}" ) logger.warn("Then the analysis can be triggered with:") - logger.warn(f"meorg analysis start {model_output_name}") + logger.warn(f"meorg analysis start {meorg_output_name}") return False # Finally, attempt the upload! @@ -88,7 +88,7 @@ def do_meorg( "state_selection": "default", "parameter_selection": "automated", "is_bundle": True, - "name": config["model_output_name"], + "name": config["meorg_output_name"], } model_exp_id = MEORG_EXPERIMENT_ID_MAP[experiment]["experiment"] model_benchmark_ids = MEORG_EXPERIMENT_ID_MAP[experiment]["benchmarks"] diff --git a/tests/test_config.py b/tests/test_config.py index 25341148..ae186cbd 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -53,7 +53,7 @@ def no_optional_config() -> dict: return { "modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"], "realisations": [ - {"repo": {"svn": {"branch_path": "123-sample"}}, "model_output_name": True}, + {"repo": {"svn": {"branch_path": "123-sample"}}}, { "repo": { "svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"} @@ -127,6 +127,8 @@ def all_optional_custom_config(no_optional_config) -> dict: for c_r, b_n in zip(config["realisations"], branch_names): c_r["name"] = b_n + config["realisations"][0]["meorg_output_name"] = True + return config @@ -197,54 +199,87 @@ def test_no_project_name( ) -def test_add_model_output_name(no_optional_config): +def test_add_meorg_output_name(all_optional_custom_config): """Test addition of correct model output name.""" - output_config = bc.add_model_output_name(no_optional_config) + output_config = bc.add_meorg_output_name(all_optional_custom_config) - del no_optional_config["realisations"][0]["model_output_name"] - no_optional_config = no_optional_config | {"model_output_name": "123-sample"} - assert output_config == no_optional_config + del all_optional_custom_config["realisations"][0]["meorg_output_name"] + all_optional_custom_config = all_optional_custom_config | { + "meorg_output_name": "123-sample-optional" + } + assert output_config == all_optional_custom_config -def test_valid_valid_output_name(): - """Test reading config for a file that may/may not exist.""" - model_output_name = "123-sample-issue" - msg = bc.is_valid_model_output_name(model_output_name) +def test_empty_model_output_name(): + """Test validating empty model output name.""" + msg = bc.is_valid_meorg_output_name("") + assert msg == "Model output name is empty\n" + + +def test_valid_output_name(): + """Test validating correct model output name.""" + meorg_output_name = "123-sample-issue" + msg = bc.is_valid_meorg_output_name(meorg_output_name) assert msg is None @pytest.mark.parametrize( - ("model_output_name", "output_msg"), + ("meorg_output_name", "output_msg"), [ - ("", "Model output name is empty"), ( - "l" * 256, - "Model output name has length more than allowed limit on me.org (255)", + f"123-{'l'*256}", + "The length of model output name must be shorter than 255 characters. E.g.: 1-length-is-20-chars\n", + ), + ( + "123-fsd f", + "Model output name cannot have spaces. It should use dashes (-) to separate words. E.g. 123-word1-word2\n", + ), + ( + "hello-123", + "Model output name does not start with number, E.g. 123-number-before-word\n", + ), + ( + "123", + "Model output name does not contain keyword after number, E.g. 123-keyword\n", ), - ("123-fsd f", "Model output name cannot have spaces"), - ("hello-123", "Model output name does not start with number"), - ("123", "Model output name does not contain keyword after number"), ], ) -def test_invalid_valid_output_name(model_output_name, output_msg): - """Test reading config for a file that may/may not exist.""" - msg = bc.is_valid_model_output_name(model_output_name) +def test_invalid_output_name(meorg_output_name, output_msg): + """Test validating incorrect model output name.""" + output_msg = f"Errors present when validating model output name:\n{output_msg}" + msg = bc.is_valid_meorg_output_name(meorg_output_name) assert msg == output_msg -@pytest.mark.parametrize( - ("config_str", "model_output_name", "output_config"), - [ - ("config-basic.yml", "123-sample", "all_optional_default_config"), - ("config-optional.yml", "123-sample-optional", "all_optional_custom_config"), - ], - indirect=["config_str"], -) -def test_read_config(request, config_path, model_output_name, output_config): - """Test overall behaviour of read_config.""" - output_config = request.getfixturevalue(output_config) | { - "model_output_name": model_output_name +@pytest.mark.parametrize("config_str", ["config-basic.yml"], indirect=True) +def test_read_basic_config(config_path, all_optional_default_config): + config = bc.read_config(config_path) + assert pformat(config) == pformat(all_optional_default_config) + + +@pytest.mark.parametrize("config_str", ["config-optional.yml"], indirect=True) +def test_read_optional_config(config_path, all_optional_custom_config): + output_config = all_optional_custom_config | { + "meorg_output_name": "123-sample-optional" } - del output_config["realisations"][0]["model_output_name"] + del output_config["realisations"][0]["meorg_output_name"] config = bc.read_config(config_path) assert pformat(config) == pformat(output_config) + + +# @pytest.mark.parametrize( +# ("config_str", "meorg_output_name", "output_config"), +# [ +# ("config-basic.yml", "123-sample", "all_optional_default_config"), +# ("config-optional.yml", "123-sample-optional", "all_optional_custom_config"), +# ], +# indirect=["config_str"], +# ) +# def test_read_config(request, config_path, meorg_output_name, output_config): +# """Test overall behaviour of read_config.""" +# output_config = request.getfixturevalue(output_config) | { +# "meorg_output_name": meorg_output_name +# } +# output_config["realisations"][0].pop("meorg_output_name", None) +# config = bc.read_config(config_path) +# assert pformat(config) == pformat(output_config) From b1016451a6aed3db271cdded9ef6dfec163b7d9f Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 27 Aug 2025 13:20:57 +1000 Subject: [PATCH 10/14] update --- docs/user_guide/config_options.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index b7e5d2ed..e234babd 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -379,6 +379,17 @@ realisations: Note: It is the user's responsbility to ensure the model output name does not clash with existing names belonging to other users on modelevaluation.org. +The model output name should also follow the Github issue branch format (i.e. it should start with a digit, with words separated by dashes). Finally, the maximum number of characters allowed for `meorg_output_name` is 50. + +This key is _optional_. No default. + +```yaml +realisations: + - repo: + git: + branch: 123-my-branch + model_output_name: True +``` ### [name](#name) From b344ca312dcc2ab351ac1aec18956e234c2179d8 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 3 Sep 2025 09:46:42 +1000 Subject: [PATCH 11/14] check name length --- src/benchcab/config.py | 4 ++-- tests/test_config.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/benchcab/config.py b/src/benchcab/config.py index 515d5a6f..af8b481e 100644 --- a/src/benchcab/config.py +++ b/src/benchcab/config.py @@ -150,8 +150,8 @@ def is_valid_meorg_output_name(name: str) -> Optional[str]: msg = "" - if len(name) > 255: - msg += "The length of model output name must be shorter than 255 characters. E.g.: 1-length-is-20-chars\n" + if len(name) > 50: + msg += "The length of model output name must be shorter than 50 characters. E.g.: 1-length-is-20-chars\n" if " " in name: msg += "Model output name cannot have spaces. It should use dashes (-) to separate words. E.g. 123-word1-word2\n" diff --git a/tests/test_config.py b/tests/test_config.py index ae186cbd..ff318f66 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -227,8 +227,8 @@ def test_valid_output_name(): ("meorg_output_name", "output_msg"), [ ( - f"123-{'l'*256}", - "The length of model output name must be shorter than 255 characters. E.g.: 1-length-is-20-chars\n", + f"123-{'l'*48}", + "The length of model output name must be shorter than 50 characters. E.g.: 1-length-is-20-chars\n", ), ( "123-fsd f", From 57d53dc17944facae5e375e8b7e23203f1aebd95 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 3 Sep 2025 10:47:52 +1000 Subject: [PATCH 12/14] remove old tests for meorg_output_name --- tests/test_config.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index ff318f66..85002fff 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -265,21 +265,3 @@ def test_read_optional_config(config_path, all_optional_custom_config): del output_config["realisations"][0]["meorg_output_name"] config = bc.read_config(config_path) assert pformat(config) == pformat(output_config) - - -# @pytest.mark.parametrize( -# ("config_str", "meorg_output_name", "output_config"), -# [ -# ("config-basic.yml", "123-sample", "all_optional_default_config"), -# ("config-optional.yml", "123-sample-optional", "all_optional_custom_config"), -# ], -# indirect=["config_str"], -# ) -# def test_read_config(request, config_path, meorg_output_name, output_config): -# """Test overall behaviour of read_config.""" -# output_config = request.getfixturevalue(output_config) | { -# "meorg_output_name": meorg_output_name -# } -# output_config["realisations"][0].pop("meorg_output_name", None) -# config = bc.read_config(config_path) -# assert pformat(config) == pformat(output_config) From 45e1456a0f5afd1c30971169ee8cfd9e9ad2e591 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal Date: Wed, 17 Sep 2025 13:01:05 +1000 Subject: [PATCH 13/14] Update yml versions --- .conda/benchcab-dev.yaml | 2 +- .conda/meta.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.conda/benchcab-dev.yaml b/.conda/benchcab-dev.yaml index 91a17702..ef0c95c9 100644 --- a/.conda/benchcab-dev.yaml +++ b/.conda/benchcab-dev.yaml @@ -17,7 +17,7 @@ dependencies: - gitpython - jinja2 - hpcpy>=0.5.0 - - meorg_client>=0.3.1 + - meorg_client>=0.5.0 # CI - pytest-cov # Dev Dependencies diff --git a/.conda/meta.yaml b/.conda/meta.yaml index ba2f8a82..da256dd0 100644 --- a/.conda/meta.yaml +++ b/.conda/meta.yaml @@ -30,4 +30,4 @@ requirements: - gitpython - jinja2 - hpcpy>=0.5.0 - - meorg_client>=0.3.1 + - meorg_client>=0.5.0 From 726d4e95daf6b212aa3b4cfd5eec792a061bf1a8 Mon Sep 17 00:00:00 2001 From: Abhaas Goyal <46426485+abhaasgoyal@users.noreply.github.com> Date: Mon, 22 Sep 2025 09:44:51 +1000 Subject: [PATCH 14/14] Update docs/user_guide/config_options.md Co-authored-by: Sean Bryan <39685865+SeanBryan51@users.noreply.github.com> --- docs/user_guide/config_options.md | 15 +++++++-- docs/user_guide/index.md | 16 +++++----- src/benchcab/benchcab.py | 2 +- src/benchcab/config.py | 32 +++++++++++++------ src/benchcab/data/test/config-optional.yml | 2 +- .../data/test/pbs_jobscript_default.sh | 2 +- .../test/pbs_jobscript_no_skip_codecov.sh | 2 +- .../data/test/pbs_jobscript_skip_optional.sh | 2 +- .../data/test/pbs_jobscript_verbose.sh | 2 +- src/benchcab/internal.py | 2 +- src/benchcab/utils/meorg.py | 1 + src/benchcab/utils/pbs.py | 2 +- tests/test_benchcab.py | 8 ++--- tests/test_config.py | 9 +++--- 14 files changed, 60 insertions(+), 37 deletions(-) diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md index e234babd..2c83ed44 100644 --- a/docs/user_guide/config_options.md +++ b/docs/user_guide/config_options.md @@ -375,9 +375,9 @@ realisations: ### [meorg_output_name](#meorg_output_name) -: **Default:** unset, _optional key_. :octicons-dash-24: Chosen as the model name for one of the realisations. This would be the Model Output to which output files will be automatically uploaded for analysis. The user must set only one of the realisations keys as `true` for the name to be chosen. +: **Default:** unset, _optional key_. :octicons-dash-24: Chosen as the model name for one of the realisations, if the user wants to upload the Model Output to me.org for further analysis. A `base32` format hash derived from `model_profile_id` and `$USER` is appended to the model name. -Note: It is the user's responsbility to ensure the model output name does not clash with existing names belonging to other users on modelevaluation.org. +Note: It is the user's responsbility to ensure the model output name does not clash with existing names belonging to other users on modelevaluation.org. The realisation name is set via `name` if provided, otherwise the default realisation name of the `Repo`. The model output name should also follow the Github issue branch format (i.e. it should start with a digit, with words separated by dashes). Finally, the maximum number of characters allowed for `meorg_output_name` is 50. @@ -388,8 +388,17 @@ realisations: - repo: git: branch: 123-my-branch - model_output_name: True + meorg_output_name: True + - repo: + git: + branch: 456-my-branch ``` +f(mo_name, user, profile) +123-my-branch-34akg9 # Add by default + + + + ### [name](#name) diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 189f69e6..ffd99a78 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -13,7 +13,7 @@ In this guide, we will describe: To use `benchcab`, you need to join the following projects at NCI: - [ks32][ks32_mynci] -- [hh5][hh5_mynci] +- [xp65][xp65_mynci] - [wd9][wd9_mynci] if not part of the [cable][cable_mynci] project ## Quick-start @@ -21,8 +21,8 @@ To use `benchcab`, you need to join the following projects at NCI: To launch a `benchcab` run, execute the following in the command line: ```sh -module use /g/data/hh5/public/modules -module load conda/analysis3-unstable +module use /g/data/xp65/public/modules +module load conda/benchcab cd /scratch/$PROJECT/$USER git clone https://github.com/CABLE-LSM/bench_example.git cd bench_example @@ -33,18 +33,18 @@ benchcab run ## Installation -The package is already installed for you in the Conda environments under the hh5 project. You simply need to load the module for the conda environment: +The package is already installed for you in the Conda environments under the xp65 project. You simply need to load the module for the conda environment: ```bash - module use /g/data/hh5/public/modules - module load conda/analysis3-unstable + module use /g/data/xp65/public/modules + module load conda/benchcab ``` You need to load the module on each new session at NCI on login or compute nodes. !!! Tip "Save the module location" - You should not put any `module load` or `module add` commands in your `$HOME/.bashrc` file. But you can safely store the `module use /g/data/hh5/public/modules` command in your `$HOME/.bashrc` file. This means you won't have to type this line again in other sessions you open on Gadi. + You should not put any `module load` or `module add` commands in your `$HOME/.bashrc` file. But you can safely store the `module use /g/data/xp65/public/modules` command in your `$HOME/.bashrc` file. This means you won't have to type this line again in other sessions you open on Gadi. ## Usage @@ -306,7 +306,7 @@ Please enter your questions as issues on [the benchcab repository][issues-benchc Alternatively, you can also access the ACCESS-NRI User support via [the ACCESS-Hive forum][forum-support]. -[hh5_mynci]: https://my.nci.org.au/mancini/project/hh5 +[xp65_mynci]: https://my.nci.org.au/mancini/project/xp65 [ks32_mynci]: https://my.nci.org.au/mancini/project/ks32 [wd9_mynci]: https://my.nci.org.au/mancini/project/wd9 [cable_mynci]: https://my.nci.org.au/mancini/project/cable diff --git a/src/benchcab/benchcab.py b/src/benchcab/benchcab.py index 0175f0be..5377a172 100644 --- a/src/benchcab/benchcab.py +++ b/src/benchcab/benchcab.py @@ -107,7 +107,7 @@ def _validate_environment(self, project: str, modules: list): """ raise AttributeError(msg) - required_groups = set([project, "ks32", "hh5"]) + required_groups = set([project, "ks32", "xp65"]) groups = [grp.getgrgid(gid).gr_name for gid in os.getgroups()] if not required_groups.issubset(groups): msg = ( diff --git a/src/benchcab/config.py b/src/benchcab/config.py index af8b481e..8cb10f5d 100644 --- a/src/benchcab/config.py +++ b/src/benchcab/config.py @@ -8,8 +8,10 @@ import yaml import copy from cerberus import Validator - +import base64 +import hashlib import benchcab.utils as bu +from benchcab.internal import MEORG_PROFILE from benchcab import internal from benchcab.utils.repo import create_repo from benchcab.model import Model @@ -188,23 +190,33 @@ def add_meorg_output_name(config: dict): msg = "More than 1 value set as true" raise AssertionError(msg) + mo_names = "" for r in config["realisations"]: + # `meorg_output_name` decided either via `name` parameter in a realisation, + # otherwise via `Repo` branch name + repo = create_repo( + spec=r["repo"], + path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()), + ) + mo_name = Model(repo, name=r.get("name")).name + + mo_names += mo_name if r.pop("meorg_output_name", None): - # `meorg_output_name` decided either via `name` parameter in a realisation, - # otherwise via `Repo` branch name - repo = create_repo( - spec=r["repo"], - path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()), - ) - mo_name = Model(repo, name=r.get("name")).name - msg = is_valid_meorg_output_name(mo_name) - if msg is not None: raise Exception(msg) config["meorg_output_name"] = mo_name + if "meorg_output_name" in config: + user = os.getenv("USER") + mo_name_hash_input = f"{mo_names}{MEORG_PROFILE['id']}{user}" + # hash in bytes form + mo_name_hash_b = hashlib.sha1(mo_name_hash_input.encode()) + # Convert to str and take first 6 characters + mo_name_hash = base64.b32encode(mo_name_hash_b.digest()).decode()[:6] + config["meorg_output_name"] += f"_{mo_name_hash}" + return config diff --git a/src/benchcab/data/test/config-optional.yml b/src/benchcab/data/test/config-optional.yml index 9a2c351e..915433d5 100644 --- a/src/benchcab/data/test/config-optional.yml +++ b/src/benchcab/data/test/config-optional.yml @@ -1,5 +1,5 @@ # Config with optional data -project: hh5 +project: xp65 fluxsite: experiment: AU-Tum diff --git a/src/benchcab/data/test/pbs_jobscript_default.sh b/src/benchcab/data/test/pbs_jobscript_default.sh index 5a00b7af..ec470d10 100644 --- a/src/benchcab/data/test/pbs_jobscript_default.sh +++ b/src/benchcab/data/test/pbs_jobscript_default.sh @@ -7,7 +7,7 @@ #PBS -P tm70 #PBS -j oe #PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/wd9 +#PBS -l storage=gdata/ks32+gdata/xp65+gdata/wd9 set -ev diff --git a/src/benchcab/data/test/pbs_jobscript_no_skip_codecov.sh b/src/benchcab/data/test/pbs_jobscript_no_skip_codecov.sh index 5d732c3a..0cb3d1f1 100644 --- a/src/benchcab/data/test/pbs_jobscript_no_skip_codecov.sh +++ b/src/benchcab/data/test/pbs_jobscript_no_skip_codecov.sh @@ -7,7 +7,7 @@ #PBS -P tm70 #PBS -j oe #PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/wd9 +#PBS -l storage=gdata/ks32+gdata/xp65+gdata/wd9 set -ev diff --git a/src/benchcab/data/test/pbs_jobscript_skip_optional.sh b/src/benchcab/data/test/pbs_jobscript_skip_optional.sh index d6baeecc..d632abcd 100644 --- a/src/benchcab/data/test/pbs_jobscript_skip_optional.sh +++ b/src/benchcab/data/test/pbs_jobscript_skip_optional.sh @@ -7,7 +7,7 @@ #PBS -P tm70 #PBS -j oe #PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/wd9 +#PBS -l storage=gdata/ks32+gdata/xp65+gdata/wd9 set -ev diff --git a/src/benchcab/data/test/pbs_jobscript_verbose.sh b/src/benchcab/data/test/pbs_jobscript_verbose.sh index ba342778..840041b0 100644 --- a/src/benchcab/data/test/pbs_jobscript_verbose.sh +++ b/src/benchcab/data/test/pbs_jobscript_verbose.sh @@ -7,7 +7,7 @@ #PBS -P tm70 #PBS -j oe #PBS -m e -#PBS -l storage=gdata/ks32+gdata/hh5+gdata/wd9 +#PBS -l storage=gdata/ks32+gdata/xp65+gdata/wd9 set -ev diff --git a/src/benchcab/internal.py b/src/benchcab/internal.py index 1e7ee3b7..358f96c9 100644 --- a/src/benchcab/internal.py +++ b/src/benchcab/internal.py @@ -317,5 +317,5 @@ def get_met_forcing_file_names(experiment: str) -> list[str]: cache_delay=60 * 5, # 5mins between upload and analysis triggering mem="8G", walltime="01:00:00", - storage=["gdata/ks32", "gdata/hh5", "gdata/wd9", "gdata/rp23"], + storage=["gdata/ks32", "gdata/xp65", "gdata/wd9", "gdata/rp23"], ) diff --git a/src/benchcab/utils/meorg.py b/src/benchcab/utils/meorg.py index cb8d731f..de179203 100644 --- a/src/benchcab/utils/meorg.py +++ b/src/benchcab/utils/meorg.py @@ -5,6 +5,7 @@ from hpcpy import get_client from meorg_client.client import Client as MeorgClient + import benchcab.utils as bu from benchcab.internal import MEORG_CLIENT, MEORG_PROFILE, MEORG_EXPERIMENT_ID_MAP from benchcab.utils import interpolate_file_template diff --git a/src/benchcab/utils/pbs.py b/src/benchcab/utils/pbs.py index 3657fbc1..7581b00a 100644 --- a/src/benchcab/utils/pbs.py +++ b/src/benchcab/utils/pbs.py @@ -32,7 +32,7 @@ def render_job_script( between model output files. """ verbose_flag = " -v" if verbose else "" - storage_flags = ["gdata/ks32", "gdata/hh5", "gdata/wd9", *pbs_config["storage"]] + storage_flags = ["gdata/ks32", "gdata/xp65", "gdata/wd9", *pbs_config["storage"]] context = dict( verbose_flag=verbose_flag, diff --git a/tests/test_benchcab.py b/tests/test_benchcab.py index 12b5c69c..0bf10512 100644 --- a/tests/test_benchcab.py +++ b/tests/test_benchcab.py @@ -15,13 +15,13 @@ def _set_user_projects(): "os.getgroups" ) as mocked_groups: type(mocked_getgrid.return_value).gr_name = mock.PropertyMock( - return_value="hh5" + return_value="xp65" ) mocked_groups.return_value = [1] yield -@pytest.fixture(scope="module", params=["hh5", "invalid_project_name"]) +@pytest.fixture(scope="module", params=["xp65", "invalid_project_name"]) def config_project(request): """Get config project name.""" return request.param @@ -42,7 +42,7 @@ def config_project(request): @pytest.mark.parametrize( ("config_project", "pytest_error"), [ - ("hh5", does_not_raise()), + ("xp65", does_not_raise()), (None, pytest.raises(AttributeError, match=no_project_name_msg)), ], ) @@ -57,7 +57,7 @@ def test_project_name(config_project, pytest_error): @pytest.mark.parametrize( ("config_project", "pytest_error"), [ - ("hh5", does_not_raise()), + ("xp65", does_not_raise()), ("invalid_project_name", pytest.raises(PermissionError)), ], ) diff --git a/tests/test_config.py b/tests/test_config.py index 85002fff..3403b549 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -13,7 +13,7 @@ import benchcab.utils as bu from benchcab import internal -NO_OPTIONAL_CONFIG_PROJECT = "hh5" +NO_OPTIONAL_CONFIG_PROJECT = "xp65" OPTIONAL_CONFIG_PROJECT = "ks32" @@ -23,6 +23,7 @@ def _set_project_env_variable(monkeypatch): # Clear existing environment variables first with mock.patch.dict(os.environ, clear=True): monkeypatch.setenv("PROJECT", OPTIONAL_CONFIG_PROJECT) + monkeypatch.setenv("USER", "test") yield @@ -205,12 +206,12 @@ def test_add_meorg_output_name(all_optional_custom_config): del all_optional_custom_config["realisations"][0]["meorg_output_name"] all_optional_custom_config = all_optional_custom_config | { - "meorg_output_name": "123-sample-optional" + "meorg_output_name": "123-sample-optional_7J3IEJ" } assert output_config == all_optional_custom_config -def test_empty_model_output_name(): +def test_empty_meorg_output_name(): """Test validating empty model output name.""" msg = bc.is_valid_meorg_output_name("") assert msg == "Model output name is empty\n" @@ -260,7 +261,7 @@ def test_read_basic_config(config_path, all_optional_default_config): @pytest.mark.parametrize("config_str", ["config-optional.yml"], indirect=True) def test_read_optional_config(config_path, all_optional_custom_config): output_config = all_optional_custom_config | { - "meorg_output_name": "123-sample-optional" + "meorg_output_name": "123-sample-optional_7J3IEJ" } del output_config["realisations"][0]["meorg_output_name"] config = bc.read_config(config_path)