Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/user_guide/config_options.md
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ realisations:

: **Default:** _required key, no default_. :octicons-dash-24: Specify the local checkout path of CABLE branch.

### [meorg_model_output_name](#meorg_model_output_name)
### [model_output_name](#model_output_name)

: **Default:** False :octicons-dash-24: Chosen as the model name for one of the realisations. This would be the Model Output to which output files will be automatically uploaded for analysis. The user must set only one of the realisations keys as `true` for the name to be chosen.

Expand Down
61 changes: 54 additions & 7 deletions src/benchcab/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from benchcab import internal
from benchcab.utils.repo import create_repo
from benchcab.model import Model
from typing import Optional


class ConfigValidationError(Exception):
Expand Down Expand Up @@ -85,7 +86,7 @@ def read_optional_key(config: dict):
Parameters
----------
config : dict
The configuration file with without optional keys
The configuration file with, or without optional keys

"""
if "project" not in config:
Expand Down Expand Up @@ -128,13 +129,47 @@ def read_optional_key(config: dict):
return config


def is_valid_model_output_name(name: str) -> Optional[str]:
"""Validate model output name against github issue standards.

Standard: <digit>-<words-sep-by-dashes>

Parameters
----------
name: str
The model output name

Returns
-------
Optional[str]
If model output name does not meet standard, then return error message

"""
if len(name) == 0:
return "Model output name is empty"

if len(name) > 255:
return "Model output name has length more than allowed limit on me.org (255)"

if " " in name:
return "Model output name cannot have spaces"

name_keywords = name.split("-")

if not name_keywords[0].isdigit():
return "Model output name does not start with number"

if len(name_keywords) == 1:
return "Model output name does not contain keyword after number"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm suggesting to change the error messages to be more direct, always using an affirmative sentence and importantly giving an example.

Suggested change
return "Model output name does not contain keyword after number"
return "Model output name must contain keywords after the initial number. E.g. 123-fixing-met-file-reading-error"

I also wonder if we want to build an error message that indicates all the mistakes in a given name at once, instead of returning on the first error encountered. Something like the following (incomplete and not formatted for the code):

msg=""
if len(name) == 0:
    msg+="Model output name can not be empty.\n"

if len(name) > 255:
    msg+="The length of model output name must be shorter than 255 characters.\n"

if msg:
    msg+="Example valid name: 123-fixing-met-file-reading-error"

return msg

Actually, the check on len(name)==0 indicates a problem with the code itself rather than the config I suspect so it might be weird to give a valid name example but I think it's a small inconvenience.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the check on len(name)==0, I'm trying to treat the function as independent entity from the code (so if the code does have issues in the future, then it would be caught more easily). Let me know if this check is still not needed.



def add_model_output_name(config: dict):
"""Determine model output name from realisations.

Parameters
----------
config : dict
The configuration file with with optional keys
The configuration file with optional keys

"""
# pure function
Expand All @@ -145,11 +180,23 @@ def add_model_output_name(config: dict):
assert not is_model_output_name
if r.pop("model_output_name", None):
is_model_output_name = True
repo = create_repo(
spec=r["repo"],
path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()),
)
config["model_output_name"] = Model(repo).name

mo_name = None
if r.get("name"):
mo_name = r["name"]
else:
repo = create_repo(
spec=r["repo"],
path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()),
)
mo_name = Model(repo).name
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would revert to the previous implementation but using the name argument in create_repo()

Suggested change
mo_name = None
if r.get("name"):
mo_name = r["name"]
else:
repo = create_repo(
spec=r["repo"],
path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()),
)
mo_name = Model(repo).name
mo_name = None
repo = create_repo(
spec=r["repo"],
path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()),
name=r.get("name"),
)
mo_name = Model(repo).name

The implementation of Model().__init__() should assign to Model(repo).name:

  • the branch name if name is not present in the config
  • the value of name if it's present.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To resolve this, I am using r.get("name") both in Repo and Model initialisations since Model cannot determine the name just from Repo (unless it has a mandatory name parameter, which it doesn't for all the sub classes - for example LocalRepo has name but GitRepo doesn't)

            repo = create_repo(
                spec=r["repo"],
                path=internal.SRC_DIR / (r["name"] if r.get("name") else Path()),
            )
            mo_name = Model(repo, name=r.get("name")).name

For now, I'm not making it mandatory for Repo to have a name parameter (so usage of r.get("name") twice is fine.


msg = is_valid_model_output_name(mo_name)

if msg is not None:
raise Exception(msg)

config["model_output_name"] = mo_name
break
assert is_model_output_name
return config
Expand Down
13 changes: 9 additions & 4 deletions src/benchcab/data/meorg_jobscript.j2
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,19 @@ MODEL_OUTPUT_ARGS+=" --parameter-selection {{ mo.parameter_selection }}"
MODEL_OUTPUT_ARGS+=" --is-bundle"
{% endif %}

echo "Querying whether $MODEL_OUTPUT_NAME already exists on me.org"
MODEL_OUTPUT_ID=$($MEORG_BIN output query $MODEL_OUTPUT_NAME | head -n 1 )
if [ ! -z "${MODEL_OUTPUT_ID}" ] ; then
# Re-run analysis on the same model output ID, cleaning up existing files
echo "Deleting existing files from model output ID"
$MEORG_BIN file delete_all $MODEL_OUTPUT_ID
echo "Updated model output ID"
echo -n "Updated"
else
echo "Create new model output ID"
echo -n "Created"
fi

echo " $MODEL_OUTPUT_NAME on me.org and given this ID: $MODEL_OUTPUT_ID"

MODEL_OUTPUT_ID="$($MEORG_BIN output create $MODEL_PROFILE_ID $MODEL_OUTPUT_NAME $MODEL_OUTPUT_ARGS | head -n 1 | awk '{print $NF}')"
echo "Add experiments to model output"
$MEORG_BIN experiment update $MODEL_OUTPUT_ID {{ model_exp_ids|join(',') }}
Expand All @@ -53,7 +57,7 @@ echo "Waiting for object store transfer ($CACHE_DELAY sec)"
sleep $CACHE_DELAY

{% for exp_id in model_exp_ids %}
echo "Replace benchmarks to model output"
echo "Add benchmarks to model output"
$MEORG_BIN benchmark update $MODEL_OUTPUT_ID {{ exp_id }} {{ model_benchmark_ids|join(',') }}

# Trigger the analysis
Expand All @@ -62,4 +66,5 @@ $MEORG_BIN analysis start $MODEL_OUTPUT_ID {{ exp_id }}

{% endfor %}

echo "DONE"
MEORG_BASE_URL_DEV="${MEORG_BASE_URL_DEV:-https://modelevaluation.org/api/}"
echo "Files transferred to me.org. Analysis in progress. Open ${MEORG_BASE_URL_DEV}/display/${MODEL_OUTPUT_ID} to see results"
2 changes: 1 addition & 1 deletion src/benchcab/data/test/config-basic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
realisations:
- repo:
svn:
branch_path: trunk
branch_path: 123-sample
model_output_name: True
- repo:
svn:
Expand Down
4 changes: 2 additions & 2 deletions src/benchcab/data/test/config-optional.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ science_configurations:
realisations:
- repo:
svn:
branch_path: trunk
name: svn_trunk
branch_path: 123-sample
name: 123-sample-optional
model_output_name: True
- repo:
svn:
Expand Down
3 changes: 1 addition & 2 deletions src/benchcab/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@
},
}

MEORG_PROFILE = {"name": "CABLE", "id": "nFcjg4qqHGPkB9sqE"}

FLUXSITE_DEFAULT_EXPERIMENT = "forty-two-site-test"

Expand Down Expand Up @@ -318,5 +319,3 @@ def get_met_forcing_file_names(experiment: str) -> list[str]:
walltime="01:00:00",
storage=["gdata/ks32", "gdata/hh5", "gdata/wd9", "gdata/rp23"],
)

MEORG_PROFILE = {"name": "CABLE", "id": "nFcjg4qqHGPkB9sqE"}
44 changes: 35 additions & 9 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def no_optional_config() -> dict:
return {
"modules": ["intel-compiler/2021.1.1", "netcdf/4.7.4", "openmpi/4.1.0"],
"realisations": [
{"repo": {"svn": {"branch_path": "trunk"}}, "model_output_name": True},
{"repo": {"svn": {"branch_path": "123-sample"}}, "model_output_name": True},
{
"repo": {
"svn": {"branch_path": "branches/Users/ccc561/v3.0-YP-changes"}
Expand Down Expand Up @@ -122,7 +122,7 @@ def all_optional_custom_config(no_optional_config) -> dict:
},
"codecov": True,
}
branch_names = ["svn_trunk", "git_branch"]
branch_names = ["123-sample-optional", "git_branch"]

for c_r, b_n in zip(config["realisations"], branch_names):
c_r["name"] = b_n
Expand Down Expand Up @@ -202,22 +202,48 @@ def test_add_model_output_name(no_optional_config):
output_config = bc.add_model_output_name(no_optional_config)

del no_optional_config["realisations"][0]["model_output_name"]
no_optional_config = no_optional_config | {"model_output_name": "trunk"}
no_optional_config = no_optional_config | {"model_output_name": "123-sample"}
assert output_config == no_optional_config


def test_valid_valid_output_name():
"""Test reading config for a file that may/may not exist."""
model_output_name = "123-sample-issue"
msg = bc.is_valid_model_output_name(model_output_name)
assert msg is None


@pytest.mark.parametrize(
("model_output_name", "output_msg"),
[
("", "Model output name is empty"),
(
"l" * 256,
"Model output name has length more than allowed limit on me.org (255)",
),
("123-fsd f", "Model output name cannot have spaces"),
("hello-123", "Model output name does not start with number"),
("123", "Model output name does not contain keyword after number"),
],
)
def test_invalid_valid_output_name(model_output_name, output_msg):
"""Test reading config for a file that may/may not exist."""
msg = bc.is_valid_model_output_name(model_output_name)
assert msg == output_msg


@pytest.mark.parametrize(
("config_str", "output_config_str"),
("config_str", "model_output_name", "output_config"),
[
("config-basic.yml", "all_optional_default_config"),
("config-optional.yml", "all_optional_custom_config"),
("config-basic.yml", "123-sample", "all_optional_default_config"),
("config-optional.yml", "123-sample-optional", "all_optional_custom_config"),
],
indirect=["config_str"],
)
def test_read_config(config_path, output_config_str, request):
def test_read_config(request, config_path, model_output_name, output_config):
"""Test overall behaviour of read_config."""
output_config = request.getfixturevalue(output_config_str) | {
"model_output_name": "trunk"
output_config = request.getfixturevalue(output_config) | {
"model_output_name": model_output_name
}
del output_config["realisations"][0]["model_output_name"]
config = bc.read_config(config_path)
Expand Down
Loading