Skip to content

Commit

Permalink
Fix prov files not printing with links in prov index.html (#937)
Browse files Browse the repository at this point in the history
* Consolidate logger messages for prov paths 
* Suppress error for git command when the package is a version build
  • Loading branch information
tomvothecoder authored Feb 14, 2025
1 parent 4e35711 commit dcd5719
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 138 deletions.
282 changes: 201 additions & 81 deletions e3sm_diags/e3sm_diags_driver.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
#!/usr/bin/env python
# The above line is needed for `test_all_sets.test_all_sets_mpl`.
# Otherwise, OSError: [Errno 8] Exec format error: 'e3sm_diags_driver.py'.
from __future__ import annotations

import os
import subprocess
import sys
import traceback
from typing import Dict, List, Tuple
from datetime import datetime
from typing import Dict, List, Tuple, TypedDict

import dask
import dask.bag as db

import e3sm_diags
from e3sm_diags.logger import custom_logger
from e3sm_diags.logger import LOG_FILENAME, custom_logger
from e3sm_diags.parameter.core_parameter import CoreParameter
from e3sm_diags.parser import SET_TO_PARSER
from e3sm_diags.parser.core_parser import CoreParser
Expand All @@ -20,6 +23,34 @@
logger = custom_logger(__name__)


class ProvPaths(TypedDict):
"""
ProvPaths is a TypedDict that defines the structure for provenance paths.
Attributes
----------
results_dir: str
Path to the diagnostic results.
log_path : str
Path to the log directory.
parameter_files_path : str
Path to the parameter files.
python_script_path : str
Path to the Python script.
env_yml_path : str
Path to the environment YAML file.
index_html_path : str
Path to the provenance index HTML file.
"""

results_dir: str
log_path: str
parameter_files_path: str | None
python_script_path: str | None
env_yml_path: str | None
index_html_path: str | None


def get_default_diags_path(set_name, run_type, print_path=True):
"""
Returns the path for the default diags for plotset set_name.
Expand All @@ -40,132 +71,146 @@ def get_default_diags_path(set_name, run_type, print_path=True):
return pth


def _save_env_yml(results_dir):
def save_provenance(results_dir: str, parser: CoreParser) -> ProvPaths:
"""
Store the provenance in results_dir.
"""
prov_dir = os.path.join(results_dir, "prov")

paths: ProvPaths = {
"results_dir": results_dir,
"log_path": os.path.join(prov_dir, LOG_FILENAME),
"parameter_files_path": None,
"python_script_path": None,
"env_yml_path": None,
"index_html_path": None,
}

paths["parameter_files_path"] = _save_parameter_files(prov_dir, parser)
paths["python_script_path"] = _save_python_script(prov_dir, parser)

# FIXME: Replace Exception with specific exception type.
try:
paths["env_yml_path"] = _save_env_yml(prov_dir)
except Exception:
paths["env_yml_path"] = None
traceback.print_exc()

if not os.path.exists(prov_dir):
os.makedirs(prov_dir, 0o755)

# Create an HTML file to list the contents of the prov dir.
index_html_path = os.path.join(prov_dir, "index.html")
paths["index_html_path"] = index_html_path

with open(index_html_path, "w") as f:
f.write("<html><body><h1>Provenance Files</h1><ul>")

for root, _, files in os.walk(prov_dir):
for file_name in files:
file_path = os.path.relpath(os.path.join(root, file_name), prov_dir)
f.write(
f'<li><a href="{file_path}" target="_blank">{file_name}</a></li>'
)

f.write("</ul></body></html>")

return paths


def _save_env_yml(results_dir: str) -> str | None:
"""
Save the yml to recreate the environment in results_dir.
"""
cmd = "conda env export"
p = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = p.communicate()

filename = None

if err:
logger.exception("Error when creating env yml file: ")
logger.exception(err)
else:
fnm = os.path.join(results_dir, "environment.yml")
with open(fnm, "w") as f:
filename = os.path.join(results_dir, "environment.yml")

with open(filename, "w") as f:
f.write(output.decode("utf-8"))
logger.info("Saved environment yml file to: {}".format(fnm))

return filename

def _save_parameter_files(results_dir, parser):

def _save_parameter_files(results_dir: str, parser: CoreParser) -> str | None:
"""
Save the command line arguments used, and any py or cfg files.
"""
filepath = os.path.join(results_dir, "cmd_used.txt")
new_filepath = None

cmd_used = " ".join(sys.argv)
fnm = os.path.join(results_dir, "cmd_used.txt")
with open(fnm, "w") as f:
with open(filepath, "w") as f:
f.write(cmd_used)
logger.info("Saved command used to: {}".format(fnm))

args = parser.view_args()

if hasattr(args, "parameters") and args.parameters:
fnm = args.parameters
if not os.path.isfile(fnm):
logger.warning("File does not exist: {}".format(fnm))
else:
with open(fnm, "r") as f:
contents = "".join(f.readlines())
# Remove any path, just keep the filename.
new_fnm = fnm.split("/")[-1]
new_fnm = os.path.join(results_dir, new_fnm)
with open(new_fnm, "w") as f:
f.write(contents)
logger.info("Saved py file to: {}".format(new_fnm))

if hasattr(args, "other_parameters") and args.other_parameters:
fnm = args.other_parameters[0]
if not os.path.isfile(fnm):
logger.warning("File does not exist: {}".format(fnm))
else:
with open(fnm, "r") as f:
contents = "".join(f.readlines())
# Remove any path, just keep the filename.
new_fnm = fnm.split("/")[-1]
new_fnm = os.path.join(results_dir, new_fnm)
with open(new_fnm, "w") as f:
f.write(contents)
logger.info("Saved cfg file to: {}".format(new_fnm))
filepath = args.parameters
elif hasattr(args, "other_parameters") and args.other_parameters:
filepath = args.other_parameters[0]

if not os.path.isfile(filepath):
logger.warning("File does not exist: {}".format(filepath))
else:
with open(filepath, "r") as f:
contents = "".join(f.readlines())

# Remove any path, just keep the filename.
new_filepath = filepath.split("/")[-1]
new_filepath = os.path.join(results_dir, new_filepath)

with open(new_filepath, "w") as f:
f.write(contents)

return new_filepath

def _save_python_script(results_dir, parser):

def _save_python_script(results_dir: str, parser: CoreParser) -> str | None:
"""
When using a Python script to run the
diags via the API, dump a copy of the script.
"""
args = parser.view_args()
# If running the legacy way, there's
# nothing to be saved.

# FIXME: Is this code still needed?
# If running the legacy way, there's nothing to be saved.
if args.parameters:
return
return None

# Get the last argument that has .py in it.
py_files = [f for f in sys.argv if f.endswith(".py")]

# User didn't pass in a Python file, so they maybe ran:
# e3sm_diags -d diags.cfg
if not py_files:
return
return None

fnm = py_files[-1]

if not os.path.isfile(fnm):
logger.warning("File does not exist: {}".format(fnm))
return
return None

with open(fnm, "r") as f:
contents = "".join(f.readlines())
# Remove any path, just keep the filename.
new_fnm = fnm.split("/")[-1]
new_fnm = os.path.join(results_dir, new_fnm)
with open(new_fnm, "w") as f:
f.write(contents)
logger.info("Saved Python script to: {}".format(new_fnm))


def save_provenance(results_dir, parser):
"""
Store the provenance in results_dir.
"""
results_dir = os.path.join(results_dir, "prov")
if not os.path.exists(results_dir):
os.makedirs(results_dir, 0o755)

# Create an HTML file to list the contents of the prov dir.
index_html_path = os.path.join(results_dir, "index.html")

with open(index_html_path, "w") as f:
f.write("<html><body><h1>Provenance Files</h1><ul>")

for file_name in os.listdir(results_dir):
file_path = os.path.join(results_dir, file_name)
if os.path.isfile(file_path):
f.write(
f'<li><a href="{file_name}" target="_blank">{file_name}</a></li>'
)

f.write("</ul></body></html>")

logger.info("Created provenance index HTML file at: {}".format(index_html_path))

try:
_save_env_yml(results_dir)
except Exception:
traceback.print_exc()
# Remove any path, just keep the filename.
new_filepath = fnm.split("/")[-1]
new_filepath = os.path.join(results_dir, new_filepath)

_save_parameter_files(results_dir, parser)
with open(new_filepath, "w") as f:
f.write(contents)

_save_python_script(results_dir, parser)
return new_filepath


# FIXME: B008 Do not perform function call `CoreParser` in argument defaults;
Expand Down Expand Up @@ -363,8 +408,11 @@ def main(parameters=[]) -> List[CoreParameter]: # noqa B006

if not os.path.exists(parameters[0].results_dir):
os.makedirs(parameters[0].results_dir, 0o755)

if not parameters[0].no_viewer: # Only save provenance for full runs.
save_provenance(parameters[0].results_dir, parser)
prov_paths = save_provenance(parameters[0].results_dir, parser)

_log_diagnostic_run_info(prov_paths)

# Perform the diagnostic run
# --------------------------
Expand Down Expand Up @@ -410,5 +458,77 @@ def main(parameters=[]) -> List[CoreParameter]: # noqa B006
return parameters_results


def _log_diagnostic_run_info(prov_paths: ProvPaths):
"""Logs information about the diagnostic run.
This method is useful for tracking the provenance of the diagnostic run
and understanding the context of the diagnostic results.
It logs the following information:
- Timestamp of the run
- Version information (Git branch and commit hash or module version)
- Paths to the provenance files (log, parameter files, Python script,
env yml, index HTML)
Parameters
----------
prov_paths : ProvPaths
The paths to the provenance files.
Notes
-----
The version information is retrieved from the current Git branch and
commit hash. If the Git information is not available, it falls back
to the version defined in the `e3sm_diags` module.
"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

try:
branch_name = (
subprocess.check_output(
["git", "rev-parse", "--abbrev-ref", "HEAD"],
cwd=os.path.dirname(__file__),
stderr=subprocess.DEVNULL,
)
.strip()
.decode("utf-8")
)
commit_hash = (
subprocess.check_output(
["git", "rev-parse", "HEAD"],
cwd=os.path.dirname(__file__),
stderr=subprocess.DEVNULL,
)
.strip()
.decode("utf-8")
)
version_info = f"branch {branch_name} with commit {commit_hash}"
except subprocess.CalledProcessError:
version_info = f"version {e3sm_diags.__version__}"

(
results_dir,
log_path,
parameter_files_path,
python_script_path,
env_yml_path,
index_html_path,
) = prov_paths.values()
logger.info(
f"\n{'=' * 80}\n"
f"E3SM Diagnostics Run\n"
f"{'-' * 20}\n"
f"Timestamp: {timestamp}\n"
f"Version Info: {version_info}\n"
f"Results Path: {results_dir}\n"
f"Log Path: {log_path}\n"
f"Parameter Files Path: {parameter_files_path}\n"
f"Python Script Path: {python_script_path}\n"
f"Environment YML Path: {env_yml_path}\n"
f"Provenance Index HTML Path: {index_html_path}\n"
f"{'=' * 80}\n"
)


if __name__ == "__main__":
main()
Loading

0 comments on commit dcd5719

Please sign in to comment.