diff --git a/fre/analysis/base_class.py b/fre/analysis/base_class.py new file mode 100644 index 000000000..81150513a --- /dev/null +++ b/fre/analysis/base_class.py @@ -0,0 +1,42 @@ +import json + + +class AnalysisScript(object): + """Abstract base class for analysis scripts. User-defined analysis scripts + should inhert from this class and override the requires and run_analysis methods. + + Attributes: + description: Longer form description for the analysis. + title: Title that describes the analysis. + """ + def __init__(self): + """Instantiates an object. The user should provide a description and title.""" + raise NotImplementedError("you must override this function.") + self.description = None + self.title = None + + def requires(self): + """Provides metadata describing what is needed for this analysis to run. + + Returns: + A json string describing the metadata. + """ + raise NotImplementedError("you must override this function.") + return json.dumps("{json of metadata MDTF format.}") + + def run_analysis(self, yaml, name, date_range, scripts_dir, output_dir, output_yaml): + """Runs the analysis and generates all plots and associated datasets. + + Args: + yaml: Path to a model yaml + name: Name of the analysis as specified in the yaml + date_range: Time span to use for analysis (YYYY-MM-DD,YYYY-MM-DD) + scripts_dir: Path to a directory to save intermediate scripts + output_dir: Path to a directory to save figures + output_yaml: Path to use as an structured output yaml file + + Returns: + A list of png figures. + """ + raise NotImplementedError("you must override this function.") + return ["figure1.png", "figure2.png",] diff --git a/fre/analysis/env_tool.py b/fre/analysis/env_tool.py new file mode 100644 index 000000000..bcccf68c4 --- /dev/null +++ b/fre/analysis/env_tool.py @@ -0,0 +1,132 @@ +from pathlib import Path +from subprocess import CalledProcessError, PIPE, run, STDOUT +from tempfile import TemporaryDirectory +import venv + + +def _process_output(output): + """Converts bytes string to list of String lines. + + Args: + output: Bytes string. + + Returns: + List of strings. + """ + return [x for x in output.decode("utf-8").split("\n") if x] + + +class VirtualEnvManager(object): + """Helper class for creating/running simple command in a virtual environment.""" + def __init__(self, path): + self.path = Path(path) + self.activate = f"source {self.path / 'bin' / 'activate'}" + + @staticmethod + def _execute(commands): + """Runs input commands through bash in a child process. + + Args: + commands: List of string commands. + + Returns: + List of string output. + """ + with TemporaryDirectory() as tmp: + script_path = Path(tmp) / "script" + with open(script_path, "w") as script: + script.write("\n".join(commands)) + try: + process = run(["bash", str(script_path)], stdout=PIPE, stderr=STDOUT, + check=True) + except CalledProcessError as err: + for line in _process_output(err.output): + print(line) + raise + return _process_output(process.stdout) + + def _execute_python_script(self, commands): + """Runs input python code in bash in a child process. + + Args: + commands: List of string python code lines. + + Returns: + List of string output. + """ + with TemporaryDirectory() as tmp: + script_path = Path(tmp) / "python_script" + with open(script_path, "w") as script: + script.write("\n".join(commands)) + commands = [self.activate, f"python3 {str(script_path)}"] + return self._execute(commands) + + def create_env(self): + """Creates the virtual environment.""" + venv.create(self.path, with_pip=True) + + def destroy_env(self): + """Destroys the virtual environment.""" + raise NotImplementedError("this feature is not implemented yet.") + + def install_package(self, name): + """Installs a package in the virtual environment. + + Args: + name: String name of the package. + + Returns: + List of string output. + """ + commands = [self.activate, "python3 -m pip --upgrade pip", + f"python3 -m pip install {name}"] + return self._execute(commands) + + def list_plugins(self): + """Returns a list of plugins that are available in the virtual environment. + + Returns: + List of plugins. + """ + python_script = [ + "from analysis_scripts import available_plugins", + "for plugin in available_plugins():", + " print(plugin)" + ] + return self._execute_python_script(python_script) + + def run_analysis_plugin(self, name, catalog, output_directory, config=None): + """Returns a list of paths to figures created by the plugin from the virtual + environment. + + Args: + name: String name of the analysis package. + catalog: Path to the data catalog. + output_directory: Path to the output directory. + + Returns: + List of figure paths. + """ + if config: + python_script = [f"config = {str(config)}",] + else: + python_script = ["config = None",] + python_script += [ + "from analysis_scripts import run_plugin", + f"paths = run_plugin('{name}', '{catalog}', '{output_directory}', config=config)", + "for path in paths:", + " print(path)" + ] + return self._execute_python_script(python_script) + + def uninstall_package(self, name): + """Uninstalls a package from the virtual environment. + + Args: + name: String name of the package. + + Returns: + List of string output. + """ + commands = [self.activate, f"pip uninstall {name}"] + return self._execute(commands) diff --git a/fre/analysis/freanalysis.py b/fre/analysis/freanalysis.py index 4f100bd9d..60bc11632 100644 --- a/fre/analysis/freanalysis.py +++ b/fre/analysis/freanalysis.py @@ -44,19 +44,16 @@ def list(library_directory): @analysis_cli.command() -@click.option("--name", type=str, required=True, help="Name of the analysis script.") -@click.option("--catalog", type=str, required=True, help="Path to the data catalog.") -@click.option("--output-directory", type=str, required=True, - help="Path to the output directory.") -@click.option("--output-yaml", type=str, required=True, help="Path to the output yaml.") -@click.option("--experiment-yaml", type=str, required=True, help="Path to the experiment yaml.") -@click.option("--library-directory", type=str, required=False, - help="Path to a custom lib directory.") -def run(name, catalog, output_directory, output_yaml, experiment_yaml, - library_directory): - """Runs the analysis script and writes the paths to the created figures to a yaml file.""" - run_analysis(name, catalog, output_directory, output_yaml, experiment_yaml, - library_directory) +@click.option("--yaml", type=str, required=True, help="Path to the model yaml") +@click.option("--name", type=str, required=True, help="Name of the analysis script") +@click.option("--date_range", type=str, required=True, help="Time span to use for analysis (YYYY-MM-DD,YYYY-MM-DD)") +@click.option("--scripts_dir", type=str, required=True, help="Path to a directory to save intermediate scripts") +@click.option("--output_dir", type=str, required=True, help="Path to a directory to save figures") +@click.option("--output_yaml", type=str, required=True, help="Path to use as an structured output yaml file") +def run(yaml, name, date_range, scripts_dir, output_dir, output_yaml): + """Runs the analysis and generates all plots and associated datasets. + """ + run_analysis(yaml, name, date_range, scripts_dir, output_dir, output_yaml) @analysis_cli.command() diff --git a/fre/analysis/plugins/__init__.py b/fre/analysis/plugins/__init__.py new file mode 100644 index 000000000..429224d1b --- /dev/null +++ b/fre/analysis/plugins/__init__.py @@ -0,0 +1 @@ +from .esnb import freanalysis_esnb diff --git a/fre/analysis/plugins/esnb.py b/fre/analysis/plugins/esnb.py new file mode 100644 index 000000000..18bd34217 --- /dev/null +++ b/fre/analysis/plugins/esnb.py @@ -0,0 +1,57 @@ +import logging +from pathlib import Path, PurePosixPath +import requests +from ..base_class import AnalysisScript +import esnb.engine + +fre_logger = logging.getLogger(__name__) + +class freanalysis_esnb(AnalysisScript): + """Defines run and report-requirements methods for ESNB flavor usage + """ + + def __init__(self): + self.description = "Wrapper to access analysis framework for ESNB scripts" + self.title = "ESNB" + + def run_analysis(self, config, name, date_range, scripts_dir, output_dir, output_yaml): + """Runs the ESNB analysis specified in the yaml and the runtime options + + Args: + config: Dictionary of specific configuration for the script + name: Name of the analysis as specified in the yaml + date_range: Time span to use for analysis (YYYY-MM-DD,YYYY-MM-DD) + scripts_dir: Path to a directory to save intermediate scripts + output_dir: Path to a directory to save figures + output_yaml: Path to use as an structured output yaml file + """ + + # save notebook to scripts_dir + url = config["notebook_path"] + # convert to the "Raw" URL + # replace 'github.com' with 'raw.githubusercontent.com' and remove '/blob' + raw_url = url.replace("github.com", "raw.githubusercontent.com").replace("/blob/", "/") + local_filename = Path(scripts_dir) / PurePosixPath(url).name + with requests.get(raw_url) as r: + r.raise_for_status() # Check for HTTP errors (404, 500, etc.) + with open(local_filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + fre_logger.debug(f"ESNB notebook saved to '{local_filename}'") + + # create run_settings dictionary + run_settings = { + 'conda_env_root': config["conda_env_root"], + 'notebook_path': local_filename, + 'outdir': output_dir, + 'scripts_dir': scripts_dir + } + + # create case_settings dictionary + + # write the python script that runs the notebook + python_script = esnb.engine.canopy_launcher(run_settings, verbose=True) + fre_logger.debug(f"ESNB python wrapper saved to '{python_script}'") + + # run the python script + diff --git a/fre/analysis/plugins/subtools.py b/fre/analysis/plugins/subtools.py new file mode 100644 index 000000000..fb50dba84 --- /dev/null +++ b/fre/analysis/plugins/subtools.py @@ -0,0 +1,143 @@ +import importlib +import inspect +from pathlib import Path +import pkgutil + +from ..base_class import AnalysisScript +from .esnb import freanalysis_esnb + + +class UnknownPluginError(BaseException): + """Custom exception for when an invalid plugin name is used.""" + pass + + +def _find_plugin_class(module): + """Looks for a class that inherits from AnalysisScript. + + Args: + module: Module object. + + Returns: + Class that inherits from AnalysisScript. + + Raises: + UnknownPluginError if no class is found. + """ + for attribute in vars(module).values(): + # Try to find a class that inherits from the AnalysisScript class. + if inspect.isclass(attribute) and AnalysisScript in attribute.__bases__: + # Return the class so an object can be instantiated from it later. + return attribute + raise UnknownPluginError("could not find class that inherts from AnalysisScripts") + + +_sanity_counter = 0 # How much recursion is happening. +_maximum_craziness = 100 # This is too much recursion. + + +def _recursive_search(name, ispkg): + """Recursively search for a module that has a class that inherits from AnalysisScript. + + Args: + name: String name of the module. + ispkg: Flag telling whether or not the module is a package. + + Returns: + Class that inherits from AnalysisScript. + + Raises: + UnknownPluginError if no class is found. + ValueError if there is too much recursion. + """ + global _sanity_counter + _sanity_counter += 1 + if _sanity_counter > _maximum_craziness: + raise ValueError(f"recursion level {_sanity_counter} too high.") + + module = importlib.import_module(name) + try: + return _find_plugin_class(module) + except UnknownPluginError: + if not ispkg: + # Do not recurse further. + raise + paths = module.__spec__.submodule_search_locations + for finder, subname, ispkg in pkgutil.iter_modules(paths): + subname = f"{name}.{subname}" + try: + return _recursive_search(subname, ispkg) + except UnknownPluginError: + # Didn't find it, so continue to iterate. + pass + + +# Dictionary of found plugins. +_discovered_plugins = {} +for finder, name, ispkg in pkgutil.iter_modules(): + if name.startswith("freanalysis_") and ispkg: + _sanity_counter = 0 + _discovered_plugins[name] = _recursive_search(name, True) + + +def _plugin_object(name): + """Attempts to create an object from a class that inherits from AnalysisScript in + the plugin module. + + Args: + name: Name of the plugin. + + Returns: + The object that inherits from AnalysisScript. + + Raises: + UnknownPluginError if the input name is not in the disovered_plugins dictionary. + """ + return freanalysis_esnb() +# try: + #return _discovered_plugins[name]() +# return freanalysis_esnb() +# except KeyError: +# raise UnknownPluginError(f"could not find analysis script plugin '{name}'.") + + +def available_plugins(): + """Returns a list of plugin names.""" + return sorted(list(_discovered_plugins.keys())) + + +def list_plugins(): + """Prints a list of plugin names.""" + names = available_plugins() + if names: + print("\n".join(["Available plugins:", "-"*32] + names)) + else: + print("Warning: no plugins found.") + + +def plugin_requirements(name): + """Returns a JSON string detailing the plugin's requirement metadata. + + Args: + name: Name of the plugin. + + Returns: + JSON string of metadata. + """ + return _plugin_object(name).requires() + + +def run_plugin(script_type, name, config, date_range, scripts_dir, output_dir, output_yaml): + """Runs the plugin's analysis. + + Args: + name: Name of the plugin. + catalog: Path to the data catalog. + png_dir: Directory where the output figures will be stored. + config: Dictionary of configuration values. + catalog: Path to the catalog of reference data. + + Returns: + A list of png figure files that were created by the analysis. + """ + return _plugin_object(script_type).run_analysis(config, name, date_range, scripts_dir, output_dir, output_yaml) diff --git a/fre/analysis/subtools.py b/fre/analysis/subtools.py index ebc69ec8e..8118dfdb1 100644 --- a/fre/analysis/subtools.py +++ b/fre/analysis/subtools.py @@ -4,7 +4,7 @@ from subprocess import run from tempfile import TemporaryDirectory -from analysis_scripts import available_plugins, run_plugin, VirtualEnvManager +from .plugins.subtools import available_plugins, run_plugin from yaml import safe_load @@ -72,43 +72,31 @@ def list_plugins(library_directory=None): return available_plugins() -def run_analysis(name, catalog, output_directory, output_yaml, experiment_yaml, - library_directory=None): - """Runs the analysis script and writes the paths to the created figures to a yaml file. +def run_analysis(yaml, name, date_range, scripts_dir, output_dir, output_yaml): + """Runs the analysis and generates all plots and associated datasets. Args: - name: String name of the analysis script. - catalog: Path to the data catalog. - output_directory: Path to the output directory. - output_yaml: Path to the output yaml. - experiment: Path to the experiment yaml. - library_directory: Directory where the analysis package is installed. + yaml: Path to a model yaml + name: Name of the analysis as specified in the yaml + date_range: Time span to use for analysis (YYYY-MM-DD,YYYY-MM-DD) + scripts_dir: Path to a directory to save intermediate scripts + output_dir: Path to a directory to save figures + output_yaml: Path to use as an structured output yaml file """ - # Create the directory for the figures. - Path(output_directory).mkdir(parents=True, exist_ok=True) + # Create the directory for the figures, scripts, and output yaml + Path(output_dir).mkdir(parents=True, exist_ok=True) + Path(scripts_dir).mkdir(parents=True, exist_ok=True) + Path(output_yaml).parent.mkdir(parents=True, exist_ok=True) - # Parse the configuration out of the experiment yaml file. - with open(experiment_yaml) as file_: + # Parse the pass-through configuration out of the experiment yaml file. + with open(yaml) as file_: config_yaml = safe_load(file_) - try: - configuration = config_yaml["analysis"][name]["required"] - except KeyError: - configuration = None + specific_config = config_yaml["analysis"][name]["specific_config"] + script_type = config_yaml["analysis"][name]["script_type"] # Run the analysis. - if library_directory: - env = VirtualEnvManager(library_directory) - figure_paths = env.run_analysis_plugin(name, catalog, output_directory, - config=configuration) - else: - figure_paths = run_plugin(name, catalog, output_directory, config=configuration) - - # Write out the figure paths to a file. - with open(output_yaml, "w") as output: - output.write("figure_paths:\n") - for path in figure_paths: - output.write(f" -{Path(path).resolve()}\n") + figure_paths = run_plugin(script_type, name, specific_config, date_range, scripts_dir, output_dir, output_yaml) def uninstall_analysis_package(name, library_directory=None):