diff --git a/src/gdpx/cli/compute.py b/src/gdpx/cli/compute.py new file mode 100644 index 00000000..abb391f8 --- /dev/null +++ b/src/gdpx/cli/compute.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +import logging +import pathlib +from typing import Optional, Union, List + +from ase.io import write + +from ..worker.drive import DriverBasedWorker +from ..reactor.reactor import AbstractReactor + + +DEFAULT_MAIN_DIRNAME = "MyWorker" + + +def run_worker( + structure: List[str], + worker: DriverBasedWorker, + *, + batch: Optional[int] = None, + spawn: bool = False, + archive: bool = False, + directory: Union[str, pathlib.Path] = pathlib.Path.cwd() / DEFAULT_MAIN_DIRNAME, +): + """""" + # some imported packages change `logging.basicConfig` + # and accidently add a StreamHandler to logging.root + # so remove it... + for h in logging.root.handlers: + if isinstance(h, logging.StreamHandler) and not isinstance( + h, logging.FileHandler + ): + logging.root.removeHandler(h) + + # set working directory + directory = pathlib.Path(directory) + if not directory.exists(): + directory.mkdir() + + # - read structures + from gdpx.builder import create_builder + + frames = [] + for i, s in enumerate(structure): + builder = create_builder(s) + builder.directory = directory / "init" / f"s{i}" + frames.extend(builder.run()) + + # - find input frames + worker.directory = directory + + _ = worker.run(frames, batch=batch) + worker.inspect(resubmit=True) + if not spawn and worker.get_number_of_running_jobs() == 0: + res_dir = directory / "results" + if not res_dir.exists(): + res_dir.mkdir(exist_ok=True) + + ret = worker.retrieve(include_retrieved=True, use_archive=archive) + if not isinstance(worker.driver, AbstractReactor): + end_frames = [traj[-1] for traj in ret] + write(res_dir / "end_frames.xyz", end_frames) + else: + ... + + # AtomsNDArray(ret).save_file(res_dir/"trajs.h5") + else: + print("Results have already been retrieved.") + + return + + +if __name__ == "__main__": + ... diff --git a/src/gdpx/main.py b/src/gdpx/main.py index 861dcab1..8ceae1c1 100755 --- a/src/gdpx/main.py +++ b/src/gdpx/main.py @@ -133,13 +133,9 @@ def main(): "-b", "--batch", default=None, type=int, help="run selected batch number (useful when queue run)" ) - parser_compute.add_argument( - "-o", "--output", default="last", choices=["last","traj"], - help="retrieve last frame or entire trajectory" - ) parser_compute.add_argument( "--spawn", action="store_true", - help="If the computation is spawned, it will not save results when all jobs are finished." + help="If the computation is spawned, it will not save results until all jobs are finished." ) parser_compute.add_argument( "--archive", action="store_true", @@ -255,10 +251,10 @@ def main(): from gdpx.selector.interface import run_selection run_selection(args.CONFIG, args.structure, args.directory) elif args.subcommand == "compute": - from gdpx.worker.interface import run_worker + from .cli.compute import run_worker run_worker( - args.STRUCTURE, args.directory, potter, args.output, args.batch, - args.spawn, args.archive + args.STRUCTURE, potter, batch=args.batch, + spawn=args.spawn, archive=args.archive, directory=args.directory ) elif args.subcommand == "explore": from .expedition.interface import run_expedition diff --git a/src/gdpx/worker/interface.py b/src/gdpx/worker/interface.py index aa2d4009..60a87ba2 100644 --- a/src/gdpx/worker/interface.py +++ b/src/gdpx/worker/interface.py @@ -3,35 +3,23 @@ import copy import itertools -import logging import pathlib from typing import NoReturn, Optional, List, Callable -import numpy as np - import omegaconf -from ase import Atoms -from ase.io import read, write -from ase.geometry import find_mic - from ..core.operation import Operation from ..core.variable import Variable from ..core.register import registers from ..utils.command import parse_input_file -from ..computation.driver import AbstractDriver -from ..data.array import AtomsNDArray from ..potential.manager import AbstractPotentialManager -from ..reactor.reactor import AbstractReactor from ..scheduler.scheduler import AbstractScheduler from .worker import AbstractWorker from .drive import DriverBasedWorker, CommandDriverBasedWorker, QueueDriverBasedWorker from .react import ReactorBasedWorker from .single import SingleWorker -DEFAULT_MAIN_DIRNAME = "MyWorker" - def convert_config_to_potter(config): """Convert a configuration file or a dict to a potter/reactor. @@ -369,63 +357,5 @@ def _create_workers( return workers -def run_worker( - structure: List[str], - directory=pathlib.Path.cwd() / DEFAULT_MAIN_DIRNAME, - worker: DriverBasedWorker = None, - output: str = None, - batch: int = None, - spawn: bool = False, - archive: bool = False, -): - """""" - # - some imported packages change `logging.basicConfig` - # and accidently add a StreamHandler to logging.root - # so remove it... - for h in logging.root.handlers: - if isinstance(h, logging.StreamHandler) and not isinstance( - h, logging.FileHandler - ): - logging.root.removeHandler(h) - - directory = pathlib.Path(directory) - if not directory.exists(): - directory.mkdir() - - # - read structures - from gdpx.builder import create_builder - - frames = [] - for i, s in enumerate(structure): - builder = create_builder(s) - builder.directory = directory / "init" / f"s{i}" - frames.extend(builder.run()) - - # - find input frames - worker.directory = directory - - _ = worker.run(frames, batch=batch) - worker.inspect(resubmit=True) - if not spawn and worker.get_number_of_running_jobs() == 0: - # BUG: bacthes may conflict to save results - # - report - res_dir = directory / "results" - if not res_dir.exists(): - res_dir.mkdir(exist_ok=True) - - ret = worker.retrieve(include_retrieved=True, use_archive=archive) - if not isinstance(worker.driver, AbstractReactor): - end_frames = [traj[-1] for traj in ret] - write(res_dir / "end_frames.xyz", end_frames) - else: - ... - - # AtomsNDArray(ret).save_file(res_dir/"trajs.h5") - else: - print("Results have already been retrieved.") - - return - - if __name__ == "__main__": ...