diff --git a/src/autoplex/auto/rss/flows.py b/src/autoplex/auto/rss/flows.py index f55491a54..fb9df8e84 100644 --- a/src/autoplex/auto/rss/flows.py +++ b/src/autoplex/auto/rss/flows.py @@ -107,7 +107,7 @@ def make(self, **kwargs): - 'test_error': float, The test error from the last completed training step. - 'pre_database_dir': str, Path to the directory containing the pre-existing database for resuming. - - 'mlip_path': str, Path to the file of a previous MLIP model. + - 'mlip_path': str | Path, Path to the file of a previous MLIP model. - 'isolated_atom_energies': dict, A dictionary with isolated atom energy values mapped to atomic numbers. generated_struct_numbers: list[int] diff --git a/src/autoplex/auto/rss/jobs.py b/src/autoplex/auto/rss/jobs.py index 489fc5caf..78156e9b9 100644 --- a/src/autoplex/auto/rss/jobs.py +++ b/src/autoplex/auto/rss/jobs.py @@ -257,8 +257,8 @@ def initial_rss( do_dft_static = DFTStaticLabelling( e0_spin=e0_spin, isolatedatom_box=isolatedatom_box, - isolated_atom=include_isolated_atom, - dimer=include_dimer, + include_isolated_atom=include_isolated_atom, + include_dimer=include_dimer, dimer_box=dimer_box, dimer_range=dimer_range, dimer_num=dimer_num, @@ -266,8 +266,9 @@ def initial_rss( custom_potcar=custom_potcar, static_energy_maker=static_energy_maker, static_energy_maker_isolated_atoms=static_energy_maker_isolated_atoms, + config_type=config_type, ).make( - structures=do_randomized_structure_generation.output, config_type=config_type + structures=do_randomized_structure_generation.output, ) do_data_collection = collect_dft_data( dft_ref_file=dft_ref_file, rss_group=rss_group, dft_dirs=do_dft_static.output @@ -650,6 +651,7 @@ def do_rss_iterations( num_of_selection=num_of_rss_selected_structs, bcur_params=bcur_params, traj_path=do_rss.output, + traj_type="rss", random_seed=random_seed, isolated_atom_energies=input["isolated_atom_energies"], remove_traj_files=remove_traj_files, @@ -657,8 +659,8 @@ def do_rss_iterations( do_dft_static = DFTStaticLabelling( e0_spin=e0_spin, isolatedatom_box=isolatedatom_box, - isolated_atom=include_isolated_atom, - dimer=include_dimer, + include_isolated_atom=include_isolated_atom, + include_dimer=include_dimer, dimer_box=dimer_box, dimer_range=dimer_range, dimer_num=dimer_num, @@ -666,7 +668,10 @@ def do_rss_iterations( custom_potcar=custom_potcar, static_energy_maker=static_energy_maker, static_energy_maker_isolated_atoms=static_energy_maker_isolated_atoms, - ).make(structures=do_data_sampling.output, config_type=config_type) + config_type=config_type, + ).make( + structures=do_data_sampling.output, + ) do_data_collection = collect_dft_data( dft_ref_file=dft_ref_file, rss_group=rss_group, diff --git a/src/autoplex/data/common/flows.py b/src/autoplex/data/common/flows.py index aa95f5df8..a38c433ad 100644 --- a/src/autoplex/data/common/flows.py +++ b/src/autoplex/data/common/flows.py @@ -3,15 +3,26 @@ import logging import traceback from dataclasses import dataclass, field +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from emmet.core.math import Matrix3D + +from atomate2 import SETTINGS +from atomate2.common.jobs.phonons import get_supercell_size +from atomate2.common.jobs.utils import ( + structure_to_conventional, + structure_to_primitive, +) from atomate2.forcefields.jobs import ( ForceFieldRelaxMaker, ForceFieldStaticMaker, ) +from atomate2.vasp.flows.core import DoubleRelaxMaker from atomate2.vasp.jobs.base import BaseVaspMaker -from atomate2.vasp.jobs.core import StaticMaker +from atomate2.vasp.jobs.core import StaticMaker, TightRelaxMaker from atomate2.vasp.powerups import update_user_incar_settings -from atomate2.vasp.sets.core import StaticSetGenerator +from atomate2.vasp.sets.core import StaticSetGenerator, TightRelaxSetGenerator from emmet.core.math import Matrix3D from jobflow import Flow, Maker, Response, job from pymatgen.core import Lattice @@ -19,6 +30,7 @@ from pymatgen.io.ase import AseAtomsAdaptor from autoplex.data.common.jobs import ( + collect_dft_data, convert_to_extxyz, generate_randomized_structures, get_supercell_job, @@ -28,202 +40,73 @@ ElementCollection, flatten, ) -from autoplex.misc.castep.jobs import CastepStaticMaker - -__all__ = ["DFTStaticLabelling", "GenerateTrainingDataForTesting"] - - -logging.basicConfig(level=logging.DEBUG, format="[%(levelname)s] %(message)s") - - -@dataclass -class GenerateTrainingDataForTesting(Maker): - """Maker for generating training data to test it and check the forces. - - This Maker will first generate training data based on the chosen ML model (default is GAP) - by randomizing (ase rattle) atomic displacements in supercells of the provided input structures. - Then it will proceed with MLIP-based Phonon calculations (based on atomate2 PhononMaker), collect - all structure data in extended xyz files and plot the forces in histograms (per rescaling cell_factor - and total). - - Parameters - ---------- - name: str - Name of the flow. - bulk_relax_maker: ForceFieldRelaxMaker | None - Maker for the relax jobs. - static_energy_maker: ForceFieldStaticMaker | ForceFieldRelaxMaker | None - Maker for the static jobs. - - """ - - name: str = "generate_training_data_for_testing" - bulk_relax_maker: ForceFieldRelaxMaker | None = None - static_energy_maker: ForceFieldStaticMaker | ForceFieldRelaxMaker | None = None - - def make( - self, - train_structure_list: list[Structure], - cell_factor_sequence: list[float] | None = None, - potential_filename: str = "gap.xml", - n_structures: int = 50, - rattle_std: float = 0.01, - relax_cell: bool = True, - steps: int = 1000, - supercell_matrix: Matrix3D | None = None, - config_type: str = "train", - x_min: int = 0, - x_max: int = 5, - bin_width: float = 0.125, - **relax_kwargs, - ): - """ - Generate ase.rattled structures from the training data and returns histogram plots of the forces. - - Parameters - ---------- - train_structure_list: list[Structure]. - List of pymatgen structures object. - cell_factor_sequence: list[float] - List of factor to resize cell parameters. - potential_filename: str - The param_file_name for :obj:`quippy.potential.Potential()'`. - n_structures : int. - Total number of randomly displaced structures to be generated. - rattle_std: float. - Rattle amplitude (standard deviation in normal distribution). - Default=0.01. - relax_cell : bool - Whether to allow the cell shape/volume to change during relaxation. - steps : int - Maximum number of ionic steps allowed during relaxation. - supercell_matrix: Matrix3D | None - The matrix to generate the supercell. - config_type: str - Configuration type of the data. - x_min: int - Minimum value for the plot x-axis. - x_max: int - Maximum value for the plot x-axis. - bin_width: float - Width of the plot bins. - relax_kwargs : dict - Keyword arguments that will get passed to :obj:`Relaxer.relax`. - - Returns - ------- - Matplotlib plots "count vs. forces". - """ - jobs = [] - if cell_factor_sequence is None: - cell_factor_sequence = [0.975, 1.0, 1.025, 1.05] - for structure in train_structure_list: - if self.bulk_relax_maker is None: - self.bulk_relax_maker = ForceFieldRelaxMaker( - calculator_kwargs={ - "args_str": "IP GAP", - "param_filename": str(potential_filename), - }, - force_field_name="GAP", - relax_cell=relax_cell, - steps=steps, - ) - if supercell_matrix is None: - supercell_matrix = [[3, 0, 0], [0, 3, 0], [0, 0, 3]] - - bulk_relax = self.bulk_relax_maker.make(structure=structure) - jobs.append(bulk_relax) - supercell = get_supercell_job( - structure=bulk_relax.output.structure, - supercell_matrix=supercell_matrix, - ) - jobs.append(supercell) - - for cell_factor in cell_factor_sequence: - rattled_job = generate_randomized_structures( - structure=supercell.output, - n_structures=n_structures, - volume_custom_scale_factors=[cell_factor], - rattle_std=rattle_std, - ) - jobs.append(rattled_job) - static_conv_jobs = self.static_run_and_convert( - rattled_job.output, - cell_factor, - config_type, - potential_filename, - **relax_kwargs, - ) - jobs.append(static_conv_jobs) - plots = plot_force_distribution( - cell_factor, static_conv_jobs.output, x_min, x_max, bin_width - ) - jobs.append(plots) - - return Flow(jobs=jobs, name=self.name) # , plots.output) - - @job - def static_run_and_convert( - self, - structure_list: list[Structure], - cell_factor: float, - config_type, - potential_filename, - **relax_kwargs, - ): - """ - Job for the static runs and the data conversion to the extxyz format. +from autoplex.misc.castep.jobs import BaseCastepMaker + +__all__ = [ + "DFTStaticLabelling", + "GenerateTrainingDataForTesting", + "RattledTrainingDataMaker", +] + + +logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s") + + +_DEFAULT_STATIC_ENERGY_MAKER = StaticMaker( + input_set_generator=StaticSetGenerator( + user_incar_settings={ + "ADDGRID": "True", + "ENCUT": 520, + "EDIFF": 1e-06, + "ISMEAR": 0, + "SIGMA": 0.01, + "PREC": "Accurate", + "ISYM": None, + "KSPACING": 0.2, + "NPAR": 8, + "LWAVE": "False", + "LCHARG": "False", + "ENAUG": None, + "GGA": None, + "ISPIN": None, + "LAECHG": None, + "LELF": None, + "LORBIT": None, + "LVTOT": None, + "NSW": None, + "SYMPREC": None, + "NELM": 100, + "LMAXMIX": None, + "LASPH": None, + "AMIN": None, + } + ), + run_vasp_kwargs={"handlers": ()}, +) - Parameters - ---------- - structure_list: list[Structure]. - List of pymatgen structures object. - cell_factor: float - Factor to resize cell parameters. - config_type: str - Configuration type of the data. - potential_filename: str - The param_file_name for :obj:`quippy.potential.Potential()'`. - relax_kwargs : dict - Keyword arguments that will get passed to :obj:`Relaxer.relax`. - """ - jobs = [] - for rattled in structure_list: - if relax_kwargs == {}: - relax_kwargs = { - "interval": 50000, - "fmax": 0.5, - "traj_file": rattled.reduced_formula - + "_" - + f"{cell_factor}".replace(".", "") - + ".pkl", - } - if self.static_energy_maker is None: - self.static_energy_maker = ForceFieldRelaxMaker( - calculator_kwargs={ - "args_str": "IP GAP", - "param_filename": str(potential_filename), - }, - force_field_name="GAP", - relax_cell=False, - relax_kwargs=relax_kwargs, - steps=1, - ) - static_run = self.static_energy_maker.make(structure=rattled) - jobs.append(static_run) - conv_job = convert_to_extxyz( - static_run.output, - rattled.reduced_formula - + "_" - + f"{cell_factor}".replace(".", "") - + ".pkl", - config_type, - f"{cell_factor}".replace(".", ""), - ) - jobs.append(conv_job) - - return Response(replace=Flow(jobs), output=conv_job.output) +_DEFAULT_RELAXATION_MAKER = DoubleRelaxMaker.from_relax_maker( + TightRelaxMaker( + input_set_generator=TightRelaxSetGenerator( + user_incar_settings={ + "ALGO": "Normal", + "ISPIN": 1, + "LAECHG": False, + "ISMEAR": 0, + "ENCUT": 700, + "ISYM": 0, + "SIGMA": 0.05, + "LCHARG": False, # Do not write the CHGCAR file + "LWAVE": False, # Do not write the WAVECAR file + "LVTOT": False, # Do not write LOCPOT file + "LORBIT": None, # No output of projected or partial DOS in EIGENVAL, PROCAR and DOSCAR + "LOPTICS": False, # No PCDAT file + "NCORE": 4, + } + ), + run_vasp_kwargs={"handlers": {}}, + ) +) @dataclass @@ -231,13 +114,11 @@ class DFTStaticLabelling(Maker): """ Maker to set up and run VASP static calculations for input structures, including bulk, isolated atoms, and dimers. - It supports custom VASP input parameters and error handlers. - Parameters ---------- name: str Name of the flow. - isolated_atom: bool + include_isolated_atom: bool If true, perform single-point calculations for isolated atoms. Default is False. isolated_species: list[str] | None List of species for which to perform isolated atom calculations. If None, @@ -247,7 +128,7 @@ class DFTStaticLabelling(Maker): Default is False. isolatedatom_box: list[float] List of the lattice constants for a isolated_atom configuration. - dimer: bool + include_dimer: bool If true, perform single-point calculations for dimers. Default is False. dimer_box: list[float] The lattice constants of a dimer box. @@ -264,8 +145,8 @@ class DFTStaticLabelling(Maker): custom_potcar: dict | None Dictionary of POTCAR settings to update. Keys are element symbols, values are the desired POTCAR labels. Default is None. - static_energy_maker: BaseVaspMaker | CastepStaticMaker | ForceFieldStaticMaker - Maker for static energy jobs: either BaseVaspMaker (VASP-based) or CastepStaticMaker (CASTEP-based) or + static_energy_maker: BaseVaspMaker | BaseCastepMaker | ForceFieldStaticMaker + Maker for static energy jobs: either BaseVaspMaker (VASP-based) or BaseCastepMaker (CASTEP-based) or ForceFieldStaticMaker (force field-based). Defaults to StaticMaker (VASP-based). static_energy_maker_isolated_atoms: BaseVaspMaker | ForceFieldStaticMaker | None Maker for static energy jobs of isolated atoms: either BaseVaspMaker (VASP-based) or @@ -275,6 +156,9 @@ class DFTStaticLabelling(Maker): except that `kspacing` will be automatically set to 100 to enforce a Gamma-point-only calculation. This is typically suitable for single-atom systems. Default is None. If a non-`StaticMaker` maker is used here, its output must include a `dir_name` field to ensure compatibility with downstream workflows. + config_type : str + Configuration types corresponding to the structures. If None, defaults + to 'bulk'. Default is None. Returns ------- @@ -285,61 +169,29 @@ class DFTStaticLabelling(Maker): """ name: str = "do_dft_labelling" - isolated_atom: bool = False + include_isolated_atom: bool = False isolated_species: list[str] | None = None e0_spin: bool = False isolatedatom_box: list[float] = field(default_factory=lambda: [20, 20, 20]) - dimer: bool = False + include_dimer: bool = False dimer_box: list[float] = field(default_factory=lambda: [20, 20, 20]) dimer_species: list[str] | None = None dimer_range: list[float] | None = None dimer_num: int = 21 custom_incar: dict | None = None custom_potcar: dict | None = None - static_energy_maker: BaseVaspMaker | CastepStaticMaker | ForceFieldStaticMaker = ( - field( - default_factory=lambda: StaticMaker( - input_set_generator=StaticSetGenerator( - user_incar_settings={ - "ADDGRID": "True", - "ENCUT": 520, - "EDIFF": 1e-06, - "ISMEAR": 0, - "SIGMA": 0.01, - "PREC": "Accurate", - "ISYM": None, - "KSPACING": 0.2, - "NPAR": 8, - "LWAVE": "False", - "LCHARG": "False", - "ENAUG": None, - "GGA": None, - "ISPIN": None, - "LAECHG": None, - "LELF": None, - "LORBIT": None, - "LVTOT": None, - "NSW": None, - "SYMPREC": None, - "NELM": 100, - "LMAXMIX": None, - "LASPH": None, - "AMIN": None, - } - ), - run_vasp_kwargs={"handlers": ()}, - ) - ) + static_energy_maker: BaseVaspMaker | BaseCastepMaker | ForceFieldStaticMaker = ( + field(default_factory=lambda: _DEFAULT_STATIC_ENERGY_MAKER) ) static_energy_maker_isolated_atoms: ( - BaseVaspMaker | CastepStaticMaker | ForceFieldStaticMaker | None + BaseVaspMaker | BaseCastepMaker | ForceFieldStaticMaker | None ) = None + config_type: str | None = None @job def make( self, structures: list, - config_type: str | None = None, ): """ Maker to set up and run VASP static calculations. @@ -349,9 +201,6 @@ def make( structures : list[Structure] | list[list[Structure]] List of structures for which to run the VASP static calculations. If None, no bulk calculations will be performed. Default is None. - config_type : str - Configuration types corresponding to the structures. If None, defaults - to 'bulk'. Default is None. """ job_list = [] @@ -379,13 +228,13 @@ def make( static_job = st_m.make(structure=struct) static_job.name = f"static_bulk_{idx}" dirs["dirs_of_dft"].append(static_job.output.dir_name) - if config_type: - dirs["config_type"].append(config_type) + if self.config_type: + dirs["config_type"].append(self.config_type) else: dirs["config_type"].append("bulk") job_list.append(static_job) - if self.isolated_atom: + if self.include_isolated_atom: try: if self.isolated_species is not None: syms = self.isolated_species @@ -429,7 +278,7 @@ def make( logging.error(f"Unknown species of isolated atoms! Exception: {e}") traceback.print_exc() - if self.dimer: + if self.include_dimer: try: atoms = [AseAtomsAdaptor().get_atoms(at) for at in structures] if self.dimer_species is not None: @@ -489,3 +338,423 @@ def make( traceback.print_exc() return Response(replace=Flow(job_list), output=dirs) + + +@dataclass +class RattledTrainingDataMaker(DFTStaticLabelling): + """ + Build a DFT-labeled dataset of rattled or distorted atomic structures. + + Starting from a relaxed bulk structure generated by a relaxation maker, this class applies controlled + perturbations such as atomic position rattling, volume scaling, and geometric distortions. The resulting + structures are then labeled with DFT, producing a dataset suitable for training or benchmarking atomistic models. + + Parameters + ---------- + name: str + Name of the flow. + bulk_relax_maker: BaseVaspMaker | BaseCastepMaker | ForceFieldRelaxMaker + Maker used to produce the relaxed structure that will be + perturbed. Defaults to _DEFAULT_RELAXATION_MAKER. + uc: bool + If True, will generate randomly distorted structures (unitcells) + and add static computation jobs to the flow. + distort_type: int + 0- volume distortion, 1- angle distortion, 2- volume and angle distortion. Default=0. + n_structures: int. + Target total number of structures to generate (after rattling). Default=10. + - If `volume_custom_scale_factors` is None: + The code generates `n_structures` different volume or angle distortions. + Each is rattled once. + - If `volume_custom_scale_factors` is defined: + Given that the list length equals m, the total `n_structures` is distributed + over these m scale factors: + base = n_structures // m + rem = n_structures % m + Note that the last `rem` factors get one extra rattled structure. + Example: volume_custom_scale_factors=[0.95,0.97,0.99], n_structures=10 -> counts=[3,3,4]. + volume_scale_factor_range: list[float] + [min, max] of volume scale factors. + e.g. [0.90, 1.10] will distort volume -+10%. + volume_custom_scale_factors: list[float] + Specify explicit scale factors (if range is not specified). + If None, will default to [0.90, 0.95, 0.98, 0.99, 1.01, 1.02, 1.05, 1.10]. + rattle_type: int. + 0- standard rattling, 1- Monte-Carlo rattling. Default=0. + rattle_std: float. + Rattle amplitude (standard deviation in normal distribution). + Default=0.01. + Note that for MC rattling, displacements generated will roughly be + rattle_mc_n_iter**0.5 * rattle_std for small values of n_iter. + rattle_seed: int. + Seed for setting up NumPy random state from which random numbers are generated. + Default=42. + rattle_mc_n_iter: int. + Number of Monte Carlo iterations. + Larger number of iterations will generate larger displacements. + Default=10. + min_distance: float + Minimum separation allowed between any two atoms. + Default= 1.5A. + angle_percentage_scale: float + Angle scaling factor. + Default= 10 will randomly distort angles by +-10% of original value. + angle_max_attempts: int. + Maximum number of attempts to distort structure before aborting. + Default=1000. + w_angle: list[float] + List of angle indices to be changed i.e. 0=alpha, 1=beta, 2=gamma. + Default= [0, 1, 2]. + symprec: float + Precision to determine symmetry. + use_symmetrized_structure: str or None + Primitive, conventional or None + supercell_matrix : Matrix3D | None + Explicit supercell matrix to use when expanding the reference cell. + Default None. + supercell_settings : dict + Settings used to construct a supercell when supercell_matrix is not + provided. Expected keys include min_length, max_length, + prefer_90_degrees, and allow_orthorhombic. + dft_ref_file : str + Output filename to store generated structures in extxyz format. + Default "dft_ref.extxyz". + config_type : str + Type identifier that will be attached to generated structures. + Default "rattled_structures". + """ + + name: str = "do_rattling" + bulk_relax_maker: BaseVaspMaker | BaseCastepMaker | ForceFieldRelaxMaker = field( + default_factory=lambda: _DEFAULT_RELAXATION_MAKER + ) + uc: bool = False + distort_type: int = 0 + n_structures: int = 10 + volume_custom_scale_factors: list[float] | None = None + volume_scale_factor_range: list[float] | None = None + rattle_type: int = 0 + rattle_std: float = 0.01 + rattle_seed: int = 42 + rattle_mc_n_iter: int = 10 + min_distance: float = 1.5 + angle_percentage_scale: float = 10 + angle_max_attempts: int = 1000 + w_angle: list[int] | None = None + symprec: float = SETTINGS.SYMPREC + use_symmetrized_structure: str | None = None + supercell_matrix: Matrix3D | None = None + supercell_settings: dict = field( + default_factory=lambda: { + "min_length": 15, + "max_length": 20, + "prefer_90_degrees": False, + "allow_orthorhombic": False, + } + ) + dft_ref_file: str = "dft_ref.extxyz" + config_type: str = "rattled_structures" + + @job + def make( + self, + structure: Structure, + ): + """ + Generate and label a set of rattled or distorted structures based on a reference structure. + + Parameters + ---------- + structure : Structure + Input crystal structure to serve as the starting point. Typically + the primitive or conventional cell of a bulk material. + + Returns + ------- + dict + A dictionary containing: + - `pre_database_dir`: Path to the directory with collected DFT data. + - `isolated_atom_energies`: Mapping of element symbols to their isolated-atom reference + energies. + """ + job_list = [] + final_structures = [] + + if self.bulk_relax_maker is not None: + relaxed = self.bulk_relax_maker.make(structure) + job_list.append(relaxed) + structure = relaxed.output.structure + + if self.use_symmetrized_structure == "primitive": + prim_job = structure_to_primitive(structure, self.symprec) + job_list.append(prim_job) + structure = prim_job.output + elif self.use_symmetrized_structure == "conventional": + conv_job = structure_to_conventional(structure, self.symprec) + job_list.append(conv_job) + structure = conv_job.output + + if self.supercell_matrix is None: + supercell_job = get_supercell_size( + structure=structure, + min_length=self.supercell_settings.get("min_length", 12), + max_length=self.supercell_settings.get("max_length", 20), + prefer_90_degrees=self.supercell_settings.get( + "prefer_90_degrees", False + ), + allow_orthorhombic=self.supercell_settings.get( + "allow_orthorhombic", False + ), + ) + job_list.append(supercell_job) + supercell_matrix = supercell_job.output + else: + supercell_matrix = self.supercell_matrix + + rattle_job = generate_randomized_structures( + structure=structure, + supercell_matrix=supercell_matrix, + distort_type=self.distort_type, + n_structures=self.n_structures, + volume_custom_scale_factors=self.volume_custom_scale_factors, + volume_scale_factor_range=self.volume_scale_factor_range, + rattle_std=self.rattle_std, + min_distance=self.min_distance, + angle_percentage_scale=self.angle_percentage_scale, + angle_max_attempts=self.angle_max_attempts, + rattle_type=self.rattle_type, + rattle_seed=self.rattle_seed, + rattle_mc_n_iter=self.rattle_mc_n_iter, + w_angle=self.w_angle, + ) + job_list.append(rattle_job) + final_structures.append(rattle_job.output) + + if self.uc: + rattle_uc_job = generate_randomized_structures( + structure=structure, + supercell_matrix=((1, 0, 0), (0, 1, 0), (0, 0, 1)), + distort_type=self.distort_type, + n_structures=self.n_structures, + volume_custom_scale_factors=self.volume_custom_scale_factors, + volume_scale_factor_range=self.volume_scale_factor_range, + rattle_std=self.rattle_std, + min_distance=self.min_distance, + angle_percentage_scale=self.angle_percentage_scale, + angle_max_attempts=self.angle_max_attempts, + rattle_type=self.rattle_type, + rattle_seed=self.rattle_seed, + rattle_mc_n_iter=self.rattle_mc_n_iter, + w_angle=self.w_angle, + ) + job_list.append(rattle_uc_job) + final_structures.append(rattle_uc_job.output) + + do_dft_static = DFTStaticLabelling.make(self, structures=final_structures) + job_list.append(do_dft_static) + + do_data_collection = collect_dft_data( + dft_ref_file=self.dft_ref_file, + dft_dirs=do_dft_static.output, + ) + job_list.append(do_data_collection) + + return Response( + replace=Flow(job_list), + output={ + "pre_database_dir": do_data_collection.output, + "isolated_atom_energies": do_data_collection.output[ + "isolated_atom_energies" + ], + }, + ) + + +@dataclass +class GenerateTrainingDataForTesting(Maker): + """Maker for generating training data to test it and check the forces. + + This Maker will first generate training data based on the chosen ML model (default is GAP) + by randomizing (ase rattle) atomic displacements in supercells of the provided input structures. + Then it will proceed with MLIP-based Phonon calculations (based on atomate2 PhononMaker), collect + all structure data in extended xyz files and plot the forces in histograms (per rescaling cell_factor + and total). + + Parameters + ---------- + name: str + Name of the flow. + bulk_relax_maker: ForceFieldRelaxMaker | None + Maker for the relax jobs. + static_energy_maker: ForceFieldStaticMaker | ForceFieldRelaxMaker | None + Maker for the static jobs. + + """ + + name: str = "generate_training_data_for_testing" + bulk_relax_maker: ForceFieldRelaxMaker | None = None + static_energy_maker: ForceFieldStaticMaker | ForceFieldRelaxMaker | None = None + + def make( + self, + train_structure_list: list[Structure], + cell_factor_sequence: list[float] | None = None, + potential_filename: str = "gap.xml", + n_structures: int = 50, + rattle_std: float = 0.01, + relax_cell: bool = True, + steps: int = 1000, + supercell_matrix: Matrix3D | None = None, + config_type: str = "train", + x_min: int = 0, + x_max: int = 5, + bin_width: float = 0.125, + **relax_kwargs, + ): + """ + Generate ase.rattled structures from the training data and returns histogram plots of the forces. + + Parameters + ---------- + train_structure_list: list[Structure]. + List of pymatgen structures object. + cell_factor_sequence: list[float] + List of factor to resize cell parameters. + potential_filename: str + The param_file_name for :obj:`quippy.potential.Potential()'`. + n_structures : int. + Total number of randomly displaced structures to be generated. + rattle_std: float. + Rattle amplitude (standard deviation in normal distribution). + Default=0.01. + relax_cell : bool + Whether to allow the cell shape/volume to change during relaxation. + steps : int + Maximum number of ionic steps allowed during relaxation. + supercell_matrix: Matrix3D | None + The matrix to generate the supercell. + config_type: str + Configuration type of the data. + x_min: int + Minimum value for the plot x-axis. + x_max: int + Maximum value for the plot x-axis. + bin_width: float + Width of the plot bins. + relax_kwargs : dict + Keyword arguments that will get passed to :obj:`Relaxer.relax`. + + Returns + ------- + Matplotlib plots "count vs. forces". + """ + jobs = [] + if cell_factor_sequence is None: + cell_factor_sequence = [0.975, 1.0, 1.025, 1.05] + for structure in train_structure_list: + if self.bulk_relax_maker is None: + self.bulk_relax_maker = ForceFieldRelaxMaker( + calculator_kwargs={ + "args_str": "IP GAP", + "param_filename": str(potential_filename), + }, + force_field_name="GAP", + relax_cell=relax_cell, + steps=steps, + ) + if supercell_matrix is None: + supercell_matrix = [[3, 0, 0], [0, 3, 0], [0, 0, 3]] + + bulk_relax = self.bulk_relax_maker.make(structure=structure) + jobs.append(bulk_relax) + supercell = get_supercell_job( + structure=bulk_relax.output.structure, + supercell_matrix=supercell_matrix, + ) + jobs.append(supercell) + + for cell_factor in cell_factor_sequence: + rattled_job = generate_randomized_structures( + structure=supercell.output, + n_structures=n_structures, + volume_custom_scale_factors=[cell_factor], + rattle_std=rattle_std, + ) + jobs.append(rattled_job) + static_conv_jobs = self.static_run_and_convert( + rattled_job.output, + cell_factor, + config_type, + potential_filename, + **relax_kwargs, + ) + jobs.append(static_conv_jobs) + plots = plot_force_distribution( + cell_factor, static_conv_jobs.output, x_min, x_max, bin_width + ) + jobs.append(plots) + + return Flow(jobs=jobs, name=self.name) # , plots.output) + + @job + def static_run_and_convert( + self, + structure_list: list[Structure], + cell_factor: float, + config_type, + potential_filename, + **relax_kwargs, + ): + """ + Job for the static runs and the data conversion to the extxyz format. + + Parameters + ---------- + structure_list: list[Structure]. + List of pymatgen structures object. + cell_factor: float + Factor to resize cell parameters. + config_type: str + Configuration type of the data. + potential_filename: str + The param_file_name for :obj:`quippy.potential.Potential()'`. + relax_kwargs : dict + Keyword arguments that will get passed to :obj:`Relaxer.relax`. + + """ + jobs = [] + for rattled in structure_list: + if relax_kwargs == {}: + relax_kwargs = { + "interval": 50000, + "fmax": 0.5, + "traj_file": rattled.reduced_formula + + "_" + + f"{cell_factor}".replace(".", "") + + ".pkl", + } + if self.static_energy_maker is None: + self.static_energy_maker = ForceFieldRelaxMaker( + calculator_kwargs={ + "args_str": "IP GAP", + "param_filename": str(potential_filename), + }, + force_field_name="GAP", + relax_cell=False, + relax_kwargs=relax_kwargs, + steps=1, + ) + static_run = self.static_energy_maker.make(structure=rattled) + jobs.append(static_run) + conv_job = convert_to_extxyz( + static_run.output, + rattled.reduced_formula + + "_" + + f"{cell_factor}".replace(".", "") + + ".pkl", + config_type, + f"{cell_factor}".replace(".", ""), + ) + jobs.append(conv_job) + + return Response(replace=Flow(jobs), output=conv_job.output) diff --git a/src/autoplex/data/common/jobs.py b/src/autoplex/data/common/jobs.py index 57a47e260..d9bcebb20 100644 --- a/src/autoplex/data/common/jobs.py +++ b/src/autoplex/data/common/jobs.py @@ -31,7 +31,6 @@ cur_select, data_distillation, flatten, - handle_rss_trajectory, mc_rattle, random_vary_angle, scale_cell, @@ -39,6 +38,8 @@ stratified_dataset_split, to_ase_trajectory, ) +from autoplex.data.md.utils import handle_md_trajectory +from autoplex.data.rss.utils import handle_rss_trajectory from autoplex.fitting.common.regularization import set_custom_sigma if TYPE_CHECKING: @@ -195,12 +196,20 @@ def generate_randomized_structures( distort_type: int. 0- volume distortion, 1- angle distortion, 2- volume and angle distortion. Default=0. n_structures: int. - Total number of distorted structures to be generated. - Must be provided if distorting volume without specifying a range, or if distorting angles. - Default=10. + Target total number of structures to generate (after rattling). Default=10. + - If `volume_custom_scale_factors` is None: + The code generates `n_structures` different volume or angle distortions. + Each is rattled once. + - If `volume_custom_scale_factors` is defined: + Given that the list length equals m, the total `n_structures` is distributed + over these m scale factors: + base = n_structures // m + rem = n_structures % m + Note that the last `rem` factors get one extra rattled structure. + Example: volume_custom_scale_factors=[0.95,0.97,0.99], n_structures=10 -> counts=[3,3,4]. volume_scale_factor_range: list[float] [min, max] of volume scale factors. - e.g. [0.90, 1.10] will distort volume +-10%. + e.g. [0.90, 1.10] will distort volume -+10%. volume_custom_scale_factors: list[float] Specify explicit scale factors (if range is not specified). If None, will default to [0.90, 0.95, 0.98, 0.99, 1.01, 1.02, 1.05, 1.10]. @@ -286,35 +295,41 @@ def generate_randomized_structures( # distorted_cells=list(chain.from_iterable(distorted_cells)) # rattle cells by standard or mc - rattled_cells = ( - [ - std_rattle( + m = len(distorted_cells) + if volume_custom_scale_factors is not None: + base = n_structures // m + rem = n_structures % m + counts = [base + (1 if i >= m - rem else 0) for i in range(m)] + else: + counts = [1] * m + + rattled_cells = [] + if rattle_type == 0: + for icell, (cell, count) in enumerate(zip(distorted_cells, counts)): + seed_for_cell = int(rattle_seed) + icell + rattled = std_rattle( structure=cell, - n_structures=1, + n_structures=count, rattle_std=rattle_std, - rattle_seed=rattle_seed + icell, + rattle_seed=seed_for_cell, ) - for icell, cell in enumerate(distorted_cells) - ] - if rattle_type == 0 - else ( - [ - mc_rattle( - structure=cell, - n_structures=1, - rattle_std=rattle_std, - min_distance=min_distance, - rattle_seed=rattle_seed + icell, - rattle_mc_n_iter=rattle_mc_n_iter, - ) - for icell, cell in enumerate(distorted_cells) - ] - if rattle_type == 1 - else None - ) - ) - - if rattled_cells is None: + rattled_cells.append(rattled) + elif rattle_type == 1: + for icell, (cell, count) in enumerate(zip(distorted_cells, counts)): + if count <= 0: + continue + seed_for_cell = int(rattle_seed) + icell + rattled = mc_rattle( + structure=cell, + n_structures=count, + rattle_std=rattle_std, + min_distance=min_distance, + rattle_seed=seed_for_cell, + rattle_mc_n_iter=rattle_mc_n_iter, + ) + rattled_cells.append(rattled) + else: + rattled_cells = None raise TypeError("rattle_type is not recognized") return list(chain.from_iterable(rattled_cells)) @@ -330,6 +345,7 @@ def sample_data( dir: list[str] | str | None = None, structure: list[Structure] | list[list[Structure]] | None = None, traj_path: list | None = None, + traj_type: Literal["rss", "md"] = "rss", isolated_atom_energies: dict | None = None, random_seed: int = None, remove_traj_files: bool = False, @@ -380,6 +396,8 @@ def sample_data( List of structures for sampling. Default is None. traj_path: list[list[str]] List of lists containing trajectory paths. Default is None. + traj_type: Literal["rss", "md"] + Specifies the type of trajectory to process, where 'rss' and 'md' represent different trajectory formats. isolated_atom_energies: dict Dictionary of isolated energy values for species. Required for 'boltzhist_cur' selection method. Default is None. @@ -430,7 +448,11 @@ def sample_data( atoms = [AseAtomsAdaptor().get_atoms(at) for at in structure] else: - atoms, pressures = handle_rss_trajectory(traj_path, remove_traj_files) + if traj_type == "rss": + atoms, pressures = handle_rss_trajectory(traj_path, remove_traj_files) + elif traj_type == "md": + atoms = handle_md_trajectory(traj_path, remove_traj_files) + atoms = flatten(atoms, recursive=True) if selection_method in {"cur", "bcur1s", "bcur2i"}: n_species = ElementCollection( @@ -541,13 +563,17 @@ def sample_data( if selected_atoms is None: raise ValueError("Unable to sample correctly. Please recheck the parameters!") + ase_atoms_list = [AseAtomsAdaptor().get_atoms(struct) for struct in selected_atoms] + + write("selected_structures.extxyz", ase_atoms_list) + return selected_atoms @job def collect_dft_data( dft_ref_file: str = "dft_ref.extxyz", - rss_group: str = "RSS", + rss_group: str | None = None, dft_dirs: dict | None = None, ) -> dict: """ @@ -653,7 +679,8 @@ def collect_dft_data( and at_i.info["config_type"] != "IsolatedAtom" ): at_i.pbc = True - at_i.info["rss_group"] = rss_group + if rss_group: + at_i.info["rss_group"] = rss_group else: at_i.info["rss_nonperiodic"] = "T" diff --git a/src/autoplex/data/common/utils.py b/src/autoplex/data/common/utils.py index 46e66ee19..216671f54 100644 --- a/src/autoplex/data/common/utils.py +++ b/src/autoplex/data/common/utils.py @@ -165,7 +165,6 @@ def scale_cell( """ atoms = AseAtomsAdaptor.get_atoms(structure) distorted_cells = [] - if volume_custom_scale_factors is not None: scale_factors_defined = volume_custom_scale_factors warnings.warn("Using your custom lattice scale factors", stacklevel=2) @@ -182,10 +181,6 @@ def scale_cell( / (n_structures - 1), ) - if not np.isclose(scale_factors_defined, 1.0).any(): - scale_factors_defined = np.append(scale_factors_defined, 1) - scale_factors_defined = np.sort(scale_factors_defined) - warnings.warn( f"Generated lattice scale factors {scale_factors_defined} within your range", stacklevel=2, @@ -366,7 +361,7 @@ def std_rattle( copy.rattle(stdev=rattle_std, seed=rattle_seed) rattled_xtals.append(AseAtomsAdaptor.get_structure(copy)) if i > 0: - rattle_seed = rattle_seed + 1 + rattle_seed += 1000 copy = atoms.copy() copy.rattle(stdev=rattle_std, seed=rattle_seed) rattled_xtals.append(AseAtomsAdaptor.get_structure(copy)) @@ -1596,57 +1591,3 @@ def flatten_list(input_list: list | list[list]) -> list: return list(chain.from_iterable(input_list)) return input_list - - -def handle_rss_trajectory( - traj_path, remove_traj_files -) -> tuple[list[list], list[list]]: - """ - Handle trajectory and associated information. - - Parameters - ---------- - traj_path: list | None - List of dictionaries containing trajectory information. - Each dictionary should have keys 'traj_path' and 'pressure'. - If None, an empty list will be used. - remove_traj_files: bool - Whether to remove the directories containing trajectory files - after processing them. Default is False. - - Returns - ------- - tuple: - atoms: list - List of ASE Atoms objects read from the trajectory files. - pressures: list - List of pressure values corresponding to the atoms. - """ - atoms = [] - pressures = [] - traj_path = [] if traj_path is None else flatten_list(traj_path) - traj_dirs = [] - - if all(i is None for i in traj_path): - raise ValueError("No valid trajectory path was obtained!") - - for traj in traj_path: - if traj is not None and Path(traj).exists(): - print("Processing trajectory:", traj) - at = ase.io.read(traj, index=":") - atoms.append(at) - pressure = [i.info["RSS_applied_pressure"] for i in at] - pressures.append(pressure) - traj_dirs.append(os.path.dirname(traj)) - - if remove_traj_files and traj_dirs: - traj_dirs = list(set(traj_dirs)) - for dir_path in traj_dirs: - if os.path.exists(dir_path) and os.path.isdir(dir_path): - for root, _, files in os.walk(dir_path): - for name in files: - if "RSS_relax_results" in name: - file_path = os.path.join(root, name) - os.remove(file_path) - - return atoms, pressures diff --git a/src/autoplex/data/md/__init__.py b/src/autoplex/data/md/__init__.py new file mode 100644 index 000000000..12d97db3d --- /dev/null +++ b/src/autoplex/data/md/__init__.py @@ -0,0 +1 @@ +"""Module containing flows and jobs for reference data generation for MD workflows.""" diff --git a/src/autoplex/data/md/flows.py b/src/autoplex/data/md/flows.py new file mode 100644 index 000000000..deb7919ef --- /dev/null +++ b/src/autoplex/data/md/flows.py @@ -0,0 +1,98 @@ +"""Flows for running MD.""" + +import logging +from dataclasses import dataclass +from typing import Literal + +from jobflow import Flow, Response, job +from pymatgen.core.structure import Structure + +from autoplex.data.common.flows import DFTStaticLabelling +from autoplex.data.common.jobs import collect_dft_data, sample_data +from autoplex.data.md.jobs import MDAseMaker + +logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s") + + +@dataclass +class MDMaker(DFTStaticLabelling): + """ + Run molecular dynamics (MD) simulations and label selected configurations with DFT. + + Parameters + ---------- + name: str + Name of the flow. + md_maker: MDAseMaker | None + A maker responsible for performing the actual MD simulation. + dft_ref_file : str + Output filename for the generated and labeled configurations. + Default "dft_md_ref.extxyz". + config_type : str + Tag attached to generated structures. Default "md". + selection_method : Literal['random', 'uniform'] + Method for selecting samples. Options include: + - 'random': Random selection. + - 'uniform': Uniform selection. + random_seed: int, optional + Seed for random number generation, ensuring reproducibility of sampling. + num_of_selection: int + Number of structures to be sampled. + remove_traj_files: bool + Remove all trajectory files raised by MD. + """ + + name: str = "do_md" + md_maker: MDAseMaker | None = None + dft_ref_file: str = "dft_md_ref.extxyz" + config_type: str = "md" + selection_method: Literal["cur", "random", "uniform"] = "uniform" + random_seed: int = 42 + num_of_selection: int = 5 + remove_traj_files: bool = False + + @job + def make(self, structure: Structure): + """ + Generate and label a set of MD-rattled structures. + + Parameters + ---------- + structure : Structure + Input structure to serve as the starting point. + + Returns + ------- + dict + A dictionary containing: + - `pre_database_dir`: Path to the directory with collected DFT data. + - `isolated_atom_energies`: Mapping of element symbols to their isolated-atom reference + energies. + """ + md_job = self.md_maker.make(structure=structure) + do_data_sampling = sample_data( + selection_method=self.selection_method, + num_of_selection=self.num_of_selection, + traj_path=md_job.output, + traj_type="md", + random_seed=self.random_seed, + remove_traj_files=self.remove_traj_files, + ) + do_dft_static = DFTStaticLabelling.make( + self, structures=do_data_sampling.output + ) + do_data_collection = collect_dft_data( + dft_ref_file=self.dft_ref_file, + dft_dirs=do_dft_static.output, + ) + job_list = [md_job, do_data_sampling, do_dft_static, do_data_collection] + + return Response( + replace=Flow(job_list), + output={ + "pre_database_dir": do_data_collection.output, + "isolated_atom_energies": do_data_collection.output[ + "isolated_atom_energies" + ], + }, + ) diff --git a/src/autoplex/data/md/jobs.py b/src/autoplex/data/md/jobs.py new file mode 100644 index 000000000..6bce9c719 --- /dev/null +++ b/src/autoplex/data/md/jobs.py @@ -0,0 +1,136 @@ +"""Jobs for running MD.""" + +import logging +import os +from dataclasses import dataclass, field +from pathlib import Path + +from atomate2.forcefields.md import ForceFieldMDMaker +from emmet.core.math import Matrix3D +from jobflow import Flow, Response, job +from pymatgen.core.structure import Structure + +from autoplex.data.common.utils import scale_cell +from autoplex.data.md.utils import generate_temperature_profile + +logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s") + + +@dataclass +class MDAseMaker(ForceFieldMDMaker): + """ + Maker to perform molecular dynamics (MD) simulations using ASE. + + This class extends the `ForceFieldMDMaker` from atomate2, with all its + standard keywords preserved. For reference, see: + https://github.com/materialsproject/atomate2/blob/main/src/atomate2/forcefields/md.py + + In addition, this MD maker supports running **any combination of quenching + (cooling)** and **heating** stages. By specifying multiple temperature points, + the maker automatically builds a stepwise temperature path, enabling complex + thermal protocols such as: + • isothermal equilibration + • multi-stage quench (e.g., 3000 -> 1500 -> 300 K) + • annealing or cyclic heating-cooling loops + + This maker provides a flexible way to design and execute + temperature-controlled MD workflows directly within atomate2. + + Parameters + ---------- + temperature_list : list[float] + A list of values defines a multi-stage quench or anneal profile. + eqm_step_list : list[int] | None, optional + Number of MD steps to hold each temperature value. + Length must match `temperatures`. If None, defaults to 10,000. + rate_list : list[float] | None, optional + Relative cooling/heating rates between stages (len = len(temperatures) - 1). + If None, linear interpolation is used. A larger `rate` value produces more + intermediate temperatures (slower quench), while a smaller value gives fewer steps + (faster quench). For example, rate=10 -> 10^14 K/s; rate=100 -> 10^13 K/s. + name : str + Job name. + volume_scale_factor_range: list[float] + [min, max] of volume scale factors. + e.g. [0.90, 1.10] will distort volume -+10%. + supercell_matrix: Matrix3D. + Matrix for obtaining the supercell. + + Returns + ------- + list[Path] + A list of absolute file paths to the "MD.traj" files from each job output. + """ + + temperature_list: list[float] = field(default_factory=list) + eqm_step_list: list[int] | None = None + rate_list: list[int] | None = None + name: str = "ML-driven_MD_with_ASE" + volume_custom_scale_factors: list[float] | None = None + supercell_matrix: Matrix3D | None = None + + @job + def make(self, structure: Structure): + """Maker to run MD simulations. + + Parameters + ---------- + structure: Structure + Pymatgen structure. + """ + if self.supercell_matrix is not None: + logging.info(f"Applying supercell_matrix:\n{self.supercell_matrix}") + structure = structure * self.supercell_matrix + logging.info(f"Supercell generated: {structure}.") + + if self.volume_custom_scale_factors is not None: + n_structures = len(self.volume_custom_scale_factors) + logging.info("Applying custom volume scaling:") + logging.info(f" Scale factors: {self.volume_custom_scale_factors}") + logging.info(f" Number of scaled structures to generate: {n_structures}") + structure = scale_cell( + structure=structure, + n_structures=n_structures, + volume_custom_scale_factors=self.volume_custom_scale_factors, + ) + + temps, n_steps = generate_temperature_profile( + temperature_list=self.temperature_list, + eqm_step_list=self.eqm_step_list, + rate_list=self.rate_list, + ) + self.temperature = temps + self.n_steps = int(n_steps) + + job_list = [] + job_output = {} + structures = [structure] if isinstance(structure, Structure) else structure + for idx, struct in enumerate(structures): + md_job = ForceFieldMDMaker.make(self, structure=struct) + job_list.append(md_job) + job_output[f"md_job_{idx}"] = md_job.output + + collec_trajs = collect_md_trajs(job_output) + job_list.append(collec_trajs) + + return Response(replace=Flow(job_list), output=collec_trajs.output) + + +@job +def collect_md_trajs(md_outputs: dict) -> list[Path]: + """ + Collect molecular dynamics (MD) trajectory file paths from multiple job outputs. + + Parameters + ---------- + md_outputs: dict + A dictionary mapping job identifiers (e.g., "md_job_0", "md_job_1", ...) + to their output objects. Each output object must have a `dir_name` attribute + that points to the directory containing the MD results. + + Returns + ------- + list[Path] + A list of absolute file paths to the "MD.traj" files from each job output. + """ + return [os.path.join(out.dir_name, "MD.traj") for out in md_outputs.values()] diff --git a/src/autoplex/data/md/utils.py b/src/autoplex/data/md/utils.py new file mode 100644 index 000000000..327f723ee --- /dev/null +++ b/src/autoplex/data/md/utils.py @@ -0,0 +1,125 @@ +"""Utility functions for MD.""" + +import logging +import os +from pathlib import Path + +import ase.io +import numpy as np + +from autoplex.data.common.utils import flatten_list + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + + +def generate_temperature_profile( + temperature_list: list[float], + eqm_step_list: list[int] | None = None, + rate_list: list[int] | None = None, +): + """ + Generate a temperature array for molecular-dynamics runs. + + Each listed temperature corresponds to one MD step. + This handles both single-temperature (isothermal) and + multi-temperature (e.g., multi-stage quench) runs. + + Parameters + ---------- + temperature_list : list[float] + A list of values defines a multi-stage quench or anneal profile. + eqm_step_list : list[int] | None, optional + Number of MD steps to hold each temperature value. + Length must match `temperatures`. If None, defaults to 10,000. + rate_list : list[float] | None, optional + Relative cooling/heating rates between stages (len = len(temperatures) - 1). + If None, linear interpolation is used. A larger `rate` value produces more + intermediate temperatures (slower quench), while a smaller value gives fewer steps + (faster quench). For example, rate=10 -> 10^14 K/s; rate=100 -> 10^13 K/s. + + Returns + ------- + np.ndarray + Array of temperatures for each MD step (length = n_steps + 1), + ready to pass to ``ForceFieldMDMaker.temperature``. + """ + n_seg = len(temperature_list) + if eqm_step_list is not None and len(eqm_step_list) != n_seg: + raise ValueError( + f"Length mismatch: eqm_steps_list ({len(eqm_step_list)}) " + f"must match temperatures ({n_seg})." + ) + + if n_seg == 1: + n_hold = 10000 if eqm_step_list is None else eqm_step_list[0] + return temperature_list, n_hold + + T_list = [] + eqm_step_list = eqm_step_list or [10000] * n_seg + rate_list = rate_list or [1.0] * (n_seg - 1) + + for i in range(n_seg): + # hold current temperature + T_list.extend([temperature_list[i]] * int(eqm_step_list[i])) + + if i < n_seg - 1: + T0, T1 = float(temperature_list[i]), float(temperature_list[i + 1]) + md_steps = int(-(T0 - T1) * rate_list[i]) + tem_interval = max(abs(md_steps), 1) + + # linear interpolation between T0 and T1, one temperature per step + if T0 > T1: + tem_list = list(np.linspace(T0, T1, tem_interval + 1))[1:] + else: + tem_list = list(np.linspace(T1, T0, tem_interval + 1))[1:] + tem_list.reverse() + + T_list.extend(tem_list) + + T_list.append(temperature_list[-1]) + T_array = np.array(T_list, dtype=float) + n_steps = len(T_array) - 1 + return T_array, n_steps + + +def handle_md_trajectory( + traj_path: list | None = None, + remove_traj_files: bool = False, +) -> tuple[list[list], list[list]]: + """ + Handle trajectory and associated information. + + Parameters + ---------- + traj_path: list + List of paths pointing to trajectory files to be processed. + Default is None. + remove_traj_files: bool + Whether to remove the directories containing trajectory files + after processing them. Default is False. + + Returns + ------- + tuple: + atoms: list + List of ASE Atoms objects read from the trajectory files. + """ + atoms = [] + traj_path = [] if traj_path is None else flatten_list(traj_path) + + if all(i is None for i in traj_path): + raise ValueError("No valid MD trajectory path was obtained!") + + for traj in traj_path: + if traj is not None and Path(traj).exists(): + logging.info(f"Processing MD trajectory:, {traj}") + at = ase.io.read(traj, index=":") + atoms.append(at) + + if remove_traj_files: + logging.warning(f"The MD trajectory file is deleted: {traj}") + os.remove(traj) + + return atoms diff --git a/src/autoplex/data/phonons/flows.py b/src/autoplex/data/phonons/flows.py index 8b21aee86..cde5f41eb 100644 --- a/src/autoplex/data/phonons/flows.py +++ b/src/autoplex/data/phonons/flows.py @@ -268,7 +268,7 @@ class RandomStructuresDataGenerator(Maker): angle_max_attempts: int. Maximum number of attempts to distort structure before aborting. Default=1000. - w_angle: list[float] + w_angle: list[int] List of angle indices to be changed i.e. 0=alpha, 1=beta, 2=gamma. Default= [0, 1, 2]. rattle_type: int. @@ -326,7 +326,7 @@ class RandomStructuresDataGenerator(Maker): rattle_std: float = 0.01 rattle_seed: int = 42 rattle_mc_n_iter: int = 10 - w_angle: list[float] | None = None + w_angle: list[int] | None = None supercell_settings: dict | None = field( default_factory=lambda: {"min_length": 15, "max_length": 20} ) diff --git a/src/autoplex/data/rss/utils.py b/src/autoplex/data/rss/utils.py index d0366a52c..c9e81de16 100644 --- a/src/autoplex/data/rss/utils.py +++ b/src/autoplex/data/rss/utils.py @@ -2,6 +2,7 @@ import ast import json +import logging import os from functools import partial from multiprocessing import Pool @@ -29,11 +30,16 @@ from pymatgen.io.ase import AseAtomsAdaptor from threadpoolctl import threadpool_limits +from autoplex.data.common.utils import flatten_list from autoplex.fitting.common.utils import ( CustomPotential, extract_gap_label, ) +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + def extract_pairstyle( ace_label: str, ace_json: str, ace_table: str @@ -225,10 +231,9 @@ def adjust_forces(self, atoms, forces): bondlength = np.linalg.norm(displace) if bondlength < self.threshold: - print( - "Hookean adjusting forces, bondlength: ", + logging.info( + "Hookean adjusting forces, bondlength: %.6f < threshold: %.6f", bondlength, - " < ", self.threshold, ) self.used = True @@ -475,7 +480,9 @@ def process_rss( ) if keep_symmetry: - print("Creating FixSymmetry calculator and maintaining initial symmetry!") + logging.info( + "Creating FixSymmetry calculator and maintaining initial symmetry!" + ) constraint_list.append(FixSymmetry(atom, symprec=1.0e-4)) if constraint_list: @@ -534,7 +541,7 @@ def build_traj(): return None except RuntimeError: - print("RuntimeError occurred during optimization! Return none!") + logging.info("RuntimeError occurred during optimization! Return none!") return None @@ -623,7 +630,7 @@ def minimize_structures( del at.info["virial"] if hookean_repul: - print("Hookean repulsion is used!") + logging.info("Hookean repulsion is used!") for i, atom in enumerate(atoms): atom.info["unique_starting_index"] = iteration_index + f"{i+struct_start_index}" @@ -682,3 +689,58 @@ def split_structure_into_groups(structures: list, num_groups: int) -> list[list] start_index += group_size return structure_groups + + +def handle_rss_trajectory( + traj_path: list, + remove_traj_files: bool = False, +) -> tuple[list[list[ase.Atoms]], list[list[float]]]: + """ + Handle trajectory and associated information. + + Parameters + ---------- + traj_path: list + List of paths pointing to trajectory files to be processed. + remove_traj_files: bool + Whether to remove the directories containing trajectory files + after processing them. Default is False. + + Returns + ------- + tuple: + atoms: list[list[ase.Atoms]] + List of ASE Atoms objects read from the trajectory files. + pressures: list[list[float]] + List of pressure values corresponding to the atoms. + """ + atoms = [] + pressures = [] + traj_path = [] if traj_path is None else flatten_list(traj_path) + traj_dirs = [] + + if all(i is None for i in traj_path): + raise ValueError("No valid RSS trajectory path was obtained!") + + for traj in traj_path: + if traj is not None and Path(traj).exists(): + logging.info(f"Processing RSS trajectory:, {traj}") + at = ase.io.read(traj, index=":") + atoms.append(at) + pressure = [i.info["RSS_applied_pressure"] for i in at] + pressures.append(pressure) + traj_dirs.append(os.path.dirname(traj)) + + if remove_traj_files and traj_dirs: + traj_dirs = list(set(traj_dirs)) + for dir_path in traj_dirs: + if os.path.exists(dir_path) and os.path.isdir(dir_path): + for root, _, files in os.walk(dir_path): + for name in files: + if "RSS_relax_results" in name: + file_path = os.path.join(root, name) + os.remove(file_path) + + logging.warning(f"All RSS trajectory files have been deleted: {traj}") + + return atoms, pressures diff --git a/src/autoplex/misc/castep/jobs.py b/src/autoplex/misc/castep/jobs.py index e9a77b0b4..5ce533ed5 100644 --- a/src/autoplex/misc/castep/jobs.py +++ b/src/autoplex/misc/castep/jobs.py @@ -18,7 +18,7 @@ from ase.stress import voigt_6_to_full_3x3_stress from ase.units import GPa from atomate2.common.files import gzip_files -from jobflow import Maker, job +from jobflow import Flow, Maker, job from pymatgen.io.ase import AseAtomsAdaptor from autoplex.misc.castep.run import run_castep @@ -27,6 +27,7 @@ CASTEP_INPUT_FILES, CASTEP_OUTPUT_FILES, CastepInputGenerator, + CastepRelaxSetGenerator, CastepStaticSetGenerator, ) from autoplex.settings import SETTINGS @@ -106,7 +107,7 @@ def make(self, structure: Structure): Parameters ---------- - structure : Structure + structure: Structure A pymatgen structure object. Returns @@ -223,3 +224,62 @@ class CastepStaticMaker(BaseCastepMaker): input_set_generator: CastepInputGenerator = field( default_factory=CastepStaticSetGenerator ) + + +@dataclass +class CastepRelaxMaker(BaseCastepMaker): + """ + Maker to run a single CASTEP geometry optimization. + + Parameters + ---------- + name : str + Job name. + input_set_generator : CastepRelaxSetGenerator + Generator for CASTEP input settings. + """ + + name: str = "castep_relax" + input_set_generator: CastepInputGenerator = field( + default_factory=CastepRelaxSetGenerator + ) + + +@dataclass +class CastepDoubleRelaxMaker(CastepRelaxMaker): + """ + Maker to run two consecutive CASTEP relaxations. + + The first relaxation runs on the initial structure, + and the second uses the first output structure as input + to refine the result. + + Parameters + ---------- + name : str + Job name. + """ + + name: str = "castep_double_relax" + + def make(self, structure: Structure): + """ + Run two consecutive CASTEP relaxations. + + Parameters + ---------- + structure: Structure + A pymatgen Structure object. + + Returns + ------- + output: dict + Output of the second relaxation step. + """ + jobs = [] + first_job = CastepRelaxMaker.make(self, structure) + first_job.name = "relax_1" + second_job = CastepRelaxMaker.make(self, first_job.output.structure) + second_job.name = "relax_2" + jobs = [first_job, second_job] + return Flow(jobs=jobs, output=second_job.output, name=self.name) diff --git a/src/autoplex/misc/castep/utils.py b/src/autoplex/misc/castep/utils.py index f6eb32f66..7325a1782 100644 --- a/src/autoplex/misc/castep/utils.py +++ b/src/autoplex/misc/castep/utils.py @@ -193,6 +193,8 @@ def param_updates(self) -> dict: "elec_energy_tol": 1e-06, "max_scf_cycles": 1000, "smearing_width": 0.05, + "write_checkpoint": "none", + "num_dump_cycles": 0, "finite_basis_corr": "automatic", "mixing_scheme": "Pulay", "mix_charge_amp": 0.6, @@ -220,3 +222,78 @@ def cell_updates(self) -> dict: return { "kpoints_mp_spacing": "0.03", } + + +@dataclass +class CastepRelaxSetGenerator(CastepInputGenerator): + """ + Class to generate CASTEP structure relaxation (geometry optimization) input sets. + + This class creates input parameters for CASTEP relax calculations, + similar to VASP RelaxSetGenerator in atomate2. + + Parameters + ---------- + tight : bool + Whether to use tighter convergence settings (for high-precision relaxation). + variable_cell : bool + Whether to relax both atomic positions and cell parameters (cell optimization). + stress_tol : float + Target stress tolerance in GPa. + **kwargs + Other keyword arguments passed to CastepInputGenerator. + """ + + CONFIG: dict = field( + default_factory=lambda: { + "PARAM": { + "task": "GeometryOptimization", + "calculate_stress": "True", + } + } + ) + tight: bool = False + variable_cell: bool = True + + @property + def param_updates(self) -> dict: + """Return a dictionary of CASTEP input parameter updates.""" + updates = { + "task": "GeometryOptimization", + "cut_off_energy": 520.0, + "xc_functional": "PBE", + "elec_energy_tol": 1e-05, + "geom_energy_tol": 1e-04, + "geom_force_tol": 0.05, + "geom_stress_tol": 0.1, + "max_scf_cycles": 100, + "smearing_width": 0.1, + "write_checkpoint": None, + "num_dump_cycles": None, + } + if self.tight: + updates.update( + { + "elec_energy_tol": 1e-06, + "geom_energy_tol": 1e-05, + "geom_force_tol": 0.01, + "geom_stress_tol": 0.005, + "cut_off_energy": 600.0, + "smearing_width": 0.05, + } + ) + return updates + + @property + def cell_updates(self) -> dict: + """Return a dictionary of CASTEP cell parameter updates.""" + updates = { + "kpoints_mp_spacing": 0.04, + "symmetry_generate": True, + "symmetry_tol": 1.0e-5, + } + if self.variable_cell: + updates.update({"fix_all_cell": None}) + else: + updates.update({"fix_all_cell": True}) + return updates diff --git a/tests/auto/phonons/test_flows.py b/tests/auto/phonons/test_flows.py index 7d74de0dc..c432ea1d8 100644 --- a/tests/auto/phonons/test_flows.py +++ b/tests/auto/phonons/test_flows.py @@ -668,12 +668,14 @@ def test_iterative_complete_dft_vs_ml_benchmark_workflow_gap(vasp_test_dir, mock 1.0, 1.025, 1.05], + n_structures=4, supercell_settings={"min_length": 8, "min_atoms": 20}, apply_data_preprocessing=True), complete_dft_vs_ml_benchmark_workflow_1=CompleteDFTvsMLBenchmarkWorkflow(symprec=1e-2, displacements=[0.01], split_ratio=0.33, volume_custom_scale_factors=[0.975], + n_structures=1, supercell_settings={"min_length": 8, "min_atoms": 20}, apply_data_preprocessing=True, @@ -718,11 +720,13 @@ def test_iterative_complete_dft_vs_ml_benchmark_workflow_gap_add_phonon_false(va split_ratio=0.33, add_dft_phonon_struct=False, volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, supercell_settings={"min_length": 8, "min_atoms": 20}, apply_data_preprocessing=True), complete_dft_vs_ml_benchmark_workflow_1=CompleteDFTvsMLBenchmarkWorkflow(symprec=1e-2, displacements=[0.01], split_ratio=0.33, volume_custom_scale_factors=[0.975], + n_structures=1, supercell_settings={"min_length": 8, "min_atoms": 20}, apply_data_preprocessing=True, @@ -763,6 +767,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_gap( complete_workflow = CompleteDFTvsMLBenchmarkWorkflow( symprec=1e-2, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, supercell_settings={"min_length": 8, "min_atoms": 20}, apply_data_preprocessing=True, ).make( @@ -813,6 +818,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_gap_ml_potential_for_data( complete_workflow = CompleteDFTvsMLBenchmarkWorkflow( symprec=1e-2, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, supercell_settings={"min_length": 8, "min_atoms": 20}, apply_data_preprocessing=True, displacement_maker=ForceFieldStaticMaker(force_field_name="MACE_MP_0B3"), @@ -863,6 +869,7 @@ def test_complete_dft_vs_gap_benchmark_workflow_database( complete_workflow = CompleteDFTvsMLBenchmarkWorkflow( symprec=1e-2, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, supercell_settings={"min_length": 8, "min_atoms": 20}, apply_data_preprocessing=True, run_fits_on_different_cluster=True, @@ -911,6 +918,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_m3gnet( ml_models=["M3GNET"], symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, apply_data_preprocessing=True, ).make( structure_list=[structure], @@ -962,6 +970,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_m3gnet_finetuning( ml_models=["M3GNET"], symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, apply_data_preprocessing=True, ).make( structure_list=[structure], @@ -1009,6 +1018,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_nep( ml_models=["NEP"], symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, apply_data_preprocessing=True, ).make( structure_list=[structure], @@ -1056,6 +1066,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_mace( ml_models=["MACE"], symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, benchmark_kwargs={"calculator_kwargs": {"device": "cpu"}}, apply_data_preprocessing=True, ).make( @@ -1111,6 +1122,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_mace_finetuning( ml_models=["MACE"], symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, benchmark_kwargs={"calculator_kwargs": {"device": "cpu"}}, apply_data_preprocessing=True, ).make( @@ -1176,7 +1188,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_mace_finetuning_mp_settings( complete_workflow_mace = CompleteDFTvsMLBenchmarkWorkflowMPSettings( ml_models=["MACE"], - volume_custom_scale_factors=[0.95, 1.00, 1.05], rattle_type=0, distort_type=0, + volume_custom_scale_factors=[0.95, 1.00, 1.05], n_structures=3, rattle_type=0, distort_type=0, symprec=1e-3, supercell_settings={"min_length": 6, "max_length": 10, "min_atoms": 10, "max_atoms": 300, }, displacements=[0.01], benchmark_kwargs={"calculator_kwargs": {"device": "cpu"}}, @@ -1245,6 +1257,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_nequip( ml_models=["NEQUIP"], symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, benchmark_kwargs={"calculator_kwargs": {"device": "cpu"}}, apply_data_preprocessing=True, ).make( @@ -1300,6 +1313,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_two_mpids( displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, apply_data_preprocessing=True, ).make( structure_list=[structure, structure], @@ -1336,6 +1350,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_with_hploop( supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, hyper_para_loop=True, atomwise_regularization_list=[0.01], n_sparse_list=[3000, 5000], @@ -1379,6 +1394,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_with_sigma_regularization_hploop( displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, hyper_para_loop=True, atomwise_regularization_list=[0.01], n_sparse_list=[3000, 5000], @@ -1426,6 +1442,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_with_sigma_regularization( supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, summary_filename_prefix="test_results_", apply_data_preprocessing=True, regularization=True, @@ -1480,6 +1497,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_separated( displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, apply_data_preprocessing=True, separated=True, ).make( @@ -1522,6 +1540,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_separated_sigma_reg_hploop_three_ supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, hyper_para_loop=True, atomwise_regularization_list=[0.01], n_sparse_list=[3000, 5000], @@ -1572,6 +1591,7 @@ def test_complete_dft_vs_ml_benchmark_workflow_separated_sigma_reg_hploop( supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], + n_structures=4, hyper_para_loop=True, atomwise_regularization_list=[0.01], n_sparse_list=[3000, 5000], soap_delta_list=[1.0], apply_data_preprocessing=True, @@ -1626,7 +1646,7 @@ def test_add_data_to_dataset_workflow( structure = Structure.from_file(path_to_struct) add_data_workflow = CompleteDFTvsMLBenchmarkWorkflow( - n_structures=3, + n_structures=12, symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], @@ -1678,7 +1698,7 @@ def test_add_data_workflow_with_dft_reference( dft_reference: PhononBSDOSDoc = dft_data["output"] add_data_workflow_with_dft_reference = CompleteDFTvsMLBenchmarkWorkflow( - n_structures=3, + n_structures=12, symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], @@ -1728,7 +1748,7 @@ def test_add_data_workflow_add_phonon_false( structure = Structure.from_file(path_to_struct) add_data_workflow_add_phonon_false = CompleteDFTvsMLBenchmarkWorkflow( - n_structures=3, + n_structures=12, symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], @@ -1765,7 +1785,7 @@ def test_add_data_workflow_add_random_false( structure = Structure.from_file(path_to_struct) add_data_workflow_add_random_false = CompleteDFTvsMLBenchmarkWorkflow( - n_structures=3, + n_structures=12, symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], @@ -1802,7 +1822,7 @@ def test_add_data_workflow_with_same_mpid( structure = Structure.from_file(path_to_struct) add_data_workflow_with_same_mpid = CompleteDFTvsMLBenchmarkWorkflow( - n_structures=3, + n_structures=12, symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], @@ -1910,7 +1930,7 @@ def test_workflow_with_different_makers( input_set_generator=test_iso_atom_static_input_set, ) test_different_makers_wf = CompleteDFTvsMLBenchmarkWorkflow( - n_structures=3, + n_structures=12, symprec=1e-2, supercell_settings={"min_length": 8, "min_atoms": 20}, displacements=[0.01], diff --git a/tests/auto/phonons/test_jobs.py b/tests/auto/phonons/test_jobs.py index 82a64d269..00bf237ff 100644 --- a/tests/auto/phonons/test_jobs.py +++ b/tests/auto/phonons/test_jobs.py @@ -386,6 +386,7 @@ def test_dft_random_gen_data_manual_supercell_matrix( dft_rattled_workflow = dft_random_gen_data(structure=structure, mp_id="test", volume_custom_scale_factors=[0.95, 1.0, 1.05], + n_structures=3, displacement_maker=TightDFTStaticMaker(), rattled_bulk_relax_maker=relax_maker, supercell_settings=supercell_settings) diff --git a/tests/conftest.py b/tests/conftest.py index 9d818b4fd..8db78d543 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -392,8 +392,8 @@ def mock_rss(input_dir: str = None, dir=input_dir, random_seed=random_seed) job3 = DFTStaticLabelling(e0_spin=e0_spin, - isolated_atom=isolated_atom, - dimer=dimer, + include_isolated_atom=isolated_atom, + include_dimer=dimer, dimer_range=dimer_range, dimer_num=dimer_num, custom_incar=custom_incar, diff --git a/tests/data/common/test_flows.py b/tests/data/common/test_flows.py index fe8c42a1a..41396c145 100644 --- a/tests/data/common/test_flows.py +++ b/tests/data/common/test_flows.py @@ -36,10 +36,10 @@ def test_vasp_static(test_dir, mock_vasp, memory_jobstore, clean_dir): mock_vasp(ref_paths, fake_run_vasp_kwargs) - job_dft = DFTStaticLabelling(isolated_atom=True, + job_dft = DFTStaticLabelling(include_isolated_atom=True, e0_spin=True, isolatedatom_box=[20.0, 20.5, 21.0], - dimer=True, + include_dimer=True, dimer_box=[15.0, 15.5, 16.0], dimer_range=[1.5, 2.0], dimer_num=3, @@ -69,6 +69,7 @@ def test_vasp_static(test_dir, mock_vasp, memory_jobstore, clean_dir): "LASPH": None, "AMIN": None, }, + config_type="bulk", ).make(structures=test_structures) job_collect_data = collect_dft_data(dft_dirs=job_dft.output) @@ -105,7 +106,7 @@ def test_generate_training_data_for_testing( train_structure_list=[structure], cell_factor_sequence=[0.95, 1.0, 1.05], potential_filename=potential_file_dir, - n_structures=1, + n_structures=3, steps=1, ) diff --git a/tests/data/common/test_jobs.py b/tests/data/common/test_jobs.py index 5d3c9bbaf..7d9de2e46 100644 --- a/tests/data/common/test_jobs.py +++ b/tests/data/common/test_jobs.py @@ -37,7 +37,7 @@ def test_generate_randomized_structures_distort_type_0(memory_jobstore): for uuid, response_collection in responses.items(): for k, response in response_collection.items(): # check if correct number of structures are generated - assert 11 == len(response.output) + assert 10 == len(response.output) for struct in response.output: # check if all outputs are Structure objects assert isinstance(struct, Structure) @@ -102,7 +102,7 @@ def test_generate_randomized_structures_distort_type_2(memory_jobstore): for uuid, response_collection in responses.items(): for k, response in response_collection.items(): # check if correct number of structures are generated - assert 11 == len(response.output) + assert 10 == len(response.output) for struct in response.output: # check if all outputs are Structure objects assert isinstance(struct, Structure) diff --git a/tests/data/common/test_utils.py b/tests/data/common/test_utils.py index 16386245e..bd0bd68e0 100644 --- a/tests/data/common/test_utils.py +++ b/tests/data/common/test_utils.py @@ -175,10 +175,10 @@ def test_scale_cell(vasp_test_dir): scaled_cell = scale_cell(structure=structure, volume_scale_factor_range=[0.95, 1.05]) scaled_cell_1 = scale_cell(structure=structure, volume_scale_factor_range=[0.95, 1.0]) scaled_cell_1_2 = scale_cell(structure=structure, volume_scale_factor_range=[0.95, 1]) - scaled_cell_2 = scale_cell(structure=structure, volume_custom_scale_factors=[0.95, 1.05, 1.10]) - scaled_cell_2_2 = scale_cell(structure=structure, volume_custom_scale_factors=[0.95, 1.0, 1.05, 1.10]) + scaled_cell_2 = scale_cell(structure=structure, volume_custom_scale_factors=[0.95, 1.05, 1.10], n_structures=3) + scaled_cell_2_2 = scale_cell(structure=structure, volume_custom_scale_factors=[0.95, 1.0, 1.05, 1.10], n_structures=4) - assert len(scaled_cell) == 11 + assert len(scaled_cell) == 10 assert len(scaled_cell_1) == 10 assert len(scaled_cell_1_2) == 10 assert len(scaled_cell_2) == 3 diff --git a/tests/data/phonons/test_flows.py b/tests/data/phonons/test_flows.py index 5b8273e74..e47066c7c 100644 --- a/tests/data/phonons/test_flows.py +++ b/tests/data/phonons/test_flows.py @@ -151,7 +151,7 @@ def test_data_generation_distort_type_2(vasp_test_dir, mock_vasp, relax_maker, c "check_inputs": ["incar", "potcar"], }, } - data_gen_dt_2 = RandomStructuresDataGenerator(distort_type=2, bulk_relax_maker=relax_maker).make( + data_gen_dt_2 = RandomStructuresDataGenerator(n_structures=2, distort_type=2, bulk_relax_maker=relax_maker).make( structure=structure, mp_id=test_mpid, volume_custom_scale_factors=[ @@ -211,7 +211,7 @@ def test_data_generation_volume_range(vasp_test_dir, mock_vasp, relax_maker, cle "check_inputs": ["incar", "potcar"], }, } - data_gen_vol = RandomStructuresDataGenerator(distort_type=0, bulk_relax_maker=relax_maker).make( + data_gen_vol = RandomStructuresDataGenerator(n_structures=4, distort_type=0, bulk_relax_maker=relax_maker).make( structure=structure, mp_id=test_mpid, volume_custom_scale_factors=[0.975, 1.0, 1.025, 1.05], @@ -312,7 +312,7 @@ def test_data_generation_distort_type_0(vasp_test_dir, mock_vasp, relax_maker, c "check_inputs": ["incar", "kpoints", "potcar"], }, } - data_gen_dt_0 = RandomStructuresDataGenerator(distort_type=0, bulk_relax_maker=relax_maker).make( + data_gen_dt_0 = RandomStructuresDataGenerator(n_structures=1, distort_type=0, bulk_relax_maker=relax_maker).make( structure=structure, mp_id=test_mpid, volume_custom_scale_factors=[1.0], diff --git a/tests/misc/castep/test_flows.py b/tests/misc/castep/test_flows.py index f2bdc27e7..60ee9b163 100644 --- a/tests/misc/castep/test_flows.py +++ b/tests/misc/castep/test_flows.py @@ -41,8 +41,8 @@ def test_DFTStaticLabelling_with_castep(memory_jobstore, mock_castep, clean_dir) ) job_dft = DFTStaticLabelling( - isolated_atom=False, - dimer=False, + include_isolated_atom=False, + include_dimer=False, static_energy_maker=castep_maker, ).make(structures=structures) diff --git a/tutorials/tutorial_phonon.ipynb b/tutorials/tutorial_phonon.ipynb index c22df2bbc..7475395fa 100644 --- a/tutorials/tutorial_phonon.ipynb +++ b/tutorials/tutorial_phonon.ipynb @@ -1,10 +1,12 @@ { "cells": [ { - "metadata": {}, "cell_type": "markdown", - "source": "# Iterative DFT vs MLIP benchmark workflow for phonons", - "id": "87deae719cc4aaa4" + "id": "87deae719cc4aaa4", + "metadata": {}, + "source": [ + "# Iterative DFT vs MLIP benchmark workflow for phonons" + ] }, { "cell_type": "markdown", @@ -16,8 +18,11 @@ ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "8ae0edc572eeeb02", + "metadata": {}, + "outputs": [], "source": [ "# Please note that I am reusing the same relaxations here in several steps.\n", "# This is only to save storage on our repo. It has influence on the result.\n", @@ -64,14 +69,14 @@ " \"dft phonon static 1/1_mp-117_1.0_0\": \"tutorial_data/dft_phonon_static_1_1_mp-117_1.0_0_189\",\n", " \"dft phonon static 1/1_mp-117_1.06_0\": \"tutorial_data/dft_phonon_static_1_1_mp-117_1.06_0_191\",\n", "}" - ], - "id": "8ae0edc572eeeb02", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "cf432f68df1c608a", + "metadata": {}, + "outputs": [], "source": [ "import warnings\n", "\n", @@ -89,24 +94,24 @@ ")\n", "\n", "warnings.filterwarnings(\"ignore\")" - ], - "id": "cf432f68df1c608a", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "markdown", + "id": "53be906dcff1b224", + "metadata": {}, "source": [ "First, we define all relevant Makers for the workflow, used to train and finetune ML potentials for phonons. We will pre-relax the structures before starting the workflow.\n", "We will now define the relax maker, a displacement maker (static maker), static energy maker\n", "and a static energy maker for isolated atoms." - ], - "id": "53be906dcff1b224" + ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "6e15101da57d5ffc", + "metadata": {}, + "outputs": [], "source": [ "phonon_bulk_relax_maker = DoubleRelaxMaker.from_relax_maker(\n", " TightRelaxMaker(\n", @@ -135,14 +140,14 @@ " ),\n", " )\n", ")" - ], - "id": "6e15101da57d5ffc", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "bb57b38855f6db67", + "metadata": {}, + "outputs": [], "source": [ "phonon_displacement_maker = PhononDisplacementMaker(\n", " name=\"dft phonon static\",\n", @@ -173,15 +178,14 @@ " auto_ispin=False,\n", " ),\n", ")" - ], - "id": "bb57b38855f6db67", - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "78005d1a28749166", "metadata": {}, + "outputs": [], "source": [ "phonon_static_energy_maker = phonon_displacement_maker\n", "\n", @@ -204,9 +208,7 @@ " },\n", " ),\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -218,8 +220,10 @@ }, { "cell_type": "code", + "execution_count": null, "id": "33f5c619d2247ca5", "metadata": {}, + "outputs": [], "source": [ "job_list = []\n", "\n", @@ -244,14 +248,14 @@ "\n", "mpbenchmark = mpids\n", "benchmark_structure_list = structure_list" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "267b672052f3ac63", "metadata": {}, + "outputs": [], "source": [ "iteration_flow = IterativeCompleteDFTvsMLBenchmarkWorkflow(\n", " max_iterations=2, # with the current test data, you can switch between 1 and 2\n", @@ -262,6 +266,7 @@ " add_dft_rattled_struct=True,\n", " add_dft_phonon_struct=True,\n", " volume_custom_scale_factors=[1.0],\n", + " n_structures=1,\n", " rattle_type=0,\n", " distort_type=0,\n", " rattle_std=0.1, #\n", @@ -290,6 +295,7 @@ " add_dft_phonon_struct=False,\n", " add_dft_rattled_struct=True,\n", " volume_custom_scale_factors=[1.0],\n", + " n_structures=1,\n", " rattle_type=0,\n", " distort_type=0,\n", " rattle_std=0.1, # maybe 0.1\n", @@ -347,9 +353,7 @@ "\n", "job_list.append(iteration_flow)\n", "autoplex_flow = Flow(jobs=job_list, output=iteration_flow.output)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -361,8 +365,10 @@ }, { "cell_type": "code", + "execution_count": null, "id": "f261e7380ecfa2d5", "metadata": {}, + "outputs": [], "source": [ "with mock_vasp(ref_paths=ref_paths, clean_folders=False) as mf:\n", " run_locally(\n", @@ -371,9 +377,7 @@ " ensure_success=True,\n", " raise_immediately=True,\n", " )" - ], - "outputs": [], - "execution_count": null + ] } ], "metadata": {