feat: add cli scripts to quickly run a calculation (#69)

* add a script to quickly make single-point prediction * add a script to quickly make single-point prediction * add cli script for structure relaxation * remove fix encoding hook as it is not necessary in python 3 * add comments * mark the class method relax_structures as deprecated * add examples structures for testing * add init * add cli script to compute phonons * fixed results * remove scripts in cli folder * add CLI entrypoint to mattersim applications * wrap up function * rename * add relax subcommand * removed unused arguments * fixed the return type of relax * update phonon * reorganize the cli codes * add command line for molecular dynamics --------- Co-authored-by: yanghan-microsoft <[email protected]>
microsoft · Dec 29, 2024 · 79e48a1 · 79e48a1
1 parent 8b7ffb6
commit 79e48a1
Show file tree

Hide file tree

Showing 12 changed files with 837 additions and 3 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,7 +3,6 @@ repos:
     rev: v4.2.0
     hooks:
       - id: end-of-file-fixer
-      - id: fix-encoding-pragma
       - id: mixed-line-ending
       - id: trailing-whitespace
       - id: check-json
@@ -24,4 +23,4 @@ repos:
     rev: 6.0.0
     hooks:
       - id: flake8
-        args: ["--max-line-length=88", "--ignore=E203,W503"]
+        args: ["--max-line-length=88", "--ignore=E203,W503"]
diff --git a/src/mattersim/applications/relax.py b/src/mattersim/applications/relax.py
@@ -8,6 +8,7 @@
 from ase.optimize import BFGS, FIRE
 from ase.optimize.optimize import Optimizer
 from ase.units import GPa
+from deprecated import deprecated
 
 
 class Relaxer(object):
@@ -55,7 +56,7 @@ def relax(
         fmax: float = 0.01,
         params_filter: dict = {},
         **kwargs,
-    ) -> Atoms:
+    ) -> Tuple[bool, Atoms]:
         """
         Relax the atoms object.
 
@@ -108,6 +109,7 @@ def relax(
         return converged, atoms
 
     @classmethod
+    @deprecated(reason="Use cli/applications/relax_structure.py instead.")
     def relax_structures(
         cls,
         atoms: Union[Atoms, Iterable[Atoms]],

diff --git a/src/mattersim/cli/__init__.py b/src/mattersim/cli/__init__.py
diff --git a/src/mattersim/cli/applications/__init__.py b/src/mattersim/cli/applications/__init__.py
diff --git a/src/mattersim/cli/applications/moldyn.py b/src/mattersim/cli/applications/moldyn.py
@@ -0,0 +1,114 @@
+import os
+import re
+import uuid
+from collections import defaultdict
+from typing import List
+
+import pandas as pd
+from ase import Atoms
+from ase.io import read
+from loguru import logger
+from pymatgen.io.ase import AseAtomsAdaptor
+
+from mattersim.applications.moldyn import MolecularDynamics
+
+
+def moldyn(
+    atoms_list: List[Atoms],
+    *,
+    temperature: float = 300,
+    timestep: float = 1,
+    steps: int = 1000,
+    ensemble: str = "nvt_nose_hoover",
+    logfile: str = "-",
+    loginterval: int = 10,
+    trajectory: str = None,
+    taut: float = None,
+    work_dir: str = str(uuid.uuid4()),
+    save_csv: str = "results.csv.gz",
+    **kwargs,
+) -> dict:
+    moldyn_results = defaultdict(list)
+
+    for atoms in atoms_list:
+        # check if the atoms object has non-zero values in the lower triangle
+        # of the cell. If so, the cell will be rotated and permuted to upper
+        # triangular form. This is to avoid numerical issues in the MD simulation.
+        print(atoms.cell.array)
+        if any(atoms.cell.array[2, 0:2]) or atoms.cell.array[1, 0] != 0:
+            logger.warning(
+                "The lower triangle of the cell is not zero. "
+                "The cell will be rotated and permuted to upper triangular form."
+            )
+
+            # The following code is from the PR
+            # https://gitlab.com/ase/ase/-/merge_requests/3277.
+            # It will be removed once the PR is merged.
+            # This part of the codes rotates the cell and permutes the axes
+            # such that the cell will be in upper triangular form.
+
+            from ase.build import make_supercell
+
+            _calc = atoms.calc
+            logger.info(f"Initial cell: {atoms.cell.array}")
+
+            atoms.set_cell(atoms.cell.standard_form()[0], scale_atoms=True)
+
+            # Permute a and c axes
+            atoms = make_supercell(atoms, [[0, 0, 1], [0, 1, 0], [1, 0, 0]])
+
+            atoms.rotate(90, "y", rotate_cell=True)
+
+            # set the lower triangle of the cell to be exactly zero
+            # to avoid numerical issues
+            atoms.cell.array[1, 0] = 0
+            atoms.cell.array[2, 0] = 0
+            atoms.cell.array[2, 1] = 0
+
+            logger.info(f"Cell after rotation/permutation: {atoms.cell.array}")
+            atoms.calc = _calc
+
+        if not os.path.exists(work_dir):
+            os.makedirs(work_dir)
+
+        md = MolecularDynamics(
+            atoms,
+            ensemble=ensemble,
+            temperature=temperature,
+            timestep=timestep,
+            loginterval=loginterval,
+            logfile=os.path.join(work_dir, logfile),
+            trajectory=os.path.join(work_dir, trajectory),
+            taut=taut,
+        )
+        md.run(steps)
+
+        # parse the logfile
+
+        # Read the file into a pandas DataFrame
+        df = pd.read_csv(
+            os.path.join(work_dir, logfile),
+            sep="\\s+",
+            names=["time", "temperature", "energy", "pressure"],
+            skipfooter=1,
+        )
+        df.columns = list(
+            map(lambda x: re.sub(r"\[.*?\]", "", x).strip().lower(), df.columns)
+        )
+        traj = read(os.path.join(work_dir, trajectory), index=":")
+        print(df.shape)
+        print(len(traj))
+        structure_list = [AseAtomsAdaptor.get_structure(atoms) for atoms in traj]
+
+        # Add the structure list to the DataFrame
+        df["structure"] = [structure.to_json() for structure in structure_list]
+
+        # Print the DataFrame
+        print(df)
+
+        # Save the DataFrame to a CSV file
+        df.to_csv(os.path.join(work_dir, save_csv))
+
+        moldyn_results = df.to_dict()
+
+    return moldyn_results
diff --git a/src/mattersim/cli/applications/phonon.py b/src/mattersim/cli/applications/phonon.py
@@ -0,0 +1,142 @@
+import os
+import uuid
+from collections import defaultdict
+from typing import List
+
+import numpy as np
+import pandas as pd
+import yaml
+from ase import Atoms
+from loguru import logger
+from pymatgen.core.structure import Structure
+from pymatgen.io.ase import AseAtomsAdaptor
+from tqdm import tqdm
+
+from mattersim.applications.phonon import PhononWorkflow
+from mattersim.cli.applications.relax import relax
+
+
+def phonon(
+    atoms_list: List[Atoms],
+    *,
+    find_prim: bool = False,
+    work_dir: str = str(uuid.uuid4()),
+    save_csv: str = "results.csv.gz",
+    amplitude: float = 0.01,
+    supercell_matrix: np.ndarray = None,
+    qpoints_mesh: np.ndarray = None,
+    max_atoms: int = None,
+    enable_relax: bool = False,
+    **kwargs,
+) -> dict:
+    """
+    Predict phonon properties for a list of atoms.
+
+    Args:
+        atoms_list (List[Atoms]): List of ASE Atoms objects.
+        find_prim (bool, optional): If find the primitive cell and use it
+            to calculate phonon. Default to False.
+        work_dir (str, optional): workplace path to contain phonon result.
+            Defaults to data + chemical_symbols + 'phonon'
+        amplitude (float, optional): Magnitude of the finite difference to
+            displace in force constant calculation, in Angstrom. Defaults
+            to 0.01 Angstrom.
+        supercell_matrix (nd.array, optional): Supercell matrix for constr
+            -uct supercell, priority over than max_atoms. Defaults to None.
+        qpoints_mesh (nd.array, optional): Qpoint mesh for IBZ integral,
+            priority over than max_atoms. Defaults to None.
+        max_atoms (int, optional): Maximum atoms number limitation for the
+            supercell generation. If not set, will automatic generate super
+            -cell based on symmetry. Defaults to None.
+        enable_relax (bool, optional): Whether to relax the structure before
+            predicting phonon properties. Defaults to False.
+    """
+    phonon_results = defaultdict(list)
+
+    for atoms in tqdm(
+        atoms_list, total=len(atoms_list), desc="Predicting phonon properties"
+    ):
+        if enable_relax:
+            relaxed_results = relax(
+                [atoms],
+                constrain_symmetry=True,
+                work_dir=work_dir,
+                save_csv=save_csv.replace(".csv", "_relax.csv"),
+            )
+            structure = Structure.from_str(relaxed_results["structure"][0], fmt="json")
+            _atoms = AseAtomsAdaptor.get_atoms(structure)
+            _atoms.calc = atoms.calc
+            atoms = _atoms
+        ph = PhononWorkflow(
+            atoms=atoms,
+            find_prim=find_prim,
+            work_dir=work_dir,
+            amplitude=amplitude,
+            supercell_matrix=supercell_matrix,
+            qpoints_mesh=qpoints_mesh,
+            max_atoms=max_atoms,
+        )
+        has_imaginary, phonon = ph.run()
+        phonon_results["has_imaginary"].append(has_imaginary)
+        # phonon_results["phonon"].append(phonon)
+        phonon_results["phonon_band_plot"].append(
+            os.path.join(os.path.abspath(work_dir), f"{atoms.symbols}_phonon_band.png")
+        )
+        phonon_results["phonon_dos_plot"].append(
+            os.path.join(os.path.abspath(work_dir), f"{atoms.symbols}_phonon_dos.png")
+        )
+        os.rename(
+            os.path.join(os.path.abspath(work_dir), "band.yaml"),
+            os.path.join(os.path.abspath(work_dir), f"{atoms.symbols}_band.yaml"),
+        )
+        os.rename(
+            os.path.join(os.path.abspath(work_dir), "phonopy_params.yaml"),
+            os.path.join(
+                os.path.abspath(work_dir), f"{atoms.symbols}_phonopy_params.yaml"
+            ),
+        )
+        os.rename(
+            os.path.join(os.path.abspath(work_dir), "total_dos.dat"),
+            os.path.join(os.path.abspath(work_dir), f"{atoms.symbols}_total_dos.dat"),
+        )
+        phonon_results["phonon_band"].append(
+            yaml.safe_load(
+                open(
+                    os.path.join(
+                        os.path.abspath(work_dir), f"{atoms.symbols}_band.yaml"
+                    ),
+                    "r",
+                )
+            )
+        )
+        phonon_results["phonopy_params"].append(
+            yaml.safe_load(
+                open(
+                    os.path.join(
+                        os.path.abspath(work_dir),
+                        f"{atoms.symbols}_phonopy_params.yaml",
+                    ),
+                    "r",
+                )
+            )
+        )
+        phonon_results["total_dos"].append(
+            np.loadtxt(
+                os.path.join(
+                    os.path.abspath(work_dir), f"{atoms.symbols}_total_dos.dat"
+                ),
+                comments="#",
+            )
+        )
+
+    if not os.path.exists(work_dir):
+        os.makedirs(work_dir)
+
+    logger.info(f"Saving the results to {os.path.join(work_dir, save_csv)}")
+    df = pd.DataFrame(phonon_results)
+    df.to_csv(
+        os.path.join(work_dir, save_csv.replace(".csv", "_phonon.csv")),
+        index=False,
+        mode="a",
+    )
+    return phonon_results
diff --git a/src/mattersim/cli/applications/relax.py b/src/mattersim/cli/applications/relax.py
@@ -0,0 +1,98 @@
+import os
+import uuid
+from collections import defaultdict
+from typing import List, Union
+
+import pandas as pd
+from ase import Atoms
+from ase.constraints import Filter
+from ase.optimize.optimize import Optimizer
+from ase.units import GPa
+from loguru import logger
+from pymatgen.io.ase import AseAtomsAdaptor
+from tqdm import tqdm
+
+from mattersim.applications.relax import Relaxer
+
+
+def relax(
+    atoms_list: List[Atoms],
+    *,
+    optimizer: Union[str, Optimizer] = "FIRE",
+    filter: Union[str, Filter, None] = None,
+    constrain_symmetry: bool = False,
+    fix_axis: Union[bool, List[bool]] = False,
+    pressure_in_GPa: float = None,
+    fmax: float = 0.01,
+    steps: int = 500,
+    work_dir: str = str(uuid.uuid4()),
+    save_csv: str = "results.csv.gz",
+    **kwargs,
+) -> dict:
+    """
+    Relax a list of atoms structures.
+
+    Args:
+        atoms_list (List[Atoms]): List of ASE Atoms objects.
+        optimizer (Union[str, Optimizer]): The optimizer to use. Default is "FIRE".
+        filter (Union[str, Filter, None]): The filter to use.
+        constrain_symmetry (bool): Whether to constrain symmetry. Default is False.
+        fix_axis (Union[bool, List[bool]]): Whether to fix the axis. Default is False.
+        pressure_in_GPa (float): Pressure in GPa to use for relaxation.
+        fmax (float): Maximum force tolerance for relaxation. Default is 0.01.
+        steps (int): Maximum number of steps for relaxation. Default is 500.
+        work_dir (str): Working directory for the calculations.
+            Default is a UUID with timestamp.
+        save_csv (str): Save the results to a CSV file. Default is `results.csv.gz`.
+
+    Returns:
+        pd.DataFrame: DataFrame containing the relaxed results.
+    """
+    params_filter = {}
+
+    if pressure_in_GPa:
+        params_filter["scalar_pressure"] = (
+            pressure_in_GPa * GPa
+        )  # convert GPa to eV/Angstrom^3
+        filter = "ExpCellFilter" if filter is None else filter
+    elif filter:
+        params_filter["scalar_pressure"] = 0.0
+
+    relaxer = Relaxer(
+        optimizer=optimizer,
+        filter=filter,
+        constrain_symmetry=constrain_symmetry,
+        fix_axis=fix_axis,
+    )
+
+    relaxed_results = defaultdict(list)
+    for atoms in tqdm(atoms_list, total=len(atoms_list), desc="Relaxing structures"):
+        converged, relaxed_atoms = relaxer.relax(
+            atoms,
+            params_filter=params_filter,
+            fmax=fmax,
+            steps=steps,
+        )
+        relaxed_results["converged"].append(converged)
+        relaxed_results["structure"].append(
+            AseAtomsAdaptor.get_structure(relaxed_atoms).to_json()
+        )
+        relaxed_results["energy"].append(relaxed_atoms.get_potential_energy())
+        relaxed_results["energy_per_atom"].append(
+            relaxed_atoms.get_potential_energy() / len(relaxed_atoms)
+        )
+        relaxed_results["forces"].append(relaxed_atoms.get_forces())
+        relaxed_results["stress"].append(relaxed_atoms.get_stress(voigt=False))
+        relaxed_results["stress_GPa"].append(
+            relaxed_atoms.get_stress(voigt=False) / GPa
+        )
+
+        logger.info(f"Relaxed structure: {relaxed_atoms}")
+
+    if not os.path.exists(work_dir):
+        os.makedirs(work_dir)
+
+    logger.info(f"Saving the results to {os.path.join(work_dir, save_csv)}")
+    df = pd.DataFrame(relaxed_results)
+    df.to_csv(os.path.join(work_dir, save_csv), index=False, mode="a")
+    return relaxed_results