From 737590c45c0825a07437d996d832bd9ef95131c9 Mon Sep 17 00:00:00 2001 From: mkphuthi Date: Fri, 12 Jan 2024 09:37:46 -0500 Subject: [PATCH] updating docs --- README.md | 3 +- asimtools/asimmodules/benchmarking/parity.py | 2 +- asimtools/utils.py | 157 ++++++++++++++++--- paper.md | 36 ++++- 4 files changed, 169 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index f1ef495..f531d05 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -drawing - + diff --git a/asimtools/asimmodules/benchmarking/parity.py b/asimtools/asimmodules/benchmarking/parity.py index edf699c..2e40047 100644 --- a/asimtools/asimmodules/benchmarking/parity.py +++ b/asimtools/asimmodules/benchmarking/parity.py @@ -136,7 +136,7 @@ def parity( """Generates a parity plot and collects evaluation statistics comparing energy and/or forces and/or stress to existing values in the provided dataset - :param images: Image config, see :func:`asimtools.utils.get_atoms` + :param images: Image config, see :func:`asimtools.utils.get_images` :type images: Dict :param calc_id: ID of calculator provided in calc_input or global file :type calc_id: str diff --git a/asimtools/utils.py b/asimtools/utils.py index 75deb00..fd317aa 100644 --- a/asimtools/utils.py +++ b/asimtools/utils.py @@ -125,18 +125,15 @@ def join_names(substrs: Sequence[str]) -> str: return name def get_atoms( - image_file: str = None, - builder: str = 'bulk', - atoms: Atoms = None, - repeat: Tuple[int, int, int] = None, - rattle_stdev: float = None, + image_file: Optional[str] = None, + builder: Optional[str] = 'bulk', + atoms: Optional[Atoms] = None, + repeat: Optional[Tuple[int, int, int]] = None, + rattle_stdev: Optional[float] = None, **kwargs ) -> Atoms: """Return an atoms object based on specified config. This is the - recommended way to load atoms objects. There are three options to specify: - #. image_file. - #. builder, kwargs. - #. atoms. + recommended way to load atoms objects. :param image_file: Path to an ASE-readable image file, defaults to None :type image_file: str, optional @@ -152,7 +149,67 @@ def get_atoms( defaults to None :type rattle_stdev: float, optional :return: One :class:`ase.Atoms` instance - :rtype: Atoms + :rtype: Atoms, optional + + There are three options one could use to specify and image or an atoms + objects: + + #. image_file + \*\*kwargs + #. builder + \*\*kwargs. + #. atoms + + Examples + -------- + + Some examples using builders from ASE. All ``**kwargs`` are passed to + :func:`ase.build`: + + >>> get_atoms(builder='molecule', name='H2O') + Atoms(symbols='OH2', pbc=False) + >>> get_atoms(builder='bulk', name='Cu') + Atoms(symbols='Cu', pbc=True, cell=[[0.0, 1.805, 1.805], [1.805, 0.0, 1.805], [1.805, 1.805, 0.0]]) + >>> get_atoms(builder='bulk', name='Ar', crystalstructure='fcc', a=3.4, cubic=True) + Atoms(symbols='Ar4', pbc=True, cell=[3.4, 3.4, 3.4]) + >>> get_atoms(builder='fcc100', symbol='Fe', vacuum=8, size=[4,4, 5]) + Atoms(symbols='Cu80', pbc=[True, True, False], cell=[10.210621920333747, 10.210621920333747, 23.22], tags=...) + + Some examples for reading an image from a file using :func:`ase.io.read` + are given below. All ``**kwargs`` are passed to :func:`ase.io.read` + + >>> h2o = get_atoms(builder='molecule', name='H2O') + >>> h2o.write('h2o.cif') + >>> get_atoms(image_file='h2o.cif') + Atoms(symbols='OH2', pbc=False) + >>> get_atoms(image_file='h2o.cif', format='cif') + Atoms(symbols='OH2', pbc=False) + >>> from ase.io import write + >>> molecules = [get_atoms(builder='molecule', name='H2O'), get_atoms(builder='molecule', name='H2')] + >>> write('molecules.xyz', molecules, format='extxyz') + >>> get_atoms(image_file='molecules.xyz', index=0) # Pick out one structure using indexing + Atoms(symbols='OH2', pbc=False) + + You can also make supercells and rattle the atoms + + >>> li_bulk = get_atoms(name='Li') + >>> li_bulk.write('POSCAR', format='vasp') + >>> get_atoms(image_file='POSCAR', repeat=[3,3,3]) + Atoms(symbols='Li27', pbc=True, cell=[[-5.235, 5.235, 5.235], [5.235, -5.235, 5.235], [5.235, 5.235, -5.235]]) + >>> get_atoms(builder='bulk', name='Li', repeat=[2,2,2], rattle_stdev=0.01) + Atoms(symbols='Li8', pbc=True, cell=[[-3.49, 3.49, 3.49], [3.49, -3.49, 3.49], [3.49, 3.49, -3.49]]) + + Mostly for internal use and use in asimmodules, one can specify atoms + directly + + >>> li_bulk = get_atoms(name='Li') + >>> get_atoms(atoms=li_bulk) + Atoms(symbols='Li', pbc=True, cell=[[-1.745, 1.745, 1.745], [1.745, -1.745, 1.745], [1.745, 1.745, -1.745]]) + + In an asimmodule, the ``image`` argument is always given as a dictionary, + you therefore have to expand it before passing it to ``get_atoms`` + + >>> image = {'name': 'Pt'} + >>> get_atoms(**image) + Atoms(symbols='Pt', pbc=True, cell=[[0.0, 1.96, 1.96], [1.96, 0.0, 1.96], [1.96, 1.96, 0.0]]) """ assert image_file is not None or \ len(kwargs) > 0 or \ @@ -210,17 +267,14 @@ def get_images( **kwargs ) -> List[Atoms]: """Return a list of atoms objects based on the input arguments. Options \ - to specify are: - #. image_file - #. pattern - #. images + to specify are: #. image_file #. pattern #. images :param image_file: Path to ASE-readable file with one or more images, \ defaults to None :type image_file: str, optional :param pattern: String pattern of paths from which to search for images, \ - defaults to None. This only gets one image from each file as in \ - :func:`ase.io.read` without specifying an index + defaults to None. This only gets the last image from each file as in \ + :func:`ase.io.read` if an index is not specified. :type pattern: str, optional :param patterns: Sequence of string patterns/paths from which to search \ for images, defaults to None. This only gets one image from each file \ @@ -228,11 +282,76 @@ def get_images( :type pattern: str, optional :param images: A list of atoms objects, defaults to None :type images: Iterable[Atoms], optional - :param index: Index to specify when using :func:`ase.io.read`, \ - defaults to ':' + :param index: Index to specify when using :func:`ase.io.read`, \ defaults + to ':' :type index: Union[str, int], optional - :return: List of :class:`ase.Atoms` + :param skip_failed: Whether to raise an IO error if it fails to read any of + the specified images or ignore errors, defaults to False + :type skip_failed: bool, optional + :raises IOError: Failed to read one of the specified images + :return: List of :class:`ase.Atoms` for all images found :rtype: List[Atoms] + + There are three options one could use to specify and image or an atoms + objects: + + #. image_file + \*\*kwargs for specifying one image file + #. pattern + \*\*kwargs for specifying multiple image files with a wildcard + character + #. patterns + \*\*kwargs for specifying a list of patterns to match, + captures the above two cases + #. images + + Examples + -------- + + Some examples for reading images selectively from an image_file. All + ``**kwargs`` are passed to :func:`ase.io.read`: + + >>> from asimtools.utils import get_atoms + >>> molecules = [] + >>> molecules.append(get_atoms(builder='molecule', name='H2O')) + >>> molecules.append(get_atoms(builder='molecule', name='H2')) + >>> molecules.append(get_atoms(builder='molecule', name='N2')) + >>> write('molecules.xyz', molecules, format='extxyz') + >>> get_images(image_file='molecules.xyz') + [Atoms(symbols='OH2', pbc=False), Atoms(symbols='H2', pbc=False), Atoms(symbols='N2', pbc=False)] + >>> get_images(image_file='molecules.xyz', index=':2') + [Atoms(symbols='OH2', pbc=False), Atoms(symbols='H2', pbc=False)] + + You can also use a wildcard (\*) by specifying the pattern argument. Notice + that the files don't have to be the same format if ASE can guess all the + file formats, otherwise you can specify the format argument which should + apply to all the images. + + >>> cu = get_atoms(name='Cu') + >>> cu.write('bulk_cu.cfg') + >>> fe = get_atoms(name='Fe') + >>> fe.write('bulk_fe.cif') + >>> pt = get_atoms(name='Pt') + >>> pt.write('bulk_pt.cfg') + >>> get_images(pattern='bulk*') + [Atoms(symbols='Cu', pbc=True, cell=[[0.0, 1.805, 1.805], [1.805, 0.0, 1.805], [1.805, 1.805, 0.0]], masses=..., momenta=...), Atoms(symbols='Fe', pbc=True, cell=[[2.48549, 0.0, 0.0], [-0.8284876429214074, 2.3433456351179887, 0.0], [-0.8284876429214074, -1.171653675382785, 2.0294079014797743]], spacegroup_kinds=...), Atoms(symbols='Pt', pbc=True, cell=[[0.0, 1.96, 1.96], [1.96, 0.0, 1.96], [1.96, 1.96, 0.0]], masses=..., momenta=...)] + Atoms(symbols='OH2', pbc=False) + >>> get_images(pattern='bulk*.cfg', format='cfg') + [Atoms(symbols='Cu', pbc=True, cell=[[0.0, 1.805, 1.805], [1.805, 0.0, 1.805], [1.805, 1.805, 0.0]], masses=..., momenta=...), Atoms(symbols='Pt', pbc=True, cell=[[0.0, 1.96, 1.96], [1.96, 0.0, 1.96], [1.96, 1.96, 0.0]], masses=..., momenta=...)] + + You can also specify multiple patterns + + >>> get_images(patterns=['bulk*.cfg', 'bulk\*.cif']) + [Atoms(symbols='Cu', pbc=True, cell=[[0.0, 1.805, 1.805], [1.805, 0.0, 1.805], [1.805, 1.805, 0.0]], masses=..., momenta=...), Atoms(symbols='Pt', pbc=True, cell=[[0.0, 1.96, 1.96], [1.96, 0.0, 1.96], [1.96, 1.96, 0.0]], masses=..., momenta=...), Atoms(symbols='Fe', pbc=True, cell=[[2.48549, 0.0, 0.0], [-0.8284876429214074, 2.3433456351179887, 0.0], [-0.8284876429214074, -1.171653675382785, 2.0294079014797743]], spacegroup_kinds=...)] + + Or you can directly pass a list of Atoms, mostly for internal use + + >>> get_images(images=molecules) + [Atoms(symbols='OH2', pbc=False), Atoms(symbols='H2', pbc=False), Atoms(symbols='N2', pbc=False)] + + In an asimmodule, the ``images`` argument is always given as a dictionary, + you therefore have to expand it before passing it to ``get_images`` + + >>> images = {'pattern': 'bulk*'} + >>> get_images(**images) + [Atoms(symbols='Cu', pbc=True, cell=[[0.0, 1.805, 1.805], [1.805, 0.0, 1.805], [1.805, 1.805, 0.0]], masses=..., momenta=...), Atoms(symbols='Fe', pbc=True, cell=[[2.48549, 0.0, 0.0], [-0.8284876429214074, 2.3433456351179887, 0.0], [-0.8284876429214074, -1.171653675382785, 2.0294079014797743]], spacegroup_kinds=...), Atoms(symbols='Pt', pbc=True, cell=[[0.0, 1.96, 1.96], [1.96, 0.0, 1.96], [1.96, 1.96, 0.0]], masses=..., momenta=...)] """ assert (image_file is not None) or \ (pattern is not None) or \ diff --git a/paper.md b/paper.md index 0764909..4d40293 100644 --- a/paper.md +++ b/paper.md @@ -4,13 +4,12 @@ tags: - Python - atomic simulation - density functional theory - - molecular dynamics - workflow authors: - name: Mgcini Keith Phuthi orcid: 0000-0002-0982-8635 equal-contrib: false - affiliation: "1, 2" # (Multiple affiliations must be quoted) + affiliation: "1" # (Multiple affiliations must be quoted) - name: Emil Annevelink orcid: 0000-0001-5035-7807 equal-contrib: false @@ -31,11 +30,12 @@ bibliography: paper.bib # Summary -Atomic simulations are a key component of modern day materials science in both academia and industry. However, simulation protocols and workflows used by researchers are typically difficult to transfer to systems using different inputs, packages and environments. This leads to poor reproducability and inefficient transfer of code from one researcher to the next. In addition, there exists a zoo of tools and packages for atomic simulation with more being developed every day. There is however no unifying framework that can encompass all these tools without significant software development or using bulky workflow managers. +Atomic SIMulation Tools (`ASIMTools`) is a lightweight workflow and simulation manager for reproducible atomistic simulations that can be transferred across environments, DFT codes, interatomic potentials and structures implemented in Python. By using in-built or user-defined asimmodules and utilities, users can run simulation recipes and automatically scale them on slurm based clusters or locally on their console. The core idea is to separate the dependence of the atomistic potential/calculator, the simulation environment and the simulation protocol thereby allowing the same simulation to be run with different calculators, structures or on different computers with just a change of parameter. This is extremely useful at a time when benchmarking Machine Learning Interactio Potentials has become a core part of computational materials science.Input and output yaml files follow a simple standard format, providing a simple interface that also acts as a record of the parameters used in a simulation without having to edit python scripts. The minimal set of requirements means any materials science codes can be incorporated into an ASIMTools workflow. # Statement of need -Atomic SIMulation Tools (`ASIMTools`) is a lightweight workflow and simulation manager for reproducible atomistic simulations that can be transferred across environments, calculators and structures implemented in Python. By using in-built or user-defined asimmodules and utilities, users can run/build their own simulation recipes and automatically scale them on slurm based clusters or locally on their console. The core idea is to separate the dependence of the atomistic potential/calculator, the simulation environment and the simulation protocol thereby allowing the same simulation to be run with different calculators, structures or on different computers with just a change of parameter.Input and output yaml files follow a standard format based providing a simple interface that also acts as a record of the parameters used in a simulation without having to edit python asimmodules. +Atomic simulations are a key component of modern day materials science in both academia and industry. However, simulation protocols and workflows used by researchers are typically difficult to transfer to systems using different inputs, codes and environments. It often involves rewriting entire scripts in different languages to change from one type of atomistic potential or atomic structure to another. This leads to poor reproducability and inefficient transfer of code from one researcher to the next. In addition, there exists a zoo of tools and packages for atomic simulation with more being developed every day `[Walsh:2024]`. There is however no unifying framework that can encompass all these tools without significant software development effort. Significant effort should not be necessary because while the source of the fundamental outputs of atomistic potentials such as energy, forces etc. may differ, simulations built on these outputs should converge towards the most accurate and computationally efficient. ASIMTools focuses on this last aspect by +introducing asimmodules which are simply Python functions that act as simulation protocols which have no dependence on a specific atomistic potential. Through iteration and community input, these simulation protocols will hopefully converge towards best practice and ensure reproducibility of simulation results. `ASIMTools` is for users interested in performing atomistic calculations on UNIX-like operating systems and/or on slurm based High Performance Computing @@ -44,12 +44,12 @@ simulation protocols can be easily added to the library of provided asimmodules iterated on. This will allow the community to develop a robust set of shareable simulation protocols. The flexibility of ASIMTools allows integration of any kind of simulation tools such as pymatgen, LAMMPS etc. with examples provided. -With the asimmodules defined, users only need to provide a set of inputs in the form of yaml files that define the parameters used for each simulation and are therefore a record. +With the asimmodules defined, users only need to provide a set of inputs in the form of yaml files that define the parameters used for each simulation and are therefore a record of these parameters. # State of the Field There exist a number of popular workflow tools for atomistic simulations such as Aiida `[@author:2001]`, Fireworks `[@author:2001]` and many more. These tools provide frameworks for constructing complex workflows with different underlying principles. Some managers enforce strict rules that ensure that data obeys FAIR principles and emphasize data provenance and reproducibility. These methods however tend to be fairly large packages with steep learning curves. ASIMTools provides a simple interface as a starting point that can transform any code into ASIMTools compatible code by simply wrapping it in a function that returns a Python dictionary. Any such code can work in ASIMTools and with a few extra steps, the protocol can be made to support an arbitrary calculator and input structure. -In some workflow managters, such as Atomic Simulation Recipes `[@author:2001]`. Once workflows are built, it can often be difficult to quickly change and iterate over key parameters such as the choice of atomistic calculator or structure as they are intrinsically built into the code. This is particularly challening in an age where machine learning models are becoming more popular. Workflows involving machine learning interaction potentials tend to require the ability to repeat the same calculations on different examples, using different calculators on different hardware iteratively. This is where the value of ASIMTools lies in contrast to more established workflows. ASIMTools is not designed to replace the more powerful workflow managers but rather to supplement them. This is achieved by providing unified inputs that can be easily integrated into, for example, Aiida as Python functions/asimmodules while also being a stand-alone lightweight workflow manager. +In some workflow managters, such as Atomic Simulation Recipes `[@author:2001]`, once workflows are built, it can often be difficult to quickly change and iterate over key parameters such as the choice of atomistic calculator or structure as they are intrinsically built into the code. This is particularly challening in an age where machine learning models are becoming more popular. Workflows involving machine learning interaction potentials tend to require the ability to repeat the same calculations on different examples, using different calculators on different hardware iteratively. This is where the value of ASIMTools lies in contrast to more established workflows. ASIMTools is not designed to replace the more powerful workflow managers but rather to supplement them. This is achieved by providing unified inputs that can be easily integrated into, for example, Aiida as Python functions/asimmodules while also being a stand-alone lightweight workflow manager. # Example We present two examples of simulation protocols, more can be found in the @@ -73,7 +73,7 @@ Citations to entries in paper.bib should be in [rMarkdown](http://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html) format. -If you want to cite a software repository URL (e.g. something on GitHub without a preferred +If you want to cite a software repository URL (e.g. something on GitHub without a preferredp citation) then you can do it with the example BibTeX entry below for @fidgit. For a quick reference, the following citation commands can be used: @@ -117,3 +117,25 @@ Example paper.bib file: Title = {{Galactic Dynamics: Second Edition}}, Year = 2008 } + +@article{Walsh:2024, + Title = {Open computational materials science}, + Volume = {23}, + copyright = {2024 Springer Nature Limited}, + issn = {1476-4660}, + url = {https://www.nature.com/articles/s41563-023-01699-7}, + doi = {10.1038/s41563-023-01699-7}, + Abstract = {The materials modelling community is emerging as a champion for reproducible and reusable science. Aron Walsh discusses how FAIR databases, collaborative codes and transparent workflows are advancing this movement.}, + Language = {en}, + Number = {1}, + urldate = {2024-01-11}, + Journal = {Nature Materials}, + Author = {Walsh, Aron}, + Month = jan, + Year = {2024}, + Note = {Number: 1 + Publisher: Nature Publishing Group}, + Keywords = {Research data, Theory and computation}, + Pages = {16--17}, +} +