From 97870cd6948ca7ebc50d1e3f1bf4e247730c2ffa Mon Sep 17 00:00:00 2001
From: mkphuthi <mkphuthi@gmail.com>
Date: Sun, 17 Nov 2024 10:10:55 -0800
Subject: [PATCH] started mlip additions

---
 asimtools/asimmodules/ase_md/ase_md.py        | 236 ++++++++++++++++++
 asimtools/asimmodules/lammps/lammps.py        |   2 +-
 asimtools/asimmodules/mlips/__init__.py       |   0
 .../asimmodules/mlips/active_learning.py      |   8 +
 .../asimmodules/mlips/compute_deviation.py    | 136 ++++++++++
 asimtools/asimmodules/mlips/select_images.py  |  44 ++++
 asimtools/asimmodules/mlips/train_mace.py     |  69 +++++
 asimtools/asimmodules/mlips/train_mtp.py      |   0
 asimtools/asimmodules/mlips/train_nequip.py   |   0
 asimtools/calculators.py                      |  10 +-
 .../external/MACE/train_mace_sim_input.yaml   |  23 ++
 .../active_learning/Ar_unlabeled_data.xyz     | 100 ++++++++
 .../external/active_learning/calc_input.yaml  |  18 ++
 .../compute_deviation_sim_imput.yaml          |   9 +
 .../select_images_sim_input.yaml              |   7 +
 .../training_data/Ar_unlabeled_data.xyz       | 100 ++++++++
 pyproject.toml                                |   2 +-
 17 files changed, 758 insertions(+), 6 deletions(-)
 create mode 100644 asimtools/asimmodules/ase_md/ase_md.py
 create mode 100644 asimtools/asimmodules/mlips/__init__.py
 create mode 100644 asimtools/asimmodules/mlips/active_learning.py
 create mode 100644 asimtools/asimmodules/mlips/compute_deviation.py
 create mode 100644 asimtools/asimmodules/mlips/select_images.py
 create mode 100644 asimtools/asimmodules/mlips/train_mace.py
 create mode 100644 asimtools/asimmodules/mlips/train_mtp.py
 create mode 100644 asimtools/asimmodules/mlips/train_nequip.py
 create mode 100644 examples/external/MACE/train_mace_sim_input.yaml
 create mode 100644 examples/external/active_learning/Ar_unlabeled_data.xyz
 create mode 100644 examples/external/active_learning/calc_input.yaml
 create mode 100644 examples/external/active_learning/compute_deviation_sim_imput.yaml
 create mode 100644 examples/external/active_learning/select_images_sim_input.yaml
 create mode 100644 examples/external/active_learning/training_data/Ar_unlabeled_data.xyz

diff --git a/asimtools/asimmodules/ase_md/ase_md.py b/asimtools/asimmodules/ase_md/ase_md.py
new file mode 100644
index 0000000..0916024
--- /dev/null
+++ b/asimtools/asimmodules/ase_md/ase_md.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python
+'''
+Describe the script briefly here. If this is a script that runs multiple steps,
+describe it here using reStructuredText to generate autodocs
+
+Cite the papers where the method/script was first introduced here as well
+
+Author: mkphuthi@github.com
+'''
+
+from typing import Dict, Optional, Sequence
+from future import __annotations__
+import numpy as np
+import matplotlib.pyplot as plt
+import ase
+from ase.io.trajectory import Trajectory
+from ase.units import GPa, fs
+from ase.md.velocitydistribution import MaxwellBoltzmannDistribution
+from ase.md.langevin import Langevin
+from ase.md.npt import NPT
+from asimtools.calculators import load_calc
+from asimtools.utils import (
+    get_atoms,
+    get_images,
+)
+
+def langevin_nvt(
+    atoms: ase.Atoms,
+    temp: float,
+    nsteps: int,
+    traj_file: str = None,
+    friction: float = 1e-2,
+    timestep: float = 1*fs,
+):
+    """Does Langevin dynamics
+
+    :param atoms: atoms object
+    :type atoms: ase.Atoms
+    :param temp: Temperature in Kelvin
+    :type temp: float
+    :param nsteps: Number of steps to run
+    :type nsteps: int
+    :param traj_file: trajectory file name, defaults to None
+    :type traj_file: str, optional
+    :param friction: friction parameter, defaults to 1e-2
+    :type friction: float, optional
+    :param timestep: Timestep in ASE time units, defaults to 1*fs
+    :type timestep: float, optional
+    :return: final atoms object and trajectory object
+    :rtype: ase.Atoms, Trajectory
+    """
+    MaxwellBoltzmannDistribution(atoms, temperature_K=temp)
+    dyn = Langevin(
+        atoms,
+        timestep=timestep,
+        temperature_K=temp,
+        friction=friction,
+    )
+
+    if traj_file is not None:
+        traj = Trajectory(
+            traj_file,
+            atoms=atoms,
+            mode='w',
+            properties=['energy', 'forces', 'stress']
+        )
+        dyn.attach(traj.write)
+    dyn.run(nsteps)
+    return atoms, traj
+
+def npt(
+    atoms: ase.Atoms,
+    temp: float,
+    nsteps: int,
+    timestep: float,
+    externalstress: float = 0,
+    traj_file: str = None,
+    ttime: float = 25*fs,
+    pfactor: float = (75*fs)**2 * 14*GPa, #Replace 14 with bulk modulus of material
+):
+    """Does NPT dynamics
+
+    :param atoms: input atoms object
+    :type atoms: ase.Atoms
+    :param temp: Temperature in Kelvin
+    :type temp: float
+    :param nsteps: Number of steps to run
+    :type nsteps: int
+    :param timestep: Timestep in ASE time units
+    :type timestep: float
+    :param externalstress: External pressure to apply, defaults to 0
+    :type externalstress: float, optional
+    :param traj_file: trajectory file name, defaults to None
+    :type traj_file: str, optional
+    :param ttime: thermostat time constant, defaults to 25*fs
+    :type ttime: float, optional
+    :return: final atoms object and trajectory object
+    :rtype: ase.Atoms, Trajectory
+    """
+    MaxwellBoltzmannDistribution(atoms, temperature_K=temp)
+    dyn = NPT(
+        atoms,
+        timestep=timestep,
+        temperature_K=temp,
+        externalstress=externalstress,
+        ttime=ttime,
+        pfactor=pfactor,
+        # mask=np.diag([1, 1, 1]),
+    )
+    if traj_file is not None:
+        traj = Trajectory(
+            traj_file,
+            atoms=atoms,
+            mode='w',
+            properties=['energy', 'forces', 'stress'],
+        )
+        dyn.attach(traj.write)
+    dyn.run(nsteps)
+    traj = Trajectory(traj_file, 'r')
+    return atoms, traj
+
+def plot_thermo(
+    images: Dict,
+    props: Sequence = ['epot', 'temp', 'ekin', 'etot', 'press'],
+    prefix: str = 'ase_md',
+):
+    """Plots thermodynamic properties of a trajectory
+
+    :param images: Dictionary of images to read from
+    :type images: Dict
+    :param props: Properties to plot, defaults to ['epot', 'temp', 'ekin', 'etot', 'press']
+    :type props: Sequence, optional
+    :param prefix: Prefix for output file names, defaults to 'ase_md'
+    :type prefix: str, optional
+    """
+    atoms = get_images(**images)
+    prop_dict = {prop: [] for prop in props}
+
+    for a, atoms in enumerate(images):
+        epot = atoms.get_potential_energy()
+        ekin = atoms.get_kinetic_energy()
+        etot = epot+ekin
+        T = atoms.get_temperature()
+        prop_dict['epot'].append(epot)
+        prop_dict['ekin'].append(ekin)
+        prop_dict['etot'].append(etot)
+        prop_dict['temp'].append(T)
+        if 'press' in props:
+            pressure = np.mean(atoms.get_stress(include_ideal_gas=True)[:3])
+            prop_dict['press'].append(pressure)
+
+    stride = images.get('index', ':')
+
+    steps = np.arange(len(prop_dict['epots'])) * stride
+    for _, prop in enumerate(props):
+        _, ax = plt.subplots()
+        ax.plot(steps, prop_dict[prop])
+        ax.set_xlabel('Step')
+        ax.set_ylabel(prop)
+        plt.savefig(f'{prefix}_{prop}.png')
+
+    plt.close(fig='all')
+
+def ase_md(
+    calc_id: str,
+    image: Dict,
+    timestep: float,
+    temp: float,
+    dynamics: str = 'npt',
+    friction: float = 1e-2,
+    ttime: float = 25*fs,
+    pfactor: float = None, #(75*fs)**2 * 14*GPa, #14 is bulk modulus of material i.e. Li
+    nsteps: int = 100,
+    externalstress: float = 0,
+    plot: bool = True,
+) -> Dict:
+    """Runs ASE MD simulations. This is only recommended for small systems and
+    for testing. For larger systems, use LAMMPS or more purpose-built code
+
+    :param calc_id: calc_id specification
+    :type calc_id: str
+    :param image: Image specification, see :func:`asimtools.utils.get_atoms`
+    :type image: Dict
+    :param timestep: Timestep in ASE time units
+    :type timestep: float
+    :param temp: Temperature in Kelvin
+    :type temp: float
+    :param dynamics: Type of dynamics to run from 'nvt', 'langevin' and 'nvt',
+        defaults to 'npt'
+    :type dynamics: str, optional
+    :param friction: Friction parameter, defaults to 1e-2
+    :type friction: float, optional
+    :param ttime: Thermostat time constant, defaults to 25*fs
+    :type ttime: float, optional
+    :param pfactor: Pressure factor, defaults to None
+    :type pfactor: float, optional
+    :param nsteps: Number of steps to run, defaults to 100
+    :type nsteps: int, optional
+    :param externalstress: External stress to apply, defaults to 0
+    :type externalstress: float, optional
+    :param plot: Whether to plot thermodynamic properties, defaults to True
+    :type plot: bool, optional
+    :return: Results dictionary
+    :rtype: Dict
+    """
+
+    calc = load_calc(calc_id)
+    atoms = get_atoms(**image)
+    atoms.set_calculator(calc)
+
+    if dynamics == 'langevin':
+        atoms, _ = langevin_nvt(
+            atoms,
+            temp,
+            nsteps,
+            traj_file='output.traj',
+            timestep=timestep,
+            friction=friction,
+        )
+    elif dynamics == 'npt':
+        atoms, _ = npt(
+            atoms,
+            temp,
+            nsteps,
+            traj_file='output.traj',
+            timestep=timestep,
+            pfactor=pfactor,
+            externalstress=externalstress,
+            ttime=ttime,
+        )
+
+    if plot:
+        plot_thermo({'input_file': 'output.traj'})
+
+    results = {}
+    return results
diff --git a/asimtools/asimmodules/lammps/lammps.py b/asimtools/asimmodules/lammps/lammps.py
index 0d484b9..21f4584 100755
--- a/asimtools/asimmodules/lammps/lammps.py
+++ b/asimtools/asimmodules/lammps/lammps.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 '''
-Runs a user defined lammps script or template 
+Runs a user defined lammps script or template. LAMMPS must be installed 
 
 Author: mkphuthi@github.com
 '''
diff --git a/asimtools/asimmodules/mlips/__init__.py b/asimtools/asimmodules/mlips/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/asimtools/asimmodules/mlips/active_learning.py b/asimtools/asimmodules/mlips/active_learning.py
new file mode 100644
index 0000000..eec72f1
--- /dev/null
+++ b/asimtools/asimmodules/mlips/active_learning.py
@@ -0,0 +1,8 @@
+from pathlib import Path
+from typing import Dict, Optional, Sequence
+from asimtools.job import UnitJob
+
+def active_learning(
+    image: Dict,
+) -> Dict:
+
diff --git a/asimtools/asimmodules/mlips/compute_deviation.py b/asimtools/asimmodules/mlips/compute_deviation.py
new file mode 100644
index 0000000..60f8873
--- /dev/null
+++ b/asimtools/asimmodules/mlips/compute_deviation.py
@@ -0,0 +1,136 @@
+from typing import Dict, List, Sequence, Optional
+import os
+from glob import glob
+from natsort import natsorted
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from asimtools.calculators import load_calc
+from asimtools.utils import (
+    change_dict_value,
+    get_images,
+)
+
+def compute_deviation(
+    images: Dict,
+    template_calc_input: Optional[Dict] = None,
+    model_weights_key_sequence: Optional[Sequence] = None,
+    model_weights_pattern: Optional[os.PathLike] = None,
+    calc_ids: Optional[Sequence] = None,
+) -> Dict:
+    """Computes variance of properties from a trajectory file
+
+    :param images: Images specification, see :func:`asimtools.utils.get_images`
+    :type images: Dict
+    :param template_calc_input: Template calc_input, defaults to None
+    :type template_calc_input: Optional[Dict]
+    :param model_weights_key_sequence: Sequence of keys to change in the
+        template calc_input
+    :type model_weights_key_sequence: Optional[Sequence]
+    :param model_weights_pattern: Pattern of model weights files, defaults to
+        None
+    :type model_weights_pattern: Optional[os.PathLike]
+    :param calc_ids: List of calc_ids to use, if provided, all other arguments
+        are ignored, defaults to None
+    :type calc_ids: Optional[Sequence]
+
+    """
+    properties = ['energy', 'forces', 'stress']
+    if calc_ids is None:
+        model_weights_files = natsorted(glob(model_weights_pattern))
+
+        calc_dict = {}
+        for i, model_weights_file in enumerate(model_weights_files):
+            new_calc_input = change_dict_value(
+                template_calc_input,
+                model_weights_file,
+                key_sequence=model_weights_key_sequence,
+                return_copy=True
+            )
+
+            calc_dict[f'calc-{i}'] = new_calc_input
+    else:
+        calc_dict = {calc_id: calc_id for calc_id in calc_ids}
+
+    variances = {prop: {} for prop in properties}
+
+    images = get_images(**images)
+    assert len(images) > 0, 'No images found'
+
+    prop_dict = {
+        prop: {calc_id: [] for calc_id in calc_dict} for prop in properties
+    }
+    for prop in properties:
+        if prop == 'forces':
+            prop_dict[prop]['mean_std'] = []
+            prop_dict[prop]['max_std'] = []
+        else:
+            prop_dict[prop]['std'] = []
+
+    for i, atoms in enumerate(images):
+        atom_results = {prop: [] for prop in properties}
+        for calc_id in calc_dict:
+            # Some calculators behave badly if not reloaded unfortunately
+            if isinstance(calc_dict[calc_id], str):
+                calc = load_calc(calc_id=calc_dict[calc_id])
+            else:
+                calc = load_calc(calc_input=calc_dict[calc_id])
+
+            atoms.set_calculator(calc)
+            energy = atoms.get_potential_energy(atoms)
+            forces = np.linalg.norm(atoms.get_forces(), axis=1)
+            stress = -np.sum(
+                atoms.get_stress(voigt=True, include_ideal_gas=False)[:3]
+            ) / 3
+
+            prop_dict['energy'][calc_id].append(energy)
+            prop_dict['forces'][calc_id].append(forces)
+            prop_dict['stress'][calc_id].append(stress)
+            atom_results['energy'].append(energy)
+            atom_results['forces'].append(forces)
+            atom_results['stress'].append(stress)
+
+        prop_dict['energy']['std'].append(np.std(atom_results['energy']))
+        prop_dict['forces']['mean_std'].append(
+            np.mean(np.std(atom_results['forces'], axis=1))
+        )
+        prop_dict['forces']['max_std'].append(
+            np.max(np.std(atom_results['forces'], axis=1))
+        )
+        prop_dict['stress']['std'].append(np.std(atom_results['stress']))
+
+    df = pd.DataFrame({
+        'energy_std': prop_dict['energy']['std'],
+        'force_mean_std': prop_dict['forces']['mean_std'],
+        'force_max_std': prop_dict['forces']['max_std'],
+        'stress_std': prop_dict['stress']['std']
+
+    })
+    df.to_csv('deviations.csv')
+
+    for prop in properties:
+        if prop not in ['forces']:
+            # df = pd.DataFrame(prop_dict[prop])
+            fig, ax = plt.subplots()
+            for calc_id in calc_dict:
+                ax.plot(prop_dict[prop][calc_id], label=calc_id)
+            ax.set_xlabel('Image index')
+            ax.set_ylabel(f'{prop} [ASE units]')
+            ax.legend()
+            plt.savefig(f'{prop}.png')
+            plt.close()
+
+        fig, ax = plt.subplots()
+        if prop == 'forces':
+            ax.plot(prop_dict[prop]['mean_std'], label='mean_std')
+            ax.plot(prop_dict[prop]['max_std'], label='max_std')
+            ax.legend()
+        else:
+            ax.plot(prop_dict[prop]['std'])
+        ax.set_xlabel('Image index')
+        ax.set_ylabel(f'{prop} std [ASE units]')
+        plt.savefig(f'{prop}_std.png')
+        plt.close()
+    
+    return {}
+    
\ No newline at end of file
diff --git a/asimtools/asimmodules/mlips/select_images.py b/asimtools/asimmodules/mlips/select_images.py
new file mode 100644
index 0000000..0e2caeb
--- /dev/null
+++ b/asimtools/asimmodules/mlips/select_images.py
@@ -0,0 +1,44 @@
+from typing import Dict, Sequence
+import os
+import pandas as pd
+from ase.io import write
+from asimtools.utils import (
+    get_images,
+)
+
+def select_images(
+    image_file: os.PathLike,
+    deviation_csv: os.PathLike,
+    thresholds: Sequence[float],
+    column: str = 'force_mean_std',
+) -> Dict:
+    """Select images based on deviation values
+
+    :param image_file: Path to image_file with images to select from
+    :type image_file: os.PathLike
+    :param deviation_csv: Path to csv file from compute_deviation, row indices
+        should match indices in image_file
+    :type deviation_csv: os.PathLike
+    :param thresholds: Thresholds for selection provided as (min,max)
+    :type thresholds: Sequence[float]
+    :param column: Column of csv to use, defaults to 'force_mean_std'
+    :type column: str, optional
+    :return: Dictionary with stats on selected images
+    :rtype: Dict
+    """
+    
+    dev_df = pd.read_csv(deviation_csv, sep=',')
+    selected_idxs = dev_df.index[
+        (dev_df[column] > thresholds[0]) & (dev_df[column] < thresholds[1])
+    ]
+
+    images = get_images(image_file=image_file)
+    selected_images = [images[i] for i in selected_idxs]
+    write('selected_images.xyz', selected_images)
+    results = {
+        'num_selected': len(selected_images),
+        'fraction_selected': len(selected_images)/len(images),
+        'selected_idxs': list(selected_idxs),
+    }
+    return results
+    
\ No newline at end of file
diff --git a/asimtools/asimmodules/mlips/train_mace.py b/asimtools/asimmodules/mlips/train_mace.py
new file mode 100644
index 0000000..84f3f5b
--- /dev/null
+++ b/asimtools/asimmodules/mlips/train_mace.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+'''
+Runs a user defined lammps script or template. LAMMPS must be installed 
+
+Author: mkphuthi@github.com
+'''
+from typing import Dict, Optional
+import sys
+from pathlib import Path
+import logging
+import warnings
+warnings.filterwarnings("ignore")
+from numpy.random import randint
+from mace.cli.run_train import main as mace_run_train_main
+
+def train_mace(
+    config: Union[Dict,str],
+    randomize_seed: bool = False,
+) -> Dict:
+    """Runs MACE training
+
+    :param config: MACE config dictionary or path to config file
+    :type config: Union[Dict,str]
+    :param randomize_seed: Whether to randomize the seed, defaults to False
+    :type randomize_seed: bool
+    :return: Dictionary of results
+    :rtype: Dict
+    """
+
+    if isinstance(config, str):
+        with open(config, 'r') as fp:
+            config = yaml.safe_load(fp)
+    
+    if randomize_seed:
+        config['seed'] = randint(0, 1000000)
+
+    config_file_path = str(Path("mace_config.yaml").resolve())
+    with open(config_file_path, "w") as f:
+        f.write(config)
+
+    logging.getLogger().handlers.clear()
+    sys.argv = ["program", "--config", config_file_path]
+    mace_run_train_main()
+    return {}
+
+
+train_mace(
+    config={
+        "model": "MACE",
+        "num_channels": 32,
+        "max_L": 0,
+        "r_max": 4.0,
+        "name": "mace02_com1_gen1",
+        "model_dir": "MACE_models",
+        "log_dir": "MACE_models",
+        "checkpoints_dir": "MACE_models",
+        "results_dir": "MACE_models",
+        "train_file": "data/solvent_xtb_train_23_gen1.xyz",
+        "valid_file": "data/solvent_xtb_train_50.xyz",
+        "test_file": "data/solvent_xtb_test.xyz",
+        "energy_key": "energy_xtb",
+        "forces_key": "forces_xtb",
+        "device": "cuda",
+        "batch_size": 10,
+        "max_num_epochs": 500,
+        "swa": True,
+        "seed": 1234,
+    }
+)
diff --git a/asimtools/asimmodules/mlips/train_mtp.py b/asimtools/asimmodules/mlips/train_mtp.py
new file mode 100644
index 0000000..e69de29
diff --git a/asimtools/asimmodules/mlips/train_nequip.py b/asimtools/asimmodules/mlips/train_nequip.py
new file mode 100644
index 0000000..e69de29
diff --git a/asimtools/calculators.py b/asimtools/calculators.py
index 5a0c789..8e80af1 100644
--- a/asimtools/calculators.py
+++ b/asimtools/calculators.py
@@ -10,15 +10,17 @@
 # pylint: disable=import-error
 
 def load_calc(
-    calc_id: str = None,
+    calc_id: Optional[str] = None,
     calc_input: Optional[Dict] = None
 ):
     """Loads a calculator using given calc_id or calc_input
 
-    :param calc_id: ID/key to use to load calculator from the supplied/global\
-        calc_input file, defaults to None
+    :param calc_id: ID/key to use to load calculator from the supplied or \
+        global calc_input file, provide only one of calc_id or calc_input, \
+        defaults to None
     :type calc_id: str, optional
-    :param calc_input: calc_input dictionary for a single calculator\
+    :param calc_input: calc_input dictionary for a single calculator \
+        , provide only one of calc_id or calc_input, \
         , defaults to None
     :type calc_input: Optional[Dict], optional
     :return: ASE calculator instance
diff --git a/examples/external/MACE/train_mace_sim_input.yaml b/examples/external/MACE/train_mace_sim_input.yaml
new file mode 100644
index 0000000..99f43c1
--- /dev/null
+++ b/examples/external/MACE/train_mace_sim_input.yaml
@@ -0,0 +1,23 @@
+asimmodule: mlips.train_mace
+workdir: train_results
+args:
+  config:
+    model: "MACE"
+    num_channels: 32
+    max_L: 0
+    r_max: 4.0
+    name: "mace02_com1_gen1"
+    model_dir: "MACE_models"
+    log_dir: "MACE_models"
+    checkpoints_dir: "MACE_models"
+    results_dir: "MACE_models"
+    train_file: "data/solvent_xtb_train_23_gen1.xyz"
+    valid_file: "data/solvent_xtb_train_50.xyz"
+    test_file: "data/solvent_xtb_test.xyz"
+    energy_key: "energy_xtb"
+    forces_key: "forces_xtb"
+    device: cuda
+    batch_size: 10
+    max_num_epochs: 500
+    swa: True
+    seed: 1234
\ No newline at end of file
diff --git a/examples/external/active_learning/Ar_unlabeled_data.xyz b/examples/external/active_learning/Ar_unlabeled_data.xyz
new file mode 100644
index 0000000..e9a518e
--- /dev/null
+++ b/examples/external/active_learning/Ar_unlabeled_data.xyz
@@ -0,0 +1,100 @@
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.00462964      -0.09226040       0.04695847
+Ar       2.47324817       2.68025648       0.10554395
+Ar       2.75415631       0.05849513       2.61938741
+Ar       5.27144325       2.52971925       2.49149306
+Ar      -0.21182464       2.58068233       2.57142148
+Ar       2.63359161       5.16296212       2.84025139
+Ar       2.68470167       2.74070101       5.16118135
+Ar       5.04805016       5.33507958       5.36608106
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar      -0.23650223       0.04782552       0.03863883
+Ar       2.71136214       2.46317054      -0.03238926
+Ar       2.65637321      -0.07235410       2.67018562
+Ar       5.43566149       2.77002496       2.56032613
+Ar      -0.10493872       2.62550011       2.54996654
+Ar       2.59310875       5.15667921       2.66451553
+Ar       2.64456917       2.65466279       5.18992199
+Ar       5.39568301       5.13716759       5.14043630
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar      -0.11850162      -0.07448449      -0.06423130
+Ar       2.69278426       2.49272792      -0.08870735
+Ar       2.66757600      -0.05770337       2.78553048
+Ar       5.23563841       2.68294600       2.72418804
+Ar       0.07070927       2.61359509       2.63604965
+Ar       2.44190057       5.28480129       2.67402492
+Ar       2.54272907       2.59354269       5.18455391
+Ar       5.43184955       5.38845282       5.33081980
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.03255134      -0.06828410      -0.04537061
+Ar       2.71825535       2.59104123      -0.00766290
+Ar       2.68244333      -0.18601312       2.53289227
+Ar       5.11521126       2.66714454       2.73185114
+Ar      -0.09382444       2.48452015       2.60713811
+Ar       2.74684109       5.13087203       2.56430643
+Ar       2.76442920       2.64013382       5.27624961
+Ar       5.30394961       5.21984033       5.17026880
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.11239723       0.09751320       0.04876586
+Ar       2.62085924       2.78386042      -0.01997055
+Ar       2.61166314       0.07106806       2.48794379
+Ar       5.31346950       2.52653839       2.70613207
+Ar      -0.09254101       2.64380490       2.62112123
+Ar       2.46829225       5.27521538       2.66087230
+Ar       2.75545069       2.65286988       5.26817028
+Ar       5.30862628       5.15703143       5.33653645
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.01735047       0.03618800      -0.02931878
+Ar       2.74158381       2.50187237       0.06319529
+Ar       2.69178762       0.04769081       2.40760661
+Ar       5.24391796       2.64584317       2.68127807
+Ar      -0.07171837       2.61146428       2.68509519
+Ar       2.60394452       5.17911184       2.67935530
+Ar       2.63982819       2.70957509       5.27607143
+Ar       5.38986334       5.21939804       5.09127323
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.01576750       0.02106763       0.08763310
+Ar       2.69012197       2.74527467      -0.10234706
+Ar       2.66568894       0.08898679       2.67019135
+Ar       5.19490405       2.59859269       2.67143736
+Ar       0.01589783       2.49671301       2.58481518
+Ar       2.58891160       5.27207806       2.51019353
+Ar       2.51823414       2.38832802       5.41893456
+Ar       5.20623418       5.28150733       5.28914131
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.23088184      -0.10957529       0.02576660
+Ar       2.64321252       2.40903239       0.07462241
+Ar       2.59540175      -0.00218275       2.53401779
+Ar       5.28764227       2.73716515       2.56333426
+Ar       0.09842428       2.60219012       2.77119452
+Ar       2.55916224       5.32029612       2.66997623
+Ar       2.78491591       2.62257993       5.17708658
+Ar       5.12920260       5.20982169       5.44648588
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.16856664       0.04481991       0.03474697
+Ar       2.65989544       2.82114757      -0.05296433
+Ar       2.57180337       0.04507377       2.82009965
+Ar       5.22036423       2.72757370       2.73869744
+Ar      -0.15854701       2.73287175       2.75956315
+Ar       2.67675944       5.21854976       2.59652101
+Ar       2.73128570       2.72366806       5.15909367
+Ar       5.34509342       5.26335190       5.24872631
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.08204380       0.12297169      -0.07630832
+Ar       2.76113233       2.57487942       0.20156581
+Ar       2.72397558      -0.01601679       2.54435111
+Ar       5.31690468       2.62780965       2.69346375
+Ar       0.03931346       2.67780599       2.85455249
+Ar       2.51593437       5.44528947       2.62845880
+Ar       2.49914606       2.60818151       5.27128093
+Ar       5.13509074       5.28249139       5.31034184
diff --git a/examples/external/active_learning/calc_input.yaml b/examples/external/active_learning/calc_input.yaml
new file mode 100644
index 0000000..a5c829c
--- /dev/null
+++ b/examples/external/active_learning/calc_input.yaml
@@ -0,0 +1,18 @@
+lj-0: 
+  name: LennardJones
+  module: ase.calculators.lj
+  args:
+    sigma: 3.0
+    epsilon: 0.003
+lj-1: 
+  name: LennardJones
+  module: ase.calculators.lj
+  args:
+    sigma: 3.6
+    epsilon: 0.009
+lj-2: 
+  name: LennardJones
+  module: ase.calculators.lj
+  args:
+    sigma: 4.2
+    epsilon: 0.013
diff --git a/examples/external/active_learning/compute_deviation_sim_imput.yaml b/examples/external/active_learning/compute_deviation_sim_imput.yaml
new file mode 100644
index 0000000..a386b36
--- /dev/null
+++ b/examples/external/active_learning/compute_deviation_sim_imput.yaml
@@ -0,0 +1,9 @@
+asimmodule: mlips.compute_deviation
+workdir: compute_deviation_results
+args:
+  images: 
+    image_file: /Users/keith/dev/asimtools/examples/external/active_learning/training_data/Ar_unlabeled_data.xyz
+  template_calc_input: null
+  model_weights_key_sequence: null
+  model_weights_pattern: null
+  calc_ids: [lj-0, lj-1, lj-2]
\ No newline at end of file
diff --git a/examples/external/active_learning/select_images_sim_input.yaml b/examples/external/active_learning/select_images_sim_input.yaml
new file mode 100644
index 0000000..d1f820a
--- /dev/null
+++ b/examples/external/active_learning/select_images_sim_input.yaml
@@ -0,0 +1,7 @@
+asimmodule: mlips.select_images
+workdir: select_images_results
+args:
+  image_file: /Users/keith/dev/asimtools/examples/external/active_learning/compute_deviation_results/images_input.xyz
+  deviation_csv: /Users/keith/dev/asimtools/examples/external/active_learning/compute_deviation_results/deviations.csv
+  column: force_max_std
+  thresholds: [0.0046, 0.0048]
\ No newline at end of file
diff --git a/examples/external/active_learning/training_data/Ar_unlabeled_data.xyz b/examples/external/active_learning/training_data/Ar_unlabeled_data.xyz
new file mode 100644
index 0000000..4568a19
--- /dev/null
+++ b/examples/external/active_learning/training_data/Ar_unlabeled_data.xyz
@@ -0,0 +1,100 @@
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.00063175      -0.00134364      -0.00009798
+Ar       2.62875230       2.63083308      -0.00058141
+Ar       2.63146690      -0.00017092       2.63013702
+Ar       5.25956283       2.62918880       2.63123446
+Ar       0.00012708       2.63117043       2.63125070
+Ar       2.63115244       5.26059905       2.62816784
+Ar       2.63016133       2.62886352       5.26088197
+Ar       5.26197529       5.26072910       5.26042818
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar      -0.00064802      -0.00133520       0.00023648
+Ar       2.63035167       2.63063775       0.00026729
+Ar       2.62867021      -0.00101167       2.63195487
+Ar       5.25940318       2.63091628       2.62913458
+Ar      -0.00035514       2.62988294       2.63030931
+Ar       2.62815466       5.26029283       2.62935783
+Ar       2.63037190       2.63015708       5.26110149
+Ar       5.25972610       5.26100808       5.26005655
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.00064720      -0.00123455      -0.00038071
+Ar       2.63105752       2.63016171      -0.00140896
+Ar       2.62958203       0.00062349       2.63072573
+Ar       5.26045810       2.63084458       2.62993569
+Ar       0.00003879       2.62822595       2.62917982
+Ar       2.62811598       5.25947588       2.62900229
+Ar       2.63021872       2.63002858       5.25927368
+Ar       5.25994853       5.26034648       5.26028018
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar      -0.00064050      -0.00080407       0.00031793
+Ar       2.63065247       2.62920006       0.00001101
+Ar       2.62893777       0.00053970       2.63103402
+Ar       5.26058623       2.62896625       2.63122650
+Ar      -0.00082325       2.62863829       2.63143430
+Ar       2.63067522       5.26068558       2.62827546
+Ar       2.63100014       2.63142976       5.25911378
+Ar       5.26028775       5.25947408       5.26149853
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.00028399       0.00026002      -0.00104085
+Ar       2.62981652       2.63006441       0.00024196
+Ar       2.63034750       0.00120659       2.62868536
+Ar       5.25984940       2.63232099       2.62865888
+Ar      -0.00082745       2.63034623       2.62952122
+Ar       2.63093593       5.26021406       2.63027430
+Ar       2.63022126       2.63151084       5.26017373
+Ar       5.25880045       5.25983549       5.26199651
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar      -0.00071048      -0.00076662       0.00216996
+Ar       2.63107272       2.63117706       0.00127599
+Ar       2.62926535      -0.00095817       2.63013911
+Ar       5.26015624       2.62913605       2.63079195
+Ar      -0.00048236       2.63059186       2.62921367
+Ar       2.63026416       5.25928451       2.62992490
+Ar       2.62891662       2.63007480       5.25761921
+Ar       5.26072018       5.25931538       5.25860089
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar      -0.00000088       0.00044584      -0.00070827
+Ar       2.62898896       2.63015959      -0.00045434
+Ar       2.62967458       0.00161998       2.63050445
+Ar       5.26088612       2.62733685       2.63011542
+Ar       0.00046293       2.62864285       2.62941117
+Ar       2.63025169       5.25873326       2.62886324
+Ar       2.63039616       2.62893415       5.26114168
+Ar       5.26007213       5.26033645       5.25952491
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.00091685      -0.00226119       0.00110630
+Ar       2.63082194       2.63024638       0.00015000
+Ar       2.62959876       0.00081359       2.63256820
+Ar       5.25915980       2.63085306       2.63253808
+Ar       0.00006294       2.63037192       2.63065369
+Ar       2.63020079       5.25979578       2.62910765
+Ar       2.63125648       2.63064656       5.26086612
+Ar       5.25961629       5.26157119       5.26026478
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar      -0.00132998       0.00116114      -0.00024581
+Ar       2.62936543       2.63047710      -0.00087171
+Ar       2.62839359      -0.00114677       2.63151217
+Ar       5.25840574       2.62956249       2.62908786
+Ar       0.00191451       2.63014685       2.63113512
+Ar       2.62974394       5.25997759       2.62929390
+Ar       2.62920471       2.62903313       5.25956370
+Ar       5.26122226       5.26165839       5.26023007
+8
+Lattice="0.0 5.26 5.26 5.26 0.0 5.26 5.26 5.26 0.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+Ar       0.00030151       0.00142681      -0.00031480
+Ar       2.63011684       2.63003927       0.00009790
+Ar       2.62960775       0.00097788       2.63136609
+Ar       5.26122373       2.63103349       2.62864099
+Ar       0.00013608       2.62928128       2.62935493
+Ar       2.63023817       5.26064366       2.63054259
+Ar       2.62871616       2.63018278       5.26158698
+Ar       5.25940722       5.26127503       5.26120151
diff --git a/pyproject.toml b/pyproject.toml
index e1ded66..9344421 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "asimtools"
 description = "A lightweight python package for managing and running atomic simulation workflows"
-version = "0.0.2"
+version = "0.1.0"
 readme = "README.md"
 license = { text = "MIT" }
 authors = [