From 38cd2001b47c282c7623f86418a03edbe06b4224 Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Mon, 14 Jun 2021 17:04:58 -0400 Subject: [PATCH 01/79] write version 1 of to_ase and unit tests --- configs/example.yaml | 4 +- nequip/data/AtomicData.py | 27 +++++++++++ nequip/models/_eng.py | 5 +- nequip/scripts/ase_tutorial.py | 76 ++++++++++++++++++++++++++++++ nequip/scripts/test_saved_model.py | 0 tests/data/test_AtomicData.py | 13 +++++ 6 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 nequip/scripts/ase_tutorial.py create mode 100644 nequip/scripts/test_saved_model.py diff --git a/configs/example.yaml b/configs/example.yaml index 0096269e..2ac74bfe 100644 --- a/configs/example.yaml +++ b/configs/example.yaml @@ -61,7 +61,7 @@ npz_fixed_field_keys: # format: extxyz # logging -wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional +wandb: false # we recommend using wandb for logging, we'll turn it off here as it's optional wandb_project: aspirin # project name used in wandb wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. # if false, a new wandb run will be generated @@ -74,7 +74,7 @@ n_train: 100 n_val: 50 # number of validation data learning_rate: 0.01 # learning rate, we found values between 0.01 and 0.005 to work best - this is often one of the most important hyperparameters to tune batch_size: 5 # batch size, we found it important to keep this small for most applications (1-5) -max_epochs: 1000000 # stop training after _ number of epochs +max_epochs: 1 # stop training after _ number of epochs metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse use_ema: false # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors ema_decay: 0.999 # ema weight, commonly set to 0.999 diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index e4c679e8..a5a07ea8 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -11,6 +11,7 @@ import numpy as np import ase.neighborlist from ase.calculators.singlepoint import SinglePointCalculator, SinglePointDFTCalculator +from ase import Atoms, Atom import torch from torch_geometric.data import Data @@ -249,6 +250,32 @@ def from_ase(cls, atoms, r_max, **kwargs): **add_fields, ) + def to_ase(self): + """Build a list of ase.Atoms objects from an AtomicData object""" + positions = self.pos.tolist() + batches = self.batch.tolist() if "batch" in self and self.batch is not None else None + atomic_nums = self.atomic_numbers.tolist() \ + if AtomicDataDict.ATOMIC_NUMBERS_KEY in self and self.atomic_numbers is not None else None + pbc = self.pbc if "pbc" in self and self.pbc is not None else None + cell = self.cell if "cell" in self and self.cell is not None else None + + unique_batches = [0] + if batches is not None: + unique_batches = list(set(batches)) + + num_atoms = int(len(positions) / len(unique_batches)) + batch_atoms = [] + + for batch in unique_batches: + atoms = [] + for i in range(num_atoms): + atom_index = batch * num_atoms + i + atoms.append(Atom(atomic_nums[atom_index], position=positions[atom_index])) + mol = Atoms(atoms, cell=cell[batch], pbc=pbc[batch]) + batch_atoms.append(mol) + + return batch_atoms + def get_edge_vectors(data: Data) -> torch.Tensor: data = AtomicDataDict.with_edge_vectors(AtomicData.to_AtomicDataDict(data)) return data[AtomicDataDict.EDGE_VECTORS_KEY] diff --git a/nequip/models/_eng.py b/nequip/models/_eng.py index ec17a7b6..0496af47 100644 --- a/nequip/models/_eng.py +++ b/nequip/models/_eng.py @@ -46,7 +46,10 @@ def EnergyModel(**shared_params) -> SequentialGraphNetwork: { # TODO: the next linear throws out all L > 0, don't create them in the last layer of convnet # -- output block -- - "conv_to_output_hidden": AtomwiseLinear, + "conv_to_output_hidden": ( + AtomwiseLinear, + dict(out_field="out_block_hidden") + ), "output_hidden_to_scalar": ( AtomwiseLinear, dict(irreps_out="1x0e", out_field=AtomicDataDict.PER_ATOM_ENERGY_KEY), diff --git a/nequip/scripts/ase_tutorial.py b/nequip/scripts/ase_tutorial.py new file mode 100644 index 00000000..d271440d --- /dev/null +++ b/nequip/scripts/ase_tutorial.py @@ -0,0 +1,76 @@ +from ase import Atoms, Atom +from ase.visualize import view + +import warnings +from copy import deepcopy +from typing import Union, Tuple, Dict, Optional +from collections.abc import Mapping + +import numpy as np +import ase.neighborlist +from ase.calculators.singlepoint import SinglePointCalculator, SinglePointDFTCalculator + +import torch +from torch_geometric.data import Data +import e3nn.o3 + +# --- Tutorial --- # +# d = 1.208 +# o2 = Atoms([Atom("O", [0, 0, 0]), +# Atom("C", [0, 0, d])], +# cell=[(3, 0, 0), +# (0, 7.5, 0), +# (0, 0, 8)]) +# o2.set_pbc((True, True, True)) +# view(o2) + +# --- PyTorch testing --- # +# a = torch.randn(2, 3) +# a_list = a.tolist() +# print(a_list) +# print(a_list[0]) + +aspirin_atoms = [6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 6, 6, 8, 1, 1, 1, 1, 1, 1, 1, 1] + +pos = np.array([[1.9351, -1.3223, -0.2434], + [1.1050, 1.1607, -1.2501], + [2.7351, -0.6791, -1.2130], + [2.3617, 0.5501, -1.6917], + [-3.2277, 1.4643, 0.4005], + [0.7966, -0.7842, 0.2737], + [0.3291, 0.4374, -0.3489], + [-1.1688, -1.4345, 1.3998], + [-1.8708, 1.9584, -1.4831], + [0.9033, -2.2319, 2.1187], + [0.0525, -1.4987, 1.3657], + [-1.9753, 1.5393, -0.3593], + [-0.9103, 0.9451, 0.2944], + [0.4981, -2.5795, 2.9364], + [2.3691, -2.2767, 0.1016], + [0.7304, 2.1011, -1.6379], + [3.7474, -1.1822, -1.3683], + [2.8886, 1.2051, -2.3682], + [-4.1139, 1.2818, -0.3308], + [-3.2100, 0.7930, 1.2202], + [-3.5192, 2.4355, 0.9832]]) + +positions = torch.tensor(pos).tolist() +batches = torch.tensor([0], dtype=torch.int32).tolist() +atomic_nums = torch.tensor(aspirin_atoms).tolist() +pbc = torch.tensor([[False, False, False]]).numpy() +cell = torch.tensor([[[0, 0, 0], [0, 0, 0], [0, 0, 0]]]).numpy() + +unique_batches = list(set(batches)) if batches is not None else [0] + +num_atoms = int(len(positions) / len(unique_batches)) +batch_atoms = [] + +for batch in unique_batches: + atoms = [] + for i in range(num_atoms): + atom_index = batch * num_atoms + i + atoms.append(Atom(atomic_nums[atom_index], position=positions[atom_index])) + mol = Atoms(atoms, cell=cell[batch], pbc=pbc[batch]) + batch_atoms.append(mol) + +view(batch_atoms[0]) diff --git a/nequip/scripts/test_saved_model.py b/nequip/scripts/test_saved_model.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/test_AtomicData.py b/tests/data/test_AtomicData.py index 845a2931..cbae92ff 100644 --- a/tests/data/test_AtomicData.py +++ b/tests/data/test_AtomicData.py @@ -19,6 +19,19 @@ def test_from_ase(CuFcc): assert data[key].shape == (len(atoms), 3) # 4 species in this atoms +def test_to_ase(CH3CHO): + atoms, data = CH3CHO + to_ase = data.to_ase() + to_ase_atoms = to_ase[0] + assert np.allclose(atoms.get_positions(), to_ase_atoms.get_positions()) + assert np.array_equal(atoms.get_atomic_numbers(), to_ase_atoms.get_atomic_numbers()) + assert np.array_equal(atoms.get_pbc(), to_ase_atoms.get_pbc()) + assert np.array_equal(atoms.get_cell(), to_ase_atoms.get_cell()) + + +# def test_to_ase_batches(): +# + def test_non_periodic_edge(CH3CHO): atoms, data = CH3CHO # check edges From e067a6836f3b1b5804e225973ef83a7c135c8ca8 Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Mon, 14 Jun 2021 18:04:38 -0400 Subject: [PATCH 02/79] complete first version of to_ase code and unit testing --- nequip/train/trainer.py | 8 ++++++++ tests/data/test_AtomicData.py | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 049efcdd..ee3500b1 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -708,6 +708,14 @@ def batch_step(self, data, validation=False): # We make a shallow copy of the input dict in case the model modifies it input_data = data_unscaled.copy() out = self.model(input_data) + if self.ibatch == 0: + np.savez('aspirin_atomic_numbers', out[AtomicDataDict.ATOMIC_NUMBERS_KEY].detach().numpy()) + np.savez('aspirin_batch', out[AtomicDataDict.BATCH_KEY].detach().numpy()) + np.savez('aspirin_positions', out[AtomicDataDict.POSITIONS_KEY].detach().numpy()) + np.savez('aspirin_cell', out[AtomicDataDict.CELL_KEY].detach().numpy()) + np.savez('aspirin_pbc', out[AtomicDataDict.PBC_KEY].detach().numpy()) + np.savez('aspirin_edge_index', out[AtomicDataDict.EDGE_INDEX_KEY].detach().numpy()) + del input_data # If we're in evaluation mode (i.e. validation), then diff --git a/tests/data/test_AtomicData.py b/tests/data/test_AtomicData.py index cbae92ff..7040a8ab 100644 --- a/tests/data/test_AtomicData.py +++ b/tests/data/test_AtomicData.py @@ -29,8 +29,22 @@ def test_to_ase(CH3CHO): assert np.array_equal(atoms.get_cell(), to_ase_atoms.get_cell()) -# def test_to_ase_batches(): -# +def test_to_ase_batches(): + atomic_numbers = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_atomic_numbers.npz")['arr_0'] + pos = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_positions.npz")['arr_0'] + batch = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_batch.npz")['arr_0'] + cell = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_cell.npz")['arr_0'] + pbc = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_pbc.npz")['arr_0'] + edge_index = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_edge_index.npz")['arr_0'] + data = AtomicData(pos=pos, atomic_numbers=atomic_numbers, batch=batch, cell=cell, pbc=pbc, edge_index=edge_index) + + to_ase_atoms_batches = data.to_ase() + for atoms_batch in to_ase_atoms_batches: + assert np.shape(atoms_batch.get_positions()) == (len(atoms_batch), 3) + assert np.shape(atoms_batch.get_atomic_numbers()) == (len(atoms_batch),) + assert np.array_equal(atoms_batch.get_cell(), np.zeros((3, 3))) + assert np.array_equal(atoms_batch.get_pbc(), np.zeros(3, dtype=bool)) + def test_non_periodic_edge(CH3CHO): atoms, data = CH3CHO From 0539bf3b235d086d5f04cce43493b6f9b285cf94 Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Tue, 15 Jun 2021 14:33:01 -0400 Subject: [PATCH 03/79] commit before switching branches --- nequip/train/trainer.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index ee3500b1..e667ecc6 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -708,13 +708,13 @@ def batch_step(self, data, validation=False): # We make a shallow copy of the input dict in case the model modifies it input_data = data_unscaled.copy() out = self.model(input_data) - if self.ibatch == 0: - np.savez('aspirin_atomic_numbers', out[AtomicDataDict.ATOMIC_NUMBERS_KEY].detach().numpy()) - np.savez('aspirin_batch', out[AtomicDataDict.BATCH_KEY].detach().numpy()) - np.savez('aspirin_positions', out[AtomicDataDict.POSITIONS_KEY].detach().numpy()) - np.savez('aspirin_cell', out[AtomicDataDict.CELL_KEY].detach().numpy()) - np.savez('aspirin_pbc', out[AtomicDataDict.PBC_KEY].detach().numpy()) - np.savez('aspirin_edge_index', out[AtomicDataDict.EDGE_INDEX_KEY].detach().numpy()) + # if self.ibatch == 0: + # np.savez('aspirin_atomic_numbers', out[AtomicDataDict.ATOMIC_NUMBERS_KEY].detach().numpy()) + # np.savez('aspirin_batch', out[AtomicDataDict.BATCH_KEY].detach().numpy()) + # np.savez('aspirin_positions', out[AtomicDataDict.POSITIONS_KEY].detach().numpy()) + # np.savez('aspirin_cell', out[AtomicDataDict.CELL_KEY].detach().numpy()) + # np.savez('aspirin_pbc', out[AtomicDataDict.PBC_KEY].detach().numpy()) + # np.savez('aspirin_edge_index', out[AtomicDataDict.EDGE_INDEX_KEY].detach().numpy()) del input_data From 799d897a8a68dc2dd1dcb3930736e834e8df08d1 Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Tue, 15 Jun 2021 14:48:40 -0400 Subject: [PATCH 04/79] update model out blocks --- nequip/models/_eng.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/models/_eng.py b/nequip/models/_eng.py index 0496af47..25baaebd 100644 --- a/nequip/models/_eng.py +++ b/nequip/models/_eng.py @@ -52,7 +52,7 @@ def EnergyModel(**shared_params) -> SequentialGraphNetwork: ), "output_hidden_to_scalar": ( AtomwiseLinear, - dict(irreps_out="1x0e", out_field=AtomicDataDict.PER_ATOM_ENERGY_KEY), + dict(field="out_block_hidden", irreps_out="1x0e", out_field=AtomicDataDict.PER_ATOM_ENERGY_KEY), ), } ) From 4fa53497ee39f1005861d326de3aa3a4aaedb0c1 Mon Sep 17 00:00:00 2001 From: nw13slx Date: Thu, 17 Jun 2021 12:00:50 -0400 Subject: [PATCH 05/79] update data interface --- nequip/data/dataset.py | 6 +++--- nequip/datasets/aspirin.py | 2 +- nequip/train/trainer.py | 2 -- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index bdc4f737..7c092b1d 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -295,8 +295,8 @@ def statistics( unbiased: bool = True, modes: Optional[List[Union[str]]] = None, ) -> List[tuple]: - if self.__indices__ is not None: - selector = torch.as_tensor(self.__indices__)[::stride] + if self._indices is not None: + selector = torch.as_tensor(self._indices)[::stride] else: selector = torch.arange(0, self.len(), stride) @@ -318,7 +318,7 @@ def statistics( obj = self.fixed_fields else: obj = self.data - + if callable(field): arr = field(obj) else: diff --git a/nequip/datasets/aspirin.py b/nequip/datasets/aspirin.py index d6096f40..d5c42ebc 100644 --- a/nequip/datasets/aspirin.py +++ b/nequip/datasets/aspirin.py @@ -32,6 +32,6 @@ def get_data(self): fixed_fields = { AtomicDataDict.ATOMIC_NUMBERS_KEY: np.asarray(data["z"], dtype=np.int), AtomicDataDict.PBC_KEY: np.array([False, False, False]), - AtomicDataDict.CELL_KEY: None, + # AtomicDataDict.CELL_KEY: None, } return arrays, fixed_fields diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 049efcdd..b183b802 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -312,8 +312,6 @@ def __init__( # initialize the optimizer and scheduler, the params will be updated in the function self.init() - self.statistics = {} - if not (restart and append): d = self.as_dict() for key in list(d.keys()): From 71a198d99a9bb58c47749892e5b98e520ba1c974 Mon Sep 17 00:00:00 2001 From: nw13slx Date: Thu, 17 Jun 2021 14:06:31 -0400 Subject: [PATCH 06/79] resolve num_node conflicts --- nequip/data/AtomicData.py | 11 ++++++++++- nequip/data/dataset.py | 2 +- nequip/datasets/aspirin.py | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index e4c679e8..94c4fbea 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -263,8 +263,15 @@ def to_AtomicDataDict( keys = data.keys() else: raise ValueError(f"Invalid data `{repr(data)}`") + return { - k: data[k] for k in keys if (k not in exclude_keys and data[k] is not None) + k: data[k] + for k in keys + if ( + k not in exclude_keys + and data[k] is not None + and isinstance(data[k], torch.Tensor) + ) } @classmethod @@ -321,6 +328,8 @@ def without_nodes(self, which_nodes): ] elif k == AtomicDataDict.CELL_KEY: new_dict[k] = self[k] + elif k == "num_nodes": + pass else: if len(self[k]) == self.num_nodes: new_dict[k] = self[k][mask] diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index 7c092b1d..927da4f8 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -318,7 +318,7 @@ def statistics( obj = self.fixed_fields else: obj = self.data - + if callable(field): arr = field(obj) else: diff --git a/nequip/datasets/aspirin.py b/nequip/datasets/aspirin.py index d5c42ebc..a99688c5 100644 --- a/nequip/datasets/aspirin.py +++ b/nequip/datasets/aspirin.py @@ -32,6 +32,6 @@ def get_data(self): fixed_fields = { AtomicDataDict.ATOMIC_NUMBERS_KEY: np.asarray(data["z"], dtype=np.int), AtomicDataDict.PBC_KEY: np.array([False, False, False]), - # AtomicDataDict.CELL_KEY: None, + # AtomicDataDict.CELL_KEY: None, } return arrays, fixed_fields From 365124899d912cad7bb81db21dc1cba2b1f715df Mon Sep 17 00:00:00 2001 From: nw13slx Date: Thu, 17 Jun 2021 18:07:54 -0400 Subject: [PATCH 07/79] convert none python intrinsic type to repr while printing --- nequip/train/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index b183b802..e5aa954a 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -316,7 +316,7 @@ def __init__( d = self.as_dict() for key in list(d.keys()): if not isinstance(d[key], (float, int, str, list, tuple)): - d[key] = type(d[key]) + d[key] = repr(d[key]) self.log_dictionary(d, name="Initialization") logging.debug("! Done Initialize Trainer") From c2519fe33d4344cd2bd494a2652ef3713e0fb03f Mon Sep 17 00:00:00 2001 From: nw13slx Date: Thu, 17 Jun 2021 18:08:08 -0400 Subject: [PATCH 08/79] solve initialization bug --- nequip/utils/initialization.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nequip/utils/initialization.py b/nequip/utils/initialization.py index 2e8c0e02..1cde1ea9 100644 --- a/nequip/utils/initialization.py +++ b/nequip/utils/initialization.py @@ -15,8 +15,9 @@ def unit_uniform_init_(t: torch.Tensor): def uniform_initialize_fcs(mod: torch.nn.Module): """Initialize ``e3nn.nn.FullyConnectedNet``s with ``unit_uniform_init_``""" if isinstance(mod, e3nn.nn.FullyConnectedNet): - for w in mod.weights: - unit_uniform_init_(w) + for ilayer, layer in mod._modules.items(): + for w in layer.weight: + unit_uniform_init_(w) # no need to do torch.nn.Linear, which is uniform by default From 0501ea5f76b91561b6575ce5a7354979ad443c3d Mon Sep 17 00:00:00 2001 From: nw13slx Date: Thu, 17 Jun 2021 19:23:19 -0400 Subject: [PATCH 09/79] change markdowns and setup --- CHANGELOG.md | 1 + setup.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 900f00b5..094fd6df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Most recent change on the bottom. ## [Unreleased] +- update interfaces with torch_geometric 1.7 and e3nn 0.3.2 ## [0.3.2] - 2021-06-09 ### Added diff --git a/setup.py b/setup.py index 9dea66e7..6f289274 100644 --- a/setup.py +++ b/setup.py @@ -29,8 +29,8 @@ "numpy", "ase", "torch>=1.8", - "torch_geometric", - "e3nn>=0.3", + "torch_geometric>=1.7", + "e3nn>=0.3.2", "pyyaml", "contextlib2;python_version<'3.7'", # backport of nullcontext "typing_extensions;python_version<'3.8'", From 54dd30366dad788352d619416f6a2416fb058f22 Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Fri, 18 Jun 2021 20:14:32 -0400 Subject: [PATCH 10/79] fix bugs in to_ase --- nequip/data/AtomicData.py | 23 +++++++++++++---------- nequip/scripts/ase_tutorial.py | 14 +++++++------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index a5a07ea8..6bee073c 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -252,26 +252,29 @@ def from_ase(cls, atoms, r_max, **kwargs): def to_ase(self): """Build a list of ase.Atoms objects from an AtomicData object""" - positions = self.pos.tolist() - batches = self.batch.tolist() if "batch" in self and self.batch is not None else None - atomic_nums = self.atomic_numbers.tolist() \ + positions = self.pos + batches = self.batch if AtomicDataDict.BATCH_KEY in self and self.batch is not None else None + atomic_nums = self.atomic_numbers \ if AtomicDataDict.ATOMIC_NUMBERS_KEY in self and self.atomic_numbers is not None else None - pbc = self.pbc if "pbc" in self and self.pbc is not None else None - cell = self.cell if "cell" in self and self.cell is not None else None + pbc = self.pbc if AtomicDataDict.PBC_KEY in self and self.pbc is not None else None + cell = self.cell if AtomicDataDict.CELL_KEY in self and self.cell is not None else None unique_batches = [0] if batches is not None: - unique_batches = list(set(batches)) + unique_batches = list(set(batches.tolist())) - num_atoms = int(len(positions) / len(unique_batches)) - batch_atoms = [] + # Number of atoms per batch + num_atoms = int(list(positions.shape)[0] / len(unique_batches)) + batch_atoms = [] for batch in unique_batches: atoms = [] for i in range(num_atoms): atom_index = batch * num_atoms + i - atoms.append(Atom(atomic_nums[atom_index], position=positions[atom_index])) - mol = Atoms(atoms, cell=cell[batch], pbc=pbc[batch]) + atoms.append(Atom(atomic_nums[atom_index], position=positions[atom_index, :])) + mol = Atoms(atoms, + cell=cell[batch] if cell is not None else None, + pbc=pbc[batch] if pbc is not None else None) batch_atoms.append(mol) return batch_atoms diff --git a/nequip/scripts/ase_tutorial.py b/nequip/scripts/ase_tutorial.py index d271440d..8cbe23c9 100644 --- a/nequip/scripts/ase_tutorial.py +++ b/nequip/scripts/ase_tutorial.py @@ -54,23 +54,23 @@ [-3.2100, 0.7930, 1.2202], [-3.5192, 2.4355, 0.9832]]) -positions = torch.tensor(pos).tolist() -batches = torch.tensor([0], dtype=torch.int32).tolist() -atomic_nums = torch.tensor(aspirin_atoms).tolist() +positions = torch.tensor(pos) +batches = torch.tensor([0], dtype=torch.int32) +atomic_nums = torch.tensor(aspirin_atoms) pbc = torch.tensor([[False, False, False]]).numpy() cell = torch.tensor([[[0, 0, 0], [0, 0, 0], [0, 0, 0]]]).numpy() -unique_batches = list(set(batches)) if batches is not None else [0] +unique_batches = list(set(batches.tolist())) if batches is not None else [0] -num_atoms = int(len(positions) / len(unique_batches)) +num_atoms = int(list(positions.shape)[0] / len(unique_batches)) batch_atoms = [] for batch in unique_batches: atoms = [] for i in range(num_atoms): atom_index = batch * num_atoms + i - atoms.append(Atom(atomic_nums[atom_index], position=positions[atom_index])) - mol = Atoms(atoms, cell=cell[batch], pbc=pbc[batch]) + atoms.append(Atom(atomic_nums[atom_index], position=positions[atom_index, :])) + mol = Atoms(atoms, cell=cell[batch] if cell is not None else None, pbc=pbc[batch] if pbc is not None else None) batch_atoms.append(mol) view(batch_atoms[0]) From 0be34bb8e9ed555bccdfff5dc51c5f83737c353d Mon Sep 17 00:00:00 2001 From: nw13slx Date: Mon, 21 Jun 2021 13:22:10 -0400 Subject: [PATCH 11/79] change version in README and setup.py --- README.md | 8 ++++---- setup.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 90629bdf..e4d7a20c 100644 --- a/README.md +++ b/README.md @@ -16,15 +16,15 @@ NequIP requires: To install: -* Install [PyTorch Geometric](https://github.com/rusty1s/pytorch_geometric), make sure to install this with your correct version of CUDA/CPU and to use PyTorch Geometric version 1.7.0: +* Install [PyTorch Geometric](https://github.com/rusty1s/pytorch_geometric), make sure to install this with your correct version of CUDA/CPU and to use PyTorch Geometric version 1.7.1: ``` pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA}.html pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA}.html pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA}.html pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA}.html -pip install torch-geometric==1.7.0 -pip install e3nn==0.2.9 +pip install torch-geometric==1.7.1 +pip install e3nn==0.3.2 ``` where ```${CUDA}``` should be replaced by either ```cpu```, ```cu101```, ```cu102```, or ```cu111``` depending on your PyTorch installation, for details see [here](https://github.com/rusty1s/pytorch_geometric). @@ -32,7 +32,7 @@ where ```${CUDA}``` should be replaced by either ```cpu```, ```cu101```, ```cu10 * Install [e3nn](https://github.com/e3nn/e3nn), version 0.2.9: ``` -pip install e3nn==0.2.9 +pip install e3nn==0.3.2 ``` * Install our fork of [`pytorch_ema`](https://github.com/Linux-cpp-lisp/pytorch_ema) for using an Exponential Moving Average on the weights: diff --git a/setup.py b/setup.py index 6f289274..82c93441 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ "numpy", "ase", "torch>=1.8", - "torch_geometric>=1.7", + "torch_geometric>=1.7.1", "e3nn>=0.3.2", "pyyaml", "contextlib2;python_version<'3.7'", # backport of nullcontext From 0f90409e88be81101d32eef545025d6d6aedf991 Mon Sep 17 00:00:00 2001 From: nw13slx Date: Mon, 21 Jun 2021 13:23:31 -0400 Subject: [PATCH 12/79] fix e3nn version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e4d7a20c..9d922b3e 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ pip install e3nn==0.3.2 where ```${CUDA}``` should be replaced by either ```cpu```, ```cu101```, ```cu102```, or ```cu111``` depending on your PyTorch installation, for details see [here](https://github.com/rusty1s/pytorch_geometric). -* Install [e3nn](https://github.com/e3nn/e3nn), version 0.2.9: +* Install [e3nn](https://github.com/e3nn/e3nn), version 0.3.2: ``` pip install e3nn==0.3.2 From 95ae236eac2b3ebdcf2107d30674f0baf8dfd49f Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Thu, 24 Jun 2021 14:55:20 -0400 Subject: [PATCH 13/79] incorporate PR feedback except atomic_batch test --- configs/example.yaml | 2 +- nequip/data/AtomicData.py | 65 +++++++++++++++++------------- nequip/models/_eng.py | 7 +--- nequip/scripts/ase_tutorial.py | 37 +++++++++-------- nequip/scripts/test_saved_model.py | 0 nequip/train/trainer.py | 8 ---- tests/data/test_AtomicData.py | 47 +++++++++++++-------- 7 files changed, 91 insertions(+), 75 deletions(-) delete mode 100644 nequip/scripts/test_saved_model.py diff --git a/configs/example.yaml b/configs/example.yaml index 2ac74bfe..21fea50a 100644 --- a/configs/example.yaml +++ b/configs/example.yaml @@ -61,7 +61,7 @@ npz_fixed_field_keys: # format: extxyz # logging -wandb: false # we recommend using wandb for logging, we'll turn it off here as it's optional +wandb: true # we recommend using wandb for logging, we'll turn it off here as it's optional wandb_project: aspirin # project name used in wandb wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated. # if false, a new wandb run will be generated diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 6bee073c..2cfa67f4 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -11,7 +11,7 @@ import numpy as np import ase.neighborlist from ase.calculators.singlepoint import SinglePointCalculator, SinglePointDFTCalculator -from ase import Atoms, Atom +from ase import Atoms import torch from torch_geometric.data import Data @@ -250,34 +250,43 @@ def from_ase(cls, atoms, r_max, **kwargs): **add_fields, ) - def to_ase(self): - """Build a list of ase.Atoms objects from an AtomicData object""" + def to_ase(self) -> Union[list, ase.Atoms]: + """Build a (list of) ``ase.Atoms`` object(s) from an ``AtomicData`` object. + + For each unique batch number associated with AtomicDataDict.BATCH_KEY, + an ``ase.Atoms`` object is created. If AtomicDataDict.BATCH_KEY does not + exist in self, a single ``ase.Atoms`` object is created. + + Returns: + A list of ``ase.Atoms`` objects if AtomicDataDict.BATCH_KEY is in self + and is not None. Otherwise, a single ``ase.Atoms`` object is returned. + """ positions = self.pos - batches = self.batch if AtomicDataDict.BATCH_KEY in self and self.batch is not None else None - atomic_nums = self.atomic_numbers \ - if AtomicDataDict.ATOMIC_NUMBERS_KEY in self and self.atomic_numbers is not None else None - pbc = self.pbc if AtomicDataDict.PBC_KEY in self and self.pbc is not None else None - cell = self.cell if AtomicDataDict.CELL_KEY in self and self.cell is not None else None - - unique_batches = [0] - if batches is not None: - unique_batches = list(set(batches.tolist())) - - # Number of atoms per batch - num_atoms = int(list(positions.shape)[0] / len(unique_batches)) - - batch_atoms = [] - for batch in unique_batches: - atoms = [] - for i in range(num_atoms): - atom_index = batch * num_atoms + i - atoms.append(Atom(atomic_nums[atom_index], position=positions[atom_index, :])) - mol = Atoms(atoms, - cell=cell[batch] if cell is not None else None, - pbc=pbc[batch] if pbc is not None else None) - batch_atoms.append(mol) - - return batch_atoms + atomic_nums = self.atomic_numbers + pbc = self.pbc if AtomicDataDict.PBC_KEY in self else None + cell = self.cell if AtomicDataDict.CELL_KEY in self else None + batch = self.batch if AtomicDataDict.BATCH_KEY in self else None + + if batch is not None: + unique_batches = range(batch.max() + 1) + batch_atoms = [] + for batch_idx in unique_batches: + mask = batch == batch_idx + mol = Atoms( + numbers=atomic_nums[mask], + positions=positions[mask], + cell=cell[batch_idx] if cell is not None else None, + pbc=pbc[batch_idx] if pbc is not None else None, + ) + batch_atoms.append(mol) + return batch_atoms + else: + return Atoms( + numbers=atomic_nums, + positions=positions, + cell=cell[0] if cell is not None else None, + pbc=pbc[0] if pbc is not None else None, + ) def get_edge_vectors(data: Data) -> torch.Tensor: data = AtomicDataDict.with_edge_vectors(AtomicData.to_AtomicDataDict(data)) diff --git a/nequip/models/_eng.py b/nequip/models/_eng.py index 25baaebd..ec17a7b6 100644 --- a/nequip/models/_eng.py +++ b/nequip/models/_eng.py @@ -46,13 +46,10 @@ def EnergyModel(**shared_params) -> SequentialGraphNetwork: { # TODO: the next linear throws out all L > 0, don't create them in the last layer of convnet # -- output block -- - "conv_to_output_hidden": ( - AtomwiseLinear, - dict(out_field="out_block_hidden") - ), + "conv_to_output_hidden": AtomwiseLinear, "output_hidden_to_scalar": ( AtomwiseLinear, - dict(field="out_block_hidden", irreps_out="1x0e", out_field=AtomicDataDict.PER_ATOM_ENERGY_KEY), + dict(irreps_out="1x0e", out_field=AtomicDataDict.PER_ATOM_ENERGY_KEY), ), } ) diff --git a/nequip/scripts/ase_tutorial.py b/nequip/scripts/ase_tutorial.py index 8cbe23c9..a9bd7806 100644 --- a/nequip/scripts/ase_tutorial.py +++ b/nequip/scripts/ase_tutorial.py @@ -55,22 +55,27 @@ [-3.5192, 2.4355, 0.9832]]) positions = torch.tensor(pos) -batches = torch.tensor([0], dtype=torch.int32) +batch = None +# batch = torch.tensor(np.zeros(21), dtype=torch.int32) atomic_nums = torch.tensor(aspirin_atoms) -pbc = torch.tensor([[False, False, False]]).numpy() -cell = torch.tensor([[[0, 0, 0], [0, 0, 0], [0, 0, 0]]]).numpy() +pbc = torch.zeros(1, 3, dtype=bool) +cell = torch.zeros(1, 3, 3) -unique_batches = list(set(batches.tolist())) if batches is not None else [0] +if batch is not None: + unique_batches = range(batch.max() + 1) + batch_atoms = [] + for batch_idx in unique_batches: + mask = batch == batch_idx + print(mask) + mol = Atoms(numbers=atomic_nums[mask], + positions=positions[mask], + cell=cell[batch_idx] if cell is not None else None, + pbc=pbc[batch_idx] if pbc is not None else None) + batch_atoms.append(mol) +else: + atoms = Atoms(numbers=atomic_nums, + positions=positions, + cell=cell[0] if cell is not None else None, + pbc=pbc[0] if pbc is not None else None) -num_atoms = int(list(positions.shape)[0] / len(unique_batches)) -batch_atoms = [] - -for batch in unique_batches: - atoms = [] - for i in range(num_atoms): - atom_index = batch * num_atoms + i - atoms.append(Atom(atomic_nums[atom_index], position=positions[atom_index, :])) - mol = Atoms(atoms, cell=cell[batch] if cell is not None else None, pbc=pbc[batch] if pbc is not None else None) - batch_atoms.append(mol) - -view(batch_atoms[0]) +view(atoms) diff --git a/nequip/scripts/test_saved_model.py b/nequip/scripts/test_saved_model.py deleted file mode 100644 index e69de29b..00000000 diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index e667ecc6..049efcdd 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -708,14 +708,6 @@ def batch_step(self, data, validation=False): # We make a shallow copy of the input dict in case the model modifies it input_data = data_unscaled.copy() out = self.model(input_data) - # if self.ibatch == 0: - # np.savez('aspirin_atomic_numbers', out[AtomicDataDict.ATOMIC_NUMBERS_KEY].detach().numpy()) - # np.savez('aspirin_batch', out[AtomicDataDict.BATCH_KEY].detach().numpy()) - # np.savez('aspirin_positions', out[AtomicDataDict.POSITIONS_KEY].detach().numpy()) - # np.savez('aspirin_cell', out[AtomicDataDict.CELL_KEY].detach().numpy()) - # np.savez('aspirin_pbc', out[AtomicDataDict.PBC_KEY].detach().numpy()) - # np.savez('aspirin_edge_index', out[AtomicDataDict.EDGE_INDEX_KEY].detach().numpy()) - del input_data # If we're in evaluation mode (i.e. validation), then diff --git a/tests/data/test_AtomicData.py b/tests/data/test_AtomicData.py index 7040a8ab..0a9e39ad 100644 --- a/tests/data/test_AtomicData.py +++ b/tests/data/test_AtomicData.py @@ -21,29 +21,42 @@ def test_from_ase(CuFcc): def test_to_ase(CH3CHO): atoms, data = CH3CHO - to_ase = data.to_ase() - to_ase_atoms = to_ase[0] + to_ase_atoms = data.to_ase() assert np.allclose(atoms.get_positions(), to_ase_atoms.get_positions()) assert np.array_equal(atoms.get_atomic_numbers(), to_ase_atoms.get_atomic_numbers()) assert np.array_equal(atoms.get_pbc(), to_ase_atoms.get_pbc()) assert np.array_equal(atoms.get_cell(), to_ase_atoms.get_cell()) -def test_to_ase_batches(): - atomic_numbers = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_atomic_numbers.npz")['arr_0'] - pos = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_positions.npz")['arr_0'] - batch = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_batch.npz")['arr_0'] - cell = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_cell.npz")['arr_0'] - pbc = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_pbc.npz")['arr_0'] - edge_index = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_edge_index.npz")['arr_0'] - data = AtomicData(pos=pos, atomic_numbers=atomic_numbers, batch=batch, cell=cell, pbc=pbc, edge_index=edge_index) - - to_ase_atoms_batches = data.to_ase() - for atoms_batch in to_ase_atoms_batches: - assert np.shape(atoms_batch.get_positions()) == (len(atoms_batch), 3) - assert np.shape(atoms_batch.get_atomic_numbers()) == (len(atoms_batch),) - assert np.array_equal(atoms_batch.get_cell(), np.zeros((3, 3))) - assert np.array_equal(atoms_batch.get_pbc(), np.zeros(3, dtype=bool)) +def test_to_ase_batches(atomic_batch): + x = atomic_batch + # atomic_numbers = np.load( + # "C:/Users/alber/nequip/nequip/scripts/aspirin_atomic_numbers.npz" + # )["arr_0"] + # pos = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_positions.npz")["arr_0"] + # batch = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_batch.npz")["arr_0"] + # cell = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_cell.npz")["arr_0"] + # pbc = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_pbc.npz")["arr_0"] + # edge_index = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_edge_index.npz")[ + # "arr_0" + # ] + # data = AtomicData( + # pos=pos, + # atomic_numbers=atomic_numbers, + # batch=batch, + # cell=cell, + # pbc=pbc, + # edge_index=edge_index, + # ) + + # atomic_batch fixture + assert True + # to_ase_atoms_batches = data.to_ase() + # for atoms_batch in to_ase_atoms_batches: + # assert atoms_batch.get_positions().shape == (len(atoms_batch), 3) + # assert atoms_batch.get_atomic_numbers().shape == (len(atoms_batch),) + # assert np.array_equal(atoms_batch.get_cell(), np.zeros((3, 3))) + # assert np.array_equal(atoms_batch.get_pbc(), np.zeros(3, dtype=bool)) def test_non_periodic_edge(CH3CHO): From d97dce93557da01768713f1a2dc62e035b0ac35a Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Thu, 24 Jun 2021 17:14:10 -0400 Subject: [PATCH 14/79] finish test_to_ase_batches --- tests/data/test_AtomicData.py | 38 +++++++++-------------------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/tests/data/test_AtomicData.py b/tests/data/test_AtomicData.py index 0a9e39ad..a13ecfa0 100644 --- a/tests/data/test_AtomicData.py +++ b/tests/data/test_AtomicData.py @@ -29,34 +29,16 @@ def test_to_ase(CH3CHO): def test_to_ase_batches(atomic_batch): - x = atomic_batch - # atomic_numbers = np.load( - # "C:/Users/alber/nequip/nequip/scripts/aspirin_atomic_numbers.npz" - # )["arr_0"] - # pos = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_positions.npz")["arr_0"] - # batch = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_batch.npz")["arr_0"] - # cell = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_cell.npz")["arr_0"] - # pbc = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_pbc.npz")["arr_0"] - # edge_index = np.load("C:/Users/alber/nequip/nequip/scripts/aspirin_edge_index.npz")[ - # "arr_0" - # ] - # data = AtomicData( - # pos=pos, - # atomic_numbers=atomic_numbers, - # batch=batch, - # cell=cell, - # pbc=pbc, - # edge_index=edge_index, - # ) - - # atomic_batch fixture - assert True - # to_ase_atoms_batches = data.to_ase() - # for atoms_batch in to_ase_atoms_batches: - # assert atoms_batch.get_positions().shape == (len(atoms_batch), 3) - # assert atoms_batch.get_atomic_numbers().shape == (len(atoms_batch),) - # assert np.array_equal(atoms_batch.get_cell(), np.zeros((3, 3))) - # assert np.array_equal(atoms_batch.get_pbc(), np.zeros(3, dtype=bool)) + atomic_data = AtomicData.from_dict(vars(atomic_batch)) + to_ase_atoms_batch = atomic_data.to_ase() + for batch_idx, atoms in enumerate(to_ase_atoms_batch): + mask = atomic_data.batch == batch_idx + assert atoms.get_positions().shape == (len(atoms), 3) + assert np.allclose(atoms.get_positions(), atomic_data.pos[mask]) + assert atoms.get_atomic_numbers().shape == (len(atoms),) + assert np.array_equal(atoms.get_atomic_numbers(), atomic_data.atomic_numbers[mask]) + assert np.array_equal(atoms.get_cell(), atomic_data.cell[batch_idx]) + assert np.array_equal(atoms.get_pbc(), atomic_data.pbc[batch_idx]) def test_non_periodic_edge(CH3CHO): From 114b2e292c9b933ed66265538b0f1cf6d2be3b2b Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Thu, 24 Jun 2021 17:26:47 -0400 Subject: [PATCH 15/79] remove ase_tutorial.py --- nequip/data/AtomicData.py | 4 +- nequip/scripts/ase_tutorial.py | 81 ---------------------------------- tests/data/test_AtomicData.py | 4 +- 3 files changed, 5 insertions(+), 84 deletions(-) delete mode 100644 nequip/scripts/ase_tutorial.py diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 2cfa67f4..64a59f86 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -5,7 +5,7 @@ import warnings from copy import deepcopy -from typing import Union, Tuple, Dict, Optional +from typing import Union, Tuple, Dict, Optional, List from collections.abc import Mapping import numpy as np @@ -250,7 +250,7 @@ def from_ase(cls, atoms, r_max, **kwargs): **add_fields, ) - def to_ase(self) -> Union[list, ase.Atoms]: + def to_ase(self) -> Union[List[ase.Atoms], ase.Atoms]: """Build a (list of) ``ase.Atoms`` object(s) from an ``AtomicData`` object. For each unique batch number associated with AtomicDataDict.BATCH_KEY, diff --git a/nequip/scripts/ase_tutorial.py b/nequip/scripts/ase_tutorial.py deleted file mode 100644 index a9bd7806..00000000 --- a/nequip/scripts/ase_tutorial.py +++ /dev/null @@ -1,81 +0,0 @@ -from ase import Atoms, Atom -from ase.visualize import view - -import warnings -from copy import deepcopy -from typing import Union, Tuple, Dict, Optional -from collections.abc import Mapping - -import numpy as np -import ase.neighborlist -from ase.calculators.singlepoint import SinglePointCalculator, SinglePointDFTCalculator - -import torch -from torch_geometric.data import Data -import e3nn.o3 - -# --- Tutorial --- # -# d = 1.208 -# o2 = Atoms([Atom("O", [0, 0, 0]), -# Atom("C", [0, 0, d])], -# cell=[(3, 0, 0), -# (0, 7.5, 0), -# (0, 0, 8)]) -# o2.set_pbc((True, True, True)) -# view(o2) - -# --- PyTorch testing --- # -# a = torch.randn(2, 3) -# a_list = a.tolist() -# print(a_list) -# print(a_list[0]) - -aspirin_atoms = [6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 6, 6, 8, 1, 1, 1, 1, 1, 1, 1, 1] - -pos = np.array([[1.9351, -1.3223, -0.2434], - [1.1050, 1.1607, -1.2501], - [2.7351, -0.6791, -1.2130], - [2.3617, 0.5501, -1.6917], - [-3.2277, 1.4643, 0.4005], - [0.7966, -0.7842, 0.2737], - [0.3291, 0.4374, -0.3489], - [-1.1688, -1.4345, 1.3998], - [-1.8708, 1.9584, -1.4831], - [0.9033, -2.2319, 2.1187], - [0.0525, -1.4987, 1.3657], - [-1.9753, 1.5393, -0.3593], - [-0.9103, 0.9451, 0.2944], - [0.4981, -2.5795, 2.9364], - [2.3691, -2.2767, 0.1016], - [0.7304, 2.1011, -1.6379], - [3.7474, -1.1822, -1.3683], - [2.8886, 1.2051, -2.3682], - [-4.1139, 1.2818, -0.3308], - [-3.2100, 0.7930, 1.2202], - [-3.5192, 2.4355, 0.9832]]) - -positions = torch.tensor(pos) -batch = None -# batch = torch.tensor(np.zeros(21), dtype=torch.int32) -atomic_nums = torch.tensor(aspirin_atoms) -pbc = torch.zeros(1, 3, dtype=bool) -cell = torch.zeros(1, 3, 3) - -if batch is not None: - unique_batches = range(batch.max() + 1) - batch_atoms = [] - for batch_idx in unique_batches: - mask = batch == batch_idx - print(mask) - mol = Atoms(numbers=atomic_nums[mask], - positions=positions[mask], - cell=cell[batch_idx] if cell is not None else None, - pbc=pbc[batch_idx] if pbc is not None else None) - batch_atoms.append(mol) -else: - atoms = Atoms(numbers=atomic_nums, - positions=positions, - cell=cell[0] if cell is not None else None, - pbc=pbc[0] if pbc is not None else None) - -view(atoms) diff --git a/tests/data/test_AtomicData.py b/tests/data/test_AtomicData.py index a13ecfa0..8c765d2a 100644 --- a/tests/data/test_AtomicData.py +++ b/tests/data/test_AtomicData.py @@ -36,7 +36,9 @@ def test_to_ase_batches(atomic_batch): assert atoms.get_positions().shape == (len(atoms), 3) assert np.allclose(atoms.get_positions(), atomic_data.pos[mask]) assert atoms.get_atomic_numbers().shape == (len(atoms),) - assert np.array_equal(atoms.get_atomic_numbers(), atomic_data.atomic_numbers[mask]) + assert np.array_equal( + atoms.get_atomic_numbers(), atomic_data.atomic_numbers[mask] + ) assert np.array_equal(atoms.get_cell(), atomic_data.cell[batch_idx]) assert np.array_equal(atoms.get_pbc(), atomic_data.pbc[batch_idx]) From 7c07c01958b70c727848dce9e22b45635ea1d8e9 Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Thu, 24 Jun 2021 17:29:48 -0400 Subject: [PATCH 16/79] change max_epochs back to 1000000 in example.yaml --- configs/example.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/example.yaml b/configs/example.yaml index 21fea50a..0096269e 100644 --- a/configs/example.yaml +++ b/configs/example.yaml @@ -74,7 +74,7 @@ n_train: 100 n_val: 50 # number of validation data learning_rate: 0.01 # learning rate, we found values between 0.01 and 0.005 to work best - this is often one of the most important hyperparameters to tune batch_size: 5 # batch size, we found it important to keep this small for most applications (1-5) -max_epochs: 1 # stop training after _ number of epochs +max_epochs: 1000000 # stop training after _ number of epochs metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse use_ema: false # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors ema_decay: 0.999 # ema weight, commonly set to 0.999 From 1320599091cc98b2dee52e42dcfee019a9ef581e Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Thu, 24 Jun 2021 17:35:54 -0400 Subject: [PATCH 17/79] update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 900f00b5..fd6c0086 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ Most recent change on the bottom. ## [Unreleased] +## [0.3.3] - 2021-06-24 +### Added +- `to_ase` method in `AtomicData.py` to convert `AtomicData` object to (list of) `ase.Atoms` object(s) +- Two unit tests in `test_AtomicData.py` to test the above + ## [0.3.2] - 2021-06-09 ### Added - Option for which nonlinearities to use From c3b28fe30005730f65ea780ecd9533888b5b691d Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 28 Jun 2021 13:01:32 -0400 Subject: [PATCH 18/79] add batch test --- tests/data/test_AtomicData.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/data/test_AtomicData.py b/tests/data/test_AtomicData.py index 845a2931..b62debc1 100644 --- a/tests/data/test_AtomicData.py +++ b/tests/data/test_AtomicData.py @@ -1,7 +1,9 @@ import pytest +import copy import numpy as np import torch +from torch_geometric.data import Batch import ase.build import ase.geometry @@ -157,6 +159,21 @@ def test_silicon_neighbors(Si): assert edge_index_set_equiv(data.edge_index, edge_index_true) +def test_batching(Si): + _, orig = Si + N = 4 + datas = [] + for _ in range(N): + new = copy.deepcopy(orig) + new.pos += torch.randn_like(new.pos) + datas.append(new) + batch = Batch.from_data_list(datas) + for i, orig in enumerate(datas): + new = batch.get_example(i) + for k, v in orig: + assert torch.equal(v, new[k]) + + def edge_index_set_equiv(a, b): """Compare edge_index arrays in an unordered way.""" # [[0, 1], [1, 0]] -> {(0, 1), (1, 0)} From 1fccbaeb191c5774c1f143a3fd9f396d5978313a Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 28 Jun 2021 13:39:24 -0400 Subject: [PATCH 19/79] Fix `Final` backport --- CHANGELOG.md | 1 + nequip/scripts/deploy.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 86b058cc..9eda3c24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Most recent change on the bottom. ### Fixed - Fix specifying nonlinearities when wandb enabled +- `Final` backport for <3.8 compatability ## [0.3.2] - 2021-06-09 ### Added diff --git a/nequip/scripts/deploy.py b/nequip/scripts/deploy.py index 32ec7582..6c7a4e66 100644 --- a/nequip/scripts/deploy.py +++ b/nequip/scripts/deploy.py @@ -1,4 +1,10 @@ -from typing import Final, Tuple, Dict, Union +import sys + +if sys.version_info[1] >= 8: + from typing import Final +else: + from typing_extensions import Final +from typing import Tuple, Dict, Union import argparse import pathlib import logging From b423530303be0fc210bede84b3f2fecb47dd07c9 Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 28 Jun 2021 14:14:25 -0400 Subject: [PATCH 20/79] cleanup --- nequip/datasets/aspirin.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/nequip/datasets/aspirin.py b/nequip/datasets/aspirin.py index a99688c5..a78ab71f 100644 --- a/nequip/datasets/aspirin.py +++ b/nequip/datasets/aspirin.py @@ -2,9 +2,6 @@ from os.path import dirname, basename, abspath -from ase import units -from ase.io import read - from nequip.data import AtomicDataDict, AtomicInMemoryDataset @@ -32,6 +29,5 @@ def get_data(self): fixed_fields = { AtomicDataDict.ATOMIC_NUMBERS_KEY: np.asarray(data["z"], dtype=np.int), AtomicDataDict.PBC_KEY: np.array([False, False, False]), - # AtomicDataDict.CELL_KEY: None, } return arrays, fixed_fields From 5c29a33d68de17d4c202da956772515917257ed8 Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 28 Jun 2021 14:16:32 -0400 Subject: [PATCH 21/79] e3nn 0.3.3 --- README.md | 6 +++--- nequip/utils/initialization.py | 19 ++++++++++--------- setup.py | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 9d922b3e..3e189f11 100644 --- a/README.md +++ b/README.md @@ -24,15 +24,15 @@ pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA}.html pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA}.html pip install torch-geometric==1.7.1 -pip install e3nn==0.3.2 +pip install e3nn==0.3.3 ``` where ```${CUDA}``` should be replaced by either ```cpu```, ```cu101```, ```cu102```, or ```cu111``` depending on your PyTorch installation, for details see [here](https://github.com/rusty1s/pytorch_geometric). -* Install [e3nn](https://github.com/e3nn/e3nn), version 0.3.2: +* Install [e3nn](https://github.com/e3nn/e3nn), version 0.3.3: ``` -pip install e3nn==0.3.2 +pip install e3nn==0.3.3 ``` * Install our fork of [`pytorch_ema`](https://github.com/Linux-cpp-lisp/pytorch_ema) for using an Exponential Moving Average on the weights: diff --git a/nequip/utils/initialization.py b/nequip/utils/initialization.py index 1cde1ea9..4f4b6634 100644 --- a/nequip/utils/initialization.py +++ b/nequip/utils/initialization.py @@ -15,9 +15,8 @@ def unit_uniform_init_(t: torch.Tensor): def uniform_initialize_fcs(mod: torch.nn.Module): """Initialize ``e3nn.nn.FullyConnectedNet``s with ``unit_uniform_init_``""" if isinstance(mod, e3nn.nn.FullyConnectedNet): - for ilayer, layer in mod._modules.items(): - for w in layer.weight: - unit_uniform_init_(w) + for layer in mod: + unit_uniform_init_(layer.weight) # no need to do torch.nn.Linear, which is uniform by default @@ -37,11 +36,13 @@ def uniform_initialize_tps(mod: torch.nn.Module): def xavier_initialize_fcs(mod: torch.nn.Module): """Initialize ``e3nn.nn.FullyConnectedNet``s and ``torch.nn.Linear``s with Xavier uniform initialization""" if isinstance(mod, e3nn.nn.FullyConnectedNet): - for w in mod.weights: + for layer in mod: # in FC: # h_in, _h_out = W.shape # W = W / h_in**0.5 - torch.nn.init.xavier_uniform_(w, gain=w.shape[0] ** 0.5) + torch.nn.init.xavier_uniform_( + layer.weight, gain=layer.weight.shape[0] ** 0.5 + ) elif isinstance(mod, torch.nn.Linear): torch.nn.init.xavier_uniform_(mod.weight) @@ -63,8 +64,8 @@ def orthogonal_initialize_linears(mod: torch.nn.Module): def orthogonal_initialize_fcs(mod: torch.nn.Module): """Initialize ``e3nn.nn.FullyConnectedNet``s and ``torch.nn.Linear``s with orthogonal initialization""" if isinstance(mod, e3nn.nn.FullyConnectedNet): - for w in mod.weights: - torch.nn.init.orthogonal_(w) + for layer in mod: + torch.nn.init.orthogonal_(layer.weight) elif isinstance(mod, torch.nn.Linear): torch.nn.init.orthogonal_(mod.weight) @@ -72,5 +73,5 @@ def orthogonal_initialize_fcs(mod: torch.nn.Module): def unit_orthogonal_initialize_e3nn_fcs(mod: torch.nn.Module): """Initialize only ``e3nn.nn.FullyConnectedNet``s with ``unit_orthogonal_init_``""" if isinstance(mod, e3nn.nn.FullyConnectedNet): - for w in mod.weights: - unit_orthogonal_init_(w) + for layer in mod: + unit_orthogonal_init_(layer.weight) diff --git a/setup.py b/setup.py index 82c93441..ae001bb0 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ "ase", "torch>=1.8", "torch_geometric>=1.7.1", - "e3nn>=0.3.2", + "e3nn>=0.3.3", "pyyaml", "contextlib2;python_version<'3.7'", # backport of nullcontext "typing_extensions;python_version<'3.8'", From 02fa125096ed3291abbfc24918b3f708edbe8e50 Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 28 Jun 2021 14:33:37 -0400 Subject: [PATCH 22/79] correct fallback --- nequip/data/AtomicData.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 94c4fbea..168e74ec 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -328,10 +328,8 @@ def without_nodes(self, which_nodes): ] elif k == AtomicDataDict.CELL_KEY: new_dict[k] = self[k] - elif k == "num_nodes": - pass else: - if len(self[k]) == self.num_nodes: + if isinstance(self[k], torch.Tensor) and len(self[k]) == self.num_nodes: new_dict[k] = self[k][mask] else: new_dict[k] = self[k] From 806ad92790f8eb62c20dc681f358f9d4779b056a Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Mon, 28 Jun 2021 17:32:16 -0400 Subject: [PATCH 23/79] incorporate 2nd PR feedback --- CHANGELOG.md | 1 - nequip/data/AtomicData.py | 23 ++++++++++++++--------- tests/data/test_AtomicData.py | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd6c0086..fc287077 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,6 @@ Most recent change on the bottom. ## [0.3.3] - 2021-06-24 ### Added - `to_ase` method in `AtomicData.py` to convert `AtomicData` object to (list of) `ase.Atoms` object(s) -- Two unit tests in `test_AtomicData.py` to test the above ## [0.3.2] - 2021-06-09 ### Added diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 64a59f86..24c1ae7a 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -10,8 +10,8 @@ import numpy as np import ase.neighborlist +import ase from ase.calculators.singlepoint import SinglePointCalculator, SinglePointDFTCalculator -from ase import Atoms import torch from torch_geometric.data import Data @@ -253,26 +253,31 @@ def from_ase(cls, atoms, r_max, **kwargs): def to_ase(self) -> Union[List[ase.Atoms], ase.Atoms]: """Build a (list of) ``ase.Atoms`` object(s) from an ``AtomicData`` object. - For each unique batch number associated with AtomicDataDict.BATCH_KEY, - an ``ase.Atoms`` object is created. If AtomicDataDict.BATCH_KEY does not + For each unique batch number provided in ``AtomicDataDict.BATCH_KEY``, + an ``ase.Atoms`` object is created. If ``AtomicDataDict.BATCH_KEY`` does not exist in self, a single ``ase.Atoms`` object is created. Returns: - A list of ``ase.Atoms`` objects if AtomicDataDict.BATCH_KEY is in self + A list of ``ase.Atoms`` objects if ``AtomicDataDict.BATCH_KEY`` is in self and is not None. Otherwise, a single ``ase.Atoms`` object is returned. """ positions = self.pos atomic_nums = self.atomic_numbers - pbc = self.pbc if AtomicDataDict.PBC_KEY in self else None - cell = self.cell if AtomicDataDict.CELL_KEY in self else None - batch = self.batch if AtomicDataDict.BATCH_KEY in self else None + pbc = getattr(self, AtomicDataDict.PBC_KEY, None) + cell = getattr(self, AtomicDataDict.CELL_KEY, None) + batch = getattr(self, AtomicDataDict.BATCH_KEY, None) + + if cell is not None: + cell = cell.view(-1, 3, 3) + if pbc is not None: + pbc = pbc.view(-1, 3) if batch is not None: unique_batches = range(batch.max() + 1) batch_atoms = [] for batch_idx in unique_batches: mask = batch == batch_idx - mol = Atoms( + mol = ase.Atoms( numbers=atomic_nums[mask], positions=positions[mask], cell=cell[batch_idx] if cell is not None else None, @@ -281,7 +286,7 @@ def to_ase(self) -> Union[List[ase.Atoms], ase.Atoms]: batch_atoms.append(mol) return batch_atoms else: - return Atoms( + return ase.Atoms( numbers=atomic_nums, positions=positions, cell=cell[0] if cell is not None else None, diff --git a/tests/data/test_AtomicData.py b/tests/data/test_AtomicData.py index 8c765d2a..01a87e20 100644 --- a/tests/data/test_AtomicData.py +++ b/tests/data/test_AtomicData.py @@ -29,7 +29,7 @@ def test_to_ase(CH3CHO): def test_to_ase_batches(atomic_batch): - atomic_data = AtomicData.from_dict(vars(atomic_batch)) + atomic_data = AtomicData.from_dict(atomic_batch.to_dict()) to_ase_atoms_batch = atomic_data.to_ase() for batch_idx, atoms in enumerate(to_ase_atoms_batch): mask = atomic_data.batch == batch_idx From 8156d90b9c28d18061c1462b59f2360f688b4471 Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 29 Jun 2021 13:32:05 -0400 Subject: [PATCH 24/79] Add docs badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 3e189f11..1f2c9825 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ NequIP is an open-source code for building E(3)-equivariant interatomic potentials. +[![Documentation Status](https://readthedocs.org/projects/nequip/badge/?version=latest)](https://nequip.readthedocs.io/en/latest/?badge=latest) ![nequip](./nequip.png) From a4d015320cb5e6e00a5bdf74f6755fc0e2c90eea Mon Sep 17 00:00:00 2001 From: Albert Zhu Date: Tue, 29 Jun 2021 15:22:40 -0400 Subject: [PATCH 25/79] expand cell and pbc shared between all batches --- nequip/data/AtomicData.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 24c1ae7a..b97105c4 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -273,9 +273,11 @@ def to_ase(self) -> Union[List[ase.Atoms], ase.Atoms]: pbc = pbc.view(-1, 3) if batch is not None: - unique_batches = range(batch.max() + 1) + n_batches = batch.max() + 1 + cell = cell.expand(n_batches, 3, 3) if cell is not None else None + pbc = pbc.expand(n_batches, 3) if pbc is not None else None batch_atoms = [] - for batch_idx in unique_batches: + for batch_idx in range(n_batches): mask = batch == batch_idx mol = ase.Atoms( numbers=atomic_nums[mask], From 7d4460369f2037e0a43337b358d58840b2cb90ae Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 29 Jun 2021 15:43:04 -0400 Subject: [PATCH 26/79] insert into sequentialgraphnet --- CHANGELOG.md | 3 +++ nequip/nn/_graph_mixin.py | 46 +++++++++++++++++++++++++++++++++++++ tests/nn/test_sequential.py | 27 ++++++++++++++++++++++ 3 files changed, 76 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7375551f..1ee87fce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Most recent change on the bottom. ## [Unreleased] +### Added +- `SequentialGraphNetwork` now has insertion methods + ### Changed - Nonlinearities now specified with `e` and `o` instead of `1` and `-1` - Update interfaces for `torch_geometric` 1.7 and `e3nn` 0.3.3 diff --git a/nequip/nn/_graph_mixin.py b/nequip/nn/_graph_mixin.py index 8499ef7f..cacd633b 100644 --- a/nequip/nn/_graph_mixin.py +++ b/nequip/nn/_graph_mixin.py @@ -229,6 +229,52 @@ def append_from_parameters( self.append(name, instance) return + def insert(self, after: str, name: str, module: GraphModuleMixin) -> None: + """Insert a module after the module with name ``after``. + + Args: + after: the module to insert after + name: the name of the module to insert + module: the moldule to insert + """ + # This checks names, etc. + self.add_module(name, module) + # Now insert in the right place by overwriting + names = list(self._modules.keys()) + modules = list(self._modules.values()) + idx = names.index(after) + names.insert(idx + 1, name) + modules.insert(idx + 1, module) + self._modules = OrderedDict(zip(names, modules)) + return + + def insert_from_parameters( + self, + after: str, + shared_params: Mapping, + name: str, + builder: Callable, + params: Dict[str, Any] = {}, + ) -> None: + r"""Build a module from parameters and insert it after ``after``. + + Args: + after: the name of the module to insert after + shared_params (dict-like): shared parameters from which to pull when instantiating the module + name (str): the name for the module + builder (callable): a class or function to build a module + params (dict, optional): extra specific parameters for this module that take priority over those in ``shared_params`` + """ + instance, _ = instantiate( + builder=builder, + prefix=name, + positional_args=(dict(irreps_in=self[-1].irreps_out)), + optional_args=params, + all_args=shared_params, + ) + self.insert(after, name, instance) + return + # Copied from https://pytorch.org/docs/stable/_modules/torch/nn/modules/container.html#Sequential # with type annotations added def forward(self, input: AtomicDataDict.Type) -> AtomicDataDict.Type: diff --git a/tests/nn/test_sequential.py b/tests/nn/test_sequential.py index 43f3890a..a081ff34 100644 --- a/tests/nn/test_sequential.py +++ b/tests/nn/test_sequential.py @@ -38,3 +38,30 @@ def test_append(): } ) assert out["thing"].shape == out[AtomicDataDict.NODE_FEATURES_KEY].shape + + +def test_insert(): + sgn = SequentialGraphNetwork.from_parameters( + shared_params={"num_species": 3}, + layers={"one_hot": OneHotAtomEncoding, "lin2": AtomwiseLinear}, + ) + sgn.insert_from_parameters( + after="one_hot", + shared_params={"out_field": "thing"}, + name="lin1", + builder=AtomwiseLinear, + params={"out_field": AtomicDataDict.NODE_FEATURES_KEY}, + ) + assert isinstance(sgn.lin1, AtomwiseLinear) + assert len(sgn) == 3 + assert sgn[0] is sgn.one_hot + assert sgn[1] is sgn.lin1 + assert sgn[2] is sgn.lin2 + out = sgn( + { + AtomicDataDict.POSITIONS_KEY: torch.randn(5, 3), + AtomicDataDict.EDGE_INDEX_KEY: torch.LongTensor([[0, 1], [1, 0]]), + AtomicDataDict.SPECIES_INDEX_KEY: torch.LongTensor([0, 0, 1, 2, 0]), + } + ) + assert AtomicDataDict.NODE_FEATURES_KEY in out From c314433405204c190b898751d5dfb9ccfccfeeeb Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 29 Jun 2021 15:45:03 -0400 Subject: [PATCH 27/79] per-atom scale --- CHANGELOG.md | 1 + nequip/scripts/train.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ee87fce..635ba110 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Most recent change on the bottom. - Fix specifying nonlinearities when wandb enabled - `Final` backport for <3.8 compatability - Fixed `nequip-*` commands when using `pip install` +- Default models rescale per-atom energies, and not just total ## [0.3.2] - 2021-06-09 ### Added diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index e487d991..ee38913e 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -212,7 +212,7 @@ def fresh_start(config): # == Build the model == final_model = RescaleOutput( model=core_model, - scale_keys=[AtomicDataDict.TOTAL_ENERGY_KEY] + scale_keys=[AtomicDataDict.TOTAL_ENERGY_KEY, AtomicDataDict.PER_ATOM_ENERGY_KEY] + ( [AtomicDataDict.FORCE_KEY] if AtomicDataDict.FORCE_KEY in core_model.irreps_out From 860ca6fd90dea6f7a571f43af3ed1a36e0d1b90c Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 29 Jun 2021 15:57:05 -0400 Subject: [PATCH 28/79] SaveForOutput --- CHANGELOG.md | 1 + nequip/nn/__init__.py | 1 + nequip/nn/_util.py | 29 +++++++++++++++++++++++++++++ tests/nn/test_utils.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+) create mode 100644 nequip/nn/_util.py create mode 100644 tests/nn/test_utils.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 635ba110..3d33edab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Most recent change on the bottom. ## [Unreleased] ### Added - `SequentialGraphNetwork` now has insertion methods +- `nn.SaveForOutput` ### Changed - Nonlinearities now specified with `e` and `o` instead of `1` and `-1` diff --git a/nequip/nn/__init__.py b/nequip/nn/__init__.py index 2a58ea2d..f86102b8 100644 --- a/nequip/nn/__init__.py +++ b/nequip/nn/__init__.py @@ -9,3 +9,4 @@ from ._grad_output import GradientOutput, ForceOutput # noqa: F401 from ._rescale import RescaleOutput # noqa: F401 from ._convnetlayer import ConvNetLayer # noqa: F401 +from ._util import SaveForOutput # noqa: F401 diff --git a/nequip/nn/_util.py b/nequip/nn/_util.py new file mode 100644 index 00000000..95c3f969 --- /dev/null +++ b/nequip/nn/_util.py @@ -0,0 +1,29 @@ +import torch + +from nequip.data import AtomicDataDict +from nequip.nn import GraphModuleMixin + + +class SaveForOutput(torch.nn.Module, GraphModuleMixin): + """Copy a field and disconnect it from the autograd graph. + + Copy a field and disconnect it from the autograd graph, storing it under another key for inspection as part of the models output. + + Args: + field: the field to save + out_field: the key to put the saved copy in + """ + + field: str + out_field: str + + def __init__(self, field: str, out_field: str, irreps_in=None): + super().__init__() + self._init_irreps(irreps_in=irreps_in) + self.irreps_out[out_field] = self.irreps_in[field] + self.field = field + self.out_field = out_field + + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: + data[self.out_field] = data[self.field].detach().clone() + return data diff --git a/tests/nn/test_utils.py b/tests/nn/test_utils.py new file mode 100644 index 00000000..ee645924 --- /dev/null +++ b/tests/nn/test_utils.py @@ -0,0 +1,29 @@ +import torch + +from nequip.data import AtomicDataDict +from nequip.nn.embedding import OneHotAtomEncoding +from nequip.nn import SequentialGraphNetwork, SaveForOutput, AtomwiseLinear + + +def test_basic(): + sgn = SequentialGraphNetwork.from_parameters( + shared_params={"num_species": 4}, + layers={ + "one_hot": OneHotAtomEncoding, + "save": ( + SaveForOutput, + dict(field=AtomicDataDict.NODE_FEATURES_KEY, out_field="saved"), + ), + "linear": AtomwiseLinear, + }, + ) + out = sgn( + { + AtomicDataDict.POSITIONS_KEY: torch.randn(5, 3), + AtomicDataDict.EDGE_INDEX_KEY: torch.LongTensor([[0, 1], [1, 0]]), + AtomicDataDict.SPECIES_INDEX_KEY: torch.LongTensor([0, 0, 1, 2, 0]), + } + ) + saved = out["saved"] + assert saved.shape == (5, 4) + assert torch.all(saved[0] == torch.as_tensor([1.0, 0.0, 0.0, 0.0])) From df16cf7c6fa706e2db56d171e15ee33989bd3dff Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 29 Jun 2021 16:27:08 -0400 Subject: [PATCH 29/79] fix irreps bug --- nequip/nn/_graph_mixin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nequip/nn/_graph_mixin.py b/nequip/nn/_graph_mixin.py index cacd633b..9498201b 100644 --- a/nequip/nn/_graph_mixin.py +++ b/nequip/nn/_graph_mixin.py @@ -265,10 +265,11 @@ def insert_from_parameters( builder (callable): a class or function to build a module params (dict, optional): extra specific parameters for this module that take priority over those in ``shared_params`` """ + idx = list(self._modules.keys()).index(after) instance, _ = instantiate( builder=builder, prefix=name, - positional_args=(dict(irreps_in=self[-1].irreps_out)), + positional_args=(dict(irreps_in=self[idx].irreps_out)), optional_args=params, all_args=shared_params, ) From 86a10bc12b69724a1d7fd59a2da6653539f6c650 Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 29 Jun 2021 18:22:13 -0400 Subject: [PATCH 30/79] config nonlin for FC --- CHANGELOG.md | 1 + nequip/nn/_interaction_block.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d33edab..c699c626 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Most recent change on the bottom. ### Changed - Nonlinearities now specified with `e` and `o` instead of `1` and `-1` - Update interfaces for `torch_geometric` 1.7 and `e3nn` 0.3.3 +- `nonlinearity_scalars` now also affects the nonlinearity used in the radial net of `InteractionBlock` ### Fixed - Fix specifying nonlinearities when wandb enabled diff --git a/nequip/nn/_interaction_block.py b/nequip/nn/_interaction_block.py index f0b01b32..8370cf85 100644 --- a/nequip/nn/_interaction_block.py +++ b/nequip/nn/_interaction_block.py @@ -1,5 +1,5 @@ """ Interaction Block """ -from typing import Optional +from typing import Optional, Dict, Callable import torch @@ -26,6 +26,7 @@ def __init__( invariant_neurons=8, avg_num_neighbors=None, use_sc=False, + nonlinearity_scalars: Dict[int, Callable] = {"e": "ssp"}, ) -> None: """ InteractionBlock. @@ -114,7 +115,10 @@ def __init__( [self.irreps_in[AtomicDataDict.EDGE_EMBEDDING_KEY].num_irreps] + invariant_layers * [invariant_neurons] + [tp.weight_numel], - ShiftedSoftPlus, + { + "ssp": ShiftedSoftPlus, + "silu": torch.nn.functional.silu, + }[nonlinearity_scalars["e"]], ) self.tp = tp From f64f2e1e34997e192c55e7be570a450174af0c5a Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 1 Jul 2021 13:45:33 -0400 Subject: [PATCH 31/79] fix default --- nequip/data/dataloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/data/dataloader.py b/nequip/data/dataloader.py index 728a3c2f..6d1debb7 100644 --- a/nequip/data/dataloader.py +++ b/nequip/data/dataloader.py @@ -12,7 +12,7 @@ def __init__(self, fixed_fields=[], exclude_keys=[]): self._exclude_keys = set(exclude_keys) @classmethod - def for_dataset(cls, dataset, exclude_keys=None): + def for_dataset(cls, dataset, exclude_keys=[]): return cls( fixed_fields=list(getattr(dataset, "fixed_fields", {}).keys()), exclude_keys=exclude_keys, From e4be92f6381a1a3bef1178fbe0b85c9b1d1a47ae Mon Sep 17 00:00:00 2001 From: nw13slx Date: Thu, 1 Jul 2021 14:38:04 -0400 Subject: [PATCH 32/79] add to the init dictionary and yaml files --- nequip/train/trainer.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index e5aa954a..915c7d83 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -13,9 +13,10 @@ import yaml from copy import deepcopy from os.path import isfile -from time import perf_counter +from time import perf_counter, gmtime, strftime from typing import Optional, Union + if sys.version_info[1] >= 7: import contextlib else: @@ -23,9 +24,12 @@ import contextlib2 as contextlib import numpy as np +import e3nn +import torch_geometric import torch from torch_ema import ExponentialMovingAverage +import nequip from nequip.data import DataLoader, AtomicData, AtomicDataDict from nequip.utils import ( Output, @@ -313,10 +317,14 @@ def __init__( self.init() if not (restart and append): + d = self.as_dict() for key in list(d.keys()): if not isinstance(d[key], (float, int, str, list, tuple)): d[key] = repr(d[key]) + + d["start_time"] = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) + self.log_dictionary(d, name="Initialization") logging.debug("! Done Initialize Trainer") @@ -393,6 +401,9 @@ def as_dict(self, state_dict: bool = False, training_progress: bool = False): dictionary["progress"]["last_model_path"] = self.last_model_path dictionary["progress"]["trainer_save_path"] = self.trainer_save_path + for code in [e3nn, nequip, torch, torch_geometric]: + dictionary[f"{code.__name__}_version"] = code.__version__ + return dictionary def save(self, filename, format=None): @@ -458,6 +469,15 @@ def from_dict(cls, dictionary, append: Optional[bool] = None): d = deepcopy(dictionary) + for code in [e3nn, nequip, torch, torch_geometric]: + version = d.get(f"{code.__name__}_version", None) + if version is not None and version != code.__version__: + raise NotImplementedError( + "Parsing model from a different library version is not supported." + f"current {code.__name__} verion: {code.__version} " + f"vs version nedded {version}" + ) + # update the restart and append option d["restart"] = True if append is not None: From b68d29446c95350b21df2494ee736f94947d28f5 Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 1 Jul 2021 16:14:24 -0400 Subject: [PATCH 33/79] nequip-test-error --- nequip/scripts/test_error.py | 146 +++++++++++++++++++++++++++++++++++ setup.py | 1 + 2 files changed, 147 insertions(+) create mode 100644 nequip/scripts/test_error.py diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py new file mode 100644 index 00000000..5d7b46e7 --- /dev/null +++ b/nequip/scripts/test_error.py @@ -0,0 +1,146 @@ +import sys +import argparse +from pathlib import Path + +import torch + +from nequip.utils import Config, dataset_from_config +from nequip.data import AtomicDataDict, AtomicData, Collater +from nequip.scripts.deploy import load_deployed_model +from nequip.utils import load_file + +from torch_runstats import RunningStats + + +def main(args=None): + # in results dir, do: nequip-deploy build . deployed.pth + parser = argparse.ArgumentParser() + parser.add_argument( + "--train-dir", + help="Path to a working directory from a training session.", + type=Path, + default=None, + ) + parser.add_argument( + "--model", + help="A deployed or pickled NequIP model to load. If omitted, defaults to `best_model.pth` in `train_dir`.", + type=Path, + default=None, + ) + parser.add_argument( + "--dataset-config", + help="A YAML config file specifying the dataset to load test data from. If omitted, `config_final.yaml` in `train_dir` will be used", + type=Path, + default=None, + ) + parser.add_argument( + "--test-indexes", + help="Path to a file containing the indexes in the dataset that make up the test set. If omitted, all data frames *not* used as training or validation data in the training session `train_dir` will be used.", + type=Path, + default=None, + ) + parser.add_argument( + "--batch-size", + help="Batch size to use. Larger is usually faster on GPU.", + type=int, + default=5, + ) + parser.add_argument( + "--log-every", + help="Log approximately every n datapoints.", + type=int, + default=10, + ) + args = parser.parse_args(args=args) + + # Do the defaults: + if args.train_dir: + if args.dataset_config is None: + args.dataset_config = args.train_dir / "config_final.yaml" + if args.model is None: + args.model = args.train_dir / "best_model.pth" + if args.test_indexes is None: + # Find the remaining indexes that arent train or val + trainer = torch.load( + str(args.train_dir / "trainer.pth"), map_location="cpu" + ) + train_idcs = set(trainer["train_idcs"].tolist()) + val_idcs = set(trainer["val_idcs"].tolist()) + else: + train_idcs = val_idcs = None + # validate + if args.dataset_config is None: + raise ValueError("--dataset-config or --train-dir must be provided") + if args.model is None: + raise ValueError("--model or --train-dir must be provided") + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Load model: + try: + model, _ = load_deployed_model(args.model, device=device) + except ValueError: # its not a deployed model + model = torch.load(args.model, map_location=device) + + # Load a config file + config = Config.from_file(str(args.dataset_config)) + dataset = dataset_from_config(config) + c = Collater.for_dataset(dataset, exclude_keys=[]) + + # Determine the test set + if train_idcs is not None: + # we know the train and val, get the rest + all_idcs = set(range(len(dataset))) + # set operations + test_idcs = list(all_idcs - train_idcs - val_idcs) + assert set(test_idcs).isdisjoint(train_idcs) + assert set(test_idcs).isdisjoint(val_idcs) + else: + # load from file + test_idcs = load_file(args.test_indexes) + + # Do the stats + e_stats = RunningStats() + e_stats.to(device=device, dtype=torch.get_default_dtype()) + f_stats = RunningStats(dim=(3,), reduce_dims=(0,)) + f_stats.to(device=device, dtype=torch.get_default_dtype()) + + batch_i: int = 0 + batch_size: int = args.batch_size + since_last_log: int = 0 + + while True: + datas = [ + dataset.get(int(idex)) + for idex in test_idcs[batch_i * batch_size : (batch_i + 1) * batch_size] + ] + since_last_log += len(datas) + if len(datas) == 0: + break + batch = c.collate(datas) + batch = batch.to(device) + out = model(AtomicData.to_AtomicDataDict(batch)) + e = out[AtomicDataDict.TOTAL_ENERGY_KEY].detach() + f = out[AtomicDataDict.FORCE_KEY].detach() + e_stats.accumulate_batch((e - batch[AtomicDataDict.TOTAL_ENERGY_KEY]).abs()) + f_stats.accumulate_batch((f - batch[AtomicDataDict.FORCE_KEY]).abs()) + + if since_last_log >= args.log_every: + print( + "Progress: {:.2f}%, cumulative MAE-F: {}, cumulative MAE-E: {}".format( + (e_stats.n.cpu().item() * 100) / len(test_idcs), + e_stats.current_result().cpu().item(), + f_stats.current_result().cpu().item(), + ), + file=sys.stderr, + ) + since_last_log = 0 + + batch_i += 1 + + print("Force MAE: {}".format(f_stats.current_result().cpu().item())) + print("Energy MAE: {}".format(e_stats.current_result().cpu().item())) + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index ae001bb0..c5101171 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ "nequip-train = nequip.scripts.train:main", "nequip-restart = nequip.scripts.restart:main", "nequip-requeue = nequip.scripts.requeue:main", + "nequip-test-error = nequip.scripts.test_error:main", "nequip-deploy = nequip.scripts.deploy:main", ] }, From 86a308c92c37425213f5bc410eb5bd6b8089824d Mon Sep 17 00:00:00 2001 From: nw13slx Date: Wed, 7 Jul 2021 18:09:11 -0400 Subject: [PATCH 34/79] change it to warning --- nequip/train/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 915c7d83..16e48b80 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -472,7 +472,7 @@ def from_dict(cls, dictionary, append: Optional[bool] = None): for code in [e3nn, nequip, torch, torch_geometric]: version = d.get(f"{code.__name__}_version", None) if version is not None and version != code.__version__: - raise NotImplementedError( + logging.warning( "Parsing model from a different library version is not supported." f"current {code.__name__} verion: {code.__version} " f"vs version nedded {version}" From 7ecb9e9a0403cb8ebabbf50718f4074d45c6ca65 Mon Sep 17 00:00:00 2001 From: Alby M <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 9 Jul 2021 16:21:12 -0400 Subject: [PATCH 35/79] Update library version messages --- nequip/train/trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nequip/train/trainer.py b/nequip/train/trainer.py index 16e48b80..cf1e2b0d 100644 --- a/nequip/train/trainer.py +++ b/nequip/train/trainer.py @@ -473,9 +473,9 @@ def from_dict(cls, dictionary, append: Optional[bool] = None): version = d.get(f"{code.__name__}_version", None) if version is not None and version != code.__version__: logging.warning( - "Parsing model from a different library version is not supported." - f"current {code.__name__} verion: {code.__version} " - f"vs version nedded {version}" + "Loading a pickled model created with different library version(s) may cause issues." + f"current {code.__name__} verion: {code.__version__} " + f"vs original version: {version}" ) # update the restart and append option From ee799d25e9cac8f5a769bd0ce1273dfeb434f118 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 2 Aug 2021 15:49:41 -0400 Subject: [PATCH 36/79] black --- nequip/scripts/train.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index ee38913e..30282f80 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -139,10 +139,7 @@ def fresh_start(config): stats = trainer.dataset_train.statistics( fields=stats_fields, modes=stats_modes, stride=config.dataset_statistics_stride ) - ( - (energies_mean, energies_std), - (allowed_species, Z_count), - ) = stats[:2] + ((energies_mean, energies_std), (allowed_species, Z_count),) = stats[:2] if force_training: # Scale by the force std instead force_rms = stats[2][0] From bf3bc5c270224d1381d3a7ea7aa2fb793eb8ef49 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 2 Aug 2021 16:02:45 -0400 Subject: [PATCH 37/79] submodule rescale behaviour --- nequip/nn/_rescale.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/nequip/nn/_rescale.py b/nequip/nn/_rescale.py index 10ccc7ff..d2e5c6bd 100644 --- a/nequip/nn/_rescale.py +++ b/nequip/nn/_rescale.py @@ -29,6 +29,9 @@ class RescaleOutput(GraphModuleMixin, torch.nn.Module): scale_keys: List[str] shift_keys: List[str] + trainable_global_rescale_scale: bool + trainable_global_rescale_shift: bool + _has_scale: bool _has_shift: bool @@ -104,6 +107,19 @@ def __init__( # register dummy for TorchScript self.register_buffer("shift_by", torch.Tensor()) + # Finally, we tell all the modules in the model that there is rescaling + # This allows them to update parameters, like physical constants with units, + # that need to be scaled + # + # Note that .modules() walks the full tree, including self + for mod in self.model.modules(): + if isinstance(mod, GraphModuleMixin): + callback = getattr(mod, "update_for_rescale", None) + if callable(callback): + # It gets the `RescaleOutput` as an argument, + # since that contains all relevant information + callback(self) + def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: data = self.model(data) if self.training: @@ -120,9 +136,7 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: @torch.jit.export def scale( - self, - data: AtomicDataDict.Type, - force_process: bool = False, + self, data: AtomicDataDict.Type, force_process: bool = False, ) -> AtomicDataDict.Type: """Apply rescaling to ``data``, in place. @@ -150,9 +164,7 @@ def scale( @torch.jit.export def unscale( - self, - data: AtomicDataDict.Type, - force_process: bool = False, + self, data: AtomicDataDict.Type, force_process: bool = False, ) -> AtomicDataDict.Type: """Apply the inverse of the rescaling operation to ``data``, in place. From 6eb4f70c55b1ef4aea09d3f4227ebb79e70ed66e Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 3 Aug 2021 16:25:58 -0400 Subject: [PATCH 38/79] Add TF32 option --- configs/full.yaml | 1 + nequip/scripts/train.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/configs/full.yaml b/configs/full.yaml index 2fce3bf3..7ebfe702 100644 --- a/configs/full.yaml +++ b/configs/full.yaml @@ -12,6 +12,7 @@ seed: 0 restart: false # set True for a restarted run append: false # set True if a restarted run should append to the previous log file default_dtype: float32 # type of float to use, e.g. float32 and float64 +allow_tf32: True # whether to use TensorFloat32 if it is available # network r_max: 4.0 # cutoff radius in length units diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index 30282f80..d882d182 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -28,6 +28,7 @@ model_initializers=[], dataset_statistics_stride=1, default_dtype="float32", + allow_tf32=True, verbose="INFO", model_debug_mode=False, equivariance_test=False, @@ -79,6 +80,14 @@ def _load_callable(obj: Union[str, Callable]) -> Callable: def fresh_start(config): # = Set global state = + # Set TF32 support + # See https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices + if torch.cuda.is_available(): + if torch.torch.backends.cuda.matmul.allow_tf32 and not config.allow_tf32: + # it is enabled, and we dont want it to, so disable: + torch.backends.cuda.matmul.allow_tf32 = False + torch.backends.cudnn.allow_tf32 = False + if config.model_debug_mode: set_irreps_debug(enabled=True) torch.set_default_dtype( @@ -139,7 +148,10 @@ def fresh_start(config): stats = trainer.dataset_train.statistics( fields=stats_fields, modes=stats_modes, stride=config.dataset_statistics_stride ) - ((energies_mean, energies_std), (allowed_species, Z_count),) = stats[:2] + ( + (energies_mean, energies_std), + (allowed_species, Z_count), + ) = stats[:2] if force_training: # Scale by the force std instead force_rms = stats[2][0] From 46a966b8a592b284dfca0b5ae18a4d19b0909260 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 5 Aug 2021 17:16:06 -0400 Subject: [PATCH 39/79] test error using metrics --- nequip/scripts/test_error.py | 132 ++++++++++++++++++++++------------- 1 file changed, 85 insertions(+), 47 deletions(-) diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py index 5d7b46e7..ad58695d 100644 --- a/nequip/scripts/test_error.py +++ b/nequip/scripts/test_error.py @@ -1,15 +1,17 @@ import sys import argparse from pathlib import Path +from numpy import disp +from tqdm.auto import tqdm import torch from nequip.utils import Config, dataset_from_config -from nequip.data import AtomicDataDict, AtomicData, Collater +from nequip.data import AtomicData, Collater from nequip.scripts.deploy import load_deployed_model -from nequip.utils import load_file - -from torch_runstats import RunningStats +from nequip.utils import load_file, instantiate +from nequip.train.loss import Loss +from nequip.train.metrics import Metrics def main(args=None): @@ -33,6 +35,12 @@ def main(args=None): type=Path, default=None, ) + parser.add_argument( + "--metrics-config", + help="A YAML config file specifying the metrics to compute. If omitted, `config_final.yaml` in `train_dir` will be used. If the config does not specify `metrics_components`, the default is to print MAEs and RMSEs for all fields given in the loss function.", + type=Path, + default=None, + ) parser.add_argument( "--test-indexes", help="Path to a file containing the indexes in the dataset that make up the test set. If omitted, all data frames *not* used as training or validation data in the training session `train_dir` will be used.", @@ -45,18 +53,14 @@ def main(args=None): type=int, default=5, ) - parser.add_argument( - "--log-every", - help="Log approximately every n datapoints.", - type=int, - default=10, - ) args = parser.parse_args(args=args) # Do the defaults: if args.train_dir: if args.dataset_config is None: args.dataset_config = args.train_dir / "config_final.yaml" + if args.metrics_config is None: + args.metrics_config = args.train_dir / "config_final.yaml" if args.model is None: args.model = args.train_dir / "best_model.pth" if args.test_indexes is None: @@ -71,18 +75,25 @@ def main(args=None): # validate if args.dataset_config is None: raise ValueError("--dataset-config or --train-dir must be provided") + if args.metrics_config is None: + raise ValueError("--metrics-config or --train-dir must be provided") if args.model is None: raise ValueError("--model or --train-dir must be provided") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}", file=sys.stderr) # Load model: + print("Loading model... ", file=sys.stderr, end="") try: model, _ = load_deployed_model(args.model, device=device) + print("loaded deployed model.", file=sys.stderr) except ValueError: # its not a deployed model model = torch.load(args.model, map_location=device) + print("loaded pickled Python model.", file=sys.stderr) # Load a config file + print("Loading dataset...", file=sys.stderr) config = Config.from_file(str(args.dataset_config)) dataset = dataset_from_config(config) c = Collater.for_dataset(dataset, exclude_keys=[]) @@ -99,47 +110,74 @@ def main(args=None): # load from file test_idcs = load_file(args.test_indexes) - # Do the stats - e_stats = RunningStats() - e_stats.to(device=device, dtype=torch.get_default_dtype()) - f_stats = RunningStats(dim=(3,), reduce_dims=(0,)) - f_stats.to(device=device, dtype=torch.get_default_dtype()) + # Figure out what metrics we're actually computing + metrics_config = Config.from_file(str(args.metrics_config)) + metrics_components = metrics_config.get("metrics_components", None) + # See trainer.py: init() and init_metrics() + # Default to loss functions if no metrics specified: + if metrics_components is None: + loss, _ = instantiate( + builder=Loss, + prefix="loss", + positional_args=dict(coeffs=metrics_config.loss_coeffs), + all_args=metrics_config, + ) + metrics_components = [] + for key, func in loss.funcs.items(): + params = { + "PerSpecies": type(func).__name__.startswith("PerSpecies"), + } + metrics_components.append((key, "mae", params)) + metrics_components.append((key, "rmse", params)) + + metrics, _ = instantiate( + builder=Metrics, + prefix="metrics", + positional_args=dict(components=metrics_components), + all_args=metrics_config, + ) + metrics.to(device=device) batch_i: int = 0 batch_size: int = args.batch_size - since_last_log: int = 0 - - while True: - datas = [ - dataset.get(int(idex)) - for idex in test_idcs[batch_i * batch_size : (batch_i + 1) * batch_size] - ] - since_last_log += len(datas) - if len(datas) == 0: - break - batch = c.collate(datas) - batch = batch.to(device) - out = model(AtomicData.to_AtomicDataDict(batch)) - e = out[AtomicDataDict.TOTAL_ENERGY_KEY].detach() - f = out[AtomicDataDict.FORCE_KEY].detach() - e_stats.accumulate_batch((e - batch[AtomicDataDict.TOTAL_ENERGY_KEY]).abs()) - f_stats.accumulate_batch((f - batch[AtomicDataDict.FORCE_KEY]).abs()) - - if since_last_log >= args.log_every: - print( - "Progress: {:.2f}%, cumulative MAE-F: {}, cumulative MAE-E: {}".format( - (e_stats.n.cpu().item() * 100) / len(test_idcs), - e_stats.current_result().cpu().item(), - f_stats.current_result().cpu().item(), - ), - file=sys.stderr, - ) - since_last_log = 0 - - batch_i += 1 - print("Force MAE: {}".format(f_stats.current_result().cpu().item())) - print("Energy MAE: {}".format(e_stats.current_result().cpu().item())) + print("Starting...", file=sys.stderr) + with tqdm(bar_format="{desc}") as display_bar: + with tqdm(total=len(test_idcs)) as prog: + while True: + datas = [ + dataset.get(int(idex)) + for idex in test_idcs[ + batch_i * batch_size : (batch_i + 1) * batch_size + ] + ] + if len(datas) == 0: + break + batch = c.collate(datas) + batch = batch.to(device) + out = model(AtomicData.to_AtomicDataDict(batch)) + # Accumulate metrics + with torch.no_grad(): + metrics(out, batch) + + batch_i += 1 + display_bar.set_description_str( + " | ".join( + f"{k[0]}_{k[1]} = {v.cpu().item(): 4.2f}" + for k, v in metrics.current_result().items() + ) + ) + prog.update(batch.num_graphs) + display_bar.close() + prog.close() + + print("--- Final result: ---") + print( + "\n".join( + f"{k[0]}_{k[1]} = {v.cpu().item():f}" + for k, v in metrics.current_result().items() + ) + ) if __name__ == "__main__": From 1fd09ce5bca5b67b8bc5e23d393da28ec4059797 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 5 Aug 2021 17:27:21 -0400 Subject: [PATCH 40/79] only rescale per-atom energy if model provides it --- nequip/scripts/train.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nequip/scripts/train.py b/nequip/scripts/train.py index d882d182..455185c7 100644 --- a/nequip/scripts/train.py +++ b/nequip/scripts/train.py @@ -221,11 +221,16 @@ def fresh_start(config): # == Build the model == final_model = RescaleOutput( model=core_model, - scale_keys=[AtomicDataDict.TOTAL_ENERGY_KEY, AtomicDataDict.PER_ATOM_ENERGY_KEY] + scale_keys=[AtomicDataDict.TOTAL_ENERGY_KEY] + ( [AtomicDataDict.FORCE_KEY] if AtomicDataDict.FORCE_KEY in core_model.irreps_out else [] + ) + + ( + [AtomicDataDict.PER_ATOM_ENERGY_KEY] + if AtomicDataDict.PER_ATOM_ENERGY_KEY in core_model.irreps_out + else [] ), scale_by=global_scale, shift_keys=AtomicDataDict.TOTAL_ENERGY_KEY, From 8df1d15611e3ccc937bfee8c92b8b453506ac989 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 5 Aug 2021 17:27:31 -0400 Subject: [PATCH 41/79] black --- nequip/nn/_rescale.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nequip/nn/_rescale.py b/nequip/nn/_rescale.py index d2e5c6bd..a946eb80 100644 --- a/nequip/nn/_rescale.py +++ b/nequip/nn/_rescale.py @@ -136,7 +136,9 @@ def forward(self, data: AtomicDataDict.Type) -> AtomicDataDict.Type: @torch.jit.export def scale( - self, data: AtomicDataDict.Type, force_process: bool = False, + self, + data: AtomicDataDict.Type, + force_process: bool = False, ) -> AtomicDataDict.Type: """Apply rescaling to ``data``, in place. @@ -164,7 +166,9 @@ def scale( @torch.jit.export def unscale( - self, data: AtomicDataDict.Type, force_process: bool = False, + self, + data: AtomicDataDict.Type, + force_process: bool = False, ) -> AtomicDataDict.Type: """Apply the inverse of the rescaling operation to ``data``, in place. From 92e46301a0190300a1bc3a1b8a8a846422a3ec02 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 5 Aug 2021 17:42:47 -0400 Subject: [PATCH 42/79] format --- nequip/scripts/test_error.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py index ad58695d..d31f4714 100644 --- a/nequip/scripts/test_error.py +++ b/nequip/scripts/test_error.py @@ -174,7 +174,7 @@ def main(args=None): print("--- Final result: ---") print( "\n".join( - f"{k[0]}_{k[1]} = {v.cpu().item():f}" + f"{k[0] + '_' + k[1]:>20s} = {v.cpu().item():< 20f}" for k, v in metrics.current_result().items() ) ) From 85dc9303e768c53142f02851456c03dcefc3c0ba Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 5 Aug 2021 18:27:21 -0400 Subject: [PATCH 43/79] deterministic warning & device --- nequip/scripts/test_error.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py index d31f4714..d0912c12 100644 --- a/nequip/scripts/test_error.py +++ b/nequip/scripts/test_error.py @@ -51,7 +51,13 @@ def main(args=None): "--batch-size", help="Batch size to use. Larger is usually faster on GPU.", type=int, - default=5, + default=50, + ) + parser.add_argument( + "--device", + help="Device to run the model on. If not provided, defaults to CUDA if available and CPU otherwise. Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic.", + type=str, + default=None, ) args = parser.parse_args(args=args) @@ -80,8 +86,16 @@ def main(args=None): if args.model is None: raise ValueError("--model or --train-dir must be provided") - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + if args.device is None: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + else: + device = torch.device(args.device) print(f"Using device: {device}", file=sys.stderr) + if device.type == "cuda": + print( + "WARNING: please note that models running on CUDA are usually nondeterministc and that this manifests in the final test errors; for a _more_ deterministic result, please use `--device cpu`", + file=sys.stderr, + ) # Load model: print("Loading model... ", file=sys.stderr, end="") @@ -90,6 +104,7 @@ def main(args=None): print("loaded deployed model.", file=sys.stderr) except ValueError: # its not a deployed model model = torch.load(args.model, map_location=device) + model = model.to(device) print("loaded pickled Python model.", file=sys.stderr) # Load a config file From 714d23b88e5100d94fba84118f1e44b323fed41e Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Thu, 5 Aug 2021 18:33:39 -0400 Subject: [PATCH 44/79] messages --- nequip/scripts/test_error.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py index d0912c12..6551129f 100644 --- a/nequip/scripts/test_error.py +++ b/nequip/scripts/test_error.py @@ -1,7 +1,7 @@ import sys import argparse +import textwrap from pathlib import Path -from numpy import disp from tqdm.auto import tqdm import torch @@ -16,7 +16,16 @@ def main(args=None): # in results dir, do: nequip-deploy build . deployed.pth - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + description=textwrap.dedent( + """Compute the error of a model on a test set using various metrics. + + The model, metrics, dataset, etc. can specified individually, or a training session can be indicated with `--train-dir`. + + WARNING: Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic. + """ + ) + ) parser.add_argument( "--train-dir", help="Path to a working directory from a training session.", @@ -55,10 +64,16 @@ def main(args=None): ) parser.add_argument( "--device", - help="Device to run the model on. If not provided, defaults to CUDA if available and CPU otherwise. Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic.", + help="Device to run the model on. If not provided, defaults to CUDA if available and CPU otherwise.", type=str, default=None, ) + # Something has to be provided + # See https://stackoverflow.com/questions/22368458/how-to-make-argparse-print-usage-when-no-option-is-given-to-the-code + if len(sys.argv) == 1: + parser.print_help() + parser.exit() + # Parse the args args = parser.parse_args(args=args) # Do the defaults: From 062bcc6f24f97eb0520a3ca2d1d4b80438d612b9 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 11:57:02 -0400 Subject: [PATCH 45/79] messages and logic fix --- nequip/scripts/test_error.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py index 6551129f..921d511b 100644 --- a/nequip/scripts/test_error.py +++ b/nequip/scripts/test_error.py @@ -22,6 +22,8 @@ def main(args=None): The model, metrics, dataset, etc. can specified individually, or a training session can be indicated with `--train-dir`. + Prints only the final result in `name = num` format to stdout; all other information is printed to stderr. + WARNING: Please note that results of CUDA models are rarely exactly reproducible, and that even CPU models can be nondeterministic. """ ) @@ -77,9 +79,11 @@ def main(args=None): args = parser.parse_args(args=args) # Do the defaults: + dataset_is_from_training: bool = False if args.train_dir: if args.dataset_config is None: args.dataset_config = args.train_dir / "config_final.yaml" + dataset_is_from_training = True if args.metrics_config is None: args.metrics_config = args.train_dir / "config_final.yaml" if args.model is None: @@ -123,22 +127,34 @@ def main(args=None): print("loaded pickled Python model.", file=sys.stderr) # Load a config file - print("Loading dataset...", file=sys.stderr) + print( + f"Loading {'original training ' if dataset_is_from_training else ''}dataset...", + file=sys.stderr, + ) config = Config.from_file(str(args.dataset_config)) dataset = dataset_from_config(config) c = Collater.for_dataset(dataset, exclude_keys=[]) # Determine the test set - if train_idcs is not None: + # this makes no sense if a dataset is given seperately + if train_idcs is not None and dataset_is_from_training: # we know the train and val, get the rest all_idcs = set(range(len(dataset))) # set operations test_idcs = list(all_idcs - train_idcs - val_idcs) assert set(test_idcs).isdisjoint(train_idcs) assert set(test_idcs).isdisjoint(val_idcs) + print( + f"Using training dataset minus training and validation frames, yielding a test set size of {len(test_idcs)} frames.", + file=sys.stderr, + ) else: # load from file test_idcs = load_file(args.test_indexes) + print( + f"Using provided test set indexes, yielding a test set size of {len(test_idcs)} frames.", + file=sys.stderr, + ) # Figure out what metrics we're actually computing metrics_config = Config.from_file(str(args.metrics_config)) @@ -201,7 +217,8 @@ def main(args=None): display_bar.close() prog.close() - print("--- Final result: ---") + print(file=sys.stderr) + print(" " * 12 + "--- Final result: ---", file=sys.stderr) print( "\n".join( f"{k[0] + '_' + k[1]:>20s} = {v.cpu().item():< 20f}" From d35e089b28eaffd2fb98bbb64a75afef7c865a95 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 13:24:24 -0400 Subject: [PATCH 46/79] Add tqdm dependency --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index c5101171..e03b344f 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ install_requires=[ "numpy", "ase", + "tqdm", "torch>=1.8", "torch_geometric>=1.7.1", "e3nn>=0.3.3", From 5912d8acf0a14b3cde171313869eb7c817f2c470 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 13:24:39 -0400 Subject: [PATCH 47/79] pytorch_geometric stdout fix --- nequip/scripts/test_error.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py index 921d511b..e069eb49 100644 --- a/nequip/scripts/test_error.py +++ b/nequip/scripts/test_error.py @@ -2,6 +2,7 @@ import argparse import textwrap from pathlib import Path +import contextlib from tqdm.auto import tqdm import torch @@ -132,7 +133,13 @@ def main(args=None): file=sys.stderr, ) config = Config.from_file(str(args.dataset_config)) - dataset = dataset_from_config(config) + + # Currently, pytorch_geometric prints some status messages to stdout while loading the dataset + # TODO: fix may come soon: https://github.com/rusty1s/pytorch_geometric/pull/2950 + # Until it does, just redirect them. + with contextlib.redirect_stdout(sys.stderr): + dataset = dataset_from_config(config) + c = Collater.for_dataset(dataset, exclude_keys=[]) # Determine the test set From eda241536c5a07b72238bf61fa6f639a7f906e6f Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 13:24:56 -0400 Subject: [PATCH 48/79] test test_error --- tests/conftest.py | 2 +- tests/scripts/test_test_error.py | 103 +++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 tests/scripts/test_test_error.py diff --git a/tests/conftest.py b/tests/conftest.py index 8786d675..7bf777d6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,7 +26,7 @@ float_tolerance = float_tolerance -@pytest.fixture() +@pytest.fixture(scope="session") def BENCHMARK_ROOT(): return pathlib.Path(__file__).parent / "../benchmark_data/" diff --git a/tests/scripts/test_test_error.py b/tests/scripts/test_test_error.py new file mode 100644 index 00000000..8de279d1 --- /dev/null +++ b/tests/scripts/test_test_error.py @@ -0,0 +1,103 @@ +import pytest +import tempfile +import pathlib +import yaml +import subprocess +import os + +import numpy as np +import torch + +from nequip.data import AtomicDataDict + +from test_train import ConstFactorModel, IdentityModel # noqa + + +@pytest.fixture( + scope="module", + params=[ + ("minimal.yaml", AtomicDataDict.FORCE_KEY), + ], +) +def conffile(request): + return request.param + + +@pytest.fixture(scope="module", params=[ConstFactorModel, IdentityModel]) +def training_session(request, BENCHMARK_ROOT, conffile): + conffile, _ = conffile + builder = request.param + dtype = str(torch.get_default_dtype())[len("torch.") :] + + # if torch.cuda.is_available(): + # # TODO: is this true? + # pytest.skip("CUDA and subprocesses have issues") + + path_to_this_file = pathlib.Path(__file__) + config_path = path_to_this_file.parents[2] / f"configs/{conffile}" + true_config = yaml.load(config_path.read_text(), Loader=yaml.Loader) + with tempfile.TemporaryDirectory() as tmpdir: + # == Run training == + # Save time + run_name = "test_train_" + dtype + true_config["run_name"] = run_name + true_config["root"] = tmpdir + true_config["dataset_file_name"] = str( + BENCHMARK_ROOT / "aspirin_ccsd-train.npz" + ) + true_config["default_dtype"] = dtype + true_config["max_epochs"] = 2 + true_config["model_builder"] = builder + + # to be a true identity, we can't have rescaling + true_config["global_rescale_shift"] = None + true_config["global_rescale_scale"] = None + + config_path = tmpdir + "/conf.yaml" + with open(config_path, "w+") as fp: + yaml.dump(true_config, fp) + # == Train model == + env = dict(os.environ) + # make this script available so model builders can be loaded + env["PYTHONPATH"] = ":".join( + [str(path_to_this_file.parent)] + env.get("PYTHONPATH", "").split(":") + ) + retcode = subprocess.run( + ["nequip-train", str(config_path)], cwd=tmpdir, env=env + ) + retcode.check_returncode() + + yield builder, true_config, tmpdir, env + + +def test_metrics(training_session): + builder, true_config, tmpdir, env = training_session + # == Run test error == + outdir = f"{true_config['root']}/{true_config['run_name']}/" + + retcode = subprocess.run( + ["nequip-test-error", "--train-dir", outdir], + cwd=tmpdir, + env=env, + stdout=subprocess.PIPE, + ) + retcode.check_returncode() + + # Check the output + metrics = dict( + [ + tuple(e.strip() for e in line.split("=")) + for line in retcode.stdout.decode().splitlines() + ] + ) + metrics = {tuple(k.split("_")): float(v) for k, v in metrics.items()} + + # Regardless of builder, with minimal.yaml, we should have RMSE and MAE + assert set(metrics.keys()) == {("forces", "mae"), ("forces", "rmse")} + + if builder == IdentityModel: + for metric, err in metrics.items(): + assert np.allclose(err, 0.0), f"Metric `{metric}` wasn't zero!" + elif builder == ConstFactorModel: + pass + # TODO: check against naive numpy metrics From ae3c45448e5862bd8977952e17e23ea8b28db519 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 13:45:35 -0400 Subject: [PATCH 49/79] fix test idcs load bug --- nequip/scripts/test_error.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py index e069eb49..448c150e 100644 --- a/nequip/scripts/test_error.py +++ b/nequip/scripts/test_error.py @@ -157,7 +157,12 @@ def main(args=None): ) else: # load from file - test_idcs = load_file(args.test_indexes) + test_idcs = load_file( + supported_formats=dict( + torch=["pt", "pth"], yaml=["yaml", "yml"], json=["json"] + ), + filename=str(args.test_indexes), + ) print( f"Using provided test set indexes, yielding a test set size of {len(test_idcs)} frames.", file=sys.stderr, From 342c888b4531878d26c41bdfbbde7a3131f05c80 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 13:45:46 -0400 Subject: [PATCH 50/79] Test idcs, GPU, batch size --- tests/scripts/test_test_error.py | 80 +++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 18 deletions(-) diff --git a/tests/scripts/test_test_error.py b/tests/scripts/test_test_error.py index 8de279d1..660244ac 100644 --- a/tests/scripts/test_test_error.py +++ b/tests/scripts/test_test_error.py @@ -1,3 +1,5 @@ +from _pytest.mark import param +from py import test import pytest import tempfile import pathlib @@ -9,6 +11,7 @@ import torch from nequip.data import AtomicDataDict +from nequip.utils import Config from test_train import ConstFactorModel, IdentityModel # noqa @@ -70,27 +73,54 @@ def training_session(request, BENCHMARK_ROOT, conffile): yield builder, true_config, tmpdir, env -def test_metrics(training_session): +@pytest.mark.parametrize("do_test_idcs", [True, False]) +def test_metrics(training_session, do_test_idcs): builder, true_config, tmpdir, env = training_session # == Run test error == outdir = f"{true_config['root']}/{true_config['run_name']}/" - retcode = subprocess.run( - ["nequip-test-error", "--train-dir", outdir], - cwd=tmpdir, - env=env, - stdout=subprocess.PIPE, - ) - retcode.check_returncode() - - # Check the output - metrics = dict( - [ - tuple(e.strip() for e in line.split("=")) - for line in retcode.stdout.decode().splitlines() - ] - ) - metrics = {tuple(k.split("_")): float(v) for k, v in metrics.items()} + default_params = {"train-dir": outdir} + + def runit(params: dict): + tmp = default_params.copy() + tmp.update(params) + params = tmp + del tmp + retcode = subprocess.run( + ["nequip-test-error"] + + sum( + (["--" + k, str(v)] for k, v in params.items() if v is not None), + start=[], + ), + cwd=tmpdir, + env=env, + stdout=subprocess.PIPE, + ) + retcode.check_returncode() + + # Check the output + metrics = dict( + [ + tuple(e.strip() for e in line.split("=")) + for line in retcode.stdout.decode().splitlines() + ] + ) + metrics = {tuple(k.split("_")): float(v) for k, v in metrics.items()} + return metrics + + # Test idcs + if do_test_idcs: + # The Aspirin dataset is 1000 frames long + # Pick some arbitrary number of frames + test_idcs_arr = torch.randperm(1000)[:257] + test_idcs = tmpdir + "/some-test-idcs.pth" + torch.save(test_idcs_arr, test_idcs) + else: + test_idcs = None # ignore and use default + default_params["test-indexes"] = test_idcs + + # First run + metrics = runit({"train-dir": outdir, "batch-size": 200, "device": "cpu"}) # Regardless of builder, with minimal.yaml, we should have RMSE and MAE assert set(metrics.keys()) == {("forces", "mae"), ("forces", "rmse")} @@ -99,5 +129,19 @@ def test_metrics(training_session): for metric, err in metrics.items(): assert np.allclose(err, 0.0), f"Metric `{metric}` wasn't zero!" elif builder == ConstFactorModel: + # TODO: check comperable to naive numpy compute pass - # TODO: check against naive numpy metrics + + # Check insensitive to batch size + for batch_size in (13, 1000): + metrics2 = runit( + {"train-dir": outdir, "batch-size": batch_size, "device": "cpu"} + ) + for k, v in metrics.items(): + assert abs(v - metrics2[k]) < 1e-5 + + # Check GPU + if torch.cuda.is_available(): + metrics_gpu = runit({"train-dir": outdir, "batch-size": 17, "device": "cuda"}) + for k, v in metrics.items(): + assert abs(v - metrics_gpu[k]) < 1e-3 # GPU nondeterminism From ab8c4127ad1060fc25160ed1b79a0a3826ca1eee Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 13:46:35 -0400 Subject: [PATCH 51/79] cleanup --- tests/conftest.py | 1 - tests/scripts/test_test_error.py | 3 --- 2 files changed, 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7bf777d6..26ec3e80 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,6 @@ import pathlib import pytest import tempfile -import torch from ase.atoms import Atoms from ase.build import molecule diff --git a/tests/scripts/test_test_error.py b/tests/scripts/test_test_error.py index 660244ac..832536bb 100644 --- a/tests/scripts/test_test_error.py +++ b/tests/scripts/test_test_error.py @@ -1,5 +1,3 @@ -from _pytest.mark import param -from py import test import pytest import tempfile import pathlib @@ -11,7 +9,6 @@ import torch from nequip.data import AtomicDataDict -from nequip.utils import Config from test_train import ConstFactorModel, IdentityModel # noqa From 24be86e76ea5e1fd3e3314a9818afe5dd10ad7b2 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 14:25:32 -0400 Subject: [PATCH 52/79] arbitrary metrics support --- nequip/scripts/test_error.py | 19 +++++++++++-- tests/scripts/test_test_error.py | 48 +++++++++++++++++++++++++++----- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py index 448c150e..09beb437 100644 --- a/nequip/scripts/test_error.py +++ b/nequip/scripts/test_error.py @@ -199,6 +199,19 @@ def main(args=None): batch_i: int = 0 batch_size: int = args.batch_size + def _format_err(err: torch.Tensor, specifier: str): + specifier = "{:" + specifier + "}" + if err.nelement() == 1: + return specifier.format(err.cpu().item()) + elif err.nelement() == 3: + return (f"(x={specifier}, y={specifier}, z={specifier})").format( + *err.cpu().squeeze().tolist() + ) + else: + raise AssertionError( + "Somehow this metric configuration is unsupported, please file an issue!" + ) + print("Starting...", file=sys.stderr) with tqdm(bar_format="{desc}") as display_bar: with tqdm(total=len(test_idcs)) as prog: @@ -221,7 +234,7 @@ def main(args=None): batch_i += 1 display_bar.set_description_str( " | ".join( - f"{k[0]}_{k[1]} = {v.cpu().item(): 4.2f}" + f"{k[0]}_{k[1]} = {_format_err(v, '4.2f')}" for k, v in metrics.current_result().items() ) ) @@ -230,10 +243,10 @@ def main(args=None): prog.close() print(file=sys.stderr) - print(" " * 12 + "--- Final result: ---", file=sys.stderr) + print("--- Final result: ---", file=sys.stderr) print( "\n".join( - f"{k[0] + '_' + k[1]:>20s} = {v.cpu().item():< 20f}" + f"{k[0] + '_' + k[1]:>20s} = {_format_err(v, 'f'):<20s}" for k, v in metrics.current_result().items() ) ) diff --git a/tests/scripts/test_test_error.py b/tests/scripts/test_test_error.py index 832536bb..39f80a5c 100644 --- a/tests/scripts/test_test_error.py +++ b/tests/scripts/test_test_error.py @@ -4,6 +4,7 @@ import yaml import subprocess import os +import textwrap import numpy as np import torch @@ -71,7 +72,8 @@ def training_session(request, BENCHMARK_ROOT, conffile): @pytest.mark.parametrize("do_test_idcs", [True, False]) -def test_metrics(training_session, do_test_idcs): +@pytest.mark.parametrize("do_metrics", [True, False]) +def test_metrics(training_session, do_test_idcs, do_metrics): builder, true_config, tmpdir, env = training_session # == Run test error == outdir = f"{true_config['root']}/{true_config['run_name']}/" @@ -98,11 +100,19 @@ def runit(params: dict): # Check the output metrics = dict( [ - tuple(e.strip() for e in line.split("=")) + tuple(e.strip() for e in line.split("=", 1)) for line in retcode.stdout.decode().splitlines() ] ) - metrics = {tuple(k.split("_")): float(v) for k, v in metrics.items()} + metrics = { + tuple(k.split("_")): ( + float(v) # normal case + if "x" not in v + # per component case + else np.array([float(e.split("=")[-1]) for e in v[1:-1].split(", ")]) + ) + for k, v in metrics.items() + } return metrics # Test idcs @@ -116,11 +126,35 @@ def runit(params: dict): test_idcs = None # ignore and use default default_params["test-indexes"] = test_idcs + # Metrics + if do_metrics: + # Write an explicit metrics file + metrics_yaml = tmpdir + "my-metrics.yaml" + with open(metrics_yaml, "w") as f: + # Write out a fancier metrics file + # We don't use PerSpecies here since the simple models don't fill SPECIES_INDEX right now + # ^ TODO! + f.write( + textwrap.dedent( + """ + metrics_components: + - - forces + - rmse + - report_per_component: True + """ + ) + ) + expect_metrics = {("forces", "rmse")} + else: + metrics_yaml = None + # Regardless of builder, with minimal.yaml, we should have RMSE and MAE + expect_metrics = {("forces", "mae"), ("forces", "rmse")} + default_params["metrics-config"] = metrics_yaml + # First run metrics = runit({"train-dir": outdir, "batch-size": 200, "device": "cpu"}) - # Regardless of builder, with minimal.yaml, we should have RMSE and MAE - assert set(metrics.keys()) == {("forces", "mae"), ("forces", "rmse")} + assert set(metrics.keys()) == expect_metrics if builder == IdentityModel: for metric, err in metrics.items(): @@ -135,10 +169,10 @@ def runit(params: dict): {"train-dir": outdir, "batch-size": batch_size, "device": "cpu"} ) for k, v in metrics.items(): - assert abs(v - metrics2[k]) < 1e-5 + assert np.all(np.abs(v - metrics2[k]) < 1e-5) # Check GPU if torch.cuda.is_available(): metrics_gpu = runit({"train-dir": outdir, "batch-size": 17, "device": "cuda"}) for k, v in metrics.items(): - assert abs(v - metrics_gpu[k]) < 1e-3 # GPU nondeterminism + assert np.all(np.abs(v - metrics_gpu[k]) < 1e-3) # GPU nondeterminism From 55d1319c86a62e7f973e1cc768c86c65c026c71c Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 14:41:09 -0400 Subject: [PATCH 53/79] CHANGELGO --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffc69d20..e323df9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Most recent change on the bottom. ### Added - `SequentialGraphNetwork` now has insertion methods - `nn.SaveForOutput` +- `nequip-test-error` command for evaluating metrics on trained models ### Changed - Nonlinearities now specified with `e` and `o` instead of `1` and `-1` From 7ab1d038212271a442eab804745878db8aaf18c0 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 14:42:59 -0400 Subject: [PATCH 54/79] fix numpy warnings --- nequip/datasets/aspirin.py | 4 ++-- tests/data/test_AtomicData.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nequip/datasets/aspirin.py b/nequip/datasets/aspirin.py index a78ab71f..01557e0d 100644 --- a/nequip/datasets/aspirin.py +++ b/nequip/datasets/aspirin.py @@ -6,7 +6,7 @@ class AspirinDataset(AtomicInMemoryDataset): - """Aspirin DFT/CCSD(T) data """ + """Aspirin DFT/CCSD(T) data""" URL = "http://quantum-machine.org/gdml/data/npz/aspirin_ccsd.zip" FILE_NAME = "benchmark_data/aspirin_ccsd-train.npz" @@ -27,7 +27,7 @@ def get_data(self): AtomicDataDict.TOTAL_ENERGY_KEY: data["E"].reshape([-1, 1]), } fixed_fields = { - AtomicDataDict.ATOMIC_NUMBERS_KEY: np.asarray(data["z"], dtype=np.int), + AtomicDataDict.ATOMIC_NUMBERS_KEY: np.asarray(data["z"], dtype=int), AtomicDataDict.PBC_KEY: np.array([False, False, False]), } return arrays, fixed_fields diff --git a/tests/data/test_AtomicData.py b/tests/data/test_AtomicData.py index 007d333c..f6f07132 100644 --- a/tests/data/test_AtomicData.py +++ b/tests/data/test_AtomicData.py @@ -88,7 +88,7 @@ def test_without_nodes(CH3CHO): assert new_data.edge_index.min() >= 0 assert new_data.edge_index.max() == new_data.num_nodes - 1 - which_nodes_mask = np.zeros(len(atoms), dtype=np.bool) + which_nodes_mask = np.zeros(len(atoms), dtype=bool) which_nodes_mask[[0, 1, 2, 4]] = True new_data = data.without_nodes(which_nodes=which_nodes_mask) assert new_data.num_nodes == len(atoms) - np.sum(which_nodes_mask) From 69f423606dff36e4fc8002c7c499732bf838caaf Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 14:54:41 -0400 Subject: [PATCH 55/79] Disable TF32 --- tests/conftest.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 26ec3e80..4a03bcb3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,7 @@ from ase.calculators.singlepoint import SinglePointCalculator from ase.io import write +import torch from torch_geometric.data import Batch from nequip.utils.test import set_irreps_debug @@ -24,6 +25,22 @@ # Suppress linter errors float_tolerance = float_tolerance +# - Ampere and TF32 - +# Many of the tests for NequIP involve numerically checking +# algebraic properties— normalization, equivariance, +# continuity, etc. +# With the added numerical noise of TF32, some of those tests fail +# with the current (and usually generous) thresholds. +# +# Thus we go on the assumption that PyTorch + NVIDIA got everything +# right, that this setting DOES NOT AFFECT the model outputs except +# for increased numerical noise, and only test without it. +# +# TODO: consider running tests with and without +# TODO: check how much thresholds have to be changed to accomidate TF32 +torch.backends.cuda.matmul.allow_tf32 = False +torch.backends.cudnn.allow_tf32 = False + @pytest.fixture(scope="session") def BENCHMARK_ROOT(): From 2e19580456de315ed2ee1861c96a332cb4fc6123 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 15:18:14 -0400 Subject: [PATCH 56/79] Energy arrays from ASE --- CHANGELOG.md | 1 + nequip/data/AtomicData.py | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffc69d20..17d0c452 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Most recent change on the bottom. ### Added - `SequentialGraphNetwork` now has insertion methods - `nn.SaveForOutput` +- `AtomicData.from_ase` now catches `energy`/`energies` arrays ### Changed - Nonlinearities now specified with `e` and `o` instead of `1` and `-1` diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 18038b56..e898deb2 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -201,9 +201,10 @@ def from_ase(cls, atoms, r_max, **kwargs): Respects ``atoms``'s ``pbc`` and ``cell``. - Automatically recognize force, energy (overridden by free energy tag) - get_atomic_numbers() will be stored as the atomic_numbers attributes + First tries to extract energies and forces from a single-point calculator associated with the ``Atoms`` if one is present and has those fields. + If either is not found, the method will look for ``energy``/``energies`` and ``force``/``forces`` in ``atoms.arrays``. + `get_atomic_numbers()` will be stored as the atomic_numbers attribute. Args: atoms (ase.Atoms): the input. @@ -234,10 +235,19 @@ def from_ase(cls, atoms, r_max, **kwargs): "energy" ) - elif "forces" in atoms.arrays: - add_fields[AtomicDataDict.FORCE_KEY] = atoms.arrays["forces"] - elif "force" in atoms.arrays: - add_fields[AtomicDataDict.FORCE_KEY] = atoms.arrays["force"] + if AtomicDataDict.FORCE_KEY not in add_fields: + # Get it from arrays + for k in ("force", "forces"): + if k in atoms.arrays: + add_fields[AtomicDataDict.FORCE_KEY] = atoms.arrays[k] + break + + if AtomicDataDict.TOTAL_ENERGY_KEY not in add_fields: + # Get it from arrays + for k in ("energy", "energies"): + if k in atoms.arrays: + add_fields[AtomicDataDict.TOTAL_ENERGY_KEY] = atoms.arrays[k] + break add_fields[AtomicDataDict.ATOMIC_NUMBERS_KEY] = atoms.get_atomic_numbers() From beac07c7d6af4d8208b0565a5d9d40da200f3701 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 16:58:30 -0400 Subject: [PATCH 57/79] fix tmpfile path bugs --- tests/data/test_dataloader.py | 4 ++-- tests/scripts/test_test_error.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/data/test_dataloader.py b/tests/data/test_dataloader.py index 7131d225..871ecf96 100644 --- a/tests/data/test_dataloader.py +++ b/tests/data/test_dataloader.py @@ -67,9 +67,9 @@ def npz_dataset(): Z=np.random.randint(1, 108, size=(nframes, natoms)), ) with tempfile.TemporaryDirectory() as folder: - np.savez(folder + "npzdata.npz", **npz) + np.savez(folder + "/npzdata.npz", **npz) a = NpzDataset( - file_name=folder + "npzdata.npz", + file_name=folder + "/npzdata.npz", root=folder, extra_fixed_fields={"r_max": 3}, ) diff --git a/tests/scripts/test_test_error.py b/tests/scripts/test_test_error.py index 39f80a5c..a58ff227 100644 --- a/tests/scripts/test_test_error.py +++ b/tests/scripts/test_test_error.py @@ -129,7 +129,7 @@ def runit(params: dict): # Metrics if do_metrics: # Write an explicit metrics file - metrics_yaml = tmpdir + "my-metrics.yaml" + metrics_yaml = tmpdir + "/my-metrics.yaml" with open(metrics_yaml, "w") as f: # Write out a fancier metrics file # We don't use PerSpecies here since the simple models don't fill SPECIES_INDEX right now From 436a7d8b3be3e87426b1d357a637ead503a3c84b Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 17:04:40 -0400 Subject: [PATCH 58/79] Removed broken methods --- nequip/data/dataset.py | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index 927da4f8..e684ce35 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -54,6 +54,7 @@ def statistics( class AtomicInMemoryDataset(AtomicDataset): r"""Base class for all datasets that fit in memory. + Please note that, as a ``pytorch_geometric`` dataset, it must be backed by some kind of disk storage. By default, the raw file will be stored at root/raw and the processed torch file will be at root/process. @@ -64,10 +65,10 @@ class AtomicInMemoryDataset(AtomicDataset): Subclasses may implement: - ``download()`` or ``self.url`` or ``ClassName.URL`` - Args: + Args: + root (str, optional): Root directory where the dataset should be saved. Defaults to current working directory. file_name (str, optional): file name of data source. only used in children class url (str, optional): url to download data source - root (str, optional): Root directory where the dataset should be saved. Defaults to current working directory. force_fixed_keys (list, optional): keys to move from AtomicData to fixed_fields dictionary extra_fixed_fields (dict, optional): extra key that are not stored in data but needed for AtomicData initialization include_frames (list, optional): the frames to process with the constructor. @@ -123,20 +124,6 @@ def __init__( f"please delete the processed folder and rerun {self.processed_paths[0]}" ) - @classmethod - def from_data_list(cls, data_list: List[AtomicData], **kwargs): - """Make an ``AtomicInMemoryDataset`` from a list of ``AtomicData`` objects. - - Args: - data_list (List[AtomicData]) - **kwargs: passed through to the constructor - Returns: - The constructed ``AtomicInMemoryDataset``. - """ - obj = cls(**kwargs) - obj.get_data = lambda: (data_list,) - return obj - def len(self): if self.data is None: return 0 @@ -481,21 +468,6 @@ def __init__( include_frames=include_frames, ) - @classmethod - def from_atoms(cls, atoms: list, **kwargs): - """Make an ``ASEDataset`` from a list of ``ase.Atoms`` objects. - - Args: - atoms (List[ase.Atoms]) - **kwargs: passed through to the constructor - Returns: - The constructed ``ASEDataset``. - """ - # TO DO, this funciton fails. It also needs to be unit tested - obj = cls(**kwargs) - obj.get_atoms = lambda: atoms - return obj - @property def raw_file_names(self): return [basename(self.file_name)] From 8edcb95907e7a0848ff77386b979943e524c1f31 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 17:38:48 -0400 Subject: [PATCH 59/79] clean up initializers --- configs/full.yaml | 10 +++++-- configs/minimal.yaml | 5 ---- nequip/utils/initialization.py | 52 +++++++++++++++++++++++++--------- 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/configs/full.yaml b/configs/full.yaml index 7ebfe702..7ae8d632 100644 --- a/configs/full.yaml +++ b/configs/full.yaml @@ -171,11 +171,15 @@ optimizer_weight_decay: 0 # weight initialization # this can be the importable name of any function that can be `model.apply`ed to initialize some weights in the model. NequIP provides a number of useful initializers: +# For more details please see the docstrings of the individual initializers #model_initializers: # - nequip.utils.initialization.uniform_initialize_fcs -# - nequip.utils.initialization.uniform_initialize_tps -# - nequip.utils.initialization.orthogonal_initialize_linears -# - nequip.utils.initialization.uniform_initialize_linears +# - nequip.utils.initialization.uniform_initialize_equivariant_linears +# - nequip.utils.initialization.uniform_initialize_tp_internal_weights +# - nequip.utils.initialization.xavier_initialize_fcs +# - nequip.utils.initialization.(unit_)orthogonal_initialize_equivariant_linears +# - nequip.utils.initialization.(unit_)orthogonal_initialize_fcs +# - nequip.utils.initialization.(unit_)orthogonal_initialize_e3nn_fcs # lr scheduler, currently only supports the two options listed below, if you need more please file an issue # first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch diff --git a/configs/minimal.yaml b/configs/minimal.yaml index e8cb2917..a45810f9 100644 --- a/configs/minimal.yaml +++ b/configs/minimal.yaml @@ -11,11 +11,6 @@ conv_to_output_hidden_irreps_out: 16x0e feature_irreps_hidden: 16x0o + 16x0e + 16x1o + 16x1e + 16x2o + 16x2e model_uniform_init: false -model_initializers: - - nequip.utils.initialization.uniform_initialize_fcs - - nequip.utils.initialization.uniform_initialize_tps - - nequip.utils.initialization.orthogonal_initialize_linears - # data dataset: aspirin dataset_file_name: benchmark_data/aspirin_ccsd-train.npz diff --git a/nequip/utils/initialization.py b/nequip/utils/initialization.py index 4f4b6634..70ef1fe9 100644 --- a/nequip/utils/initialization.py +++ b/nequip/utils/initialization.py @@ -13,21 +13,23 @@ def unit_uniform_init_(t: torch.Tensor): def uniform_initialize_fcs(mod: torch.nn.Module): - """Initialize ``e3nn.nn.FullyConnectedNet``s with ``unit_uniform_init_``""" + """Initialize ``e3nn.nn.FullyConnectedNet``s with ``unit_uniform_init_`` + + No need to do torch.nn.Linear, which is uniform by default. + """ if isinstance(mod, e3nn.nn.FullyConnectedNet): for layer in mod: unit_uniform_init_(layer.weight) - # no need to do torch.nn.Linear, which is uniform by default -def uniform_initialize_linears(mod: torch.nn.Module): - """Initialize ``e3nn.o3.Linear``s with ``unit_uniform_init_``""" +def uniform_initialize_equivariant_linears(mod: torch.nn.Module): + """Initialize ``e3nn.o3.Linear``s that have internal weights with ``unit_uniform_init_``""" if isinstance(mod, e3nn.o3.Linear) and mod.internal_weights: unit_uniform_init_(mod.weight) -def uniform_initialize_tps(mod: torch.nn.Module): - """Initialize ``e3nn.o3.TensorProduct``s with ``unit_uniform_init_``""" +def uniform_initialize_tp_internal_weights(mod: torch.nn.Module): + """Initialize ``e3nn.o3.TensorProduct``s that have internal weights with ``unit_uniform_init_``""" if isinstance(mod, e3nn.o3.TensorProduct) and mod.internal_weights: unit_uniform_init_(mod.weight) @@ -48,26 +50,27 @@ def xavier_initialize_fcs(mod: torch.nn.Module): # == Orthogonal == +# TODO: does this normalization make any sense def unit_orthogonal_init_(t: torch.Tensor): """Orthogonal init with = 1""" assert t.ndim == 2 torch.nn.init.orthogonal_(t, gain=math.sqrt(max(t.shape))) -def orthogonal_initialize_linears(mod: torch.nn.Module): - """Initialize ``e3nn.o3.Linear``s with ``unit_orthogonal_init_``""" +def unit_orthogonal_initialize_equivariant_linears(mod: torch.nn.Module): + """Initialize ``e3nn.o3.Linear``s that have internal weights with ``unit_orthogonal_init_``""" if isinstance(mod, e3nn.o3.Linear) and mod.internal_weights: for w in mod.weight_views(): - unit_uniform_init_(w) + unit_orthogonal_init_(w) -def orthogonal_initialize_fcs(mod: torch.nn.Module): - """Initialize ``e3nn.nn.FullyConnectedNet``s and ``torch.nn.Linear``s with orthogonal initialization""" +def unit_orthogonal_initialize_fcs(mod: torch.nn.Module): + """Initialize ``e3nn.nn.FullyConnectedNet``s and ``torch.nn.Linear``s with ``unit_orthogonal_init_``""" if isinstance(mod, e3nn.nn.FullyConnectedNet): for layer in mod: - torch.nn.init.orthogonal_(layer.weight) + unit_orthogonal_init_(layer.weight) elif isinstance(mod, torch.nn.Linear): - torch.nn.init.orthogonal_(mod.weight) + unit_orthogonal_init_(mod.weight) def unit_orthogonal_initialize_e3nn_fcs(mod: torch.nn.Module): @@ -75,3 +78,26 @@ def unit_orthogonal_initialize_e3nn_fcs(mod: torch.nn.Module): if isinstance(mod, e3nn.nn.FullyConnectedNet): for layer in mod: unit_orthogonal_init_(layer.weight) + + +def orthogonal_initialize_equivariant_linears(mod: torch.nn.Module): + """Initialize ``e3nn.o3.Linear``s that have internal weights with ``torch.nn.init.orthogonal_``""" + if isinstance(mod, e3nn.o3.Linear) and mod.internal_weights: + for w in mod.weight_views(): + torch.nn.init.orthogonal_(w) + + +def orthogonal_initialize_fcs(mod: torch.nn.Module): + """Initialize ``e3nn.nn.FullyConnectedNet``s and ``torch.nn.Linear``s with ``torch.nn.init.orthogonal_``""" + if isinstance(mod, e3nn.nn.FullyConnectedNet): + for layer in mod: + torch.nn.init.orthogonal_(layer.weight) + elif isinstance(mod, torch.nn.Linear): + torch.nn.init.orthogonal_(mod.weight) + + +def orthogonal_initialize_e3nn_fcs(mod: torch.nn.Module): + """Initialize only ``e3nn.nn.FullyConnectedNet``s with ``torch.nn.init.orthogonal_``""" + if isinstance(mod, e3nn.nn.FullyConnectedNet): + for layer in mod: + torch.nn.init.orthogonal_(layer.weight) From a7e03a3717ca6c899925c7ea922a04f1d756a482 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 17:39:08 -0400 Subject: [PATCH 60/79] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17d0c452..775d7841 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Most recent change on the bottom. - Nonlinearities now specified with `e` and `o` instead of `1` and `-1` - Update interfaces for `torch_geometric` 1.7 and `e3nn` 0.3.3 - `nonlinearity_scalars` now also affects the nonlinearity used in the radial net of `InteractionBlock` +- Cleaned up naming of initializers ### Fixed - Fix specifying nonlinearities when wandb enabled From 0ce459a12f6ae287637e74d587b0ace8146cfc6a Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Fri, 6 Aug 2021 18:22:40 -0400 Subject: [PATCH 61/79] use flatten_metrics() --- nequip/scripts/test_error.py | 23 ++++++----------------- tests/scripts/test_test_error.py | 14 +++----------- 2 files changed, 9 insertions(+), 28 deletions(-) diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py index 09beb437..fed101af 100644 --- a/nequip/scripts/test_error.py +++ b/nequip/scripts/test_error.py @@ -199,19 +199,6 @@ def main(args=None): batch_i: int = 0 batch_size: int = args.batch_size - def _format_err(err: torch.Tensor, specifier: str): - specifier = "{:" + specifier + "}" - if err.nelement() == 1: - return specifier.format(err.cpu().item()) - elif err.nelement() == 3: - return (f"(x={specifier}, y={specifier}, z={specifier})").format( - *err.cpu().squeeze().tolist() - ) - else: - raise AssertionError( - "Somehow this metric configuration is unsupported, please file an issue!" - ) - print("Starting...", file=sys.stderr) with tqdm(bar_format="{desc}") as display_bar: with tqdm(total=len(test_idcs)) as prog: @@ -234,8 +221,10 @@ def _format_err(err: torch.Tensor, specifier: str): batch_i += 1 display_bar.set_description_str( " | ".join( - f"{k[0]}_{k[1]} = {_format_err(v, '4.2f')}" - for k, v in metrics.current_result().items() + f"{k} = {v:4.2f}" + for k, v in metrics.flatten_metrics(metrics.current_result())[ + 0 + ].items() ) ) prog.update(batch.num_graphs) @@ -246,8 +235,8 @@ def _format_err(err: torch.Tensor, specifier: str): print("--- Final result: ---", file=sys.stderr) print( "\n".join( - f"{k[0] + '_' + k[1]:>20s} = {_format_err(v, 'f'):<20s}" - for k, v in metrics.current_result().items() + f"{k:>20s} = {v:< 20f}" + for k, v in metrics.flatten_metrics(metrics.current_result())[0].items() ) ) diff --git a/tests/scripts/test_test_error.py b/tests/scripts/test_test_error.py index a58ff227..bd7d54ca 100644 --- a/tests/scripts/test_test_error.py +++ b/tests/scripts/test_test_error.py @@ -104,15 +104,7 @@ def runit(params: dict): for line in retcode.stdout.decode().splitlines() ] ) - metrics = { - tuple(k.split("_")): ( - float(v) # normal case - if "x" not in v - # per component case - else np.array([float(e.split("=")[-1]) for e in v[1:-1].split(", ")]) - ) - for k, v in metrics.items() - } + metrics = {k: float(v) for k, v in metrics.items()} return metrics # Test idcs @@ -144,11 +136,11 @@ def runit(params: dict): """ ) ) - expect_metrics = {("forces", "rmse")} + expect_metrics = {"f_rmse_0", "f_rmse_1", "f_rmse_2"} else: metrics_yaml = None # Regardless of builder, with minimal.yaml, we should have RMSE and MAE - expect_metrics = {("forces", "mae"), ("forces", "rmse")} + expect_metrics = {"f_mae", "f_rmse"} default_params["metrics-config"] = metrics_yaml # First run From 095a73eb2444a5af13a796f9830c84948a5ffe58 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 12:24:24 -0400 Subject: [PATCH 62/79] unify `to_ase()` --- nequip/data/AtomicData.py | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 18038b56..415ab3d0 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -262,6 +262,10 @@ def to_ase(self) -> Union[List[ase.Atoms], ase.Atoms]: and is not None. Otherwise, a single ``ase.Atoms`` object is returned. """ positions = self.pos + if positions.device != torch.device("cpu"): + raise TypeError( + "Explicitly move this `AtomicData` to CPU using `.to()` before calling `to_ase()`." + ) atomic_nums = self.atomic_numbers pbc = getattr(self, AtomicDataDict.PBC_KEY, None) cell = getattr(self, AtomicDataDict.CELL_KEY, None) @@ -276,24 +280,28 @@ def to_ase(self) -> Union[List[ase.Atoms], ase.Atoms]: n_batches = batch.max() + 1 cell = cell.expand(n_batches, 3, 3) if cell is not None else None pbc = pbc.expand(n_batches, 3) if pbc is not None else None - batch_atoms = [] - for batch_idx in range(n_batches): + else: + n_batches = 1 + + batch_atoms = [] + for batch_idx in range(n_batches): + if batch is not None: mask = batch == batch_idx - mol = ase.Atoms( - numbers=atomic_nums[mask], - positions=positions[mask], - cell=cell[batch_idx] if cell is not None else None, - pbc=pbc[batch_idx] if pbc is not None else None, - ) - batch_atoms.append(mol) + else: + mask = slice(None) + mol = ase.Atoms( + numbers=atomic_nums[mask], + positions=positions[mask], + cell=cell[batch_idx] if cell is not None else None, + pbc=pbc[batch_idx] if pbc is not None else None, + ) + batch_atoms.append(mol) + + if batch is not None: return batch_atoms else: - return ase.Atoms( - numbers=atomic_nums, - positions=positions, - cell=cell[0] if cell is not None else None, - pbc=pbc[0] if pbc is not None else None, - ) + assert len(batch_atoms) == 1 + return batch_atoms[0] def get_edge_vectors(data: Data) -> torch.Tensor: data = AtomicDataDict.with_edge_vectors(AtomicData.to_AtomicDataDict(data)) From 92d529b91b479f6a903e567da18a9c1e4c085677 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 12:34:00 -0400 Subject: [PATCH 63/79] `to_ase()` force + eng --- nequip/data/AtomicData.py | 13 +++++++++++++ tests/data/test_AtomicData.py | 10 ++++++++++ 2 files changed, 23 insertions(+) diff --git a/nequip/data/AtomicData.py b/nequip/data/AtomicData.py index 415ab3d0..3634c17d 100644 --- a/nequip/data/AtomicData.py +++ b/nequip/data/AtomicData.py @@ -270,6 +270,9 @@ def to_ase(self) -> Union[List[ase.Atoms], ase.Atoms]: pbc = getattr(self, AtomicDataDict.PBC_KEY, None) cell = getattr(self, AtomicDataDict.CELL_KEY, None) batch = getattr(self, AtomicDataDict.BATCH_KEY, None) + energy = getattr(self, AtomicDataDict.TOTAL_ENERGY_KEY, None) + force = getattr(self, AtomicDataDict.FORCE_KEY, None) + do_calc = energy is not None or force is not None if cell is not None: cell = cell.view(-1, 3, 3) @@ -289,12 +292,22 @@ def to_ase(self) -> Union[List[ase.Atoms], ase.Atoms]: mask = batch == batch_idx else: mask = slice(None) + mol = ase.Atoms( numbers=atomic_nums[mask], positions=positions[mask], cell=cell[batch_idx] if cell is not None else None, pbc=pbc[batch_idx] if pbc is not None else None, ) + + if do_calc: + fields = {} + if energy is not None: + fields["energy"] = energy[batch_idx].cpu().numpy() + if force is not None: + fields["forces"] = force[mask].cpu().numpy() + mol.calc = SinglePointCalculator(mol, **fields) + batch_atoms.append(mol) if batch is not None: diff --git a/tests/data/test_AtomicData.py b/tests/data/test_AtomicData.py index f6f07132..4af9cd27 100644 --- a/tests/data/test_AtomicData.py +++ b/tests/data/test_AtomicData.py @@ -45,6 +45,16 @@ def test_to_ase_batches(atomic_batch): assert np.array_equal(atoms.get_pbc(), atomic_data.pbc[batch_idx]) +def test_ase_roundtrip(CuFcc): + atoms, data = CuFcc + atoms2 = data.to_ase() + assert np.allclose(atoms.get_positions(), atoms2.get_positions()) + assert np.array_equal(atoms.get_atomic_numbers(), atoms2.get_atomic_numbers()) + assert np.array_equal(atoms.get_pbc(), atoms2.get_pbc()) + assert np.allclose(atoms.get_cell(), atoms2.get_cell()) + assert np.allclose(atoms.calc.results["forces"], atoms2.calc.results["forces"]) + + def test_non_periodic_edge(CH3CHO): atoms, data = CH3CHO # check edges From 4051f1df95d12de4f23cc49c923f9b8662bb428e Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 12:34:58 -0400 Subject: [PATCH 64/79] comment old verification --- nequip/data/AtomicDataDict.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/nequip/data/AtomicDataDict.py b/nequip/data/AtomicDataDict.py index 196c04ee..598b3ff1 100644 --- a/nequip/data/AtomicDataDict.py +++ b/nequip/data/AtomicDataDict.py @@ -30,10 +30,11 @@ def validate_keys(keys, graph_required=True): raise KeyError("At least pos and edge_index must be supplied") if _keys.EDGE_CELL_SHIFT_KEY in keys and "cell" not in keys: raise ValueError("If `edge_cell_shift` given, `cell` must be given.") - if _keys.ATOMIC_NUMBERS_KEY in keys and _keys.SPECIES_INDEX_KEY in keys: - raise ValueError( - "'atomic_numbers' and 'species_index' cannot be simultaneously provided" - ) + # This is in flux; TODO + # if _keys.ATOMIC_NUMBERS_KEY in keys and _keys.SPECIES_INDEX_KEY in keys: + # raise ValueError( + # "'atomic_numbers' and 'species_index' cannot be simultaneously provided" + # ) _SPECIAL_IRREPS = [None] From 45512bcdc3944d0e2f435eaaed0943b5655dbbc5 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 13:24:48 -0400 Subject: [PATCH 65/79] extxtz output --- nequip/scripts/test_error.py | 81 +++++++++++++++++++++----------- tests/scripts/test_test_error.py | 20 +++++++- 2 files changed, 73 insertions(+), 28 deletions(-) diff --git a/nequip/scripts/test_error.py b/nequip/scripts/test_error.py index fed101af..9517aef5 100644 --- a/nequip/scripts/test_error.py +++ b/nequip/scripts/test_error.py @@ -5,6 +5,8 @@ import contextlib from tqdm.auto import tqdm +import ase.io + import torch from nequip.utils import Config, dataset_from_config @@ -71,6 +73,12 @@ def main(args=None): type=str, default=None, ) + parser.add_argument( + "--output", + help="XYZ file to write out the test set and model predicted forces, energies, etc. to.", + type=Path, + default=None, + ) # Something has to be provided # See https://stackoverflow.com/questions/22368458/how-to-make-argparse-print-usage-when-no-option-is-given-to-the-code if len(sys.argv) == 1: @@ -105,6 +113,9 @@ def main(args=None): raise ValueError("--metrics-config or --train-dir must be provided") if args.model is None: raise ValueError("--model or --train-dir must be provided") + if args.output is not None: + if args.output.suffix != ".xyz": + raise ValueError("Only extxyz format for `--output` is supported.") if args.device is None: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -200,35 +211,51 @@ def main(args=None): batch_size: int = args.batch_size print("Starting...", file=sys.stderr) - with tqdm(bar_format="{desc}") as display_bar: - with tqdm(total=len(test_idcs)) as prog: - while True: - datas = [ - dataset.get(int(idex)) - for idex in test_idcs[ - batch_i * batch_size : (batch_i + 1) * batch_size - ] - ] - if len(datas) == 0: - break - batch = c.collate(datas) - batch = batch.to(device) - out = model(AtomicData.to_AtomicDataDict(batch)) - # Accumulate metrics - with torch.no_grad(): - metrics(out, batch) - - batch_i += 1 - display_bar.set_description_str( - " | ".join( - f"{k} = {v:4.2f}" - for k, v in metrics.flatten_metrics(metrics.current_result())[ - 0 - ].items() + context_stack = contextlib.ExitStack() + with contextlib.ExitStack() as context_stack: + display_bar = context_stack.enter_context(tqdm(bar_format="{desc}")) + prog = context_stack.enter_context(tqdm(total=len(test_idcs))) + + if args.output is not None: + output = context_stack.enter_context(open(args.output, "w")) + else: + output = None + + while True: + datas = [ + dataset.get(int(idex)) + for idex in test_idcs[batch_i * batch_size : (batch_i + 1) * batch_size] + ] + if len(datas) == 0: + break + batch = c.collate(datas) + batch = batch.to(device) + out = model(AtomicData.to_AtomicDataDict(batch)) + + with torch.no_grad(): + # Write output + if output is not None: + ase.io.write( + output, + AtomicData.from_AtomicDataDict(out).to(device="cpu").to_ase(), + format="extxyz", + append=True, ) + # Accumulate metrics + metrics(out, batch) + + batch_i += 1 + display_bar.set_description_str( + " | ".join( + f"{k} = {v:4.2f}" + for k, v in metrics.flatten_metrics(metrics.current_result())[ + 0 + ].items() ) - prog.update(batch.num_graphs) - display_bar.close() + ) + prog.update(batch.num_graphs) + + display_bar.close() prog.close() print(file=sys.stderr) diff --git a/tests/scripts/test_test_error.py b/tests/scripts/test_test_error.py index bd7d54ca..06ee9c56 100644 --- a/tests/scripts/test_test_error.py +++ b/tests/scripts/test_test_error.py @@ -5,6 +5,7 @@ import subprocess import os import textwrap +import shutil import numpy as np import torch @@ -78,7 +79,7 @@ def test_metrics(training_session, do_test_idcs, do_metrics): # == Run test error == outdir = f"{true_config['root']}/{true_config['run_name']}/" - default_params = {"train-dir": outdir} + default_params = {"train-dir": outdir, "output": tmpdir + "/out.xyz"} def runit(params: dict): tmp = default_params.copy() @@ -145,6 +146,8 @@ def runit(params: dict): # First run metrics = runit({"train-dir": outdir, "batch-size": 200, "device": "cpu"}) + # move out.xyz to out-orig.xyz + shutil.move(tmpdir + "/out.xyz", tmpdir + "/out-orig.xyz") assert set(metrics.keys()) == expect_metrics @@ -162,6 +165,21 @@ def runit(params: dict): ) for k, v in metrics.items(): assert np.all(np.abs(v - metrics2[k]) < 1e-5) + # Diff the output XYZ, which shouldn't change at all + # Use `cmp`, which is UNIX standard, to make efficient + # See https://stackoverflow.com/questions/12900538/fastest-way-to-tell-if-two-files-have-the-same-contents-in-unix-linux + cmp_retval = subprocess.run( + ["cmp", "--silent", tmpdir + "/out-orig.xyz", tmpdir + "/out.xyz"] + ) + if cmp_retval.returncode == 0: + # same + pass + if cmp_retval.returncode == 1: + raise AssertionError( + f"Changing batch size to {batch_size} changed out.xyz!" + ) + else: + cmp_retval.check_returncode() # error out for subprocess problem # Check GPU if torch.cuda.is_available(): From 313a225edb2bd30e8668d4e0b4fa2090e93e4fe6 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 13:40:32 -0400 Subject: [PATCH 66/79] `test-error` -> `evaluate` --- CHANGELOG.md | 2 +- nequip/scripts/{test_error.py => evaluate.py} | 0 setup.py | 2 +- tests/scripts/{test_test_error.py => test_evaluate.py} | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename nequip/scripts/{test_error.py => evaluate.py} (100%) rename tests/scripts/{test_test_error.py => test_evaluate.py} (99%) diff --git a/CHANGELOG.md b/CHANGELOG.md index e323df9f..2e0dc135 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ Most recent change on the bottom. ### Added - `SequentialGraphNetwork` now has insertion methods - `nn.SaveForOutput` -- `nequip-test-error` command for evaluating metrics on trained models +- `nequip-evaluate` command for evaluating (metrics on) trained models ### Changed - Nonlinearities now specified with `e` and `o` instead of `1` and `-1` diff --git a/nequip/scripts/test_error.py b/nequip/scripts/evaluate.py similarity index 100% rename from nequip/scripts/test_error.py rename to nequip/scripts/evaluate.py diff --git a/setup.py b/setup.py index e03b344f..db624516 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ "nequip-train = nequip.scripts.train:main", "nequip-restart = nequip.scripts.restart:main", "nequip-requeue = nequip.scripts.requeue:main", - "nequip-test-error = nequip.scripts.test_error:main", + "nequip-evaluate = nequip.scripts.evaluate:main", "nequip-deploy = nequip.scripts.deploy:main", ] }, diff --git a/tests/scripts/test_test_error.py b/tests/scripts/test_evaluate.py similarity index 99% rename from tests/scripts/test_test_error.py rename to tests/scripts/test_evaluate.py index 06ee9c56..472de3aa 100644 --- a/tests/scripts/test_test_error.py +++ b/tests/scripts/test_evaluate.py @@ -87,7 +87,7 @@ def runit(params: dict): params = tmp del tmp retcode = subprocess.run( - ["nequip-test-error"] + ["nequip-evaluate"] + sum( (["--" + k, str(v)] for k, v in params.items() if v is not None), start=[], From 22dc84cb56af8a21a043796132cd1c01167a96fa Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 13:49:24 -0400 Subject: [PATCH 67/79] allow not computing metrics --- nequip/scripts/evaluate.py | 105 +++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 46 deletions(-) diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index 9517aef5..d90e3bb8 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -51,8 +51,8 @@ def main(args=None): ) parser.add_argument( "--metrics-config", - help="A YAML config file specifying the metrics to compute. If omitted, `config_final.yaml` in `train_dir` will be used. If the config does not specify `metrics_components`, the default is to print MAEs and RMSEs for all fields given in the loss function.", - type=Path, + help="A YAML config file specifying the metrics to compute. If omitted, `config_final.yaml` in `train_dir` will be used. If the config does not specify `metrics_components`, the default is to print MAEs and RMSEs for all fields given in the loss function. If the literal string `None`, no metrics will be computed.", + type=str, default=None, ) parser.add_argument( @@ -106,11 +106,18 @@ def main(args=None): val_idcs = set(trainer["val_idcs"].tolist()) else: train_idcs = val_idcs = None + # update + if args.metrics_config == "None": + args.metrics_config = None + else: + args.metrics_config = Path(args.metrics_config) # validate if args.dataset_config is None: raise ValueError("--dataset-config or --train-dir must be provided") - if args.metrics_config is None: - raise ValueError("--metrics-config or --train-dir must be provided") + if args.metrics_config is None and args.output is None: + raise ValueError( + "Nothing to do! Must provide at least one of --metrics-config, --train-dir (to use training config for metrics), or --output" + ) if args.model is None: raise ValueError("--model or --train-dir must be provided") if args.output is not None: @@ -180,32 +187,34 @@ def main(args=None): ) # Figure out what metrics we're actually computing - metrics_config = Config.from_file(str(args.metrics_config)) - metrics_components = metrics_config.get("metrics_components", None) - # See trainer.py: init() and init_metrics() - # Default to loss functions if no metrics specified: - if metrics_components is None: - loss, _ = instantiate( - builder=Loss, - prefix="loss", - positional_args=dict(coeffs=metrics_config.loss_coeffs), + do_metrics = args.metrics_config is not None + if do_metrics: + metrics_config = Config.from_file(str(args.metrics_config)) + metrics_components = metrics_config.get("metrics_components", None) + # See trainer.py: init() and init_metrics() + # Default to loss functions if no metrics specified: + if metrics_components is None: + loss, _ = instantiate( + builder=Loss, + prefix="loss", + positional_args=dict(coeffs=metrics_config.loss_coeffs), + all_args=metrics_config, + ) + metrics_components = [] + for key, func in loss.funcs.items(): + params = { + "PerSpecies": type(func).__name__.startswith("PerSpecies"), + } + metrics_components.append((key, "mae", params)) + metrics_components.append((key, "rmse", params)) + + metrics, _ = instantiate( + builder=Metrics, + prefix="metrics", + positional_args=dict(components=metrics_components), all_args=metrics_config, ) - metrics_components = [] - for key, func in loss.funcs.items(): - params = { - "PerSpecies": type(func).__name__.startswith("PerSpecies"), - } - metrics_components.append((key, "mae", params)) - metrics_components.append((key, "rmse", params)) - - metrics, _ = instantiate( - builder=Metrics, - prefix="metrics", - positional_args=dict(components=metrics_components), - all_args=metrics_config, - ) - metrics.to(device=device) + metrics.to(device=device) batch_i: int = 0 batch_size: int = args.batch_size @@ -213,7 +222,8 @@ def main(args=None): print("Starting...", file=sys.stderr) context_stack = contextlib.ExitStack() with contextlib.ExitStack() as context_stack: - display_bar = context_stack.enter_context(tqdm(bar_format="{desc}")) + if do_metrics: + display_bar = context_stack.enter_context(tqdm(bar_format="{desc}")) prog = context_stack.enter_context(tqdm(total=len(test_idcs))) if args.output is not None: @@ -242,30 +252,33 @@ def main(args=None): append=True, ) # Accumulate metrics - metrics(out, batch) + if do_metrics: + metrics(out, batch) + display_bar.set_description_str( + " | ".join( + f"{k} = {v:4.2f}" + for k, v in metrics.flatten_metrics( + metrics.current_result() + )[0].items() + ) + ) batch_i += 1 - display_bar.set_description_str( - " | ".join( - f"{k} = {v:4.2f}" - for k, v in metrics.flatten_metrics(metrics.current_result())[ - 0 - ].items() - ) - ) prog.update(batch.num_graphs) - display_bar.close() + if do_metrics: + display_bar.close() prog.close() - print(file=sys.stderr) - print("--- Final result: ---", file=sys.stderr) - print( - "\n".join( - f"{k:>20s} = {v:< 20f}" - for k, v in metrics.flatten_metrics(metrics.current_result())[0].items() + if do_metrics: + print(file=sys.stderr) + print("--- Final result: ---", file=sys.stderr) + print( + "\n".join( + f"{k:>20s} = {v:< 20f}" + for k, v in metrics.flatten_metrics(metrics.current_result())[0].items() + ) ) - ) if __name__ == "__main__": From 46781247fd597492e292fd621cd88b1866a62f28 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 16:04:57 -0400 Subject: [PATCH 68/79] warning --- nequip/scripts/evaluate.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index d90e3bb8..3a58ff66 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -109,8 +109,9 @@ def main(args=None): # update if args.metrics_config == "None": args.metrics_config = None - else: + elif args.metrics_config is not None: args.metrics_config = Path(args.metrics_config) + do_metrics = args.metrics_config is not None # validate if args.dataset_config is None: raise ValueError("--dataset-config or --train-dir must be provided") @@ -173,6 +174,11 @@ def main(args=None): f"Using training dataset minus training and validation frames, yielding a test set size of {len(test_idcs)} frames.", file=sys.stderr, ) + if do_metrics: + print( + "WARNING: using the automatic test set ^^^ but not computing metrics, is this really what you wanted to do?", + file=sys.stderr, + ) else: # load from file test_idcs = load_file( @@ -187,7 +193,6 @@ def main(args=None): ) # Figure out what metrics we're actually computing - do_metrics = args.metrics_config is not None if do_metrics: metrics_config = Config.from_file(str(args.metrics_config)) metrics_components = metrics_config.get("metrics_components", None) From d0101ade51b95abb0c65609da02ecb9e52afb6ee Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 16:27:25 -0400 Subject: [PATCH 69/79] `from_atom_list` --- nequip/data/dataset.py | 41 ++++++++++++++++++++++++++++++++++++-- tests/data/test_dataset.py | 12 +++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/nequip/data/dataset.py b/nequip/data/dataset.py index e684ce35..a0bcc446 100644 --- a/nequip/data/dataset.py +++ b/nequip/data/dataset.py @@ -6,9 +6,11 @@ """ import numpy as np import logging - +import tempfile from os.path import dirname, basename, abspath -from typing import Tuple, Dict, Any, List, Callable, Union, Optional +from typing import Tuple, Dict, Any, List, Callable, Union, Optional, Sequence + +import ase import torch from torch_geometric.data import Batch, Dataset, download_url, extract_zip @@ -468,6 +470,41 @@ def __init__( include_frames=include_frames, ) + @classmethod + def from_atoms_list(cls, atoms: Sequence[ase.Atoms], **kwargs): + """Make an ``ASEDataset`` from a list of ``ase.Atoms`` objects. + + If `root` is not provided, a temporary directory will be used. + + Please note that this is a convinience method that does NOT avoid a round-trip to disk; the provided ``atoms`` will be written out to a file. + + Ignores ``kwargs["file_name"]`` if it is provided. + + Args: + atoms + **kwargs: passed through to the constructor + Returns: + The constructed ``ASEDataset``. + """ + if "root" not in kwargs: + tmpdir = tempfile.TemporaryDirectory() + kwargs["root"] = tmpdir.name + else: + tmpdir = None + kwargs["file_name"] = tmpdir.name + "/atoms.xyz" + atoms = list(atoms) + # Write them out + ase.io.write(kwargs["file_name"], atoms, format="extxyz") + # Read them in + obj = cls(**kwargs) + if tmpdir is not None: + # Make it keep a reference to the tmpdir to keep it alive + # When the dataset is garbage collected, the tmpdir will + # be too, and will (hopefully) get deleted eventually. + # Or at least by end of program... + obj._tmpdir_ref = tmpdir + return obj + @property def raw_file_names(self): return [basename(self.file_name)] diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py index 3fb63cb5..c75b5d82 100644 --- a/tests/data/test_dataset.py +++ b/tests/data/test_dataset.py @@ -180,3 +180,15 @@ def test_ase(self, ase_file, root): a = dataset_from_config(config) assert isdir(a.root) assert isdir(f"{a.root}/processed") + + +class TestFromList: + def test_from_atoms(self, molecules): + dataset = ASEDataset.from_atoms_list( + molecules, extra_fixed_fields={"r_max": 4.5} + ) + assert len(dataset) == len(molecules) + for i, mol in enumerate(molecules): + assert np.array_equal( + mol.get_atomic_numbers(), dataset.get(i).to_ase().get_atomic_numbers() + ) From f5764a654afdefd603f1f71e93b0beb0a0b18637 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 16:27:33 -0400 Subject: [PATCH 70/79] tmp name bug --- tests/data/test_dataloader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/data/test_dataloader.py b/tests/data/test_dataloader.py index 7131d225..871ecf96 100644 --- a/tests/data/test_dataloader.py +++ b/tests/data/test_dataloader.py @@ -67,9 +67,9 @@ def npz_dataset(): Z=np.random.randint(1, 108, size=(nframes, natoms)), ) with tempfile.TemporaryDirectory() as folder: - np.savez(folder + "npzdata.npz", **npz) + np.savez(folder + "/npzdata.npz", **npz) a = NpzDataset( - file_name=folder + "npzdata.npz", + file_name=folder + "/npzdata.npz", root=folder, extra_fixed_fields={"r_max": 3}, ) From c26a5b5331774d6a5382cae338fb56e39bb27f37 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 16:28:13 -0400 Subject: [PATCH 71/79] update to new name --- tests/model/test_eng_force.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/model/test_eng_force.py b/tests/model/test_eng_force.py index b1bb8406..a8db666e 100644 --- a/tests/model/test_eng_force.py +++ b/tests/model/test_eng_force.py @@ -13,7 +13,7 @@ from nequip.data import AtomicDataDict, AtomicData from nequip.models import EnergyModel, ForceModel from nequip.nn import GraphModuleMixin, AtomwiseLinear -from nequip.utils.initialization import uniform_initialize_linears +from nequip.utils.initialization import uniform_initialize_equivariant_linears from nequip.utils.test import assert_AtomicData_equivariant @@ -117,7 +117,7 @@ def test_weight_init(self, model, atomic_batch, device): out_orig = instance(data)[out_field] with torch.no_grad(): - instance.apply(uniform_initialize_linears) + instance.apply(uniform_initialize_equivariant_linears) out_unif = instance(data)[out_field] assert not torch.allclose(out_orig, out_unif) From b4a339ca23f1632a6ace735d0471cd1ba2d57a3b Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Mon, 9 Aug 2021 17:19:17 -0400 Subject: [PATCH 72/79] python <3.8 fix atomic_save --- CHANGELOG.md | 1 + nequip/utils/savenload.py | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 775d7841..25db8a93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Most recent change on the bottom. - `Final` backport for <3.8 compatability - Fixed `nequip-*` commands when using `pip install` - Default models rescale per-atom energies, and not just total +- Fixed Python <3.8 backward compatability with `atomic_save` ## [0.3.3] - 2021-06-24 ### Added diff --git a/nequip/utils/savenload.py b/nequip/utils/savenload.py index 202b6bf5..8c87a853 100644 --- a/nequip/utils/savenload.py +++ b/nequip/utils/savenload.py @@ -2,6 +2,7 @@ utilities that involve file searching and operations (i.e. save/load) """ from typing import Union +import sys import logging import contextlib from pathlib import Path @@ -22,7 +23,13 @@ def atomic_write(filename: Union[Path, str]): tmp_path.rename(filename) finally: # clean up - tmp_path.unlink(missing_ok=True) + # better for python 3.8 > + if sys.version_info[1] >= 8: + tmp_path.unlink(missing_ok=True) + else: + # race condition? + if tmp_path.exists(): + tmp_path.unlink() def save_file( From 77dfb73d0da726ba2ba0e956f5c81f11e10c491d Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 10 Aug 2021 14:14:54 -0400 Subject: [PATCH 73/79] testing with CUDA --- tests/scripts/test_deploy.py | 18 +++++++++++------- tests/scripts/test_train.py | 10 ++++++---- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/tests/scripts/test_deploy.py b/tests/scripts/test_deploy.py index 9bdc66dc..c0b32124 100644 --- a/tests/scripts/test_deploy.py +++ b/tests/scripts/test_deploy.py @@ -16,9 +16,9 @@ def test_deploy(nequip_dataset, BENCHMARK_ROOT): dtype = str(torch.get_default_dtype())[len("torch.") :] - if torch.cuda.is_available(): - # TODO: is this true? - pytest.skip("CUDA and subprocesses have issues") + # if torch.cuda.is_available(): + # # TODO: is this true? + # pytest.skip("CUDA and subprocesses have issues") config_path = pathlib.Path(__file__).parents[2] / "configs/minimal.yaml" true_config = yaml.load(config_path.read_text(), Loader=yaml.Loader) @@ -50,12 +50,13 @@ def test_deploy(nequip_dataset, BENCHMARK_ROOT): assert deployed_path.is_file(), "Deploy didn't create file" # now test predictions the same - data = AtomicData.to_AtomicDataDict(nequip_dataset.get(0)) + best_mod = torch.load(f"{tmpdir}/{run_name}/best_model.pth") + device = next(best_mod.parameters()).device + data = AtomicData.to_AtomicDataDict(nequip_dataset.get(0).to(device)) # Needed because of debug mode: data[AtomicDataDict.TOTAL_ENERGY_KEY] = data[ AtomicDataDict.TOTAL_ENERGY_KEY ].unsqueeze(0) - best_mod = torch.load(f"{tmpdir}/{run_name}/best_model.pth") train_pred = best_mod(data)[AtomicDataDict.TOTAL_ENERGY_KEY] # load model and check that metadata saved @@ -63,14 +64,17 @@ def test_deploy(nequip_dataset, BENCHMARK_ROOT): deploy.NEQUIP_VERSION_KEY: "", deploy.R_MAX_KEY: "", } - deploy_mod = torch.jit.load(deployed_path, _extra_files=metadata) + deploy_mod = torch.jit.load( + deployed_path, _extra_files=metadata, map_location="cpu" + ) # Everything we store right now is ASCII, so decode for printing metadata = {k: v.decode("ascii") for k, v in metadata.items()} assert metadata[deploy.NEQUIP_VERSION_KEY] == nequip.__version__ assert np.allclose(float(metadata[deploy.R_MAX_KEY]), true_config["r_max"]) + data = AtomicData.to_AtomicDataDict(nequip_dataset.get(0).to("cpu")) deploy_pred = deploy_mod(data)[AtomicDataDict.TOTAL_ENERGY_KEY] - assert torch.allclose(train_pred, deploy_pred, atol=1e-7) + assert torch.allclose(train_pred.to("cpu"), deploy_pred, atol=1e-7) # now test info retcode = subprocess.run( diff --git a/tests/scripts/test_train.py b/tests/scripts/test_train.py index 06179ba6..c6d798b1 100644 --- a/tests/scripts/test_train.py +++ b/tests/scripts/test_train.py @@ -90,9 +90,9 @@ def test_metrics(nequip_dataset, BENCHMARK_ROOT, conffile, field, builder): dtype = str(torch.get_default_dtype())[len("torch.") :] - if torch.cuda.is_available(): - # TODO: is this true? - pytest.skip("CUDA and subprocesses have issues") + # if torch.cuda.is_available(): + # # TODO: is this true? + # pytest.skip("CUDA and subprocesses have issues") path_to_this_file = pathlib.Path(__file__) config_path = path_to_this_file.parents[2] / f"configs/{conffile}" @@ -187,4 +187,6 @@ def test_metrics(nequip_dataset, BENCHMARK_ROOT, conffile, field, builder): one = model.model.one # Since the loss is always zero, even though the constant # 1 was trainable, it shouldn't have changed - assert torch.allclose(one, torch.ones(1)) + assert torch.allclose( + one, torch.ones(1, device=one.device, dtype=one.dtype) + ) From f40ddfa3bdfaa432424f101799769e1bb92e3daa Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 10 Aug 2021 14:24:10 -0400 Subject: [PATCH 74/79] bugfix --- nequip/scripts/evaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index 3a58ff66..5e12a1d0 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -174,7 +174,7 @@ def main(args=None): f"Using training dataset minus training and validation frames, yielding a test set size of {len(test_idcs)} frames.", file=sys.stderr, ) - if do_metrics: + if not do_metrics: print( "WARNING: using the automatic test set ^^^ but not computing metrics, is this really what you wanted to do?", file=sys.stderr, From cc189c033ddf0b922fb97ae4c000c26e00cd0b43 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Tue, 10 Aug 2021 14:46:06 -0400 Subject: [PATCH 75/79] truncate --- nequip/scripts/evaluate.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index 5e12a1d0..a6b55279 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -227,9 +227,11 @@ def main(args=None): print("Starting...", file=sys.stderr) context_stack = contextlib.ExitStack() with contextlib.ExitStack() as context_stack: - if do_metrics: - display_bar = context_stack.enter_context(tqdm(bar_format="{desc}")) prog = context_stack.enter_context(tqdm(total=len(test_idcs))) + if do_metrics: + display_bar = context_stack.enter_context( + tqdm(bar_format="{desc:." + str(prog.ncols) + "}") + ) if args.output is not None: output = context_stack.enter_context(open(args.output, "w")) @@ -271,9 +273,9 @@ def main(args=None): batch_i += 1 prog.update(batch.num_graphs) + prog.close() if do_metrics: display_bar.close() - prog.close() if do_metrics: print(file=sys.stderr) From 9b06bfcee1498284248e8f4a8af5f26990714f9b Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 11 Aug 2021 13:41:25 -0400 Subject: [PATCH 76/79] fix when run as subprocess --- nequip/scripts/evaluate.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/nequip/scripts/evaluate.py b/nequip/scripts/evaluate.py index a6b55279..420bcc9e 100644 --- a/nequip/scripts/evaluate.py +++ b/nequip/scripts/evaluate.py @@ -227,10 +227,16 @@ def main(args=None): print("Starting...", file=sys.stderr) context_stack = contextlib.ExitStack() with contextlib.ExitStack() as context_stack: - prog = context_stack.enter_context(tqdm(total=len(test_idcs))) + # "None" checks if in a TTY and disables if not + prog = context_stack.enter_context(tqdm(total=len(test_idcs), disable=None)) if do_metrics: display_bar = context_stack.enter_context( - tqdm(bar_format="{desc:." + str(prog.ncols) + "}") + tqdm( + bar_format="" + if prog.disable # prog.ncols doesn't exist if disabled + else ("{desc:." + str(prog.ncols) + "}"), + disable=None, + ) ) if args.output is not None: From 84a2d90e7faaac24946c4b8cd9a6c896e4f3ccba Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 11 Aug 2021 17:06:20 -0400 Subject: [PATCH 77/79] README update --- README.md | 101 +++++++++++++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index ba169d51..59228e60 100644 --- a/README.md +++ b/README.md @@ -13,35 +13,18 @@ NequIP is an open-source code for building E(3)-equivariant interatomic potentia NequIP requires: * Python >= 3.6 -* PyTorch = 1.8 +* PyTorch >= 1.8 To install: -* Install [PyTorch Geometric](https://github.com/rusty1s/pytorch_geometric), make sure to install this with your correct version of CUDA/CPU and to use PyTorch Geometric version 1.7.1: - -``` -pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA}.html -pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA}.html -pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA}.html -pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.8.0+${CUDA}.html -pip install torch-geometric==1.7.1 -pip install e3nn==0.3.3 -``` - -where ```${CUDA}``` should be replaced by either ```cpu```, ```cu101```, ```cu102```, or ```cu111``` depending on your PyTorch installation, for details see [here](https://github.com/rusty1s/pytorch_geometric). - -* Install [e3nn](https://github.com/e3nn/e3nn), version 0.3.3: - -``` -pip install e3nn==0.3.3 -``` +* Install [PyTorch Geometric](https://github.com/rusty1s/pytorch_geometric), following [their installation instructions](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html) and making sure to install with the correct version of CUDA. (Please note that `torch_geometric>=1.7.1)` is required.) * Install our fork of [`pytorch_ema`](https://github.com/Linux-cpp-lisp/pytorch_ema) for using an Exponential Moving Average on the weights: ```bash -$ pip install git+https://github.com/Linux-cpp-lisp/pytorch_ema@context_manager#egg=torch_ema +$ pip install "git+https://github.com/Linux-cpp-lisp/pytorch_ema@context_manager#egg=torch_ema" ``` -* We use [Weights&Biases](https://wandb.ai) to keep track of experiments. This is not a strict requirement, you can use our package without this, but it may make your life easier. If you want to use it, create an account [here](https://wandb.ai) and install it: +* We use [Weights&Biases](https://wandb.ai) to keep track of experiments. This is not a strict requirement — you can use our package without it — but it may make your life easier. If you want to use it, create an account [here](https://wandb.ai) and install the Python package: ``` pip install wandb @@ -57,13 +40,15 @@ pip install . ### Installation Issues -We recommend running the tests using ```pytest``` on a CPU: +We recommend running the tests using ```pytest```: ``` pip install pytest pytest ./tests/ ``` +While the tests are somewhat compute intensive, we've known them to hang on certain systems that have GPUs. If this happens to you, please report it along with information on your software environment in the [Issues](https://github.com/mir-group/nequip/issues)! + ## Usage **! PLEASE NOTE:** the first few training epochs/calls to a NequIP model can be painfully slow. This is expected behaviour as the [profile-guided optimization of TorchScript models](https://program-transformations.github.io/slides/pytorch_neurips.pdf) takes a number of calls to warm up before optimizing the model. This occurs regardless of whether the entire model is compiled because many core components from e3nn are compiled and optimized through TorchScript. @@ -84,9 +69,33 @@ A number of example configuration files are provided: Training runs can be restarted using `nequip-restart`; training that starts fresh or restarts depending on the existance of the working directory can be launched using `nequip-requeue`. All `nequip-*` commands accept the `--help` option to show their call signatures and options. -### In-depth tutorial +### Evaluating trained models (and their error) + +The `nequip-evaluate` command can be used to evaluate a trained model on a specified dataset, optionally computing error metrics or writing the results to an XYZ file for further processing. -A more in-depth introduction to the internals of NequIP can be found in the [tutorial notebook](https://deepnote.com/project/2412ca93-7ad1-4458-972c-5d5add5a667e). +The simplest command is: +```bash +$ nequip-evaluate --train-dir /path/to/training/session/ +``` +which will evaluate the original training error metrics over any part of the original dataset not used in the training or validation sets. + +For more details on this command, please run `nequip-evaluate --help`. + +### Deploying models + +The `nequip-deploy` command is used to deploy the result of a training session into a model that can be stored and used for inference. +It compiles a NequIP model trained in Python to [TorchScript](https://pytorch.org/docs/stable/jit.html). +The result is an optimized model file that has no dependency on the `nequip` Python library, or even on Python itself: +```bash +nequip-deploy build path/to/training/session/ path/to/deployed.pth +``` +For more details on this command, please run `nequip-deploy --help`. + +### Using models in Python + +Both deployed and undeployed models can be used in Python code; for details, see the end of the [Developer's tutorial](https://deepnote.com/project/2412ca93-7ad1-4458-972c-5d5add5a667e) mentioned again below. + +An ASE calculator is also provided in `nequip.dynamics`. ### LAMMPS Integration @@ -94,12 +103,11 @@ NequIP is integrated with the popular Molecular Dynamics code [LAMMPS](https://w The interface is implemented as `pair_style nequip`. Using it requires two simple steps: -1. Deploy a trained NequIP model. This step compiles a NequIP model trained in Python to [TorchScript](https://pytorch.org/docs/stable/jit.html). -The result is an optimized model file that has no Python dependency and can be used by standalone C++ programs such as LAMMPS: - +1. Deploy a trained NequIP model, as discussed above. ``` nequip-deploy build path/to/training/session/ path/to/deployed.pth ``` +The result is an optimized model file that has no Python dependency and can be used by standalone C++ programs such as LAMMPS. 2. Change the LAMMPS input file to the nequip `pair_style` and point it to the deployed NequIP model: @@ -108,29 +116,35 @@ pair_style nequip pair_coeff * * deployed.pth ``` -For installation instructions, please see the NequIP `pair_style` repo at https://github.com/mir-group/pair_nequip. +For installation instructions, please see the [`pair_nequip` repository](https://github.com/mir-group/pair_nequip). -## References +## Developer's tutorial -The theory behind NequIP is described in our preprint [1]. NequIP's backend builds on e3nn, a general framework for building E(3)-equivariant neural networks [2]. +A more in-depth introduction to the internals of NequIP can be found in the [tutorial notebook](https://deepnote.com/project/2412ca93-7ad1-4458-972c-5d5add5a667e). This notebook discusses theoretical background as well as the Python interfaces that can be used to train and call models. - [1] https://arxiv.org/abs/2101.03164 - [2] https://github.com/e3nn/e3nn +Please note that for most common usecases, including customized models, the `nequip-*` commands should be prefered for training models. + +## References & citing + +The theory behind NequIP is described in our preprint (1). NequIP's backend builds on e3nn, a general framework for building E(3)-equivariant neural networks (2). If you use this repository in your work, please consider citing NequIP (1) and e3nn (3): + + 1. https://arxiv.org/abs/2101.03164 + 2. https://e3nn.org + 3. https://doi.org/10.5281/zenodo.3724963 ## Authors NequIP is being developed by: - - Simon Batzner - - Albert Musaelian - - Lixin Sun - - Anders Johansson - - Mario Geiger - - Tess Smidt - -under the guidance of Boris Kozinsky at Harvard. + - Simon Batzner + - Albert Musaelian + - Lixin Sun + - Anders Johansson + - Mario Geiger + - Tess Smidt +under the guidance of [Boris Kozinsky at Harvard](https://bkoz.seas.harvard.edu/). ## Contact & questions @@ -138,10 +152,3 @@ If you have questions, please don't hesitate to reach out at batzner[at]g[dot]ha If you find a bug or have a proposal for a feature, please post it in the [Issues](https://github.com/mir-group/nequip/issues). If you have a question, topic, or issue that isn't obviously one of those, try our [GitHub Disucssions](https://github.com/mir-group/nequip/discussions). - -## Citing - -If you use this repository in your work, please consider citing NequIP (1) and e3nn (2): - - [1] https://arxiv.org/abs/2101.03164 - [2] https://doi.org/10.5281/zenodo.3724963 From fc18671187ddbb5489793515bbc025cb38f4cc20 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 11 Aug 2021 17:16:16 -0400 Subject: [PATCH 78/79] fix changelog --- CHANGELOG.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 232c3336..2d13b151 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Most recent change on the bottom. ## [Unreleased] ### Added +- `to_ase` method in `AtomicData.py` to convert `AtomicData` object to (list of) `ase.Atoms` object(s) - `SequentialGraphNetwork` now has insertion methods - `nn.SaveForOutput` - `nequip-evaluate` command for evaluating (metrics on) trained models @@ -15,7 +16,7 @@ Most recent change on the bottom. ### Changed - Nonlinearities now specified with `e` and `o` instead of `1` and `-1` -- Update interfaces for `torch_geometric` 1.7 and `e3nn` 0.3.3 +- Update interfaces for `torch_geometric` 1.7.1 and `e3nn` 0.3.3 - `nonlinearity_scalars` now also affects the nonlinearity used in the radial net of `InteractionBlock` - Cleaned up naming of initializers @@ -24,11 +25,7 @@ Most recent change on the bottom. - `Final` backport for <3.8 compatability - Fixed `nequip-*` commands when using `pip install` - Default models rescale per-atom energies, and not just total -- Fixed Python <3.8 backward compatability with `atomic_save` - -## [0.3.3] - 2021-06-24 -### Added -- `to_ase` method in `AtomicData.py` to convert `AtomicData` object to (list of) `ase.Atoms` object(s) +- Fixed Python <3.8 backward compatability with `atomic_save` ## [0.3.2] - 2021-06-09 ### Added From e8422ce1719a7d269c2e940db8bb620089581679 Mon Sep 17 00:00:00 2001 From: Linux-cpp-lisp <1473644+Linux-cpp-lisp@users.noreply.github.com> Date: Wed, 11 Aug 2021 17:17:16 -0400 Subject: [PATCH 79/79] bump version --- CHANGELOG.md | 2 ++ nequip/_version.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d13b151..b4859880 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Most recent change on the bottom. ## [Unreleased] + +## [0.3.3] - 2021-08-11 ### Added - `to_ase` method in `AtomicData.py` to convert `AtomicData` object to (list of) `ase.Atoms` object(s) - `SequentialGraphNetwork` now has insertion methods diff --git a/nequip/_version.py b/nequip/_version.py index f021bdb0..93a83158 100644 --- a/nequip/_version.py +++ b/nequip/_version.py @@ -2,4 +2,4 @@ # See Python packaging guide # https://packaging.python.org/guides/single-sourcing-package-version/ -__version__ = "0.3.2" +__version__ = "0.3.3"