Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pymatgen update, adsorption vector, dockerfile update #92

Merged
merged 4 commits into from
Apr 2, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions gaspy/atoms_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import re
import pickle
import numpy as np
import scipy
from scipy.spatial.qhull import QhullError
from ase import Atoms
from ase.build import rotate
Expand Down Expand Up @@ -251,6 +252,172 @@ def find_adsorption_sites(atoms):
sites = sites_dict['all']
return sites

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We try to keep two lines between functions

def find_bulk_cn_dict(bulk_atoms):
'''
Get a dictionary of coordination numbers
for each distinct site in the bulk structure.

Taken from pymatgen.core.surface Class Slab
`get_surface_sites`.
https://pymatgen.org/pymatgen.core.surface.html
'''
struct = AseAtomsAdaptor.get_structure(bulk_atoms)
sga = SpacegroupAnalyzer(struct)
sym_struct = sga.get_symmetrized_structure()
unique_indices = [equ[0] for equ in sym_struct.equivalent_indices]
# Get a dictionary of unique coordination numbers
# for atoms in each structure.
# for example, Pt[1,1,1] would have cn=3 and cn=12
# depends on the Pt atom.
v = VoronoiNN()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Single-letter object names are not a good practice. Expand it out to be more descriptive.

cn_dict = {}
for idx in unique_indices:
elem = sym_struct[idx].species_string
if elem not in cn_dict.keys():
cn_dict[elem] = []
cn = v.get_cn(sym_struct, idx, use_weights=True)
cn = float('%.5f' %(round(cn, 5)))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flake8 flag E225: missing whitespace around operator (add space after %)

if cn not in cn_dict[elem]:
cn_dict[elem].append(cn)
return cn_dict


def find_surface_atoms_indices(bulk_cn_dict, atoms):
'''
A helper function referencing codes from pymatgen to
get a list of surface atoms indices of a slab's
top surface. Due to how our workflow is setup, the
pymatgen method cannot be directly applied.

Taken from pymatgen.core.surface Class Slab,
`get_surface_sites`.
https://pymatgen.org/pymatgen.core.surface.html

Arg:
bulk_cn_dict A dictionary of coordination numbers
for each distinct site in the respective bulk structure
atoms The slab where you are trying to find surface sites in
`ase.Atoms` format
Output:
indices_list A list that contains the indices of
the surface atoms
'''
struct = AseAtomsAdaptor.get_structure(atoms)
v = VoronoiNN()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Single-letter object names are not a good practice. Expand it out to be more descriptive.

# Identify index of the surface atoms
indices_list = []
weights = [s.species.weight for s in struct]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a good practice to have 1-letter variables. In this case, s is probably a pymatgen.Site object, right? So call it site.

center_of_mass = np.average(struct.frac_coords,
weights=weights, axis=0)

for idx, site in enumerate(struct):
if site.frac_coords[2] > center_of_mass[2]:
try:
cn = v.get_cn(struct, idx, use_weights=True)
cn = float('%.5f' %(round(cn, 5)))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flake8 flag E225: missing whitespace around operator (add space after %)

# surface atoms are undercoordinated
if cn < min(bulk_cn_dict[site.species_string]):
indices_list.append(idx)
except RuntimeError:
# or if pathological error is returned,
# indicating a surface site
indices_list.append(idx)
return indices_list


def _plane_normal(coords):
"""
Return the surface normal vector to a plane of best fit
by performing planar regression.
See https://gist.github.com/amroamroamro/1db8d69b4b65e8bc66a6
for the method.

Arg:
coords A `numpy.ndarray` (n,3),
coordinates of atoms on the slab surface.

Output:
vector numpy.ndarray. Adsorption vector for an adsorption site.
"""
A = np.c_[coords[:, 0], coords[:, 1], np.ones(coords.shape[0])]
vector, _, _, _ = scipy.linalg.lstsq(A, coords[:, 2])
vector[2] = -1.0
vector /= -np.linalg.norm(vector)
return vector

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2 lines between functions

def _ang_between_vectors(v1, v2):

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need or extra lines between function definition & docstring

"""
Returns the angle in degree
between 3D vectors 'v1' and 'v2'

Arg:
v1 3D vector in np.array(x1,y1,z1) form,
the origin is (0,0,0).
v1 3D vector in np.array(x2,y2,z2) form,
the origin is (0,0,0).

Output:
angle angle in degrees.
"""
cosang = np.dot(v1, v2)
sinang = np.linalg.norm(np.cross(v1, v2))
# np.arctan2(sinang, cosang) is angle in radian
radian = np.arctan2(sinang, cosang)
angle = radian * 57.2958
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's probably some function in scipy to do this more accurately. Not a big deal though and should probably keep it since we have a lot of the catalog built.

return angle


def find_adsorption_vector(bulk_cn_dict, slab_atoms, surface_indices, adsorption_site):
"""
Returns the vector of an adsorption site representing the
furthest distance from the neighboring atoms.
The vector is a (1,3) numpy array.
The idea comes from CatKit.
https://catkit.readthedocs.io/en/latest/?badge=latest

Arg:
bulk_cn_dict A dictionary of coordination numbers
for each distinct site in the respective bulk structure
slab_atoms The `ase.Atoms` format of a supercell slab.
surface_indices The index of the surface atoms in a list.
adsorption_site A `numpy.ndarray` object that contains the x-y-z coordinates
of the adsorptions sites.

Output:
vector numpy.ndarray. Adsorption vector for an adsorption site.
"""
vnn = VoronoiNN(allow_pathological=True)

slab_atoms += Atoms('U', [adsorption_site])
U_index = slab_atoms.get_chemical_symbols().index('U')
struct_with_U = AseAtomsAdaptor.get_structure(slab_atoms)
nn_info = vnn.get_nn_info(struct_with_U, n=U_index)
nn_indices = [neighbor['site_index'] for neighbor in nn_info]
surface_nn_indices = [idx for idx in nn_indices if idx in surface_indices]

# get the index of the closest 4 atom to the site to form a plane
# chose 4 because it will gaurantee a more accurate plane for edge cases
nn_dists_from_U = {idx: np.linalg.norm(slab_atoms[idx].position-slab_atoms[U_index].position)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trailing whitespace

for idx in surface_nn_indices}
sorted_dists = {k: v for k, v in sorted(nn_dists_from_U .items(), key=lambda item: item[1])}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use more explicit variable names. I know k is short for key and v is short for value, but what are the keys? What are the values? What are the items? I think they keys are indices for atoms and values are distance. But the reader should not have to try to figure that out. It should be obvious.

closest_4_nn_indices = np.array(list(sorted_dists.keys())[:4], dtype=int)
plane_coords = struct_with_U.cart_coords[closest_4_nn_indices]
vector = _plane_normal(plane_coords)

# Check to see if the vector is reasonable.
# set an arbitay threshold where the vector and [0,0,1]
# should be less than 60 degrees.
# If someone has a better way to detect in the future, go for it
if _ang_between_vectors(np.array([0., 0., 1.]), vector) > 60.:
message = ('Warning: this might be an edge case where the '
'adsorption vector is not appropriate.'
' We will place adsorbates using default [0,0,1] vector.')
warnings.warn(message)
vector = np.array([0., 0., 1.])

del slab_atoms[[U_index]]
return vector

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2 lines between functions

def add_adsorbate_onto_slab(adsorbate, slab, site):
'''
Expand Down
73 changes: 50 additions & 23 deletions gaspy/tasks/atoms_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pickle
import luigi
import ase
import numpy as np
from ase.collections import g2
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.ext.matproj import MPRester
Expand All @@ -26,6 +27,9 @@
flip_atoms,
tile_atoms,
find_adsorption_sites,
find_bulk_cn_dict,
find_surface_atoms_indices,
find_adsorption_vector,
add_adsorbate_onto_slab)
from .. import utils, defaults

Expand Down Expand Up @@ -282,7 +286,6 @@ def run(self):
adsorbate.translate(site)
adslab_atoms = slab_atoms_tiled.copy() + adsorbate
adslab_atoms[-1].tag = 1

# Turn the atoms into a document, then save it
doc = make_doc_from_atoms(adslab_atoms)
doc['fwids'] = slab_doc['fwids']
Expand Down Expand Up @@ -339,19 +342,20 @@ class GenerateAdslabs(luigi.Task):
into `ase.Atoms` objects. These objects have a the adsorbate
tagged with a `1`. These documents also contain the following
fields:
fwids A subdictionary containing the FWIDs of the
prerequisite calculations
shift Float indicating the shift/termination of
the slab
top Boolean indicating whether or not the slab
is oriented upwards with respect to the way
it was enumerated originally by pymatgen
slab_repeat 2-tuple of integers indicating the number
of times the unit slab was repeated in the
x and y directions before site enumeration
adsorption_site `np.ndarray` of length 3 containing the
containing the cartesian coordinates of the
adsorption site.
fwids A subdictionary containing the FWIDs of the
prerequisite calculations
shift Float indicating the shift/termination of
the slab
top Boolean indicating whether or not the slab
is oriented upwards with respect to the way
it was enumerated originally by pymatgen
slab_repeat 2-tuple of integers indicating the number
of times the unit slab was repeated in the
x and y directions before site enumeration
adsorption_site `np.ndarray` of length 3 containing the
containing the cartesian coordinates of the
adsorption site.
adsorption_vector: TODO
'''
adsorbate_name = luigi.Parameter()
rotation = luigi.DictParameter(ADSLAB_SETTINGS['rotation'])
Expand All @@ -363,18 +367,31 @@ class GenerateAdslabs(luigi.Task):
bulk_vasp_settings = luigi.DictParameter(BULK_SETTINGS['vasp'])

def requires(self):
return GenerateAdsorptionSites(mpid=self.mpid,
miller_indices=self.miller_indices,
min_xy=self.min_xy,
slab_generator_settings=self.slab_generator_settings,
get_slab_settings=self.get_slab_settings,
bulk_vasp_settings=self.bulk_vasp_settings)
from .calculation_finders import FindBulk # local import to avoid import errors
return {'bulk': FindBulk(mpid=self.mpid, vasp_settings=self.bulk_vasp_settings),
'adsorption_sites': GenerateAdsorptionSites(mpid=self.mpid,
miller_indices=self.miller_indices,
min_xy=self.min_xy,
slab_generator_settings=self.slab_generator_settings,
get_slab_settings=self.get_slab_settings,
bulk_vasp_settings=self.bulk_vasp_settings)}

def run(self):
with open(self.input().path, 'rb') as file_handle:
with open(self.input()['bulk'].path, 'rb') as bulk_file_handle:
bulk_doc = pickle.load(bulk_file_handle)
bulk_atoms = make_atoms_from_doc(bulk_doc)
bulk_cn_dict = find_bulk_cn_dict(bulk_atoms)

with open(self.input()['adsorption_sites'].path, 'rb') as file_handle:
site_docs = pickle.load(file_handle)

# Get and rotate the adsorbate
# prepare the supercell slab for adsorption vector
slab_atoms = make_atoms_from_doc(site_docs[0])
del slab_atoms[-1]
supercell_slab_atoms = slab_atoms.repeat((2, 2, 1))
surface_atoms_list = find_surface_atoms_indices(bulk_cn_dict, supercell_slab_atoms)

# Get and (euler) rotate the adsorbate
adsorbate = ADSORBATES[self.adsorbate_name].copy()
adsorbate.euler_rotate(**self.rotation)

Expand All @@ -384,7 +401,16 @@ def run(self):
for site_doc in site_docs:
slab = make_atoms_from_doc(site_doc)
del slab[-1]
adslab = add_adsorbate_onto_slab(adsorbate=adsorbate,

# Find the adsorption vector.
adsorption_vector = find_adsorption_vector(bulk_cn_dict, supercell_slab_atoms,
surface_atoms_list, site_doc['adsorption_site'])

# make a copy here so the original adsorbate is not further rotated
# to align to the adsorption vector at each iteration
aligned_adsorbate = adsorbate.copy()
aligned_adsorbate.rotate(np.array([0., 0., 1.]), adsorption_vector)
adslab = add_adsorbate_onto_slab(adsorbate=aligned_adsorbate,
slab=slab,
site=site_doc['adsorption_site'])

Expand All @@ -395,6 +421,7 @@ def run(self):
doc['top'] = site_doc['top']
doc['slab_repeat'] = site_doc['slab_repeat']
doc['adsorption_site'] = site_doc['adsorption_site']
doc['adsorption_vector'] = adsorption_vector
docs_adslabs.append(doc)
save_task_output(self, docs_adslabs)

Expand Down
1 change: 1 addition & 0 deletions gaspy/tasks/db_managers/adsorption.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ def __create_adsorption_doc(energy_doc):
adsorption_doc['adsorbate'] = adslab_doc['fwname']['adsorbate']
adsorption_doc['adsorbate_rotation'] = adslab_doc['fwname']['adsorbate_rotation']
adsorption_doc['initial_adsorption_site'] = adslab_doc['fwname']['adsorption_site']
adsorption_doc['initial_adsorption_vector'] = adslab_doc['fwname']['adsorption_vector']
adsorption_doc['mpid'] = adslab_doc['fwname']['mpid']
adsorption_doc['miller'] = adslab_doc['fwname']['miller']
adsorption_doc['shift'] = adslab_doc['fwname']['shift']
Expand Down
1 change: 1 addition & 0 deletions gaspy/tasks/make_fireworks.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ def run(self, _testing=False):
'adsorbate': self.adsorbate_name,
'adsorbate_rotation': dict(self.rotation),
'adsorption_site': self.adsorption_site,
'adsorption_vector': tuple(doc['adsorption_vector']),
'mpid': self.mpid,
'miller': self.miller_indices,
'shift': self.shift,
Expand Down
Loading