Skip to content

Commit

Permalink
Merge pull request #73 from deepmodeling/devel
Browse files Browse the repository at this point in the history
Merge new features to master
  • Loading branch information
amcadmus authored Jan 3, 2020
2 parents 5785483 + 410ad7f commit ec284a2
Show file tree
Hide file tree
Showing 54 changed files with 3,454 additions and 59 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ build
dist
dpdata.egg-info
_version.py
!tests/cp2k/aimd/cp2k.log
22 changes: 20 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,10 @@ xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/')
| deepmd | npy | True | True | LabeledSystem | 'deepmd/npy' |
| gaussian| log | False | True | LabeledSystem | 'gaussian/log'|
| gaussian| log | True | True | LabeledSystem | 'gaussian/md' |
| siesta| output | False | True | LabeledSystem | 'siesta/output'|
| siesta| aimd_output | True | True | LabeledSystem | 'siesta/aimd_output' |
| siesta | output | False | True | LabeledSystem | 'siesta/output'|
| siesta | aimd_output | True | True | LabeledSystem | 'siesta/aimd_output' |
| cp2k | output | False | True | LabeledSystem | 'cp2k/output' |
| cp2k | aimd_output | True | True | LabeledSystem | 'cp2k/aimd_output' |
| QE | log | False | True | LabeledSystem | 'qe/pw/scf' |
| QE | log | True | False | System | 'qe/cp/traj' |
| QE | log | True | True | LabeledSystem | 'qe/cp/traj' |
Expand Down Expand Up @@ -152,3 +153,20 @@ Frame selection can be implemented by
dpdata.LabeledSystem('OUTCAR').sub_system([0,-1]).to_deepmd_raw('dpmd_raw')
```
by which only the first and last frames are dumped to `dpmd_raw`.

## replicate
dpdata will create a super cell of the current atom configuration.
```python
dpdata.System('./POSCAR').replicate((1,2,3,) )
```
tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction.

## perturb
By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere).
```python
perturbed_system = dpdata.System('./POSCAR').perturb(pert_num=3,
cell_pert_fraction=0.05,
atom_pert_distance=0.6,
atom_pert_style='normal')
print(perturbed_system.data)
```
Empty file added dpdata/cp2k/__init__.py
Empty file.
61 changes: 61 additions & 0 deletions dpdata/cp2k/cell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@

#%%
import numpy as np
from collections import OrderedDict
import re

def cell_to_low_triangle(A,B,C,alpha,beta,gamma):
"""
Convert cell to low triangle matrix.
Parameters
----------
A : float
cell length A
B : float
cell length B
C : float
cell length C
alpha : float
radian. The angle between vector B and vector C.
beta : float
radian. The angle between vector A and vector C.
gamma : float
radian. The angle between vector B and vector C.
Returns
-------
cell : list
The cell matrix used by dpdata in low triangle form.
"""
if not np.pi*5/180<alpha< np.pi*175/180:
raise RuntimeError("alpha=={}: must be a radian, and \
must be in np.pi*5/180 < alpha < np.pi*175/180".format(alpha))
if not np.pi*5/180<beta< np.pi*175/180:
raise RuntimeError("beta=={}: must be a radian, and \
must be in np.pi*5/180 < beta < np.pi*175/180".format(beta))
if not np.pi*5/180<gamma< np.pi*175/180:
raise RuntimeError("gamma=={}: must be a radian, and \
must be in np.pi*5/180 < gamma < np.pi*175/180".format(gamma))
if not A > 0.2:
raise RuntimeError("A=={}, must be greater than 0.2".format(A))
if not B > 0.2:
raise RuntimeError("B=={}, must be greater than 0.2".format(B))
if not C > 0.2:
raise RuntimeError("C=={}, must be greater than 0.2".format(C))

lx = A
xy = B * np.cos(gamma)
xz = C * np.cos(beta)
ly = B* np.sin(gamma)
if not ly > 0.1:
raise RuntimeError("ly:=B* np.sin(gamma)=={}, must be greater than 0.1",format(ly))
yz = (B*C*np.cos(alpha)-xy*xz)/ly
if not C**2-xz**2-yz**2 > 0.01:
raise RuntimeError("lz^2:=C**2-xz**2-yz**2=={}, must be greater than 0.01",format(C**2-xz**2-yz**2))
lz = np.sqrt(C**2-xz**2-yz**2)
cell = np.asarray([[lx, 0 , 0],
[xy, ly, 0 ],
[xz, yz, lz]]).astype('float32')
return cell

213 changes: 211 additions & 2 deletions dpdata/cp2k/output.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,217 @@
#%%
import numpy as np
import re
from collections import OrderedDict
from .cell import cell_to_low_triangle

#%%
AU_TO_ANG = 5.29177208590000E-01
AU_TO_EV = 2.72113838565563E+01
AU_TO_EV_EVERY_ANG = AU_TO_EV/AU_TO_ANG
delimiter_patterns=[]
delimiter_p1 = re.compile(r'^ \* GO CP2K GO! \*+')
delimiter_p2 = re.compile(r'^ \*+')
delimiter_patterns.append(delimiter_p1)
delimiter_patterns.append(delimiter_p2)
avail_patterns = []

avail_patterns.append(re.compile(r'^ INITIAL POTENTIAL ENERGY'))
avail_patterns.append(re.compile(r'^ ENSEMBLE TYPE'))

class Cp2kSystems(object):
"""
deal with cp2k outputfile
"""
def __init__(self, log_file_name, xyz_file_name):
self.log_file_object = open(log_file_name, 'r')
self.xyz_file_object = open(xyz_file_name, 'r')
self.log_block_generator = self.get_log_block_generator()
self.xyz_block_generator = self.get_xyz_block_generator()
self.cell=None
def __del__(self):
self.log_file_object.close()
self.xyz_file_object.close()
def __iter__(self):
return self
def __next__(self):
info_dict = {}
log_info_dict = self.handle_single_log_frame(next(self.log_block_generator))
xyz_info_dict = self.handle_single_xyz_frame(next(self.xyz_block_generator))
eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])]
eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])]
eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])]
assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file')
assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file')
assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file')
assert log_info_dict['energies']==xyz_info_dict['energies'], (log_info_dict['energies'],xyz_info_dict['energies'],'There may be errors in the file')
info_dict.update(log_info_dict)
info_dict.update(xyz_info_dict)
return info_dict

def get_log_block_generator(self):
lines = []
delimiter_flag = False
while True:
line = self.log_file_object.readline()
if line:
lines.append(line)
if any(p.match(line) for p in delimiter_patterns):
if delimiter_flag is True:
yield lines
lines = []
delimiter_flag = False
else:
line = self.log_file_object.readline()
lines.append(line)
if any(p.match(line) for p in avail_patterns):
delimiter_flag = True
else:
break
if delimiter_flag is True:
raise RuntimeError('This file lacks some content, please check')

def get_xyz_block_generator(self):
p3 = re.compile(r'^\s*(\d+)\s*')
while True:
line = self.xyz_file_object.readline()
if not line:
break
if p3.match(line):
atom_num = int(p3.match(line).group(1))
lines = []
lines.append(line)
for ii in range(atom_num+1):
lines.append(self.xyz_file_object.readline())
if not lines[-1]:
raise RuntimeError("this xyz file may lack of lines, should be {};lines:{}".format(atom_num+2, lines))
yield lines

def handle_single_log_frame(self, lines):
info_dict={}
energy_pattern_1 = re.compile(r' INITIAL POTENTIAL ENERGY\[hartree\]\s+=\s+(?P<number>\S+)')
# CONSERVED QUANTITY [hartree] = -0.279168013085E+04
energy_pattern_2 = re.compile(r' POTENTIAL ENERGY\[hartree\]\s+=\s+(?P<number>\S+)')
energy=None
cell_length_pattern = re.compile(r' INITIAL CELL LNTHS\[bohr\]\s+=\s+(?P<A>\S+)\s+(?P<B>\S+)\s+(?P<C>\S+)')
cell_angle_pattern = re.compile(r' INITIAL CELL ANGLS\[deg\]\s+=\s+(?P<alpha>\S+)\s+(?P<beta>\S+)\s+(?P<gamma>\S+)')
cell_A, cell_B, cell_C = (0,0,0,)
cell_alpha, cell_beta, cell_gamma=(0,0,0,)
force_start_pattern = re.compile(r' ATOMIC FORCES in')
force_flag=False
force_end_pattern = re.compile(r' SUM OF ATOMIC FORCES')
force_lines= []
cell_flag=0
for line in lines:
if force_start_pattern.match(line):
force_flag=True
if force_end_pattern.match(line):
assert force_flag is True, (force_flag,'there may be errors in this file ')
force_flag=False
if force_flag is True:
force_lines.append(line)
if energy_pattern_1.match(line):
energy = float(energy_pattern_1.match(line).groupdict()['number']) * AU_TO_EV
if energy_pattern_2.match(line):
energy = float(energy_pattern_2.match(line).groupdict()['number']) * AU_TO_EV
if cell_length_pattern.match(line):
cell_A = float(cell_length_pattern.match(line).groupdict()['A']) * AU_TO_ANG
cell_B = float(cell_length_pattern.match(line).groupdict()['B']) * AU_TO_ANG
cell_C = float(cell_length_pattern.match(line).groupdict()['C']) * AU_TO_ANG
cell_flag+=1
if cell_angle_pattern.match(line):
cell_alpha = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['alpha']))
cell_beta = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['beta']))
cell_gamma = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['gamma']))
cell_flag+=1
if cell_flag == 2:
self.cell = cell_to_low_triangle(cell_A,cell_B,cell_C,
cell_alpha,cell_beta,cell_gamma)
# lx = cell_A
# xy = cell_B * np.cos(cell_gamma)
# xz = cell_C * np.cos(cell_beta)
# ly = cell_B* np.sin(cell_gamma)
# yz = (cell_B*cell_C*np.cos(cell_alpha)-xy*xz)/ly
# lz = np.sqrt(cell_C**2-xz**2-yz**2)
# self.cell = [[lx, 0 , 0],
# [xy, ly, 0 ],
# [xz, yz, lz]]

element_index = -1
element_dict = OrderedDict()
atom_types_list = []
forces_list = []
for line in force_lines[3:]:
line_list = line.split()
if element_dict.get(line_list[2]):
element_dict[line_list[2]][1]+=1
else:
element_index +=1
element_dict[line_list[2]]=[element_index,1]
atom_types_list.append(element_dict[line_list[2]][0])
forces_list.append([float(line_list[3])*AU_TO_EV_EVERY_ANG,
float(line_list[4])*AU_TO_EV_EVERY_ANG,
float(line_list[5])*AU_TO_EV_EVERY_ANG])

atom_names=list(element_dict.keys())
atom_numbs=[]
for ii in atom_names:
atom_numbs.append(element_dict[ii][1])
info_dict['atom_names'] = atom_names
info_dict['atom_numbs'] = atom_numbs
info_dict['atom_types'] = np.asarray(atom_types_list)
info_dict['cells'] = np.asarray([self.cell]).astype('float32')
info_dict['energies'] = np.asarray([energy]).astype('float32')
info_dict['forces'] = np.asarray([forces_list]).astype('float32')
return info_dict

def handle_single_xyz_frame(self, lines):
info_dict = {}
atom_num = int(lines[0].strip('\n').strip())
if len(lines) != atom_num + 2:
raise RuntimeError("format error, atom_num=={}, {}!=atom_num+2".format(atom_num, len(lines)))
data_format_line = lines[1].strip('\n').strip()+str(' ')
prop_pattern = re.compile(r'(?P<prop>\w+)\s*=\s*(?P<number>.*?)[, ]')
prop_dict = dict(prop_pattern.findall(data_format_line))

energy=0
if prop_dict.get('E'):
energy = float(prop_dict.get('E')) * AU_TO_EV
# info_dict['energies'] = np.array([prop_dict['E']]).astype('float32')

element_index = -1
element_dict = OrderedDict()
atom_types_list = []
coords_list = []
for line in lines[2:]:
line_list = line.split()
if element_dict.get(line_list[0]):
element_dict[line_list[0]][1]+=1
else:
element_index +=1
element_dict[line_list[0]]=[element_index,1]
atom_types_list.append(element_dict[line_list[0]][0])
coords_list.append([float(line_list[1])*AU_TO_ANG,
float(line_list[2])*AU_TO_ANG,
float(line_list[3])*AU_TO_ANG])
atom_names=list(element_dict.keys())
atom_numbs=[]
for ii in atom_names:
atom_numbs.append(element_dict[ii][1])
info_dict['atom_names'] = atom_names
info_dict['atom_numbs'] = atom_numbs
info_dict['atom_types'] = np.asarray(atom_types_list)
info_dict['coords'] = np.asarray([coords_list]).astype('float32')
info_dict['energies'] = np.array([energy]).astype('float32')
info_dict['orig']=[0,0,0]
return info_dict

#%%

def get_frames (fname) :
coord_flag = False
force_flag = False
eV = 2.72113838565563E+01
angstrom = 5.29177208590000E-01
eV = 2.72113838565563E+01 # hatree to eV
angstrom = 5.29177208590000E-01 # Bohrto Angstrom
fp = open(fname)
atom_symbol_list = []
cell = []
Expand Down Expand Up @@ -74,3 +280,6 @@ def get_frames (fname) :
return list(atom_names), atom_numbs, atom_types, cell, coord, energy, force




# %%
Empty file added dpdata/deepmd/__init__.py
Empty file.
9 changes: 8 additions & 1 deletion dpdata/deepmd/comp.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def to_system_data(folder,
data['forces'] = np.concatenate(all_forces, axis = 0)
if len(all_virs) > 0:
data['virials'] = np.concatenate(all_virs, axis = 0)
if os.path.isfile(os.path.join(folder, "nopbc")):
data['nopbc'] = True
return data


Expand Down Expand Up @@ -101,5 +103,10 @@ def dump(folder,
np.save(os.path.join(set_folder, 'virial'), virials[set_stt:set_end])
if 'atom_pref' in data:
np.save(os.path.join(set_folder, "atom_pref"), atom_pref[set_stt:set_end])

try:
os.remove(os.path.join(folder, "nopbc"))
except OSError:
pass
if data.get("nopbc", False):
os.mknod(os.path.join(folder, "nopbc"))

9 changes: 8 additions & 1 deletion dpdata/deepmd/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def to_system_data(folder, type_map = None, labels = True) :
if os.path.exists(os.path.join(folder, 'virial.raw')) :
data['virials'] = np.loadtxt(os.path.join(folder, 'virial.raw'))
data['virials'] = np.reshape(data['virials'], [nframes, 3, 3])
if os.path.isfile(os.path.join(folder, "nopbc")):
data['nopbc'] = True
return data
else :
raise RuntimeError('not dir ' + folder)
Expand All @@ -67,5 +69,10 @@ def dump (folder, data) :
np.savetxt(os.path.join(folder, 'force.raw'), np.reshape(data['forces'], [nframes, -1]))
if 'virials' in data :
np.savetxt(os.path.join(folder, 'virial.raw'), np.reshape(data['virials'], [nframes, 9]))

try:
os.remove(os.path.join(folder, "nopbc"))
except OSError:
pass
if data.get("nopbc", False):
os.mknod(os.path.join(folder, "nopbc"))

Empty file added dpdata/gaussian/__init__.py
Empty file.
1 change: 1 addition & 0 deletions dpdata/gaussian/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,5 @@ def to_system_data(file_name, md=False):
data['coords'] = np.array(coords_t)
data['orig'] = np.array([0, 0, 0])
data['cells'] = np.array([[[100., 0., 0.], [0., 100., 0.], [0., 0., 100.]] for _ in energy_t])
data['nopbc'] = True
return data
Empty file added dpdata/lammps/__init__.py
Empty file.
Empty file added dpdata/md/__init__.py
Empty file.
Loading

0 comments on commit ec284a2

Please sign in to comment.