Skip to content

Commit

Permalink
Merge pull request #74 from wehs7661/bug-fix
Browse files Browse the repository at this point in the history
Increase flexibility of default coordinate swap function and fix minor bugs
  • Loading branch information
ajfriedman22 authored Feb 4, 2025
2 parents b39d7e1 + 9ba891b commit 5b7c6cd
Show file tree
Hide file tree
Showing 10 changed files with 498 additions and 597 deletions.
6 changes: 3 additions & 3 deletions ensemble_md/cli/run_REXEE.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def main():
os.mkdir(f'{REXEE.working_dir}/sim_{i}')
os.mkdir(f'{REXEE.working_dir}/sim_{i}/iteration_0')
MDP = REXEE.initialize_MDP(i)
MDP.write(f"{REXEE.working_dir}/sim_{i}/iteration_0/expanded.mdp", skipempty=True)
MDP.write(f"{REXEE.working_dir}/sim_{i}/iteration_0/{REXEE.mdp}", skipempty=True)
if REXEE.modify_coords == 'default' and (not os.path.exists('residue_connect.csv') or not os.path.exists('residue_swap_map.csv')): # noqa: E501
REXEE.process_top()

Expand Down Expand Up @@ -262,8 +262,8 @@ def main():
os.mkdir(f'{REXEE.working_dir}/sim_{j}/iteration_{i}')
if REXEE.fixed_weights is True:
counts = None # So that this should work also for GROMACS version < 2022.5
MDP = REXEE.update_MDP(f"sim_{j}/iteration_{i - 1}/expanded.mdp", j, i, states, wl_delta, weights, counts) # modify with a new template # noqa: E501
MDP.write(f"{REXEE.working_dir}/sim_{j}/iteration_{i}/expanded.mdp", skipempty=True)
MDP = REXEE.update_MDP(f"sim_{j}/iteration_{i - 1}/{REXEE.mdp}", j, i, states, wl_delta, weights, counts) # modify with a new template # noqa: E501
MDP.write(f"{REXEE.working_dir}/sim_{j}/iteration_{i}/{REXEE.mdp}", skipempty=True)
# In run_REXEE(i, swap_pattern), where the tpr files will be generated, we use the top file at the
# level of the simulation (the file that will be shared by all simulations). For the gro file, we
# pass swap_pattern to the function to figure it out internally.
Expand Down
59 changes: 44 additions & 15 deletions ensemble_md/replica_exchange_EE.py
Original file line number Diff line number Diff line change
Expand Up @@ -1544,6 +1544,7 @@ def default_coords_fn(self, molA_file_name, molB_file_name):
# Load the coordinate swapping map
connection_map = pd.read_csv('residue_connect.csv')
swap_map = pd.read_csv('residue_swap_map.csv')
atom_mapping = pd.read_csv('atom_name_mapping.csv')

# Step 1: Read the GRO input coordinate files and open temporary Output files
molA_file = open(molA_file_name, 'r').readlines() # open input file
Expand All @@ -1557,7 +1558,7 @@ def default_coords_fn(self, molA_file_name, molB_file_name):
residue_options = swap_map['Swap A'].to_list() + swap_map['Swap B'].to_list()
nameA = coordinate_swap.identify_res(molA.topology, residue_options)
nameB = coordinate_swap.identify_res(molB.topology, residue_options)
df_atom_swap = coordinate_swap.find_common(molA_file, molB_file, nameA, nameB)
df_atom_swap = coordinate_swap.extract_missing(nameA, nameB, swap_map)

# Step 3: Fix break if present for solvated systems only
if len(molA.topology.select('water')) != 0:
Expand All @@ -1567,30 +1568,46 @@ def default_coords_fn(self, molA_file_name, molB_file_name):
molB = coordinate_swap.fix_break(molB, nameB, B_dimensions, connection_map[connection_map['Resname'] == nameB]) # noqa: E501

# Step 4: Determine coordinates of atoms which need to be reconstructed as we swap coordinates between molecules # noqa: E501
miss_B = df_atom_swap[(df_atom_swap['Swap'] == 'B2A') & (df_atom_swap['Direction'] == 'miss')]['Name'].to_list() # noqa: E501
miss_A = df_atom_swap[(df_atom_swap['Swap'] == 'A2B') & (df_atom_swap['Direction'] == 'miss')]['Name'].to_list() # noqa: E501
if len(miss_B) != 0:
df_atom_swap = coordinate_swap.get_miss_coord(molB, molA, nameB, nameA, df_atom_swap, 'B2A', swap_map[(swap_map['Swap A'] == nameB) & (swap_map['Swap B'] == nameA)]) # noqa: E501
if len(miss_A) != 0:
df_atom_swap = coordinate_swap.get_miss_coord(molA, molB, nameA, nameB, df_atom_swap, 'A2B', swap_map[(swap_map['Swap A'] == nameA) & (swap_map['Swap B'] == nameB)]) # noqa: E501
df_atom_swap = coordinate_swap.get_miss_coord(molB, molA, nameB, nameA, df_atom_swap, 'A2B', swap_map[(swap_map['Swap A'] == nameB) & (swap_map['Swap B'] == nameA)]) # noqa: E501
df_atom_swap = coordinate_swap.get_miss_coord(molA, molB, nameA, nameB, df_atom_swap, 'B2A', swap_map[(swap_map['Swap A'] == nameA) & (swap_map['Swap B'] == nameB)]) # noqa: E501

# Step 5: Parse Current file to ensure atoms are added in the correct order
atom_order_A = gmx_parser.deter_atom_order(molA_file, nameA)
atom_order_B = gmx_parser.deter_atom_order(molB_file, nameB)

# Step 6: Write the new file
# Reprint preamble text
line_start = coordinate_swap.print_preamble(molA_file, molB_new, len(miss_B), len(miss_A))
line_start = coordinate_swap.print_preamble(molA_file, molB_new, len(df_atom_swap[df_atom_swap['Swap'] == 'A2B']), len(df_atom_swap[df_atom_swap['Swap'] == 'B2A'])) # noqa: E501

# Print up until we reach the residue to modify
line_restart, atom_num_restart = coordinate_swap.write_unmodified(line_start, molA_file, molB_new, nameA, 1, line_start, copy.deepcopy(molA.xyz[0])) # noqa: E501

# Print new coordinates to file for molB
coordinate_swap.write_new_file(df_atom_swap, 'A2B', 'B2A', line_start, molA_file, molB_new, nameA, nameB, copy.deepcopy(molA.xyz[0]), miss_A, atom_order_B) # noqa: E501
line_restart, atom_num_restart = coordinate_swap.write_modified(df_atom_swap, 'A2B', line_start, molA_file, molB_new, atom_num_restart, nameA, nameB, copy.deepcopy(molA.xyz[0]), atom_mapping, atom_order_B, atom_order_A) # noqa: E501

if line_restart is not None:
# Print rest of file after modified residue
coordinate_swap.write_unmodified(line_restart, molA_file, molB_new, nameA, atom_num_restart, line_start, copy.deepcopy(molA.xyz[0])) # noqa: E501

# Print box size
molB_new.write(molA_file[-1])

# Print new coordinates to file
# Reprint preamble text
line_start = coordinate_swap.print_preamble(molB_file, molA_new, len(miss_A), len(miss_B))
line_start = coordinate_swap.print_preamble(molB_file, molA_new, len(df_atom_swap[df_atom_swap['Swap'] == 'B2A']), len(df_atom_swap[df_atom_swap['Swap'] == 'A2B'])) # noqa: E501

# Print up until we reach the residue to modify
line_restart, atom_num_restart = coordinate_swap.write_unmodified(line_start, molB_file, molA_new, nameB, 1, line_start, copy.deepcopy(molB.xyz[0])) # noqa: E501

# Print new coordinates to file for molA
line_restart, atom_num_restart = coordinate_swap.write_modified(df_atom_swap, 'B2A', line_start, molB_file, molA_new, atom_num_restart, nameB, nameA, copy.deepcopy(molB.xyz[0]), atom_mapping, atom_order_A, atom_order_B) # noqa: E501

# Print new coordinates for molA
coordinate_swap.write_new_file(df_atom_swap, 'B2A', 'A2B', line_start, molB_file, molA_new, nameB, nameA, copy.deepcopy(molB.xyz[0]), miss_B, atom_order_A) # noqa: E501
if line_restart is not None:
# Print rest of file after modified residue
coordinate_swap.write_unmodified(line_restart, molB_file, molA_new, nameB, atom_num_restart, line_start, copy.deepcopy(molB.xyz[0])) # noqa: E501

# Print box size
molA_new.write(molB_file[-1])

# Rename temp files
os.rename('A_hybrid_swap.gro', molB_dir + '/confout.gro')
Expand All @@ -1601,6 +1618,12 @@ def process_top(self):
Processes the input topologies in order to determine the atoms for alignment in the default GRO swapping
function. Output as csv files to prevent needing to re-run this step.
"""
if not os.path.exists('atom_name_mapping.csv'):
coordinate_swap.create_atom_map(self.gro, self.resname_list, self.swap_rep_pattern)
atom_name_mapping = pd.read_csv('atom_name_mapping.csv')
else:
atom_name_mapping = pd.read_csv('atom_name_mapping.csv')

if not os.path.exists('residue_connect.csv'):
df_top = pd.DataFrame()
for f, file_name in enumerate(self.top):
Expand Down Expand Up @@ -1640,17 +1663,23 @@ def process_top(self):
# Determine atoms not present in both molecules
X, Y = [int(swap[0][0]), int(swap[1][0])]
lam = {X: int(swap[0][1]), Y: int(swap[1][1])}
swap_name_match = atom_name_mapping[(atom_name_mapping['resname A'].isin([self.resname_list[X], self.resname_list[Y]])) & (atom_name_mapping['resname B'].isin([self.resname_list[X], self.resname_list[Y]]))] # noqa: E501
for A, B in zip([X, Y], [Y, X]):
# Swapping coordinates from B to A
input_A = gmx_parser.read_top(self.top[A], self.resname_list[A])
start_line, A_name, A_num, state = coordinate_swap.get_names(input_A, self.resname_list[A])
input_B = gmx_parser.read_top(self.top[B], self.resname_list[B])
start_line, B_name, B_num, state = coordinate_swap.get_names(input_B, self.resname_list[B])

A_only = [x for x in A_name if x not in B_name]
B_only = [x for x in B_name if x not in A_name]
# Determine shared atom names
if len(swap_name_match[swap_name_match['resname A'] == self.resname_list[A]]) != 0:
common_atoms_A = list(swap_name_match['atom name A'].values)
else:
common_atoms_A = list(swap_name_match['atom name B'].values)
A_only = [x for x in A_name if x not in common_atoms_A]

# Seperate real to dummy switches
df = coordinate_swap.determine_connection(A_only, B_only, self.resname_list[A], self.resname_list[B], df_top, lam[A]) # noqa: E501
df = coordinate_swap.determine_connection(A_only, swap_name_match, self.resname_list[A], self.resname_list[B], df_top, lam[A]) # noqa: E501

df_map = pd.concat([df_map, df])

Expand Down
125 changes: 125 additions & 0 deletions ensemble_md/tests/data/coord_swap/atom_name_mapping.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
,resname A,resname B,atomid A,atom name A,atomid B,atom name B
0,A2B,B2C,1,S1,1,S1
1,A2B,B2C,1,S1,1,S1
2,A2B,B2C,2,C2,2,C2
3,A2B,B2C,2,C2,2,C2
4,A2B,B2C,3,N3,3,N3
5,A2B,B2C,3,N3,3,N3
6,A2B,B2C,4,C4,4,C4
7,A2B,B2C,4,C4,4,C4
8,A2B,B2C,5,C5,5,C5
9,A2B,B2C,5,C5,5,C5
10,A2B,B2C,6,C6,6,C6
11,A2B,B2C,6,C6,6,C6
12,A2B,B2C,7,H1,8,H1
13,A2B,B2C,7,H1,8,H1
14,A2B,B2C,8,H2,9,H2
15,A2B,B2C,8,H2,9,H2
16,A2B,B2C,9,H3,10,H3
17,A2B,B2C,9,H3,10,H3
18,A2B,B2C,10,H4,11,H4
19,A2B,B2C,10,H4,11,H4
20,A2B,B2C,12,DC7,7,C7
21,A2B,B2C,13,HV5,12,H5
22,A2B,B2C,14,HV6,13,H6
23,A2B,B2C,15,HV7,14,H7
0,B2C,C2D,1,S1,1,S1
1,B2C,C2D,1,S1,1,S1
2,B2C,C2D,2,C2,2,C2
3,B2C,C2D,2,C2,2,C2
4,B2C,C2D,3,N3,3,N3
5,B2C,C2D,3,N3,3,N3
6,B2C,C2D,4,C4,4,C4
7,B2C,C2D,4,C4,4,C4
8,B2C,C2D,5,C5,5,C5
9,B2C,C2D,5,C5,5,C5
10,B2C,C2D,6,C6,6,C6
11,B2C,C2D,6,C6,6,C6
12,B2C,C2D,7,C7,7,C7
13,B2C,C2D,7,C7,7,C7
14,B2C,C2D,8,H1,9,H1
15,B2C,C2D,8,H1,9,H1
16,B2C,C2D,9,H2,10,H2
17,B2C,C2D,9,H2,10,H2
18,B2C,C2D,10,H3,11,H3
19,B2C,C2D,10,H3,11,H3
20,B2C,C2D,11,H4,12,H4
21,B2C,C2D,11,H4,12,H4
22,B2C,C2D,12,H5,19,HV5
23,B2C,C2D,13,H6,13,H6
24,B2C,C2D,13,H6,13,H6
25,B2C,C2D,14,H7,14,H7
26,B2C,C2D,14,H7,14,H7
27,B2C,C2D,15,DC8,8,C8
28,B2C,C2D,16,HV8,15,H8
29,B2C,C2D,17,HV9,16,H9
30,B2C,C2D,18,HV10,17,H10
0,C2D,D2E,1,S1,1,S1
1,C2D,D2E,1,S1,1,S1
2,C2D,D2E,2,C2,2,C2
3,C2D,D2E,2,C2,2,C2
4,C2D,D2E,3,N3,3,N3
5,C2D,D2E,3,N3,3,N3
6,C2D,D2E,4,C4,4,C4
7,C2D,D2E,4,C4,4,C4
8,C2D,D2E,5,C5,5,C5
9,C2D,D2E,5,C5,5,C5
10,C2D,D2E,6,C6,6,C6
11,C2D,D2E,6,C6,6,C6
12,C2D,D2E,7,C7,7,C7
13,C2D,D2E,7,C7,7,C7
14,C2D,D2E,8,C8,18,DC8
15,C2D,D2E,9,H1,9,H1
16,C2D,D2E,9,H1,9,H1
17,C2D,D2E,10,H2,10,H2
18,C2D,D2E,10,H2,10,H2
19,C2D,D2E,11,H3,11,H3
20,C2D,D2E,11,H3,11,H3
21,C2D,D2E,13,H6,13,H6
22,C2D,D2E,13,H6,13,H6
23,C2D,D2E,14,H7,14,H7
24,C2D,D2E,14,H7,14,H7
25,C2D,D2E,15,H8,19,HV8
26,C2D,D2E,16,H9,20,HV9
27,C2D,D2E,17,H10,21,HV10
28,C2D,D2E,18,DC9,8,C9
29,C2D,D2E,19,HV5,12,H5
30,C2D,D2E,20,HV11,15,H11
31,C2D,D2E,21,HV12,16,H12
32,C2D,D2E,22,HV13,17,H13
0,D2E,E2F,1,S1,1,S1
1,D2E,E2F,1,S1,1,S1
2,D2E,E2F,2,C2,2,C2
3,D2E,E2F,2,C2,2,C2
4,D2E,E2F,3,N3,3,N3
5,D2E,E2F,3,N3,3,N3
6,D2E,E2F,4,C4,4,C4
7,D2E,E2F,4,C4,4,C4
8,D2E,E2F,5,C5,5,C5
9,D2E,E2F,5,C5,5,C5
10,D2E,E2F,6,C6,6,C6
11,D2E,E2F,6,C6,6,C6
12,D2E,E2F,7,C7,7,C7
13,D2E,E2F,7,C7,7,C7
14,D2E,E2F,8,C9,9,C9
15,D2E,E2F,8,C9,9,C9
16,D2E,E2F,9,H1,10,H1
17,D2E,E2F,9,H1,10,H1
18,D2E,E2F,10,H2,11,H2
19,D2E,E2F,10,H2,11,H2
20,D2E,E2F,11,H3,12,H3
21,D2E,E2F,11,H3,12,H3
22,D2E,E2F,13,H6,13,H6
23,D2E,E2F,13,H6,13,H6
24,D2E,E2F,14,H7,14,H7
25,D2E,E2F,14,H7,14,H7
26,D2E,E2F,15,H11,18,H11
27,D2E,E2F,15,H11,18,H11
28,D2E,E2F,16,H12,19,H12
29,D2E,E2F,16,H12,19,H12
30,D2E,E2F,17,H13,20,H13
31,D2E,E2F,17,H13,20,H13
32,D2E,E2F,18,DC8,8,C8
33,D2E,E2F,19,HV8,15,H8
34,D2E,E2F,20,HV9,16,H9
35,D2E,E2F,21,HV10,17,H10
20 changes: 20 additions & 0 deletions ensemble_md/tests/data/coord_swap/atom_name_mapping_alt.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
,resname A,resname B,atomid A,atom name A,atomid B,atom name B
0,I2J,J2K,1,O,1,O
1,I2J,J2K,2,C1,2,C1
2,I2J,J2K,3,C2,3,C2
3,I2J,J2K,4,C3,4,C3
4,I2J,J2K,5,C4,5,C4
5,I2J,J2K,6,CV5,6,C5
6,I2J,J2K,7,CV6,7,C6
7,I2J,J2K,8,H1,10,H1
8,I2J,J2K,9,H2,11,H2
9,I2J,J2K,10,H3,12,H3
10,I2J,J2K,11,H4,13,H4
11,I2J,J2K,12,H5,14,H5
12,I2J,J2K,13,H6,15,H6
13,I2J,J2K,14,H7,16,H7
14,I2J,J2K,16,HV9,17,H9
15,I2J,J2K,17,HV10,18,H10
16,I2J,J2K,18,HV11,19,H11
17,I2J,J2K,19,HV12,20,H12
18,I2J,J2K,20,HV13,21,H13
22 changes: 7 additions & 15 deletions ensemble_md/tests/data/coord_swap/df_atom_swap.csv
Original file line number Diff line number Diff line change
@@ -1,15 +1,7 @@
,index,Name,Atom Name Number,Element,Direction,Swap,File line,Final Type,X Coordinates,Y Coordinates,Z Coordinates
0,0,H5,5,H,miss,A2B,13,real,0.09260503947734833,1.6340194940567017,0.3355029225349426
1,1,DC8,8,C,D2R,A2B,19,real,,,
2,2,HV8,8,H,D2R,A2B,20,real,,,
3,3,HV9,9,H,D2R,A2B,21,real,,,
4,4,HV10,10,H,D2R,A2B,22,real,,,
5,0,C8,8,C,R2D,B2A,9,dummy,,,
6,1,H8,8,H,R2D,B2A,16,dummy,,,
7,2,H9,9,H,R2D,B2A,17,dummy,,,
8,3,H10,10,H,R2D,B2A,18,dummy,,,
9,4,DC10,10,C,miss,B2A,22,dummy,2.620028495788574,1.403925895690918,2.7885396480560303
10,5,HV4,4,H,miss,B2A,23,dummy,2.3651702404022217,1.4678032398223877,2.8239073753356934
11,6,HV14,14,H,miss,B2A,24,dummy,2.678273916244507,1.464888095855713,2.8585944175720215
12,7,HV15,15,H,miss,B2A,25,dummy,2.6814169883728027,1.340580940246582,2.7234597206115723
13,8,HV16,16,H,miss,B2A,26,dummy,2.5583090782165527,1.3370134830474854,2.8496134281158447
,Name,Swap,X Coordinates,Y Coordinates,Z Coordinates
0,DC10,A2B,2.620028018951416,1.4039266109466553,2.7885406017303467
1,HV14,A2B,2.6782684326171875,1.4648889303207397,2.8585996627807617
2,HV15,A2B,2.6814215183258057,1.3405863046646118,2.723461389541626
3,HV16,A2B,2.5583088397979736,1.3370097875595093,2.849609851837158
4,HV4,A2B,2.3651702404022217,1.4678034782409668,2.8239071369171143
5,H5,B2A,0.09260530769824982,1.6340194940567017,0.33550316095352173
7 changes: 7 additions & 0 deletions ensemble_md/tests/data/coord_swap/extract_missing.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
,Name,Swap
0,DC10,A2B
1,HV14,A2B
2,HV15,A2B
3,HV16,A2B
4,HV4,A2B
5,H5,B2A
15 changes: 0 additions & 15 deletions ensemble_md/tests/data/coord_swap/find_common.csv

This file was deleted.

Loading

0 comments on commit 5b7c6cd

Please sign in to comment.