Merge pull request #74 from wehs7661/bug-fix

Increase flexibility of default coordinate swap function and fix minor bugs
wehs7661 · Feb 4, 2025 · 5b7c6cd · 5b7c6cd
2 parents b39d7e1 + 9ba891b
commit 5b7c6cd
Show file tree

Hide file tree

Showing 10 changed files with 498 additions and 597 deletions.
diff --git a/ensemble_md/cli/run_REXEE.py b/ensemble_md/cli/run_REXEE.py
@@ -104,7 +104,7 @@ def main():
                 os.mkdir(f'{REXEE.working_dir}/sim_{i}')
                 os.mkdir(f'{REXEE.working_dir}/sim_{i}/iteration_0')
                 MDP = REXEE.initialize_MDP(i)
-                MDP.write(f"{REXEE.working_dir}/sim_{i}/iteration_0/expanded.mdp", skipempty=True)
+                MDP.write(f"{REXEE.working_dir}/sim_{i}/iteration_0/{REXEE.mdp}", skipempty=True)
             if REXEE.modify_coords == 'default' and (not os.path.exists('residue_connect.csv') or not os.path.exists('residue_swap_map.csv')):  # noqa: E501
                 REXEE.process_top()
 
@@ -262,8 +262,8 @@ def main():
                     os.mkdir(f'{REXEE.working_dir}/sim_{j}/iteration_{i}')
                     if REXEE.fixed_weights is True:
                         counts = None    # So that this should work also for GROMACS version < 2022.5
-                    MDP = REXEE.update_MDP(f"sim_{j}/iteration_{i - 1}/expanded.mdp", j, i, states, wl_delta, weights, counts)   # modify with a new template  # noqa: E501
-                    MDP.write(f"{REXEE.working_dir}/sim_{j}/iteration_{i}/expanded.mdp", skipempty=True)
+                    MDP = REXEE.update_MDP(f"sim_{j}/iteration_{i - 1}/{REXEE.mdp}", j, i, states, wl_delta, weights, counts)   # modify with a new template  # noqa: E501
+                    MDP.write(f"{REXEE.working_dir}/sim_{j}/iteration_{i}/{REXEE.mdp}", skipempty=True)
                     # In run_REXEE(i, swap_pattern), where the tpr files will be generated, we use the top file at the
                     # level of the simulation (the file that will be shared by all simulations). For the gro file, we
                     # pass swap_pattern to the function to figure it out internally.

diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py
@@ -1544,6 +1544,7 @@ def default_coords_fn(self, molA_file_name, molB_file_name):
         # Load the coordinate swapping map
         connection_map = pd.read_csv('residue_connect.csv')
         swap_map = pd.read_csv('residue_swap_map.csv')
+        atom_mapping = pd.read_csv('atom_name_mapping.csv')
 
         # Step 1: Read the GRO input coordinate files and open temporary Output files
         molA_file = open(molA_file_name, 'r').readlines()  # open input file
@@ -1557,7 +1558,7 @@ def default_coords_fn(self, molA_file_name, molB_file_name):
         residue_options = swap_map['Swap A'].to_list() + swap_map['Swap B'].to_list()
         nameA = coordinate_swap.identify_res(molA.topology, residue_options)
         nameB = coordinate_swap.identify_res(molB.topology, residue_options)
-        df_atom_swap = coordinate_swap.find_common(molA_file, molB_file, nameA, nameB)
+        df_atom_swap = coordinate_swap.extract_missing(nameA, nameB, swap_map)
 
         # Step 3: Fix break if present for solvated systems only
         if len(molA.topology.select('water')) != 0:
@@ -1567,30 +1568,46 @@ def default_coords_fn(self, molA_file_name, molB_file_name):
             molB = coordinate_swap.fix_break(molB, nameB, B_dimensions, connection_map[connection_map['Resname'] == nameB])  # noqa: E501
 
         # Step 4: Determine coordinates of atoms which need to be reconstructed as we swap coordinates between molecules  # noqa: E501
-        miss_B = df_atom_swap[(df_atom_swap['Swap'] == 'B2A') & (df_atom_swap['Direction'] == 'miss')]['Name'].to_list()  # noqa: E501
-        miss_A = df_atom_swap[(df_atom_swap['Swap'] == 'A2B') & (df_atom_swap['Direction'] == 'miss')]['Name'].to_list()  # noqa: E501
-        if len(miss_B) != 0:
-            df_atom_swap = coordinate_swap.get_miss_coord(molB, molA, nameB, nameA, df_atom_swap, 'B2A', swap_map[(swap_map['Swap A'] == nameB) & (swap_map['Swap B'] == nameA)])  # noqa: E501
-        if len(miss_A) != 0:
-            df_atom_swap = coordinate_swap.get_miss_coord(molA, molB, nameA, nameB, df_atom_swap, 'A2B', swap_map[(swap_map['Swap A'] == nameA) & (swap_map['Swap B'] == nameB)])  # noqa: E501
+        df_atom_swap = coordinate_swap.get_miss_coord(molB, molA, nameB, nameA, df_atom_swap, 'A2B', swap_map[(swap_map['Swap A'] == nameB) & (swap_map['Swap B'] == nameA)])  # noqa: E501
+        df_atom_swap = coordinate_swap.get_miss_coord(molA, molB, nameA, nameB, df_atom_swap, 'B2A', swap_map[(swap_map['Swap A'] == nameA) & (swap_map['Swap B'] == nameB)])  # noqa: E501
 
         # Step 5: Parse Current file to ensure atoms are added in the correct order
         atom_order_A = gmx_parser.deter_atom_order(molA_file, nameA)
         atom_order_B = gmx_parser.deter_atom_order(molB_file, nameB)
 
         # Step 6: Write the new file
         # Reprint preamble text
-        line_start = coordinate_swap.print_preamble(molA_file, molB_new, len(miss_B), len(miss_A))
+        line_start = coordinate_swap.print_preamble(molA_file, molB_new, len(df_atom_swap[df_atom_swap['Swap'] == 'A2B']), len(df_atom_swap[df_atom_swap['Swap'] == 'B2A']))  # noqa: E501
+
+        # Print up until we reach the residue to modify
+        line_restart, atom_num_restart = coordinate_swap.write_unmodified(line_start, molA_file, molB_new, nameA, 1, line_start, copy.deepcopy(molA.xyz[0]))  # noqa: E501
 
         # Print new coordinates to file for molB
-        coordinate_swap.write_new_file(df_atom_swap, 'A2B', 'B2A', line_start, molA_file, molB_new, nameA, nameB, copy.deepcopy(molA.xyz[0]), miss_A, atom_order_B)  # noqa: E501
+        line_restart, atom_num_restart = coordinate_swap.write_modified(df_atom_swap, 'A2B', line_start, molA_file, molB_new, atom_num_restart, nameA, nameB, copy.deepcopy(molA.xyz[0]), atom_mapping, atom_order_B, atom_order_A)  # noqa: E501
+
+        if line_restart is not None:
+            # Print rest of file after modified residue
+            coordinate_swap.write_unmodified(line_restart, molA_file, molB_new, nameA, atom_num_restart, line_start, copy.deepcopy(molA.xyz[0]))  # noqa: E501
+
+        # Print box size
+        molB_new.write(molA_file[-1])
 
         # Print new coordinates to file
         # Reprint preamble text
-        line_start = coordinate_swap.print_preamble(molB_file, molA_new, len(miss_A), len(miss_B))
+        line_start = coordinate_swap.print_preamble(molB_file, molA_new, len(df_atom_swap[df_atom_swap['Swap'] == 'B2A']), len(df_atom_swap[df_atom_swap['Swap'] == 'A2B']))  # noqa: E501
+
+        # Print up until we reach the residue to modify
+        line_restart, atom_num_restart = coordinate_swap.write_unmodified(line_start, molB_file, molA_new, nameB, 1, line_start, copy.deepcopy(molB.xyz[0]))  # noqa: E501
+
+        # Print new coordinates to file for molA
+        line_restart, atom_num_restart = coordinate_swap.write_modified(df_atom_swap, 'B2A', line_start, molB_file, molA_new, atom_num_restart, nameB, nameA, copy.deepcopy(molB.xyz[0]), atom_mapping, atom_order_A, atom_order_B)  # noqa: E501
 
-        # Print new coordinates for molA
-        coordinate_swap.write_new_file(df_atom_swap, 'B2A', 'A2B', line_start, molB_file, molA_new, nameB, nameA, copy.deepcopy(molB.xyz[0]), miss_B, atom_order_A)  # noqa: E501
+        if line_restart is not None:
+            # Print rest of file after modified residue
+            coordinate_swap.write_unmodified(line_restart, molB_file, molA_new, nameB, atom_num_restart, line_start, copy.deepcopy(molB.xyz[0]))  # noqa: E501
+
+        # Print box size
+        molA_new.write(molB_file[-1])
 
         # Rename temp files
         os.rename('A_hybrid_swap.gro', molB_dir + '/confout.gro')
@@ -1601,6 +1618,12 @@ def process_top(self):
         Processes the input topologies in order to determine the atoms for alignment in the default GRO swapping
         function. Output as csv files to prevent needing to re-run this step.
         """
+        if not os.path.exists('atom_name_mapping.csv'):
+            coordinate_swap.create_atom_map(self.gro, self.resname_list, self.swap_rep_pattern)
+            atom_name_mapping = pd.read_csv('atom_name_mapping.csv')
+        else:
+            atom_name_mapping = pd.read_csv('atom_name_mapping.csv')
+
         if not os.path.exists('residue_connect.csv'):
             df_top = pd.DataFrame()
             for f, file_name in enumerate(self.top):
@@ -1640,17 +1663,23 @@ def process_top(self):
                 # Determine atoms not present in both molecules
                 X, Y = [int(swap[0][0]), int(swap[1][0])]
                 lam = {X: int(swap[0][1]), Y: int(swap[1][1])}
+                swap_name_match = atom_name_mapping[(atom_name_mapping['resname A'].isin([self.resname_list[X], self.resname_list[Y]])) & (atom_name_mapping['resname B'].isin([self.resname_list[X], self.resname_list[Y]]))]  # noqa: E501
                 for A, B in zip([X, Y], [Y, X]):
+                    # Swapping coordinates from B to A
                     input_A = gmx_parser.read_top(self.top[A], self.resname_list[A])
                     start_line, A_name, A_num, state = coordinate_swap.get_names(input_A, self.resname_list[A])
                     input_B = gmx_parser.read_top(self.top[B], self.resname_list[B])
                     start_line, B_name, B_num, state = coordinate_swap.get_names(input_B, self.resname_list[B])
 
-                    A_only = [x for x in A_name if x not in B_name]
-                    B_only = [x for x in B_name if x not in A_name]
+                    # Determine shared atom names
+                    if len(swap_name_match[swap_name_match['resname A'] == self.resname_list[A]]) != 0:
+                        common_atoms_A = list(swap_name_match['atom name A'].values)
+                    else:
+                        common_atoms_A = list(swap_name_match['atom name B'].values)
+                    A_only = [x for x in A_name if x not in common_atoms_A]
 
                     # Seperate real to dummy switches
-                    df = coordinate_swap.determine_connection(A_only, B_only, self.resname_list[A], self.resname_list[B], df_top, lam[A])  # noqa: E501
+                    df = coordinate_swap.determine_connection(A_only, swap_name_match, self.resname_list[A], self.resname_list[B], df_top, lam[A])  # noqa: E501
 
                     df_map = pd.concat([df_map, df])
 

diff --git a/ensemble_md/tests/data/coord_swap/atom_name_mapping.csv b/ensemble_md/tests/data/coord_swap/atom_name_mapping.csv
@@ -0,0 +1,125 @@
+,resname A,resname B,atomid A,atom name A,atomid B,atom name B
+0,A2B,B2C,1,S1,1,S1
+1,A2B,B2C,1,S1,1,S1
+2,A2B,B2C,2,C2,2,C2
+3,A2B,B2C,2,C2,2,C2
+4,A2B,B2C,3,N3,3,N3
+5,A2B,B2C,3,N3,3,N3
+6,A2B,B2C,4,C4,4,C4
+7,A2B,B2C,4,C4,4,C4
+8,A2B,B2C,5,C5,5,C5
+9,A2B,B2C,5,C5,5,C5
+10,A2B,B2C,6,C6,6,C6
+11,A2B,B2C,6,C6,6,C6
+12,A2B,B2C,7,H1,8,H1
+13,A2B,B2C,7,H1,8,H1
+14,A2B,B2C,8,H2,9,H2
+15,A2B,B2C,8,H2,9,H2
+16,A2B,B2C,9,H3,10,H3
+17,A2B,B2C,9,H3,10,H3
+18,A2B,B2C,10,H4,11,H4
+19,A2B,B2C,10,H4,11,H4
+20,A2B,B2C,12,DC7,7,C7
+21,A2B,B2C,13,HV5,12,H5
+22,A2B,B2C,14,HV6,13,H6
+23,A2B,B2C,15,HV7,14,H7
+0,B2C,C2D,1,S1,1,S1
+1,B2C,C2D,1,S1,1,S1
+2,B2C,C2D,2,C2,2,C2
+3,B2C,C2D,2,C2,2,C2
+4,B2C,C2D,3,N3,3,N3
+5,B2C,C2D,3,N3,3,N3
+6,B2C,C2D,4,C4,4,C4
+7,B2C,C2D,4,C4,4,C4
+8,B2C,C2D,5,C5,5,C5
+9,B2C,C2D,5,C5,5,C5
+10,B2C,C2D,6,C6,6,C6
+11,B2C,C2D,6,C6,6,C6
+12,B2C,C2D,7,C7,7,C7
+13,B2C,C2D,7,C7,7,C7
+14,B2C,C2D,8,H1,9,H1
+15,B2C,C2D,8,H1,9,H1
+16,B2C,C2D,9,H2,10,H2
+17,B2C,C2D,9,H2,10,H2
+18,B2C,C2D,10,H3,11,H3
+19,B2C,C2D,10,H3,11,H3
+20,B2C,C2D,11,H4,12,H4
+21,B2C,C2D,11,H4,12,H4
+22,B2C,C2D,12,H5,19,HV5
+23,B2C,C2D,13,H6,13,H6
+24,B2C,C2D,13,H6,13,H6
+25,B2C,C2D,14,H7,14,H7
+26,B2C,C2D,14,H7,14,H7
+27,B2C,C2D,15,DC8,8,C8
+28,B2C,C2D,16,HV8,15,H8
+29,B2C,C2D,17,HV9,16,H9
+30,B2C,C2D,18,HV10,17,H10
+0,C2D,D2E,1,S1,1,S1
+1,C2D,D2E,1,S1,1,S1
+2,C2D,D2E,2,C2,2,C2
+3,C2D,D2E,2,C2,2,C2
+4,C2D,D2E,3,N3,3,N3
+5,C2D,D2E,3,N3,3,N3
+6,C2D,D2E,4,C4,4,C4
+7,C2D,D2E,4,C4,4,C4
+8,C2D,D2E,5,C5,5,C5
+9,C2D,D2E,5,C5,5,C5
+10,C2D,D2E,6,C6,6,C6
+11,C2D,D2E,6,C6,6,C6
+12,C2D,D2E,7,C7,7,C7
+13,C2D,D2E,7,C7,7,C7
+14,C2D,D2E,8,C8,18,DC8
+15,C2D,D2E,9,H1,9,H1
+16,C2D,D2E,9,H1,9,H1
+17,C2D,D2E,10,H2,10,H2
+18,C2D,D2E,10,H2,10,H2
+19,C2D,D2E,11,H3,11,H3
+20,C2D,D2E,11,H3,11,H3
+21,C2D,D2E,13,H6,13,H6
+22,C2D,D2E,13,H6,13,H6
+23,C2D,D2E,14,H7,14,H7
+24,C2D,D2E,14,H7,14,H7
+25,C2D,D2E,15,H8,19,HV8
+26,C2D,D2E,16,H9,20,HV9
+27,C2D,D2E,17,H10,21,HV10
+28,C2D,D2E,18,DC9,8,C9
+29,C2D,D2E,19,HV5,12,H5
+30,C2D,D2E,20,HV11,15,H11
+31,C2D,D2E,21,HV12,16,H12
+32,C2D,D2E,22,HV13,17,H13
+0,D2E,E2F,1,S1,1,S1
+1,D2E,E2F,1,S1,1,S1
+2,D2E,E2F,2,C2,2,C2
+3,D2E,E2F,2,C2,2,C2
+4,D2E,E2F,3,N3,3,N3
+5,D2E,E2F,3,N3,3,N3
+6,D2E,E2F,4,C4,4,C4
+7,D2E,E2F,4,C4,4,C4
+8,D2E,E2F,5,C5,5,C5
+9,D2E,E2F,5,C5,5,C5
+10,D2E,E2F,6,C6,6,C6
+11,D2E,E2F,6,C6,6,C6
+12,D2E,E2F,7,C7,7,C7
+13,D2E,E2F,7,C7,7,C7
+14,D2E,E2F,8,C9,9,C9
+15,D2E,E2F,8,C9,9,C9
+16,D2E,E2F,9,H1,10,H1
+17,D2E,E2F,9,H1,10,H1
+18,D2E,E2F,10,H2,11,H2
+19,D2E,E2F,10,H2,11,H2
+20,D2E,E2F,11,H3,12,H3
+21,D2E,E2F,11,H3,12,H3
+22,D2E,E2F,13,H6,13,H6
+23,D2E,E2F,13,H6,13,H6
+24,D2E,E2F,14,H7,14,H7
+25,D2E,E2F,14,H7,14,H7
+26,D2E,E2F,15,H11,18,H11
+27,D2E,E2F,15,H11,18,H11
+28,D2E,E2F,16,H12,19,H12
+29,D2E,E2F,16,H12,19,H12
+30,D2E,E2F,17,H13,20,H13
+31,D2E,E2F,17,H13,20,H13
+32,D2E,E2F,18,DC8,8,C8
+33,D2E,E2F,19,HV8,15,H8
+34,D2E,E2F,20,HV9,16,H9
+35,D2E,E2F,21,HV10,17,H10
diff --git a/ensemble_md/tests/data/coord_swap/atom_name_mapping_alt.csv b/ensemble_md/tests/data/coord_swap/atom_name_mapping_alt.csv
@@ -0,0 +1,20 @@
+,resname A,resname B,atomid A,atom name A,atomid B,atom name B
+0,I2J,J2K,1,O,1,O
+1,I2J,J2K,2,C1,2,C1
+2,I2J,J2K,3,C2,3,C2
+3,I2J,J2K,4,C3,4,C3
+4,I2J,J2K,5,C4,5,C4
+5,I2J,J2K,6,CV5,6,C5
+6,I2J,J2K,7,CV6,7,C6
+7,I2J,J2K,8,H1,10,H1
+8,I2J,J2K,9,H2,11,H2
+9,I2J,J2K,10,H3,12,H3
+10,I2J,J2K,11,H4,13,H4
+11,I2J,J2K,12,H5,14,H5
+12,I2J,J2K,13,H6,15,H6
+13,I2J,J2K,14,H7,16,H7
+14,I2J,J2K,16,HV9,17,H9
+15,I2J,J2K,17,HV10,18,H10
+16,I2J,J2K,18,HV11,19,H11
+17,I2J,J2K,19,HV12,20,H12
+18,I2J,J2K,20,HV13,21,H13
diff --git a/ensemble_md/tests/data/coord_swap/df_atom_swap.csv b/ensemble_md/tests/data/coord_swap/df_atom_swap.csv
@@ -1,15 +1,7 @@
-,index,Name,Atom Name Number,Element,Direction,Swap,File line,Final Type,X Coordinates,Y Coordinates,Z Coordinates
-0,0,H5,5,H,miss,A2B,13,real,0.09260503947734833,1.6340194940567017,0.3355029225349426
-1,1,DC8,8,C,D2R,A2B,19,real,,,
-2,2,HV8,8,H,D2R,A2B,20,real,,,
-3,3,HV9,9,H,D2R,A2B,21,real,,,
-4,4,HV10,10,H,D2R,A2B,22,real,,,
-5,0,C8,8,C,R2D,B2A,9,dummy,,,
-6,1,H8,8,H,R2D,B2A,16,dummy,,,
-7,2,H9,9,H,R2D,B2A,17,dummy,,,
-8,3,H10,10,H,R2D,B2A,18,dummy,,,
-9,4,DC10,10,C,miss,B2A,22,dummy,2.620028495788574,1.403925895690918,2.7885396480560303
-10,5,HV4,4,H,miss,B2A,23,dummy,2.3651702404022217,1.4678032398223877,2.8239073753356934
-11,6,HV14,14,H,miss,B2A,24,dummy,2.678273916244507,1.464888095855713,2.8585944175720215
-12,7,HV15,15,H,miss,B2A,25,dummy,2.6814169883728027,1.340580940246582,2.7234597206115723
-13,8,HV16,16,H,miss,B2A,26,dummy,2.5583090782165527,1.3370134830474854,2.8496134281158447
+,Name,Swap,X Coordinates,Y Coordinates,Z Coordinates
+0,DC10,A2B,2.620028018951416,1.4039266109466553,2.7885406017303467
+1,HV14,A2B,2.6782684326171875,1.4648889303207397,2.8585996627807617
+2,HV15,A2B,2.6814215183258057,1.3405863046646118,2.723461389541626
+3,HV16,A2B,2.5583088397979736,1.3370097875595093,2.849609851837158
+4,HV4,A2B,2.3651702404022217,1.4678034782409668,2.8239071369171143
+5,H5,B2A,0.09260530769824982,1.6340194940567017,0.33550316095352173
diff --git a/ensemble_md/tests/data/coord_swap/extract_missing.csv b/ensemble_md/tests/data/coord_swap/extract_missing.csv
@@ -0,0 +1,7 @@
+,Name,Swap
+0,DC10,A2B
+1,HV14,A2B
+2,HV15,A2B
+3,HV16,A2B
+4,HV4,A2B
+5,H5,B2A
diff --git a/ensemble_md/tests/data/coord_swap/find_common.csv b/ensemble_md/tests/data/coord_swap/find_common.csv