hamruu · Fel1-x · Mar 27, 2025 · Mar 25, 2025
diff --git a/repsample_acetylcall.py b/repsample_acetylcall.py
@@ -32,9 +32,15 @@
 excitation_energies_cm_array = np.array(excitation_energies_cm)
 excitation_energies_ev_array = excitation_energies_cm_array * 0.0001239841984
 
-MYGEOM = rstd.GeomReduction(2000, 3, 5, 1000, 8, 16, weighted=True, pdfcomp = "KLdiv", intweights=False, verbose=False, dim1=False) #create an instance of GeomReduction with specified parameters: 500 samples, 3 states, 20 representative molecules, 100 cycles, 1 core, 1 job, without weighting, using KL divergence for PDF comparison, no integer weights, and verbose off
+subset_n_count = 10
+
+MYGEOM = rstd.GeomReduction(2000, 3, subset_n_count, 1000, 8, 16, weighted=True, pdfcomp = "KLdiv", intweights=False, verbose=False, dim1=False) #create an instance of GeomReduction with specified parameters: 500 samples, 3 states, 20 representative molecules, 100 cycles, 1 core, 1 job, without weighting, using KL divergence for PDF comparison, no integer weights, and verbose off
 
 np.random.seed(42) #set the seed for reproducibility
 
 MYGEOM.read_data_direct_osc(excitation_energies_ev_array, oscillator_strengths_array) #directly feed the generated data into the GeomReduction instance
 MYGEOM.reduce_geoms() #start the geometry reduction process to select representative geometries
+
+input_filename = "acetaldehyde/harmonic_samples.xyz"
+output_filename = f"acetaldehyde/reduced_samples_{subset_n_count}.xyz"
+MYGEOM.select_geoms(input_filename, output_filename, 7)
diff --git a/src/pyneapples/rep_sampler_2d.py b/src/pyneapples/rep_sampler_2d.py
@@ -712,7 +712,36 @@ def writegeoms(self, index=None):
                     f.write('%s\n' % (self.subsamples[i]+1))   #writes the sample index (adding 1 for one-based indexing)
                 else:
                     f.write('%s %s\n' % (self.subsamples[i]+1, self.sweights[i]))   #writes the sample index and its corresponding integer weight
-
+
+    def select_geoms(self, input_file, output_file, atom_count):
+        """
+        Reads the input file and writes to output_file, inserting an extra blank line
+        immediately before any line beginning with "Properties".
+
+        Parameters:
+          input_file (str): The original file (e.g. "full_geometries.xyz")
+          output_file (str): The output file with blank lines inserted (e.g. "formatted_geometries.xyz")
+          atom_count (str or int): The number of atoms in the target molecule.
+        """
+        with open(input_file, "r") as fin:
+            lines = fin.readlines()
+
+        with open(output_file, "w") as fout:
+            geom_count = 1
+            write_block = False
+
+            for line in lines:
+                # If the line begins with the atom count this is a new geometry.
+                if line.startswith(str(atom_count)):
+                    if geom_count in self.subsamples:
+                        write_block = True
+                    else:
+                        write_block = False
+                    geom_count += 1
+
+                if write_block:
+                    fout.write(line)
+
 if __name__ == "__main__":   #main programme entry point
     random.seed(0)   #seed the random number generator for reproducibility
     start_time = time.time()   #records the start time for the overall execution