diff --git a/repsample_acetylcall.py b/repsample_acetylcall.py index fdc7722..40b35c1 100644 --- a/repsample_acetylcall.py +++ b/repsample_acetylcall.py @@ -32,9 +32,15 @@ excitation_energies_cm_array = np.array(excitation_energies_cm) excitation_energies_ev_array = excitation_energies_cm_array * 0.0001239841984 -MYGEOM = rstd.GeomReduction(2000, 3, 5, 1000, 8, 16, weighted=True, pdfcomp = "KLdiv", intweights=False, verbose=False, dim1=False) #create an instance of GeomReduction with specified parameters: 500 samples, 3 states, 20 representative molecules, 100 cycles, 1 core, 1 job, without weighting, using KL divergence for PDF comparison, no integer weights, and verbose off +subset_n_count = 10 + +MYGEOM = rstd.GeomReduction(2000, 3, subset_n_count, 1000, 8, 16, weighted=True, pdfcomp = "KLdiv", intweights=False, verbose=False, dim1=False) #create an instance of GeomReduction with specified parameters: 500 samples, 3 states, 20 representative molecules, 100 cycles, 1 core, 1 job, without weighting, using KL divergence for PDF comparison, no integer weights, and verbose off np.random.seed(42) #set the seed for reproducibility MYGEOM.read_data_direct_osc(excitation_energies_ev_array, oscillator_strengths_array) #directly feed the generated data into the GeomReduction instance MYGEOM.reduce_geoms() #start the geometry reduction process to select representative geometries + +input_filename = "acetaldehyde/harmonic_samples.xyz" +output_filename = f"acetaldehyde/reduced_samples_{subset_n_count}.xyz" +MYGEOM.select_geoms(input_filename, output_filename, 7) \ No newline at end of file diff --git a/src/pyneapples/rep_sampler_2d.py b/src/pyneapples/rep_sampler_2d.py index 08f2209..47d48cb 100644 --- a/src/pyneapples/rep_sampler_2d.py +++ b/src/pyneapples/rep_sampler_2d.py @@ -712,7 +712,36 @@ def writegeoms(self, index=None): f.write('%s\n' % (self.subsamples[i]+1)) #writes the sample index (adding 1 for one-based indexing) else: f.write('%s %s\n' % (self.subsamples[i]+1, self.sweights[i])) #writes the sample index and its corresponding integer weight - + + def select_geoms(self, input_file, output_file, atom_count): + """ + Reads the input file and writes to output_file, inserting an extra blank line + immediately before any line beginning with "Properties". + + Parameters: + input_file (str): The original file (e.g. "full_geometries.xyz") + output_file (str): The output file with blank lines inserted (e.g. "formatted_geometries.xyz") + atom_count (str or int): The number of atoms in the target molecule. + """ + with open(input_file, "r") as fin: + lines = fin.readlines() + + with open(output_file, "w") as fout: + geom_count = 1 + write_block = False + + for line in lines: + # If the line begins with the atom count this is a new geometry. + if line.startswith(str(atom_count)): + if geom_count in self.subsamples: + write_block = True + else: + write_block = False + geom_count += 1 + + if write_block: + fout.write(line) + if __name__ == "__main__": #main programme entry point random.seed(0) #seed the random number generator for reproducibility start_time = time.time() #records the start time for the overall execution