Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion repsample_acetylcall.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,15 @@
excitation_energies_cm_array = np.array(excitation_energies_cm)
excitation_energies_ev_array = excitation_energies_cm_array * 0.0001239841984

MYGEOM = rstd.GeomReduction(2000, 3, 5, 1000, 8, 16, weighted=True, pdfcomp = "KLdiv", intweights=False, verbose=False, dim1=False) #create an instance of GeomReduction with specified parameters: 500 samples, 3 states, 20 representative molecules, 100 cycles, 1 core, 1 job, without weighting, using KL divergence for PDF comparison, no integer weights, and verbose off
subset_n_count = 10

MYGEOM = rstd.GeomReduction(2000, 3, subset_n_count, 1000, 8, 16, weighted=True, pdfcomp = "KLdiv", intweights=False, verbose=False, dim1=False) #create an instance of GeomReduction with specified parameters: 500 samples, 3 states, 20 representative molecules, 100 cycles, 1 core, 1 job, without weighting, using KL divergence for PDF comparison, no integer weights, and verbose off

np.random.seed(42) #set the seed for reproducibility

MYGEOM.read_data_direct_osc(excitation_energies_ev_array, oscillator_strengths_array) #directly feed the generated data into the GeomReduction instance
MYGEOM.reduce_geoms() #start the geometry reduction process to select representative geometries

input_filename = "acetaldehyde/harmonic_samples.xyz"
output_filename = f"acetaldehyde/reduced_samples_{subset_n_count}.xyz"
MYGEOM.select_geoms(input_filename, output_filename, 7)
31 changes: 30 additions & 1 deletion src/pyneapples/rep_sampler_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,36 @@ def writegeoms(self, index=None):
f.write('%s\n' % (self.subsamples[i]+1)) #writes the sample index (adding 1 for one-based indexing)
else:
f.write('%s %s\n' % (self.subsamples[i]+1, self.sweights[i])) #writes the sample index and its corresponding integer weight


def select_geoms(self, input_file, output_file, atom_count):
"""
Reads the input file and writes to output_file, inserting an extra blank line
immediately before any line beginning with "Properties".

Parameters:
input_file (str): The original file (e.g. "full_geometries.xyz")
output_file (str): The output file with blank lines inserted (e.g. "formatted_geometries.xyz")
atom_count (str or int): The number of atoms in the target molecule.
"""
with open(input_file, "r") as fin:
lines = fin.readlines()

with open(output_file, "w") as fout:
geom_count = 1
write_block = False

for line in lines:
# If the line begins with the atom count this is a new geometry.
if line.startswith(str(atom_count)):
if geom_count in self.subsamples:
write_block = True
else:
write_block = False
geom_count += 1

if write_block:
fout.write(line)

if __name__ == "__main__": #main programme entry point
random.seed(0) #seed the random number generator for reproducibility
start_time = time.time() #records the start time for the overall execution
Expand Down