-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path5_output_metrics.py
More file actions
28 lines (26 loc) · 1.37 KB
/
5_output_metrics.py
File metadata and controls
28 lines (26 loc) · 1.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import os
import pandas as pd
from rdkit import Chem
import subprocess
true_base_dir = 'data/fragments_all/'
pred_base_dir = 'output/'
# WIP: match true/pred/frag smiles - specifically pred does not match true/frag
os.makedirs('metrics',exist_ok=True)
for sys in os.listdir(pred_base_dir):
if 'smiles' not in sys:
true_smiles_all = []
frag_smiles_all = []
pred_smiles_all = []
for i,linker in enumerate(os.listdir(os.path.join(pred_base_dir,sys))):
# generate for SMILES_true
line = pd.read_csv(os.path.join(true_base_dir,sys,'hMOF_table.csv')).iloc[i,:]
true_smiles_all.append(line[1].strip())
# generate for SMILES_frag
frag_smiles_all.append(line[2].strip())
# generate for SMILES_pred
pred_smiles_all.append(open(os.path.join(pred_base_dir,'smiles_'+sys+'.csv')).readlines()[i].strip())
df = pd.DataFrame({'index':range(len(true_smiles_all)),'true_molecules':true_smiles_all,'pred_molecules':pred_smiles_all,'frag_molecules':frag_smiles_all})
df = df[~df["pred_molecules"].str.contains('@')] # remove bad entries
df.to_csv(f'metrics/{sys}.csv',index=False)
print(f'Generating metrics for linkers that correspond to node {sys} ...')
subprocess.run(f'python -m evaluation.linkers --save_result --filename metrics/{sys}.csv', shell=True)