-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmaldi.py
69 lines (58 loc) · 2.78 KB
/
maldi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
def plot_average_bar():
df = pd.read_csv('https://raw.githubusercontent.com/beef-broccoli/ochem-data/main/deebo/maldi-bromide.csv')
eic_max = df['EIC(+)[M+H] Product Area'].max()
df['EIC(+)[M+H] Product Area'] = df['EIC(+)[M+H] Product Area']/eic_max
gb = df.groupby(by='condition')['EIC(+)[M+H] Product Area'].mean()
vals = gb.values
index = gb.index.values
plt.bar([0,1,2,3], vals)
plt.xticks([0,1,2,3], index)
for ii in [0,1,2,3]:
plt.text(ii, vals[ii]+0.005, str(round(vals[ii], 3)), ha='center', va='center')
plt.title('Average UPLC-MS ion counts (normalized) for four different catalytic methods')
plt.ylabel('Average UPLC-MS ion counts (normalized)')
plt.xlabel('Catalytic methods')
plt.show()
def simulate_etc(max_sample=50, n_simulations=10000):
# fetch ground truth data
df = pd.read_csv(
'https://raw.githubusercontent.com/beef-broccoli/ochem-data/main/deebo/maldi-amine.csv', index_col=0)
eic_max = df['EIC(+)[M+H] Product Area'].max()
df['EIC(+)[M+H] Product Area'] = df['EIC(+)[M+H] Product Area']/eic_max
percentages = []
avg_cumu_rewards = []
gb = df.groupby(by=['condition'])
for n_sample in tqdm(range(max_sample), desc='1st loop'):
count = 0
reward = 0
for i in tqdm(range(n_simulations), desc='2nd loop', leave=False):
sample = gb.sample(n_sample+1).groupby('condition')
sample_mean = sample.mean(numeric_only=True)
sample_sum = sample.sum(numeric_only=True).sum().values[0]
reward = reward+sample_sum
# if sample['yield'].idxmax() in top_six: # no tie breaking when sampling 1 with yield cutoff
# count = count + 1
maxs = sample_mean.loc[sample_mean['EIC(+)[M+H] Product Area']==sample_mean['EIC(+)[M+H] Product Area'].max()]
random_one = random.choice(list(maxs.index))
if random_one == 'Pd':
count = count+1
percentages.append(count/n_simulations)
avg_cumu_rewards.append(reward/n_simulations)
print(percentages)
print(avg_cumu_rewards)
return None
if __name__ == '__main__':
import numpy as np
plot_average_bar()
#simulate_etc()
# accuracy = [0.5004, 0.5595, 0.6095, 0.6582, 0.6867, 0.732, 0.75, 0.7705, 0.7833, 0.8047, 0.8279, 0.8377, 0.8496, 0.8664,
# 0.8713, 0.8794, 0.8935, 0.8981, 0.9081, 0.9162, 0.9226, 0.931, 0.9285, 0.9368, 0.9407, 0.9465, 0.9507, 0.9505,
# 0.9547, 0.9586, 0.9619, 0.9627, 0.9694, 0.972, 0.9755, 0.975, 0.9756, 0.978, 0.98, 0.9788, 0.9816, 0.9852, 0.9873,
# 0.9874, 0.9884, 0.9903, 0.9907, 0.9923, 0.9918, 0.9921]
#
# a = np.array(accuracy).repeat(4)
# np.save('etc.npy', a)