-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgaussianDistribution.py
84 lines (66 loc) · 2.68 KB
/
gaussianDistribution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import math
from sklearn.preprocessing import StandardScaler
## Load data
df = pd.read_csv('/home/nel227/afrl_fall_2020/data/optimization.csv')
n = len(df.index)
maxFreq = df.iloc[:, 6].to_numpy()
middleFreq = df.iloc[:, 7].to_numpy()
## Remove rows where max frequency deviates more than 2.0 mad
maxFreq = df.iloc[:, 6]
maxFreq_mad = stats.median_absolute_deviation(maxFreq)
maxFreq_median = np.median(maxFreq)
maxFreq_outlier = df.index[(abs(maxFreq.values - maxFreq_median) / maxFreq_mad) >= 2.0]
df = df.drop(maxFreq_outlier)
X = df.iloc[:, 0:6].to_numpy()
maxFreq = df.iloc[:, 6].to_numpy()
middleFreq = df.iloc[:, 7].to_numpy()
## Plot Gaussian distribution for max frequency
mu = np.mean(maxFreq)
sigma = np.sqrt(np.var(maxFreq))
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
y = stats.norm.pdf(x, mu, sigma)
fig, axs = plt.subplots()
axs.plot(x, y, color='#b22222')
px = np.linspace(mu - sigma, mu + sigma, 10)
py = stats.norm.pdf(px, mu, sigma)
axs.fill_between(px, py, color='#dc1c13', alpha=0.5)
px = np.linspace(mu - sigma, mu - 2*sigma, 10)
py = stats.norm.pdf(px, mu, sigma)
axs.fill_between(px, py, color='#ea4c46', alpha=0.5)
px = np.linspace(mu + sigma, mu + 2*sigma, 10)
py = stats.norm.pdf(px, mu, sigma)
axs.fill_between(px, py, color='#ea4c46', alpha=0.5)
px = np.linspace(mu - 2*sigma, mu - 3*sigma, 10)
py = stats.norm.pdf(px, mu, sigma)
axs.fill_between(px, py, color='#f07470', alpha=0.5)
px = np.linspace(mu + 2*sigma, mu + 3*sigma, 10)
py = stats.norm.pdf(px, mu, sigma)
axs.fill_between(px, py, color='#f07470', alpha=0.5)
## Plot Gaussian distribution for middle frequency
mu = np.mean(middleFreq)
sigma = np.sqrt(np.var(middleFreq))
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
y = stats.norm.pdf(x, mu, sigma)
plt.plot(x, y, color='#00008b')
px = np.linspace(mu - sigma, mu + sigma, 10)
py = stats.norm.pdf(px, mu, sigma)
plt.fill_between(px, py, color='#0b559f', alpha=0.5)
px = np.linspace(mu - sigma, mu - 2*sigma, 10)
py = stats.norm.pdf(px, mu, sigma)
plt.fill_between(px, py, color='#2b7bba', alpha=0.5)
px = np.linspace(mu + sigma, mu + 2*sigma, 10)
py = stats.norm.pdf(px, mu, sigma)
plt.fill_between(px, py, color='#2b7bba', alpha=0.5)
px = np.linspace(mu - 2*sigma, mu - 3*sigma, 10)
py = stats.norm.pdf(px, mu, sigma)
plt.fill_between(px, py, color='#539ecd', alpha=0.5)
px = np.linspace(mu + 2*sigma, mu + 3*sigma, 10)
py = stats.norm.pdf(px, mu, sigma)
plt.fill_between(px, py, color='#539ecd', alpha=0.5)
axs.set_xlabel('Observation')
axs.set_ylabel('Probability Density')
fig.savefig('/home/nel227/afrl_fall_2020/figures/maxFreq_gaussian_plot.png', bbox_inches='tight')