-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPredictive Posterior Distribution LinReg.py
130 lines (106 loc) · 4.73 KB
/
Predictive Posterior Distribution LinReg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 4 20:34:48 2024
Posterior Predictive Distribution to verify linear model
@author: SROTOSHI GHOSH
"""
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import my_linreg as mlr
import scipy.stats as sc
# Load the data for regression
cl = 3 * 10**8
df = pd.read_csv('data.csv')
ri = df['NASA dist'].values # Distance of galaxies from Earth
zi = df['NASA z'].values
vi = zi * cl
u_ri = df['sigma dist'].values
ri = np.array(ri)
vi = np.array(vi)
u_ri = np.array(u_ri)
N = len(vi)
m, er_slope, c, er_inter, xi2, xiyi, ei, ei2, exi, cov_val = mlr.lin_reg(vi, ri)
# Define the likelihood function
def likelihood(ri, vi, slope, intercept, sigma):
model = slope * vi + intercept
return -0.5 * np.sum(((ri - model) / sigma)**2)
# Define the prior distributions
def prior(slope, intercept, mu_slope, tau_slope, mu_intercept, tau_intercept):
prior_slope = -0.5 * ((slope - mu_slope) / tau_slope)**2
prior_intercept = -0.5 * ((intercept - mu_intercept) / tau_intercept)**2
return prior_slope + prior_intercept
# Define the posterior distribution
def posterior(ri, vi, slope, intercept, sigma, mu_slope, tau_slope, mu_intercept, tau_intercept):
return likelihood(ri, vi, slope, intercept, sigma) + prior(slope, intercept, mu_slope, tau_slope, mu_intercept, tau_intercept)
# Metropolis-Hastings sampling parameters
num_samples = 100000
sigma = np.sqrt(np.sum(ei2)/(N-2)) # Use mean of uncertainties as an estimate for sigma
proposal_width_slope = 0.000001
proposal_width_intercept = 1.0
mu_slope, tau_slope = m, er_slope # Prior mean and standard deviation for slope
mu_intercept, tau_intercept = c, er_inter # Prior mean and standard deviation for intercept
# Initialize the chain with random values
samples = np.zeros((num_samples, 2))
samples[0, :] = [np.random.normal(mu_slope, tau_slope), np.random.normal(mu_intercept, tau_intercept)]
# Metropolis-Hastings sampling
acceptance_count = 0
for i in range(1, num_samples):
current_slope, current_intercept = samples[i-1, :]
# Propose new slope and intercept
proposed_slope = np.random.normal(current_slope, proposal_width_slope)
proposed_intercept = np.random.normal(current_intercept, proposal_width_intercept)
# Calculate acceptance probability
posterior_current = posterior(ri, vi, current_slope, current_intercept, sigma, mu_slope, tau_slope, mu_intercept, tau_intercept)
posterior_proposed = posterior(ri, vi, proposed_slope, proposed_intercept, sigma, mu_slope, tau_slope, mu_intercept, tau_intercept)
acceptance_prob = min(1, np.exp(posterior_proposed - posterior_current))
# Accept or reject the proposed parameters
if acceptance_prob > np.random.rand():
samples[i, :] = [proposed_slope, proposed_intercept]
acceptance_count += 1
else:
samples[i, :] = [current_slope, current_intercept]
# Analyze the results
burn_in = 1000
slope_samples = samples[burn_in:, 0]
intercept_samples = samples[burn_in:, 1]
slope_mean = np.mean(slope_samples)
intercept_mean = np.mean(intercept_samples)
print(f"Estimated slope: {slope_mean}")
print(f"Estimated intercept: {intercept_mean:.2f}")
print(f"Acceptance count: {acceptance_count}")
# Posterior predictive distribution at one particular value of x_p
xp = (0.028762*cl) # Example value for y_p #NGC 7603
# Computing the posterior predictive distribution at x_p
posterior_predictive_values = []
for j in range(len(slope_samples)):
slope = slope_samples[j]
intercept = intercept_samples[j]
mean = slope * xp + intercept
sampled_yp = np.random.normal(mean, sigma)
posterior_predictive_values.append(sampled_yp)
m_pdf=np.mean(posterior_predictive_values)
sd_pdf=np.std(posterior_predictive_values)
print ("Posterior predictive mean for NGC 7603 (y_p) : ",m_pdf)
print ('Posterior predictive std for NGC 7603 (sigma y_p): ',sd_pdf)
x=np.linspace(m_pdf-4*sd_pdf,m_pdf+4*sd_pdf,1000)
pdf=sc.norm.pdf(x,m_pdf,sd_pdf)
# Plot the posterior predictive distribution at yp
plt.figure(figsize=(8, 6))
plt.hist(posterior_predictive_values, bins=50, density=True, alpha=0.4, color='purple')
plt.plot(x,pdf, color='purple')
plt.axvline(x=m_pdf, color='red', linestyle='-', label='Predicted y_p')
plt.axvline(x=121.74, color='black', linestyle='--', label='Observed y_p')
plt.xlabel('Posterior predictive values')
plt.ylabel('Density')
plt.title(f'Posterior Predictive Distribution at x_p={xp}')
plt.grid(True)
plt.legend()
plt.show()
plt.figure(figsize=(8, 6))
plt.plot(posterior_predictive_values)
plt.xlabel('Iteration Number')
plt.ylabel('Sample Value')
plt.title('Trace Plot of Distribution Samples')
plt.grid(True)
plt.show()