b_bnn_Vtot_R.py

# %%
import numpy as np
import torch
import torch.nn as nn
import pyro
import pyro.distributions as dist
from pyro.nn import PyroModule, PyroSample
from pyro.infer.autoguide import AutoDiagonalNormal
from pyro.infer import SVI, Trace_ELBO, Predictive
from pyro.optim import ClippedAdam
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random

# Set random seeds for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
pyro.set_rng_seed(seed)

# %%
# Load the data
saturation_maps = np.load("saturation_maps.npy")
resistivity_maps = np.load("resistivity_maps.npy")
total_water_contents = np.load("total_water_contents.npy")
data_dd_rhoa_array = np.load("data_dd.npy")
data_slm_rhoa_array = np.load("data_slm.npy")
apparent_resistivity = np.concatenate([data_dd_rhoa_array, data_slm_rhoa_array], axis=1)

print(f"Total Water Contents Shape: {total_water_contents.shape}")
print(f"All Rhoa Array Shape: {apparent_resistivity.shape}")

# Normalize the input features
scaler = StandardScaler()
X = scaler.fit_transform(apparent_resistivity)

# Optional: Dimensionality Reduction using PCA
pca = PCA(n_components=0.95)  # Retain 95% of the variance
X_reduced = pca.fit_transform(X)
print(f"Reduced feature count: {X_reduced.shape[1]}")

# Prepare the target variable
y = total_water_contents  # For predicting total water content

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_reduced, y, test_size=0.2, random_state=seed
)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# %%
class BayesianRegression(PyroModule):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.linear1 = PyroModule[nn.Linear](input_dim, hidden_dim)
        self.linear1.weight = PyroSample(dist.Normal(0., 1.).expand([hidden_dim, input_dim]).to_event(2))
        self.linear1.bias = PyroSample(dist.Normal(0., 1.).expand([hidden_dim]).to_event(1))

        self.linear2 = PyroModule[nn.Linear](hidden_dim, 1)
        self.linear2.weight = PyroSample(dist.Normal(0., 1.).expand([1, hidden_dim]).to_event(2))
        self.linear2.bias = PyroSample(dist.Normal(0., 1.).expand([1]).to_event(1))

        self.relu = nn.ReLU()

    def forward(self, x, y=None):
        x = self.relu(self.linear1(x))
        mean = self.linear2(x).squeeze(-1)
        sigma = pyro.sample("sigma", dist.Uniform(0., 10.))
        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Normal(mean, sigma), obs=y.squeeze())
        return mean

# %%
guide = AutoDiagonalNormal(BayesianRegression(X_train.shape[1], hidden_dim=50))

bayes_model = BayesianRegression(X_train.shape[1], hidden_dim=50)
optimizer = ClippedAdam({"lr": 0.01})
svi = SVI(bayes_model, guide, optimizer, loss=Trace_ELBO())

# %%
num_iterations = 5000
losses = []

for epoch in range(num_iterations):
    loss = svi.step(X_train, y_train)
    losses.append(loss)
    if (epoch + 1) % 500 == 0:
        print(f"[Iteration {epoch + 1}] Loss: {loss:.4f}")

plt.plot(losses)
plt.title("ELBO Loss")
plt.xlabel("Iteration")
plt.ylabel("Loss")
plt.show()

# %%
# Posterior Predictive Sampling
predictive = Predictive(bayes_model, guide=guide, num_samples=1000,
                        return_sites=("obs", "_RETURN"))

samples = predictive(X_test)
predictions = samples["obs"]  # Shape: (num_samples, batch_size)
mean_predictions = predictions.mean(dim=0)
std_predictions = predictions.std(dim=0)

# %%
from sklearn.metrics import mean_squared_error

# Compute RMSE
rmse = np.sqrt(mean_squared_error(y_test.detach().numpy(), mean_predictions.detach().numpy()))
print(f"Test RMSE: {rmse:.4f}")

# %%
# Convert tensors to numpy arrays
y_test_np = y_test.squeeze().detach().numpy()
mean_preds_np = mean_predictions.detach().numpy()
std_preds_np = std_predictions.detach().numpy()

# Sort the data for better visualization
sorted_indices = np.argsort(y_test_np)
y_test_sorted = y_test_np[sorted_indices]
mean_preds_sorted = mean_preds_np[sorted_indices]
std_preds_sorted = std_preds_np[sorted_indices]

plt.figure(figsize=(12, 6))
plt.plot(y_test_sorted, label='True Values', color='red')
plt.errorbar(range(len(y_test_sorted)), mean_preds_sorted, yerr=std_preds_sorted, fmt='o', label='Predictions with Uncertainty', ecolor='gray', alpha=0.5)
plt.legend()
plt.xlabel('Sample Index (sorted by true value)')
plt.ylabel('Total Water Content')
plt.title('Bayesian Neural Network Predictions with Uncertainty')
plt.show()