b_bNN_wmap_R.py

# %%
import numpy as np
import torch
import torch.nn as nn
import pyro
import pyro.distributions as dist
from pyro.nn import PyroModule, PyroSample
from pyro.infer.autoguide import AutoDiagonalNormal
from pyro.infer import SVI, Trace_ELBO, Predictive
from pyro.optim import ClippedAdam
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random

# Set random seeds for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
pyro.set_rng_seed(seed)

# %%
# Load the data
saturation_maps = np.load("saturation_maps.npy")
resistivity_maps = np.load("resistivity_maps.npy")
total_water_contents = np.load("total_water_contents.npy")
data_dd_rhoa_array = np.load("data_dd.npy")
data_slm_rhoa_array = np.load("data_slm.npy")
apparent_resistivity = np.concatenate([data_dd_rhoa_array, data_slm_rhoa_array], axis=1)

print(f"Total Water Contents Shape: {total_water_contents.shape}")
print(f"All Rhoa Array Shape: {apparent_resistivity.shape}")

# Flatten the saturation maps
saturation_maps_flat = saturation_maps.reshape(len(saturation_maps), -1)
print(f"Flattened Saturation Maps Shape: {saturation_maps_flat.shape}")

# Normalize the input features (already done previously)
X = scaler.fit_transform(apparent_resistivity)

# Use the same PCA model fitted earlier (assuming you want to keep input preprocessing consistent)
X_reduced = pca.fit_transform(X)

# Prepare the target variable
y = saturation_maps_flat  # For predicting saturation maps

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_reduced, y, test_size=0.2, random_state=seed
)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# %%
class BayesianSaturationNet(PyroModule):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.linear1 = PyroModule[nn.Linear](input_dim, hidden_dim)
        self.linear1.weight = PyroSample(dist.Normal(0., 1.).expand([hidden_dim, input_dim]).to_event(2))
        self.linear1.bias = PyroSample(dist.Normal(0., 1.).expand([hidden_dim]).to_event(1))

        self.linear2 = PyroModule[nn.Linear](hidden_dim, hidden_dim)
        self.linear2.weight = PyroSample(dist.Normal(0., 1.).expand([hidden_dim, hidden_dim]).to_event(2))
        self.linear2.bias = PyroSample(dist.Normal(0., 1.).expand([hidden_dim]).to_event(1))

        self.linear_out = PyroModule[nn.Linear](hidden_dim, output_dim)
        self.linear_out.weight = PyroSample(dist.Normal(0., 1.).expand([output_dim, hidden_dim]).to_event(2))
        self.linear_out.bias = PyroSample(dist.Normal(0., 1.).expand([output_dim]).to_event(1))

        self.relu = nn.ReLU()

    def forward(self, x, y=None):
        x = self.relu(self.linear1(x))
        x = self.relu(self.linear2(x))
        mean = self.linear_out(x)
        sigma = pyro.sample("sigma", dist.Uniform(0., 1.))
        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Normal(mean, sigma).to_event(1), obs=y)
        return mean

# %%
guide = AutoDiagonalNormal(BayesianSaturationNet(X_train.shape[1], hidden_dim=100, output_dim=y_train.shape[1]))
bayes_model = BayesianSaturationNet(X_train.shape[1], hidden_dim=100, output_dim=y_train.shape[1])
optimizer = ClippedAdam({"lr": 0.005})
svi = SVI(bayes_model, guide, optimizer, loss=Trace_ELBO())

# %%
num_iterations = 2000
losses = []

for epoch in range(num_iterations):
    loss = svi.step(X_train, y_train)
    losses.append(loss)
    if (epoch + 1) % 200 == 0:
        print(f"[Iteration {epoch + 1}] Loss: {loss:.4f}")

plt.plot(losses)
plt.title("ELBO Loss")
plt.xlabel("Iteration")
plt.ylabel("Loss")
plt.show()


# %%
# Posterior Predictive Sampling
predictive = Predictive(bayes_model, guide=guide, num_samples=100,
                        return_sites=("obs", "_RETURN"))

samples = predictive(X_test)
predictions = samples["obs"]  # Shape: (num_samples, batch_size, output_dim)
mean_predictions = predictions.mean(dim=0)
std_predictions = predictions.std(dim=0)


from sklearn.metrics import mean_squared_error

# Compute RMSE per sample
rmse_per_sample = np.sqrt(((mean_predictions.detach().numpy() - y_test.numpy()) ** 2).mean(axis=1))
overall_rmse = rmse_per_sample.mean()
print(f"Test RMSE (averaged over samples): {overall_rmse:.4f}")

# %%
# Reshape predictions to original map shape
mean_preds_maps = mean_predictions.numpy().reshape(-1, 15, 142)
std_preds_maps = std_predictions.numpy().reshape(-1, 15, 142)
y_test_maps = y_test.numpy().reshape(-1, 15, 142)

# Visualize a few samples
num_samples_to_plot = 3
indices = np.random.choice(len(mean_preds_maps), num_samples_to_plot, replace=False)

for idx in indices:
    plt.figure(figsize=(18, 5))

    plt.subplot(1, 3, 1)
    plt.imshow(y_test_maps[idx], cmap='viridis')
    plt.title('True Saturation Map')
    plt.colorbar()

    plt.subplot(1, 3, 2)
    plt.imshow(mean_preds_maps[idx], cmap='viridis')
    plt.title('Predicted Saturation Map')
    plt.colorbar()

    plt.subplot(1, 3, 3)
    plt.imshow(std_preds_maps[idx], cmap='viridis')
    plt.title('Prediction Uncertainty')
    plt.colorbar()

    plt.show()