Create testingp1.py

mhjensen · mhjensen · commit 4d0e0aa87480 · 2025-10-01T15:04:45.000+02:00
diff --git a/doc/src/week36/programs/testingp1.py b/doc/src/week36/programs/testingp1.py
@@ -0,0 +1,99 @@
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn import linear_model
+
+def R2(y_data, y_model):
+    return 1 - np.sum((y_data - y_model) ** 2) / np.sum((y_data - np.mean(y_data)) ** 2)
+def MSE(y_data,y_model):
+    n = np.size(y_model)
+    return np.sum((y_data-y_model)**2)/n
+
+
+# A seed just to ensure that the random numbers are the same for every run.
+# Useful for eventual debugging.
+np.random.seed(0)
+
+#x = np.random.rand(100)
+x = np.linspace(-1,1,200)
+y = 1.0/(1.0+25*x*x)
+plt.plot(x, y, label = 'Runge')
+# number of features p (here degree of polynomial
+p = 9
+#  The design matrix now as function of a given polynomial
+X = np.zeros((len(x),p))
+X[:,0] = x
+X[:,1] = x*x
+X[:,2] = x*x*x
+X[:,3] = x*x*x*x
+X[:,4] = x*x*x*x*x
+X[:,5] = x*x*x*x*x*x
+X[:,6] = x**7
+X[:,7] = x**8
+X[:,8] = x**9
+# We split the data in test and training data
+#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+# matrix inversion to find beta
+#OLSbeta = np.linalg.inv(X_train.T @ X_train) @ X_train.T @ y_train
+OLSbeta = np.linalg.inv(X.T @ X) @ X.T @ y
+ypredict = X @ OLSbeta
+plt.plot(x, y, label = 'Runge')
+plt.plot(x, ypredict, label = 'Runge')
+plt.show()
+"""
+print(OLSbeta)
+# and then make the prediction
+ytildeOLS = X_train @ OLSbeta
+print("Training MSE for OLS")
+print(MSE(y_train,ytildeOLS))
+ypredictOLS = X_test @ OLSbeta
+print("Test MSE OLS")
+print(np.abs(y_test-ypredictOLS))
+print(MSE(y_test,ypredictOLS))
+
+# Repeat now for Lasso and Ridge regression and various values of the regularization parameter
+I = np.eye(p,p)
+# Decide which values of lambda to use
+nlambdas = 100
+MSEPredict = np.zeros(nlambdas)
+MSETrain = np.zeros(nlambdas)
+MSELassoPredict = np.zeros(nlambdas)
+MSELassoTrain = np.zeros(nlambdas)
+lambdas = np.logspace(-4, 4, nlambdas)
+for i in range(nlambdas):
+    lmb = lambdas[i]
+    Ridgebeta = np.linalg.inv(X_train.T @ X_train+lmb*I) @ X_train.T @ y_train
+    # include lasso using Scikit-Learn
+    RegLasso = linear_model.Lasso(lmb,fit_intercept=True)
+    RegLasso.fit(X_train,y_train)
+    # and then make the prediction
+    ytildeRidge = X_train @ Ridgebeta
+    ypredictRidge = X_test @ Ridgebeta
+    ytildeLasso = RegLasso.predict(X_train)
+    ypredictLasso = RegLasso.predict(X_test)
+    MSEPredict[i] = MSE(y_test,ypredictRidge)
+    MSETrain[i] = MSE(y_train,ytildeRidge)
+    MSELassoPredict[i] = MSE(y_test,ypredictLasso)
+    MSELassoTrain[i] = MSE(y_train,ytildeLasso)
+
+# Now plot the results
+
+plt.figure()
+plt.plot(np.log10(lambdas), MSETrain, label = 'MSE Ridge train')
+plt.plot(np.log10(lambdas), MSEPredict, 'r--', label = 'MSE Ridge Test')
+plt.plot(np.log10(lambdas), MSELassoTrain, label = 'MSE Lasso train')
+plt.plot(np.log10(lambdas), MSELassoPredict, 'r--', label = 'MSE Lasso Test')
+
+plt.xlabel('log10(lambda)')
+plt.ylabel('MSE')
+plt.legend()
+plt.show()
+"""
+
+
+
+
+