From 85017634b4a18ebe23e7887a3fe5e11e90a50239 Mon Sep 17 00:00:00 2001
From: Nicholas Clark iSBup1jV!Ub~?e@r1-ic(T1nOK!>U$yF;?
zt?V(^oV}j>#5u}+&z~|iJiF$?#)TIxcfNVwm+koy-A*X|PC1rzxU54-Z$AbnpH>kv
zt2V Notice how wiggly the function becomes in the last plot when \(\lambda\) is very small, indicating that
the function is overfitting to the in-sample training data.
Incidentally, this behaviour mirrors what Nicholas Clark (Nicholas Clark (
mgcv
’s
@@ -1816,7 +1816,8 @@ Nicholas Clark (Nicholas Clark (
m_mvgam$model_file
## [1] "JAGS model code generated by package mvgam"
## [2] ""
@@ -2172,8 +2173,8 @@
Nicholas Clark (
head(mod_skeleton$pregam)
## $m
## [1] 1
@@ -2239,13 +2240,13 @@
Nicholas Clark (Nicholas Clark (Nicholas Clark (
Generally the uncertainy blows out when forecasting further ahead for
the mgcv
model (why might this be?); if we only look 2
timesteps ahead, it is not quite as terrible but it becomes more clear
@@ -2462,15 +2463,15 @@
Why does autocorrelated observation model perform so badly? Q-Q plots give some insight, as they clearly indicate the dynamic latent trend model shows a better fit of the observation process to the data compared to the autocorrelated observation model
plot(m_mvgam, "residuals")
-qq.gam(m_mgcv, pch = 16, lwd = 3, cex = 0.8)
In fact the the autocorrelated observation model is overestimating @@ -2490,7 +2491,7 @@
quantile(MCMCvis::MCMCchains(m_mvgam$model_output,
"r"), c(0.1, 0.5, 0.9))
## 10% 50% 90%
-## 3.062211 7.095187 34.194210
+## 2.975743 6.680284 32.699677
Looking at estimates for the full trend and forecast distributions
shows how the mvgam
model is clearly doing a decent job of
tracking the latent state up to the end of the training period while
@@ -2500,13 +2501,13 @@
layout(1)
plot(m_mvgam, "forecast", data_test = data_test)
## Out of sample DRPS:
-## [1] 63.10529
+## [1] 64.69394
##
-We can get a better idea of what is happening for the autoregressive observation model by plotting posterior realisations on the log scale. Here each line is a realisation, and we show these lines as a spaghetti @@ -2538,7 +2539,7 @@
layout(1)
From the plots above it is apparent that although some of the realisations are sensible, as soon as a path climbs upward it tends to @@ -2580,9 +2581,9 @@
We should also re-check the autoregressive model’s realisations, which look much better now
plot(1, type = "n", bty = "L", xlab = "Time",
@@ -2727,10 +2728,10 @@ Nicholas Clark (plot(m_mvgam, "smooths")
-Inspect posterior realisations of the smooth function
plot(m_mvgam, "smooths", realisations = TRUE)
-The plot_mvgam_smooth
function allows more flexibility
for plotting smooth functions, including an ability to supply
newdata
for plotting posterior marginal simulations.
@@ -2742,29 +2743,29 @@
plot_mvgam_smooth(m_mvgam, series = 1, smooth = "p_whiting",
residuals = TRUE)
-We can also perform a series of posterior predictive checks (using
the ppc()
function) to see if the model is able to simulate
data for the training period that looks realistic and unbiased. First,
examine simulated histograms for posterior predictions
(yhat
) and compare to the observations (y
)
ppc(m_mvgam, series = 1, type = "hist", legend_position = "bottomright")
-Now plot the distribution of predicted means compared to the observed mean
ppc(m_mvgam, series = 1, type = "mean")
-Next examine simulated empirical Cumulative Distribution Functions
(CDF) for posterior predictions (yhat
) and compare to the
CDF of the observations (y
)
ppc(m_mvgam, series = 1, type = "cdf")
-Finally look for any biases in predictions by examining a Probability Integral Transform (PIT) histogram. If our predictions are not biased one way or another (i.e. not consistently under- or over-predicting), this histogram should look roughly uniform
ppc(m_mvgam, series = 1, type = "pit")
-All of these plots indicate the model is well calibrated against the training data, with no apparent pathological behaviors exhibited. Now for some investigation of the estimated relationships and forecasts. We @@ -2773,7 +2774,7 @@
plot(m_mvgam, series = 1, type = "residuals")
-Ok so the model is doing well when fitting against the training data, but how are its forecasts? We can now evaluate the forecasts of the two competing models just as we did above for the simulated data
@@ -2786,9 +2787,9 @@Both models fail to anticipate the upward swing in landings following
the end of the training data, but as with the simulations above, the
autocorrelated observation model fitted by mgcv
is
@@ -2826,9 +2827,9 @@
This model shows an improvement in forecasting ability as the uncertainty in the temporal smooth function is more realistically growing into the future. But again, the dynamic GAM is superior in @@ -2857,9 +2858,9 @@
The dynamic GAM is difficult to beat for this example. But what other
utilities does the mvgam
package make available? We can
also re-do the posterior predictive checks, but this time focusing only
@@ -2868,13 +2869,13 @@
ppc(m_mvgam, series = 1, type = "hist", data_test = data_test,
legend_position = "topright", n_bins = 200)
-ppc(m_mvgam, series = 1, type = "mean", data_test = data_test)
-ppc(m_mvgam, series = 1, type = "cdf", data_test = data_test)
-ppc(m_mvgam, series = 1, type = "pit", data_test = data_test)
-There are some problems with the way this model is generating future predictions, so we would need to perform a more rigorous and principled model development strategy to improve our model’s predictive abilities. @@ -2904,9 +2905,9 @@
plot(m_mvgam, series = 1, type = "forecast",
data_test = data_test)
## Out of sample DRPS:
-## [1] 1861.594
+## [1] 1789.736
##
-Now we will showcase how different dynamic models can be compared using rolling probabilistic forecast evaluation, which is especially useful if we don’t already have out of sample observations for comparing @@ -2930,13 +2931,13 @@
The series of plots generated by compare_mvgams
clearly
show that the first dynamic model generates better predictions. In each
plot, DRPS for the forecast horizon
is lower for the first
@@ -2965,7 +2966,7 @@
Here it is apparent that the distribution has shifted upward in light
of the 3
observations that have been assimilated. This is
an advantageous way of allowing a model to slowly adapt to new
@@ -3381,7 +3382,7 @@
Calculate DRPS over the 10-year horizon for the mvgam model
lynx_mvgam_drps <- drps_mcmc_object(truth = lynx_test$population[1:10],
fc = fits)
@@ -3401,7 +3402,7 @@ sum(lynx_mvgam_drps[, 1])
-## [1] 808.8933
+## [1] 805.5717
mean(lynx_mvgam_drps[, 2])
## [1] 1
The mvgam
has much more realistic uncertainty than the
@@ -3436,42 +3437,42 @@
plot(lynx_mgcv, select = 1, shade = T)
plot_mvgam_smooth(lynx_mvgam, 1, "season")
-We can also view the mvgam’s posterior predictions for the entire series (testing and training)
plot(lynx_mvgam, type = "forecast", data_test = lynx_test)
## Out of sample DRPS:
-## [1] 808.8933
+## [1] 805.5717
##
-And the estimated trend
plot(lynx_mvgam, type = "trend", data_test = lynx_test)
-A key aspect of ecological forecasting is to understand how
different components of a model contribute to forecast uncertainty.
We can estimate contributions to forecast uncertainty for the GAM smooth
functions and the latent trend using mvgam
plot(lynx_mvgam, type = "uncertainty", data_test = lynx_test)
-Both components contribute to forecast uncertainty, with the trend component contributing more over time (as it should since this is the stochastic forecast component). This suggests we would still need some @@ -3518,7 +3519,15 @@
plot_mvgam_resids(lynx_mvgam, n_bins = 25)
-This same plot can also be produced for out of sample residuals if a +set of test data is supplied (note that the function will assume that +the test data come immediately sequential to the training data, there +are no formal checks of this so it is up to the user to ensure this +holds)
+plot_mvgam_resids(lynx_mvgam, n_bins = 25,
+ data_test = lynx_test)
+LQGxA9$k56|I8
z_%VKk-{N;-oUtmQ_lS12#*1`!1B;pn9WCnE@YM*NxO115{#Ob8&3w+{x`m4tFIm~J
zZqw#1J36Lb^t5+-In6?53F?HDs!}F=!GGl`@jbp=TanmL%(ISN;~`brrC=oMQ@K
z9r0Td^$EtwC7P=fYZEL+{1?sjiH#bYMeJ5X=O!(gV6%zH!rG#>X{?5rF05O%?FlxA
zI4!L0+D?rDUyH(=POQ}!(noI^L$Art*We8}2cN(f#LJ8D1N;QPAP^@RF2$9^$a-9j
zYp@BE*o-aMitV_Q*qO#o+>Q6(Uc8qWI)sOC7!RX?_o0IY^l&uf=LtNC58}i42tGtrnvnCS@25(Cuv`Trf){{MfbPeYDRA|Mg?8wg-c
zSEegX9aPT~ruW6q+I6}P)5Qz-n-nq(p^Ce3eRv#C4Sue;F485rE3(;aCQuq;4pT*TAMd-zILrFv on?;_U%Om|NCD+xb70M
zdZG;Jw<1*a(^XQ51Jo?B?uW7%)G!U|-bc2yCnVD9;JSY&j#Hq$y(sp^&)SW)cJlZ+
zC&7-BD!tc{4FGsr8xZugjM)F1_cF$g*CLmW4ZU1iqQ>++OXR)wvk-*S|EzYmo%4`z
zyHdQ}Z=E#Lw(c_9IT%Jm7?1Sjxag*nRdNsfIxjT=Tyoy#c|6@*!+kP%g KY}n(*pSZySR&!`-h~0Bq~?M)4TTh?kvH~o(2)^#?se$
zq>(K~@rNf4OZR=aW#A!Uy1=Pz)F&~ivJTjR+U#Qz!0d4u=xZqKVC8zU?%ULHXB14*
zI!$kBg1sRmbw&q!h1o**S<(lYA1^ZkR@6QL;rph1S#bDm9@ggtQmo5P%C;LN7)1zH
z4=t}EdA2)ahXY5v<9jaJ)!5KY#E!iDb>ci|f^ry5a-B&!NmiO`k=A_-#~)m0;B(Ji
z<|7tJiXwi92`;l+X1;7AXj`4(^4p5#F3*MNwlDpT49&<=$Y8h}-n41f=zkV1A2?b8
z+ZrfnAUj~++=JXj1zS|l_
z+|#69FnDw7j9GK9tyy~An%0pbxugvvgQ@3|_|#gI%eBN+$jf0`>GUGE$6b~Zvt?oS0
z!$wVkclZVu$Xuc6W=I~OUT?A1u%ZP`13_Du^gaEXnr>vdH$&g36{GgT)Rm6NWh8wl
z+n4(#@6i+c0_9*g8KrfhE_`Dex6r@jV@lzB%l#+(6l}Obb##yXANSux?%x1q#L;QF
zirz}l!$8`xzI7p;O{ASL-(6Km(?K|h+>-sSOvK#!SsrB$@T)l=?qSn#RV@b+&`;oj
zB(F|!ipEOi_FJ5K4v)C?qcY1nnrGgG+|9fjctJ_s3zfg~R#&*Rn;pvwQ=l+t9D1ZW
zMt`>(NKcV^tN=$%3{eOYF=BTR(bk6PN28om{bZBkq?*rOaS1f{zlt%cv|C#G-~PcT
zAIxVH&k;8OdJb|~^(IH5zxAeWzU7*N?fEx?ebd
UY0Ou7zv5UhbHQ>vS%;xUH#sDm)B^Rq;w$;ght$
zkSAP`Azyw|Q%7?iN0?4bm&hYxJr+Q~-d4~A8Yu1f6IEGQ91B~Dnj)$9n3
zh|e*^w%3`1W@{=JUGL_kRPyJf8OPjJze?W=b4Y3}M1EOSh37;bgYIO;u|rM*iogZO
zI3YYdkDonvZ<#6ANcO!