move all variable declarations to top of blocks in stan; add plot_ser…

…ies function
nicholasjclark · Jul 20, 2022 · 1274087 · 1274087
1 parent ae02ca2
commit 1274087
Show file tree

Hide file tree

Showing 20 changed files with 402 additions and 262 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/NAMESPACE b/NAMESPACE
@@ -29,6 +29,7 @@ export(plot_mvgam_fc)
 export(plot_mvgam_pterms)
 export(plot_mvgam_randomeffects)
 export(plot_mvgam_resids)
+export(plot_mvgam_series)
 export(plot_mvgam_smooth)
 export(plot_mvgam_trace)
 export(plot_mvgam_trend)

diff --git a/NEON_manuscript/.DS_Store b/NEON_manuscript/.DS_Store
diff --git a/NEON_manuscript/Case studies/.DS_Store b/NEON_manuscript/Case studies/.DS_Store
diff --git a/NEON_manuscript/Case studies/mvgam_case_study2_files/.DS_Store b/NEON_manuscript/Case studies/mvgam_case_study2_files/.DS_Store
diff --git a/NEON_manuscript/Case studies/rsconnect/.DS_Store b/NEON_manuscript/Case studies/rsconnect/.DS_Store
diff --git a/NEON_manuscript/Case studies/rsconnect/documents/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/.DS_Store
diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/.DS_Store
diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/rpubs.com/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/rpubs.com/.DS_Store
diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/.DS_Store
diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/rpubs.com/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/rpubs.com/.DS_Store
diff --git a/NEON_manuscript/Figures/.DS_Store b/NEON_manuscript/Figures/.DS_Store
diff --git a/NEON_manuscript/Manuscript/.DS_Store b/NEON_manuscript/Manuscript/.DS_Store
diff --git a/NEON_manuscript/next_todo.R b/NEON_manuscript/next_todo.R
@@ -1,19 +1,20 @@
 library(mvgam)
-dat <- sim_mvgam(T = 100, n_series=4, n_lv = 1)
-dat$true_corrs
+dat <- sim_mvgam(T = 100, n_series=3, prop_missing = .4)
+plot_mvgam_series(data_train = dat$data_train, series = 1)
 
 
-mod1 <- mvgam(formula = y ~ s(season, bs = 'cc') +
-                s(series, bs = 're'),
+mod1 <- mvgam(formula = y ~ s(series, bs = 're'),
               data_train = dat$data_train,
               trend_model = 'AR1',
               family = 'poisson',
               use_lv = TRUE,
-              n_lv = 2,
               use_stan = TRUE,
-              run_model = T,
+              run_model = TRUE,
               burnin = 10)
 mod1$model_file
+mod1$model_data
+
+
 summary(mod1)
 plot_mvgam_factors(mod1)
 plot(mod1, type = 'residuals')
@@ -23,238 +24,6 @@ plot(mod1, 'trend', data_test = fake)
 
 
 
-plot_mvgam_series = function(data_train, data_test, series, n_bins,
-                             log_scale = FALSE){
-
-  if(series == 'all'){
-    n_plots <- length(levels(data_train$series))
-    pages <- 1
-    .pardefault <- par(no.readonly=T)
-    par(.pardefault)
-
-    if (n_plots > 4) pages <- 2
-    if (n_plots > 8) pages <- 3
-    if (n_plots > 12) pages <- 4
-    if (pages != 0)  {
-      ppp <- n_plots %/% pages
-
-      if (n_plots %% pages != 0) {
-        ppp <- ppp + 1
-        while (ppp * (pages - 1) >= n_plots) pages <- pages - 1
-      }
-
-      # Configure layout matrix
-      c <- r <- trunc(sqrt(ppp))
-      if (c<1) r <- c <- 1
-      if (c*r < ppp) c <- c + 1
-      if (c*r < ppp) r <- r + 1
-      oldpar<-par(mfrow=c(r,c))
-
-    } else { ppp <- 1; oldpar <- par()}
-
-    all_ys <- lapply(seq_len(n_plots), function(x){
-      if(log_scale){
-        log(      data.frame(y = data_train$y,
-                             series = data_train$series,
-                             time = data_train$time) %>%
-                    dplyr::filter(series == levels(data_train$series)[x]) %>%
-                    dplyr::pull(y) + 1)
-      } else {
-        data.frame(y = data_train$y,
-                   series = data_train$series,
-                   time = data_train$time) %>%
-          dplyr::filter(series == levels(data_train$series)[x]) %>%
-          dplyr::pull(y)
-      }
-    })
-
-    for(i in 1:n_plots){
-      s_name <- levels(data_train$series)[i]
-
-      truth <- data.frame(y = data_train$y,
-                          series = data_train$series,
-                          time = data_train$time) %>%
-        dplyr::filter(series == s_name) %>%
-        dplyr::select(time, y) %>%
-        dplyr::distinct() %>%
-        dplyr::arrange(time) %>%
-        dplyr::pull(y)
-
-      if(log_scale){
-        truth <- log(truth + 1)
-        ylab <- 'log(Y + 1)'
-      } else {
-        ylab <- 'Y'
-      }
-
-      plot(1, type = "n", bty = 'L',
-           xlab = 'Time',
-           ylab = ylab,
-           ylim = range(unlist(all_ys)),
-           xlim = c(0, length(c(truth))))
-      title(s_name, line = 0)
-
-      if(n_plots > 1){
-        for(x in 1:n_plots){
-          lines(all_ys[[x]], lwd = 1.85, col = 'grey85')
-        }
-      }
-
-      lines(x = 1:length(truth), y = truth, lwd = 3, col = "white")
-      lines(x = 1:length(truth), y = truth, lwd = 2.5, col = "#8F2727")
-      box(bty = 'L', lwd = 2)
-
-    }
-
-  } else {
-    s_name <- levels(data_train$series)[series]
-    truth <- data.frame(y = data_train$y,
-                        series = data_train$series,
-                        time = data_train$time) %>%
-      dplyr::filter(series == s_name) %>%
-      dplyr::select(time, y) %>%
-      dplyr::distinct() %>%
-      dplyr::arrange(time) %>%
-      dplyr::pull(y)
-
-    layout(matrix(1:4, nrow = 2, byrow = TRUE))
-    if(!missing(data_test)){
-      test <- data.frame(y = data_test$y,
-                         series = data_test$series,
-                         time = data_test$time) %>%
-        dplyr::filter(series == s_name) %>%
-        dplyr::select(time, y) %>%
-        dplyr::distinct() %>%
-        dplyr::arrange(time) %>%
-        dplyr::pull(y)
-
-      plot(1, type = "n", bty = 'L',
-           xlab = 'Time',
-           ylab = 'Y',
-           ylim = range(c(truth, test)),
-           xlim = c(0, length(c(truth, test))))
-      title('Time series', line = 0)
-
-      lines(x = 1:length(truth), y = truth, lwd = 2, col = "#8F2727")
-      lines(x = (length(truth)+1):length(c(truth, test)), y = test, lwd = 2, col = "black")
-      abline(v = length(truth)+1, col = '#FFFFFF60', lwd = 2.85)
-      abline(v = length(truth)+1, col = 'black', lwd = 2.5, lty = 'dashed')
-      box(bty = 'L', lwd = 2)
-
-      if(missing(n_bins)){
-        n_bins <- max(c(length(hist(c(truth, test), plot = F)$breaks),
-                        20))
-      }
-
-      hist(c(truth, test), border = "#8F2727",
-           lwd = 2,
-           freq = FALSE,
-           breaks = n_bins,
-           col = "#C79999",
-           ylab = 'Density',
-           xlab = 'Count', main = '')
-      title('Histogram', line = 0)
-
-      acf(c(truth, test),
-          na.action = na.pass, bty = 'L',
-          lwd = 2.5, ci.col = 'black', col = "#8F2727",
-          main = '', ylab = 'Autocorrelation')
-      acf1 <- acf(c(truth, test), plot = F,
-                  na.action = na.pass)
-      clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used)
-      abline(h = clim,  col = '#FFFFFF', lwd = 2.85)
-      abline(h = clim,  col = 'black', lwd = 2.5, lty = 'dashed')
-      abline(h = -clim,  col = '#FFFFFF', lwd = 2.85)
-      abline(h = -clim,  col = 'black', lwd = 2.5, lty = 'dashed')
-      box(bty = 'L', lwd = 2)
-      title('ACF', line = 0)
-
-      ecdf_plotdat = function(vals, x){
-        func <- ecdf(vals)
-        func(x)
-      }
-
-      plot_x <- seq(min(c(truth, test), na.rm = T),
-                    max(c(truth, test), na.rm = T))
-      plot(1, type = "n", bty = 'L',
-           xlab = 'Count',
-           ylab = 'Empirical CDF',
-           xlim = c(min(plot_x), max(plot_x)),
-           ylim = c(0, 1))
-      title('CDF', line = 0)
-      lines(x = plot_x,
-            y = ecdf_plotdat(c(truth, test),
-                             plot_x),
-            col = "#8F2727",
-            lwd = 2.5)
-      box(bty = 'L', lwd = 2)
-
-    } else {
-      plot(1, type = "n", bty = 'L',
-           xlab = 'Time',
-           ylab = 'Observations',
-           ylim = range(c(truth)),
-           xlim = c(0, length(c(truth))))
-      title('Time series', line = 0)
-
-      lines(x = 1:length(truth), y = truth, lwd = 2, col = "#8F2727")
-      box(bty = 'L', lwd = 2)
-
-      if(missing(n_bins)){
-        n_bins <- max(c(length(hist(c(truth), plot = F)$breaks),
-                        20))
-      }
-
-      hist(c(truth), border = "#8F2727",
-           lwd = 2,
-           freq = FALSE,
-           breaks = n_bins,
-           col = "#C79999",
-           ylab = 'Density',
-           xlab = 'Count', main = '')
-      title('Histogram', line = 0)
-
-
-      acf(c(truth),
-          na.action = na.pass, bty = 'L',
-          lwd = 2.5, ci.col = 'black', col = "#8F2727",
-          main = '', ylab = 'Autocorrelation')
-      acf1 <- acf(c(truth), plot = F,
-                  na.action = na.pass)
-      clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used)
-      abline(h = clim,  col = '#FFFFFF', lwd = 2.85)
-      abline(h = clim,  col = 'black', lwd = 2.5, lty = 'dashed')
-      abline(h = -clim,  col = '#FFFFFF', lwd = 2.85)
-      abline(h = -clim,  col = 'black', lwd = 2.5, lty = 'dashed')
-      box(bty = 'L', lwd = 2)
-      title('ACF', line = 0)
-
-
-      ecdf_plotdat = function(vals, x){
-        func <- ecdf(vals)
-        func(x)
-      }
-
-      plot_x <- seq(min(truth, na.rm = T),
-                    max(truth, na.rm = T))
-      plot(1, type = "n", bty = 'L',
-           xlab = 'Count',
-           ylab = 'Empirical CDF',
-           xlim = c(min(plot_x), max(plot_x)),
-           ylim = c(0, 1))
-      title('CDF', line = 0)
-      lines(x = plot_x,
-            y = ecdf_plotdat(truth,
-                             plot_x),
-            col = "#8F2727",
-            lwd = 2.5)
-      box(bty = 'L', lwd = 2)
-    }
-
-    layout(1)
-  }
-
-}
 
 dat <- sim_mvgam(T = 100, n_series = 4, n_lv = 2,
                  mu_obs = c(4, 6, 10, 14), trend_rel = 0.3,

diff --git a/R/add_base_dgam_lines.R b/R/add_base_dgam_lines.R
@@ -33,12 +33,16 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){
     }
 
     transformed parameters {
-    // basis coefficients
-    row_vector[num_basis] b;
+    // GAM contribution to expectations (log scale)
+    vector[total_obs] eta;
 
-    // dynamic factor loading matrix
+    // trends and dynamic factor loading matrix
+    matrix[n, n_series] trend;
     matrix[n_series, n_lv] lv_coefs;
 
+    // basis coefficients
+    row_vector[num_basis] b;
+
     // constraints allow identifiability of loadings
     for (i in 1:(n_lv - 1)) {
     for (j in (i + 1):(n_lv)){
@@ -57,15 +61,12 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){
     }
 
     // derived latent trends
-    matrix[n, n_series] trend;
     for (i in 1:n){;
     for (s in 1:n_series){
     trend[i, s] = dot_product(lv_coefs[s,], LV[i,]);
     }
     }
 
-    // GAM contribution to expectations (log scale)
-    vector[total_obs] eta;
     eta = to_vector(b * X);
     }
 
@@ -98,12 +99,12 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){
 
     generated quantities {
     vector[n_sp] rho;
-    rho = log(lambda);
     vector[n_lv] penalty;
+    matrix[n, n_series] ypred;
+    rho = log(lambda);
     penalty = rep_vector(1.0, n_lv);
 
     // posterior predictions
-    matrix[n, n_series] ypred;
     for(i in 1:n){
     for(s in 1:n_series){
     ypred[i, s] = poisson_log_rng(eta[ytimes[i, s]] + trend[i, s]);
@@ -130,11 +131,12 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){
     }
 
     transformed parameters {
+    // GAM contribution to expectations (log scale)
+    vector[total_obs] eta;
+
     // basis coefficients
     row_vector[num_basis] b;
 
-    // GAM contribution to expectations (log scale)
-    vector[total_obs] eta;
     eta = to_vector(b * X);
     }
 
@@ -167,12 +169,12 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){
 
     generated quantities {
     vector[n_sp] rho;
-    rho = log(lambda);
     vector[n_series] tau;
+    matrix[n, n_series] ypred;
+    rho = log(lambda);
     tau = sigma ^ -2;
 
     // posterior predictions
-    matrix[n, n_series] ypred;
     for(i in 1:n){
     for(s in 1:n_series){
     ypred[i, s] = poisson_log_rng(eta[ytimes[i, s]] + trend[i, s]);