diff --git a/DESCRIPTION b/DESCRIPTION index c3b59949..c5274d65 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -68,4 +68,4 @@ Encoding: UTF-8 Language: en-GB Roxygen: list(markdown = TRUE, roclets=c('rd', 'collate', 'namespace')) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 diff --git a/NAMESPACE b/NAMESPACE index 9b74f649..0191f8e6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -54,6 +54,7 @@ S3method(glance,model_mean) S3method(hfitted,ARIMA) S3method(hfitted,ETS) S3method(interpolate,ARIMA) +S3method(interpolate,ETS) S3method(interpolate,TSLM) S3method(interpolate,model_mean) S3method(model_sum,AR) diff --git a/NEWS.md b/NEWS.md index d1f29537..45a557c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ ## Improvements +* `ETS()` now supports missing values. * Documentation improvements. ## Bug fixes diff --git a/R/ets.R b/R/ets.R index a3a5dd8a..1583dde9 100644 --- a/R/ets.R +++ b/R/ets.R @@ -17,10 +17,6 @@ train_ets <- function(.data, specials, opt_crit, y <- unclass(.data)[[measured_vars(.data)]] idx <- unclass(.data)[[index_var(.data)]] - if (any(is.na(y))) { - abort("ETS does not support missing values.") - } - # Build possible models model_opts <- expand.grid( errortype = ets_spec$error$method, @@ -33,7 +29,7 @@ train_ets <- function(.data, specials, opt_crit, # Remove bad models if (NROW(model_opts) > 1) { - if (min(y) <= 0) { + if (min(y, na.rm = TRUE) <= 0) { model_opts <- model_opts[model_opts$errortype != "M", ] } if (restrict) { @@ -720,4 +716,26 @@ initial_ets_states <- function(object) { ) colnames(states_init) <- unsplit(states_names, states_type) states_init -} \ No newline at end of file +} + + +#' @inherit interpolate.model_mean +#' +#' @examples +#' library(tsibbledata) +#' +#' olympic_running |> +#' model(mean = ETS(Time)) %>% +#' interpolate(olympic_running) +#' @export +interpolate.ETS <- function(object, new_data, specials, ...) { + # Get missing values + y <- unclass(new_data)[[measured_vars(new_data)]] + miss_val <- which(is.na(y)) + # Forward fitted values + forward_fits <- object$est[[".fitted"]][miss_val] + # Ideally, we would also apply the model to the reversed time series + # and get the backward fitted values, then combine the two. + new_data[[measured_vars(new_data)]][miss_val] <- forward_fits + new_data +} diff --git a/R/etsmodel.R b/R/etsmodel.R index fea5b251..4612637e 100644 --- a/R/etsmodel.R +++ b/R/etsmodel.R @@ -140,16 +140,11 @@ estimate_ets <- function(y, m, init.state, errortype, trendtype, seasontype, alpha = unname(alpha), beta = unname(beta), gamma = unname(gamma), phi = unname(phi), init.state ) - if (errortype == "A") { - fits <- y - e$e - } else { - fits <- y / (1 + e$e) - } return(list( loglik = -0.5 * e$lik, aic = aic, bic = bic, aicc = aicc, mse = mse, amse = amse, mae = mae, - residuals = e$e, fitted = fits, + residuals = e$e, fitted = e$fits, states = states, par = fit.par )) } @@ -429,7 +424,7 @@ pegelsresid.C <- function(y, m, init.state, errortype, trendtype, seasontype, da p <- length(init.state) x <- numeric(p * (n + 1)) x[1:p] <- init.state - e <- numeric(n) + e <- fits <- numeric(n) lik <- 0 if (!damped) { phi <- 1 @@ -457,18 +452,15 @@ pegelsresid.C <- function(y, m, init.state, errortype, trendtype, seasontype, da as.double(gamma), as.double(phi), as.double(e), + as.double(fits), as.double(lik), as.double(amse), as.integer(nmse), + NAOK = TRUE, PACKAGE = "fable" ) - if (!is.na(Cout[[13]])) { - if (abs(Cout[[13]] + 99999) < 1e-7) { - Cout[[13]] <- NA - } - } - return(list(lik = Cout[[13]], amse = Cout[[14]], e = Cout[[12]], states = matrix(Cout[[3]], nrow = n + 1, ncol = p, byrow = TRUE))) + return(list(lik = Cout[[14]], amse = Cout[[15]], e = Cout[[12]], fits = Cout[[13]], states = matrix(Cout[[3]], nrow = n + 1, ncol = p, byrow = TRUE))) } admissible <- function(alpha, beta, gamma, phi, m) { diff --git a/man/interpolate.ETS.Rd b/man/interpolate.ETS.Rd new file mode 100644 index 00000000..861a75b8 --- /dev/null +++ b/man/interpolate.ETS.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ets.R +\name{interpolate.ETS} +\alias{interpolate.ETS} +\title{Interpolate missing values from a fable model} +\usage{ +\method{interpolate}{ETS}(object, new_data, specials, ...) +} +\arguments{ +\item{object}{A model for which forecasts are required.} + +\item{new_data}{A tsibble containing the time points and exogenous regressors to produce forecasts for.} + +\item{specials}{(passed by \code{\link[fabletools:forecast]{fabletools::forecast.mdl_df()}}).} + +\item{...}{Other arguments passed to methods} +} +\value{ +A tibble of the same dimension of \code{new_data} with missing values interpolated. +} +\description{ +Applies a model-specific estimation technique to predict the values of missing values in a \code{tsibble}, and replace them. +} +\examples{ +library(tsibbledata) + +olympic_running |> + model(mean = ETS(Time)) \%>\% + interpolate(olympic_running) +} diff --git a/src/etsTargetFunction.cpp b/src/etsTargetFunction.cpp index bdcb1fc1..82085c3d 100644 --- a/src/etsTargetFunction.cpp +++ b/src/etsTargetFunction.cpp @@ -69,6 +69,7 @@ void EtsTargetFunction::init(std::vector & p_y, int p_nstate, int p_erro // for(int i=0; i < n; i++) this->e.push_back(0); this->amse.resize(30, 0); this->e.resize(n, 0); + this->fits.resize(n, 0); } @@ -165,8 +166,7 @@ void EtsTargetFunction::eval(const double* p_par, int p_par_length) { for(int i=0; i <= p*this->y.size(); i++) state.push_back(0); etscalc(&this->y[0], &this->n, &this->state[0], &this->m, &this->errortype, &this->trendtype, &this->seasontype, - &this->alpha, &this->beta, &this->gamma, &this->phi, &this->e[0], &this->lik, &this->amse[0], &this->nmse); - + &this->alpha, &this->beta, &this->gamma, &this->phi, &this->e[0], &this->fits[0], &this->lik, &this->amse[0], &this->nmse); // Avoid perfect fits if (this->lik < -1e10) this->lik = -1e10; diff --git a/src/etsTargetFunction.h b/src/etsTargetFunction.h index 8c5aad70..dbcbddae 100644 --- a/src/etsTargetFunction.h +++ b/src/etsTargetFunction.h @@ -5,12 +5,9 @@ extern "C" { void etscalc(double *, int *, double *, int *, int *, int *, int *, - double *, double *, double *, double *, double *, double *, double *, int *); + double *, double *, double *, double *, double *, double *, double *, double *, int *); -void cpolyroot(double *opr, double *opi, int *degree, - double *zeror, double *zeroi, Rboolean *fail); } - class EtsTargetFunction { public: @@ -53,6 +50,7 @@ class EtsTargetFunction { double alpha, beta, gamma, phi; std::vector e; + std::vector fits; std::vector amse; double lik, objval; diff --git a/src/etscalc.c b/src/etscalc.c index 12b0783c..7a26cffa 100644 --- a/src/etscalc.c +++ b/src/etscalc.c @@ -1,4 +1,6 @@ #include +#include +#include #define NONE 0 #define ADD 1 @@ -6,164 +8,169 @@ #define DAMPED 1 #define TOL 1.0e-10 #define HUGEN 1.0e10 -#define NA -99999.0 // Functions called by R void etscalc(double *, int *, double *, int *, int *, int *, int *, - double *, double *, double *, double *, double *, double *, double *, int*); + double *, double *, double *, double *, double *, double *, double *, double *, int*); void etssimulate(double *, int *, int *, int *, int *, - double *, double *, double *, double *, int *, double *, double *); + double *, double *, double *, double *, int *, double *, double *); void etsforecast(double *, int *, int *, int *, double *, int *, double *); // Internal functions void forecast(double, double, double *, int, int, int, double, double *, int); void update(double *, double *, double *, double *, double *, double *, int, int, int, - double, double, double, double, double); + double, double, double, double, double); // ****************************************************************** void etscalc(double *y, int *n, double *x, int *m, int *error, int *trend, int *season, - double *alpha, double *beta, double *gamma, double *phi, double *e, double *lik, double *amse, int *nmse) + double *alpha, double *beta, double *gamma, double *phi, double *e, + double *fits, + double *lik, double *amse, int *nmse) { - int i, j, nstates; - double oldl, l, oldb, b, olds[24], s[24], f[30], lik2, tmp, denom[30]; - - if((*m > 24) & (*season > NONE)) - return; - else if(*m < 1) - *m = 1; - - if(*nmse > 30) - *nmse = 30; - - nstates = (*m)*(*season>NONE) + 1 + (*trend>NONE); - - // Copy initial state components - l = x[0]; + int i, j, nstates; + double oldl, l, oldb, b, olds[24], s[24], f[30], lik2, tmp, denom[30]; + + if((*m > 24) & (*season > NONE)) + return; + else if(*m < 1) + *m = 1; + + if(*nmse > 30) + *nmse = 30; + + nstates = (*m)*(*season>NONE) + 1 + (*trend>NONE); + + // Copy initial state components + l = x[0]; + if(*trend > NONE) + b = x[1]; + if(*season > NONE) + { + for(j=0; j<(*m); j++) + s[j] = x[(*trend>NONE)+j+1]; + } + + *lik = 0.0; + lik2 = 0.0; + for(j=0; j<(*nmse); j++) + { + amse[j] = 0.0; + denom[j] = 0.0; + } + + for (i=0; i<(*n); i++) + { + // COPY PREVIOUS STATE + oldl = l; if(*trend > NONE) - b = x[1]; + oldb = b; if(*season > NONE) { - for(j=0; j<(*m); j++) - s[j] = x[(*trend>NONE)+j+1]; + for(j=0; j<(*m); j++) + olds[j] = s[j]; } - - *lik = 0.0; - lik2 = 0.0; + // ONE STEP FORECAST + forecast(oldl, oldb, olds, *m, *trend, *season, *phi, f, *nmse); + fits[i] = f[0]; + if(R_IsNA(fits[i])) + { + *lik = NA_REAL; + return; + } + if(R_IsNA(y[i])) + e[i] = NA_REAL; + else if(*error == ADD) + e[i] = y[i] - fits[i]; + else + e[i] = (y[i] - fits[i])/fits[i]; for(j=0; j<(*nmse); j++) { - amse[j] = 0.0; - denom[j] = 0.0; + if(i+j<(*n)) + { + denom[j] += 1.0; + if(R_IsNA(y[i+j])) + tmp = 0.0; + else + tmp = y[i+j]-f[j]; + amse[j] = (amse[j] * (denom[j]-1.0) + (tmp*tmp)) / denom[j]; + } } - for (i=0; i<(*n); i++) + // UPDATE STATE + update(&oldl, &l, &oldb, &b, olds, s, *m, *trend, *season, *alpha, *beta, *gamma, *phi, y[i]); + + // STORE NEW STATE + x[nstates*(i+1)] = l; + if(*trend > NONE) + x[nstates*(i+1)+1] = b; + if(*season > NONE) { - // COPY PREVIOUS STATE - oldl = l; - if(*trend > NONE) - oldb = b; - if(*season > NONE) - { - for(j=0; j<(*m); j++) - olds[j] = s[j]; - } - - // ONE STEP FORECAST - forecast(oldl, oldb, olds, *m, *trend, *season, *phi, f, *nmse); - if(fabs(f[0]-NA) < TOL) - { - *lik = NA; - return; - } - - if(*error == ADD) - e[i] = y[i] - f[0]; - else - e[i] = (y[i] - f[0])/f[0]; - for(j=0; j<(*nmse); j++) - { - if(i+j<(*n)) - { - denom[j] += 1.0; - tmp = y[i+j]-f[j]; - amse[j] = (amse[j] * (denom[j]-1.0) + (tmp*tmp)) / denom[j]; - } - } - - // UPDATE STATE - update(&oldl, &l, &oldb, &b, olds, s, *m, *trend, *season, *alpha, *beta, *gamma, *phi, y[i]); - - // STORE NEW STATE - x[nstates*(i+1)] = l; - if(*trend > NONE) - x[nstates*(i+1)+1] = b; - if(*season > NONE) - { - for(j=0; j<(*m); j++) - x[(*trend>NONE)+nstates*(i+1)+j+1] = s[j]; - } - *lik = *lik + e[i]*e[i]; - lik2 += log(fabs(f[0])); + for(j=0; j<(*m); j++) + x[(*trend>NONE)+nstates*(i+1)+j+1] = s[j]; } - *lik = (*n) * log(*lik); - if(*error == MULT) - *lik += 2*lik2; + if(!R_IsNA(e[i])) + *lik = *lik + e[i]*e[i]; + lik2 += log(fabs(f[0])); + } + *lik = (*n) * log(*lik); + if(*error == MULT) + *lik += 2*lik2; } // ********************************************************************************* void etssimulate(double *x, int *m, int *error, int *trend, int *season, - double *alpha, double *beta, double *gamma, double *phi, int *h, double *y, double *e) + double *alpha, double *beta, double *gamma, double *phi, int *h, double *y, double *e) { - int i, j, nstates; - double oldl, l, oldb, b, olds[24], s[24], f[10]; + int i, j, nstates; + double oldl, l, oldb, b, olds[24], s[24], f[10]; - if((*m > 24) & (*season > NONE)) - return; + if((*m > 24) & (*season > NONE)) + return; else if(*m < 1) *m = 1; - nstates = (*m)*(*season>NONE) + 1 + (*trend>NONE); - - // Copy initial state components - l = x[0]; + nstates = (*m)*(*season>NONE) + 1 + (*trend>NONE); + + // Copy initial state components + l = x[0]; + if(*trend > NONE) + b = x[1]; + if(*season > NONE) + { + for(j=0; j<(*m); j++) + s[j] = x[(*trend>NONE)+j+1]; + } + + for (i=0; i<(*h); i++) + { + // COPY PREVIOUS STATE + oldl = l; if(*trend > NONE) - b = x[1]; + oldb = b; if(*season > NONE) { - for(j=0; j<(*m); j++) - s[j] = x[(*trend>NONE)+j+1]; + for(j=0; j<(*m); j++) + olds[j] = s[j]; } - for (i=0; i<(*h); i++) + // ONE STEP FORECAST + forecast(oldl, oldb, olds, *m, *trend, *season, *phi, f, 1); + if(R_IsNA(f[0])) { - // COPY PREVIOUS STATE - oldl = l; - if(*trend > NONE) - oldb = b; - if(*season > NONE) - { - for(j=0; j<(*m); j++) - olds[j] = s[j]; - } - - // ONE STEP FORECAST - forecast(oldl, oldb, olds, *m, *trend, *season, *phi, f, 1); - if(fabs(f[0]-NA) < TOL) - { - y[0]=NA; - return; - } - - if(*error == ADD) - y[i] = f[0] + e[i]; - else - y[i] = f[0]*(1.0+e[i]); - - // UPDATE STATE - update(&oldl, &l, &oldb, &b, olds, s, *m, *trend, *season, *alpha, *beta, *gamma, *phi, y[i]); + y[0] = NA_REAL; + return; } + if(*error == ADD) + y[i] = f[0] + e[i]; + else + y[i] = f[0]*(1.0+e[i]); + + // UPDATE STATE + update(&oldl, &l, &oldb, &b, olds, s, *m, *trend, *season, *alpha, *beta, *gamma, *phi, y[i]); + } } // ********************************************************************************* @@ -171,138 +178,142 @@ void etssimulate(double *x, int *m, int *error, int *trend, int *season, void etsforecast(double *x, int *m, int *trend, int *season, double *phi, int *h, double *f) { - int j; - double l, b, s[24]; - - if((*m > 24) & (*season > NONE)) - return; - else if(*m < 1) - *m = 1; - - // Copy initial state components - l = x[0]; - b = 0.0; - if(*trend > NONE) - b = x[1]; - if(*season > NONE) - { - for(j=0; j<(*m); j++) - s[j] = x[(*trend>NONE)+j+1]; - } - - // Compute forecasts - forecast(l, b, s, *m, *trend, *season, *phi, f, *h); + int j; + double l, b, s[24]; + + if((*m > 24) & (*season > NONE)) + return; + else if(*m < 1) + *m = 1; + + // Copy initial state components + l = x[0]; + b = 0.0; + if(*trend > NONE) + b = x[1]; + if(*season > NONE) + { + for(j=0; j<(*m); j++) + s[j] = x[(*trend>NONE)+j+1]; + } + + // Compute forecasts + forecast(l, b, s, *m, *trend, *season, *phi, f, *h); } // ***************************************************************** void forecast(double l, double b, double *s, int m, int trend, int season, double phi, double *f, int h) { - int i,j; - double phistar; - - phistar = phi; - - // FORECASTS - for(i=0; i NONE) + { + if(trend==ADD) + r = (*l) - (*oldl); // l[t]-l[t-1] + else //if(trend==MULT) { - phib = pow(*oldb,phi); - q = (*oldl) * phib; // l(t-1)*b(t-1)^phi + if(fabs(*oldl) < TOL) + r = HUGEN; + else + r = (*l)/(*oldl); // l[t]/l[t-1] } - if(season==NONE) - p = y; + *b = phib + (beta/alpha)*(r - phib); // b[t] = phi*b[t-1] + beta*(r - phi*b[t-1]) + // b[t] = b[t-1]^phi + beta*(r - b[t-1]^phi) + } + + // NEW SEASON + if(season > NONE) + { + if(R_IsNA(y)) + t = olds[m-1]; else if(season==ADD) - p = y - olds[m-1]; // y[t] - s[t-m] - else - { - if(fabs(olds[m-1]) < TOL) - p = HUGEN; - else - p = y / olds[m-1]; // y[t]/s[t-m] - } - *l = q + alpha*(p-q); - - // NEW GROWTH - if(trend > NONE) - { - if(trend==ADD) - r = (*l) - (*oldl); // l[t]-l[t-1] - else //if(trend==MULT) - { - if(fabs(*oldl) < TOL) - r = HUGEN; - else - r = (*l)/(*oldl); // l[t]/l[t-1] - } - *b = phib + (beta/alpha)*(r - phib); // b[t] = phi*b[t-1] + beta*(r - phi*b[t-1]) - // b[t] = b[t-1]^phi + beta*(r - b[t-1]^phi) - } - - // NEW SEASON - if(season > NONE) + t = y - q; + else //if(season==MULT) { - if(season==ADD) - t = y - q; - else //if(season==MULT) - { - if(fabs(q) < TOL) - t = HUGEN; - else - t = y / q; - } - s[0] = olds[m-1] + gamma*(t - olds[m-1]); // s[t] = s[t-m] + gamma*(t - s[t-m]) - for(j=1; j% - model(ETS(mdeaths)), - "ETS does not support missing values" - ) - expect_warning( UKLungDeaths %>% model(ETS(mdeaths ~ trend("M") + season("A"))), @@ -174,4 +166,24 @@ test_that("Automatic ETS selection bug (#425)", { model_sum(model(train, ets=ETS(value))$ets[[1]]), "ETS(A,N,N)" ) -}) \ No newline at end of file +}) + +test_that("ETS with missing values", { + UK_missing <- UKLungDeaths + UK_missing[["mdeaths"]][3:5] <- NA + expect_no_error(UK_missing |> model(ETS(mdeaths))) + + USAccDeaths_miss <- USAccDeaths_tbl + USAccDeaths_miss$value[c(10, 14, 15)] <- NA + fable_fit <- USAccDeaths_miss |> model(ets = ETS(value)) + + USAccDeaths_miss <- fable_fit |> + interpolate(USAccDeaths_miss) + expect_false( + any(is.na(USAccDeaths_miss$value)) + ) + expect_equal( + USAccDeaths_tbl$value[-c(10, 14, 15)], + USAccDeaths_miss$value[-c(10, 14, 15)] + ) +})