diff --git a/DESCRIPTION b/DESCRIPTION index 3cb0647..5bd2916 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,6 +10,7 @@ Depends: PCICt (>= 0.5-4) Encoding: UTF-8 Imports: + circular, methods, Rcpp (>= 0.11.4), stats, @@ -31,7 +32,12 @@ RoxygenNote: 7.3.2 Collate: 'input_utils.R' 'climdexInput_class.R' + 'GenericVariable_utils.R' + 'climdexGenericVariable_class.R' + 'climdexGenericScalar.R' + 'climdexGenericVector.R' 'climdex.pcic-package.R' + 'generic_stats.R' 'constants.R' 'date_utils.R' 'precipitation_indices.R' diff --git a/NAMESPACE b/NAMESPACE index db828e9..68eec17 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,9 +7,6 @@ export(climdex.cwd) export(climdex.dtr) export(climdex.fd) export(climdex.get.available.indices) -export(climdex.min.max.idx.list) -export(climdex.mean.idx.list) -export(climdex.bootstrap.idx.list) export(climdex.gsl) export(climdex.id) export(climdex.mean.idx.list) @@ -35,8 +32,22 @@ export(climdex.tx90p) export(climdex.txn) export(climdex.txx) export(climdex.wsdi) +export(climdexGenericScalar.csv) +export(climdexGenericScalar.raw) +export(climdexGenericVector.csv) +export(climdexGenericVector.raw) export(climdexInput.csv) export(climdexInput.raw) +export(compute.gen.stat) +export(compute.stat.scalar) +export(compute.stat.vector) +export(compute_circular_mean) +export(compute_circular_sd) +export(convert_cardinal_to_degrees) +export(convert_cartesian_to_polar) +export(convert_degrees_to_cardinal) +export(convert_polar_to_cartesian) +export(filter_by_direction_range) export(get.last.monthday.of.year) export(get.outofbase.quantiles) export(get.series.lengths.at.ends) @@ -49,10 +60,15 @@ export(simple.precipitation.intensity.index) export(spell.length.max) export(threshold.exceedance.duration.index) export(total.precip.op.threshold) +exportClasses(climdexGenericScalar) +exportClasses(climdexGenericVector) exportClasses(climdexInput) import(PCICt) import(Rcpp) import(methods) +importFrom(circular,circular) +importFrom(circular,mean.circular) +importFrom(circular,sd.circular) importFrom(stats,quantile) importFrom(utils,head) importFrom(utils,read.csv) diff --git a/R/GenericVariable_utils.R b/R/GenericVariable_utils.R new file mode 100644 index 0000000..70e89c6 --- /dev/null +++ b/R/GenericVariable_utils.R @@ -0,0 +1,170 @@ +# Utility function to validate arguments for scalar and vector data. +check.generic.argument.validity <- function( data, dates, max.missing.days, calendar) { + + if (length(max.missing.days) != 3 || !all(c("annual", "monthly", "seasonal") %in% names(max.missing.days))) { + stop("max.missing.days must be a named vector with 'annual', 'monthly', and 'seasonal' elements.") + } + + + # Check that required arguments are provided + if (missing(data)) { + stop("Primary data argument is missing.") + } + + if (missing(dates)) { + stop("Argument 'dates' is missing.") + } + + + if (!is.numeric(data)) { + stop("Primary Data must be numeric.") + } + + if (length(data) != length(dates)) { + stop("Primary data and dates must have the same length.") + } + + if(!is.null(dates) && !inherits(dates, "PCICt")) + stop(paste("Dates must be of class PCICt.")) + + # Calendar check: verify it matches one of the recognized types + valid_calendars <- c("360_day", "360", "365_day", "365", "noleap", "gregorian", "proleptic_gregorian") + if (!calendar %in% valid_calendars) { + stop(paste("Invalid calendar type:", calendar, + ". Accepted types are '360_day', '360', '365_day', '365', 'noleap', 'gregorian', 'proleptic_gregorian'.")) + } +} + +# Utility function to handle date ranges and generate date factors. +date_info <- function(dates) { + cal <- attr(dates, "cal") + + last.day.of.year <- get.last.monthday.of.year(dates) + + date.range <- as.PCICt(paste(as.numeric(format(range(dates), "%Y", tz = "GMT")), c("01-01", last.day.of.year), sep = "-"), cal = cal) + date.series <- seq(date.range[1], date.range[2], by = "day") + + jdays <- get.jdays.replaced.feb29(get.jdays(date.series)) + + season_with_year <- classify_meteorological_season_with_year(date.series) + + date.factors <- list( + annual = factor(format(date.series, format = "%Y", tz = "GMT")), + monthly = factor(format(date.series, format = "%Y-%m", tz = "GMT")), + seasonal = factor(season_with_year, levels = unique(season_with_year)) + ) + + return(list( + cal = cal, + date.series = date.series, + date.factors = date.factors, + jdays = jdays + )) +} + +# Generates NA masks based on filled data and date factors +generate_namasks <- function(filled.list, date.factors, max.missing.days) { + namasks <- list( + annual = lapply(filled.list, get.na.mask, date.factors$annual, max.missing.days["annual"]), + monthly = lapply(filled.list, get.na.mask, date.factors$monthly, max.missing.days["monthly"]), + seasonal = lapply(filled.list, get.na.mask, date.factors$seasonal, max.missing.days["seasonal"])) + # Vectors: Combine the masks for magnitude and direction + if ("primary" %in% names(filled.list) && "secondary" %in% names(filled.list)) { + # Synchronize annual masks + namasks$annual$primary <- namasks$annual$primary * namasks$annual$secondary + namasks$annual$secondary <- namasks$annual$primary + + # Synchronize monthly masks + namasks$monthly$primary <- namasks$monthly$primary * namasks$monthly$secondary + namasks$monthly$secondary <- namasks$monthly$primary + + # Synchronize seasonal masks + namasks$seasonal$primary <- namasks$seasonal$primary * namasks$seasonal$secondary + namasks$seasonal$secondary <- namasks$seasonal$primary + } + namasks$annual <- lapply(names(namasks$annual), function(v) { + d <- namasks$annual[[v]] * as.numeric(tapply(namasks$monthly[[v]], rep(seq_along(namasks$annual[[v]]), each = 12), prod)) + dimnames(d) <- dim(d) <- NULL + d + }) + names(namasks$annual) <- names(namasks$seasonal) <- names(namasks$monthly) + + + season_month_counts <- sapply(unique(date.factors$seasonal), function(season) { + length(unique(date.factors$monthly[date.factors$seasonal == season])) + }) + data.vars <- names(filled.list) + + for (var in data.vars) { + seasonal_namasks <- namasks$seasonal[[var]] + na_months <- unique(date.factors$monthly)[is.na(namasks$monthly[[var]])] + seasons_of_na_months <- unique(date.factors$seasonal[date.factors$monthly %in% na_months]) + seasonal_namasks[unique(date.factors$seasonal) %in% seasons_of_na_months] <- NA + # Identify and set NA for seasons with less than 3 months + for (season in seq_along(season_month_counts) ) { + if (!is.na(season_month_counts[season]) && season_month_counts[season] < 3) { + seasonal_namasks[season] <- NA + } + } + namasks$seasonal[[var]] <- seasonal_namasks + } + return(namasks) +} + +generate_filled_list <- function(data, dates, date.series) { + if (is.vector(data)) { + return(list(create.filled.series(data, trunc(dates), date.series))) + } else { + filled.list <- sapply(data, function(x) { + return(create.filled.series(x, trunc(dates), date.series)) + }, simplify = FALSE) + return(filled.list) + } +} + + +# Reads data from a CSV file, validates it, and converts date columns to PCICt dates. +read_csv_data <- function( + file, + data.columns, + date.columns, + date.format, + na.strings, + calendar +) { + + calling_func <- as.character(sys.call(-1)[[1]]) + + # Ensure that the number of data columns matches the type of the calling function + if (grepl("Scalar", calling_func, ignore.case = TRUE) && length(data.columns) != 1) { + stop("For scalar data, 'data.columns' should contain exactly 1 column.") + } else if (grepl("Vector", calling_func, ignore.case = TRUE) && length(data.columns) != 2) { + stop("For vector data, 'data.columns' should contain exactly 2 columns.") + } + + # Read the CSV file + GV.csv <- read.csv(file, na.strings = na.strings) + + # Check that data columns exist + for (col in data.columns) { + if (!(col %in% names(GV.csv))) { + stop(paste("Data column", col, "not found in data.")) + } + } + + # Check that date columns exist + if (!all(date.columns %in% names(GV.csv))) { + stop(paste("Date columns", paste(date.columns, collapse = ", "), "not found in data.")) + } + + # Extract data cols + data_values <- lapply(data.columns, function(col) GV.csv[[col]]) + + # Extract the date fields and create date strings + date_strings <- apply(GV.csv[date.columns], 1, function(row) paste(row, collapse = " ")) + + # Convert date strings to PCICt dates + dates <- as.PCICt(strptime(date_strings, format = date.format, tz = "UTC"), cal = calendar) + + return(list(data = data_values, dates = dates)) +} diff --git a/R/climdexGenericScalar.R b/R/climdexGenericScalar.R new file mode 100644 index 0000000..f9783f0 --- /dev/null +++ b/R/climdexGenericScalar.R @@ -0,0 +1,129 @@ +#' @title climdexGenericScalar.raw +#' +#' @description +#' Creates a `ClimdexGenericScalar` object from raw scalar climate data. +#' +#' @details +#' This function processes scalar climate data (e.g., humidity, snow-depth) +#' and creates a `ClimdexGenericScalar` object. The function fills missing +#' values and applies NA masks based on the `max.missing.days` argument. +#' The `ClimdexGenericScalar` class is used to compute +#' basic climate indices from scalar data. +#' +#' @param data A numeric vector containing the scalar climate data. +#' @param dates A `PCICt` vector corresponding to the data dates. +#' @param max.missing.days A named vector indicating the maximum allowed missing days for `annual`, `monthly`, and `seasonal` time periods. +#' @param northern.hemisphere Whether this point is in the northern hemisphere. +#' @param calendar String representing the calendar type, e.g., "gregorian". +#' @return A `ClimdexGenericScalar` object containing the processed data. +#' +#' @seealso \code{\link{climdexGenericVector.raw}}, \code{\link{climdexGenericScalar.csv}} +#' +#' @examples +#' data <- c(10.5, 12.3, 11.2) +#' dates <- as.PCICt(c("2024-01-01", "2024-01-02", "2024-01-03"), +#' format = "%Y-%m-%d", cal = "gregorian") +#' climdexGenericScalar.raw(data, +#' dates, +#' max.missing.days = c(annual = 15, monthly = 3, seasonal = 6)) +#' +#' @export + +climdexGenericScalar.raw <- function( + data, + dates, + max.missing.days = c(annual = 15, monthly = 3, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" +) { + + check.generic.argument.validity(data,dates,max.missing.days,calendar) + + date.info <- date_info(dates) + jdays = date.info$jdays + date.series = date.info$date.series + date.factors = date.info$date.factors + + filled.list <- generate_filled_list(data, dates, date.series) + names(filled.list) <- "data" + namasks <- generate_namasks(filled.list, date.factors, max.missing.days) + obj <- new("climdexGenericScalar", + data = filled.list[["data"]], + dates = date.series, + date.factors = date.factors, + jdays = jdays, + namasks = namasks, + northern.hemisphere = northern.hemisphere, + max.missing.days = max.missing.days) + + return(obj) +} + +#' @title climdexGenericScalar.csv +#' +#' @description +#' Reads scalar climate data from a CSV file and creates a `ClimdexGenericScalar` object. +#' +#' @details +#' This function reads scalar climate data (e.g., humidity, snow-depth) from a CSV file +#' and generates a `ClimdexGenericScalar` object. +#' +#' The CSV file should contain the climate data in a specified column, and the date fields should be provided in separate columns. +#' +#' @param file The file path to the CSV containing the scalar climate data. +#' @param data.column The name of the column containing the scalar data in the CSV file. +#' @param date.columns A vector of column names corresponding to the date fields in the CSV file. +#' @param date.format A string representing the format of the date fields. +#' @param na.strings A character vector of strings to interpret as `NA`. +#' @param northern.hemisphere Logical indicating whether the data is from the northern hemisphere. +#' @param max.missing.days A named vector specifying the maximum number of missing days allowed for annual, monthly, and seasonal periods. +#' @param calendar A string representing the calendar type (e.g., "gregorian"). +#' +#' @return A `ClimdexGenericScalar` object containing the processed scalar climate data. +#' +#' @seealso \code{\link{climdexGenericScalar.raw}}, \code{\link{climdexGenericVector.csv}} +#' +#' @examples +#' # Example usage for scalar data: +#' +#' # Simulating CSV data for humidity +#' csv_data <- " +#' year,month,day,humidity +#' 2024,01,01,80 +#' 2024,01,02,82 +#' 2024,01,03,85 +#' " +#' +#' # Write the CSV to a temporary file +#' temp_file <- tempfile(fileext = ".csv") +#' writeLines(csv_data, temp_file) +#' +#' # Call the climdexGenericScalar.csv function +#' climdexGenericScalar.csv(temp_file, data.column = "humidity", +#' date.columns = c("year", "month", "day"), +#' date.format = "%Y %m %d", calendar = "gregorian") + +#' @export + +climdexGenericScalar.csv <- function( + file, + data.column, + date.columns, + date.format, + na.strings = NULL, + northern.hemisphere = TRUE, + max.missing.days = c(annual = 15, monthly = 3, seasonal = 6), + calendar = "gregorian" +) { + + GS.csv <- read_csv_data(file, data.column, date.columns, date.format, na.strings, calendar) + obj <- climdexGenericScalar.raw( + data = GS.csv$data[[1]], + dates = GS.csv$dates, + northern.hemisphere = northern.hemisphere, + max.missing.days = max.missing.days, + calendar = calendar + ) + + return(obj) +} diff --git a/R/climdexGenericVariable_class.R b/R/climdexGenericVariable_class.R new file mode 100644 index 0000000..7550d58 --- /dev/null +++ b/R/climdexGenericVariable_class.R @@ -0,0 +1,111 @@ +#' Class definition for climdexGenericVariable +#' @title climdexGenericVariable +#' +#' @description +#' This class represents the base for handling both scalar and vector generic climate data. +#' It is used internally to construct single-variable, climdexInput-like objects for caclulating basic climate indices. +#' +#' @section Slots: +#' \describe{ +#' \item{dates}{A `PCICt` type vector of dates.} +#' \item{date.factors}{Factors used for creation of annual, seasonal and monthly indices.} +#' \item{jdays}{Julian days for the date sequence.} +#' \item{namasks}{Data quality masks for annual, seasonal and monthly data.} +#' \item{northern.hemisphere}{Logical indicating whether data is from the northern hemisphere.} +#' \item{max.missing.days}{A named vector specifying the maximum number of missing days of data for annual, seasonal +#' and monthly data.} +#' } +#' +#' @keywords internal + +setClass( + "climdexGenericVariable", + slots = list( + dates = "PCICt", + date.factors = "list", + jdays="numeric", + namasks = "list", + northern.hemisphere = "logical", + max.missing.days = "numeric" + ) +) + +## Class definition for climdexGenericScalar +#' @title climdexGenericScalar +#' +#' @description +#' The `climdexGenericScalar` class contains scalar climate data (e.g., humidity, snow-depth +#' ) and other data required for days for calculating basic climate indices. +#' +#' @section Slots: +#' \describe{ +#' \item{data}{A numeric vector of climate data.} +#' \item{dates}{A `PCICt` type vector of dates.} +#' \item{date.factors}{Factors used for creation of annual, seasonal and monthly indices.} +#' \item{jdays}{Julian days for the date sequence.} +#' \item{namasks}{Data quality masks for annual, seasonal and monthly data.} +#' \item{northern.hemisphere}{Logical indicating whether data is from the northern hemisphere.} +#' \item{max.missing.days}{A named vector specifying the maximum number of missing days of data for annual, seasonal +#' and monthly data.} +#' } +#' +#' @name climdexGenericScalar +#' @aliases climdexGenericScalar-class +#' @exportClass climdexGenericScalar + +setClass( + "climdexGenericScalar", + contains = "climdexGenericVariable", + slots = list( + data = "numeric" + ) +) + +## Class definition for climdexGenericVector +#' @title climdexGenericVector +#' +#' @description +#' The `climdexGenericVector` class contains vector climate data (e.g., wind speed and direction), consisting of +#' both primary and secondary components, and other data required for days for calculating basic climate indices. +#' +#' @details +#' The `primary` slot contains the main numeric climate data, such as wind speed or another magnitude-based variable. +#' The `secondary` slot provides complementary information, such as direction, and its type and meaning depend on the +#' `format`: +#' \itemize{ +#' \item \strong{polar}: When the format is `"polar"`, the `primary` slot represents magnitude (e.g., wind speed), +#' and the `secondary` slot must be numeric and represents direction in degrees (e.g., wind direction, where 0° is North). +#' \item \strong{cartesian}: In `"cartesian"` format, `primary` and `secondary` both represent the numeric x and y components of a vector (e.g., velocity components in the x and y directions). +#' \item \strong{cardinal}: When the format is `"cardinal"`, the `primary` slot still represents a numeric value (e.g., wind speed), +#' but the `secondary` slot is a character vector representing cardinal directions (e.g., "N", "NE", "E", etc.). +#' } +#' This class is designed to handle both magnitude and direction data in various forms, allowing flexibility in representing +#' vector climate data depending on the source. +#' +#' @section Slots: +#' \describe{ +#' \item{primary}{A numeric vector representing the primary climate data (e.g., wind speed).} +#' \item{secondary}{A vector representing the secondary climate data (e.g., wind direction).} +#' \item{format}{A string indicating the format of the data: "polar", "cartesian", or "cardinal".} +#' \item{dates}{A `PCICt` type vector of dates.} +#' \item{date.factors}{Factors used for creation of annual, seasonal and monthly indices.} +#' \item{jdays}{Julian days for the date sequence.} +#' \item{namasks}{Data quality masks for annual, seasonal and monthly data.} +#' \item{northern.hemisphere}{Logical indicating whether data is from the northern hemisphere.} +#' \item{max.missing.days}{A named vector specifying the maximum number of missing days of data for annual, seasonal +#' and monthly data.} +#' } +#' +#' @name climdexGenericVector +#' @aliases climdexGenericVector-class +#' @exportClass climdexGenericVector + +setClass( + "climdexGenericVector", + contains = "climdexGenericVariable", + slots = list( + primary = "numeric", + secondary = "ANY", # Could be numeric or character, depending on format. + format = "character" # 'polar', 'cartesian', or 'cardinal' + ) +) diff --git a/R/climdexGenericVector.R b/R/climdexGenericVector.R new file mode 100644 index 0000000..8858c66 --- /dev/null +++ b/R/climdexGenericVector.R @@ -0,0 +1,174 @@ +#' @title climdexGenericVector.raw +#' +#' @description +#' Creates a `ClimdexGenericVector` object from raw vector climate data, including +#' both a primary (e.g., magnitude) and secondary (e.g., direction) component. +#' +#' @details +#' This function processes vector climate data and creates a `ClimdexGenericVector` +#' object. The function generates NA masks based on the provided `max.missing.days` +#' and validates the `primary` and `secondary` components based on the specified +#' format (`polar`, `cartesian`, or `cardinal`). +#' +#' @param primary A numeric vector representing the primary data (e.g., wind speed). +#' @param secondary A numeric or character vector representing the secondary data (e.g., wind direction). +#' @param dates A `PCICt` vector corresponding to the data dates. +#' @param format A string specifying the format of the vector data ("polar", "cartesian", "cardinal"). +#' @param max.missing.days A named vector indicating the maximum allowed missing days for `annual`, `monthly`, and `seasonal` time periods. +#' @param northern.hemisphere Whether this point is in the northern hemisphere. +#' @param calendar String representing the calendar type, e.g., "gregorian". +#' +#' @return A `ClimdexGenericVector` object containing the processed vector data. +#' +#' @seealso \code{\link{climdexGenericScalar.raw}}, \code{\link{climdexGenericVector.csv}} +#' +#' @examples +#' +#' \dontrun{primary <- c(5.5, 6.2, 4.8) +#' secondary <- c("N", "NE", "E") +#' dates <- as.PCICt(c("2000-01-01", "2000-01-02", "2000-01-03"), +#' format = "%Y-%m-%d", cal = "gregorian") +#' climdexGenericVector.raw(primary, secondary, dates, format = "cardinal")} +#' @export + +climdexGenericVector.raw <- function( + primary, + secondary, + dates, + format = "polar", + max.missing.days = c(annual = 15, monthly = 3, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" +) { + + check.generic.argument.validity(primary, dates, max.missing.days, calendar) + if (missing(secondary)) { + stop("Secondary data argument is missing.") + } + # Check that primary, secondary, and dates have the same length + if (length(primary) != length(secondary) || length(primary) != length(dates)) { + stop("Lengths of 'primary', 'secondary', and 'dates' must be equal.") + } + # Convert the format to lowercase to allow case-insensitive input + format <- tolower(format) + + # Additional validation for format + if (format %in% c("polar", "cartesian")) { + if (!is.numeric(secondary)) { + stop("For 'polar' or 'cartesian' formats, 'secondary' must be numeric.") + } + } else if (format == "cardinal") { + if (!is.character(secondary)) { + stop("For 'cardinal' format, 'secondary' must be character.") + } + } else { + stop("Invalid 'format'. Use 'polar', 'cartesian', or 'cardinal'.") + } + + date.info <- date_info(dates) + jdays = date.info$jdays + date.series = date.info$date.series + date.factors = date.info$date.factors + + + filled.primary <- generate_filled_list(primary, dates, date.series)[[1]] + filled.secondary <- generate_filled_list(secondary, dates, date.series)[[1]] + filled.secondary[is.na(filled.primary)] <- NA + filled.primary[is.na(filled.secondary)] <- NA + namasks <- generate_namasks(list(primary = filled.primary, secondary = filled.secondary), date.factors, max.missing.days) + + obj <- new("climdexGenericVector", + primary = filled.primary, + secondary = filled.secondary, + dates = date.series, + format = format, + date.factors = date.factors, + jdays = jdays, + namasks = namasks, + max.missing.days = max.missing.days, + northern.hemisphere = northern.hemisphere) + + return(obj) +} + +#' @title climdexGenericVector.csv +#' +#' @description +#' Reads vector climate data (e.g., wind speed and direction) from a CSV file and creates a `ClimdexGenericVector` object. +#' +#' @details +#' This function reads vector climate data, consisting of primary and secondary components (e.g., magnitude and direction), +#' from a CSV file. It validates the data and constructs a `ClimdexGenericVector` object that can be used to compute +#' climate indices. The format of the vector data (e.g., `polar`, `cartesian`, or `cardinal`) +#' must be specified. +#' +#' @param file The file path to the CSV containing the vector climate data. +#' @param primary.column The name of the column containing the primary data (e.g., magnitude) in the CSV file. +#' @param secondary.column The name of the column containing the secondary data (e.g., direction) in the CSV file. +#' @param date.columns A vector of column names or indices corresponding to the date fields in the CSV file. +#' @param date.format A string representing the format of the date fields. +#' @param format A string specifying the format of the vector data. Must be one of `"polar"`, `"cartesian"`, or `"cardinal"`. +#' @param na.strings A character vector of strings to interpret as `NA`. +#' @param max.missing.days A named vector specifying the maximum number of missing days allowed for annual, monthly, and seasonal periods. +#' @param northern.hemisphere Whether this point is in the northern hemisphere. +#' @param calendar A string representing the calendar type (e.g., "gregorian"). +#' +#' @return A `ClimdexGenericVector` object containing the processed vector climate data. +#' +#' @seealso \code{\link{climdexGenericVector.raw}}, \code{\link{climdexGenericScalar.csv}} +#' +#' @examples +#' # Example usage for vector data (e.g., wind speed and direction): +#'\dontrun{ +#' csv_data <- " +#' year,month,day,wind_speed,wind_direction +#' 2024,01,01,10,N +#' 2024,01,02,12,NE +#' 2024,01,03,8,E +#' " +#' +#' # Write the CSV to a temporary file +#' temp_file <- tempfile(fileext = ".csv") +#' writeLines(csv_data, temp_file) +#' +#' # Call the climdexGenericVector.csv function +#' climdexGenericVector.csv(temp_file, primary.column = "wind_speed", +#' secondary.column = "wind_direction", +#' date.columns = c("year", "month", "day"), +#' date.format = "%Y %m %d", format = "cardinal", +#' calendar = "gregorian") +#' } + +#' @export + +climdexGenericVector.csv <- function( + file, + primary.column, + secondary.column, + date.columns, + date.format, + format = "polar", + na.strings = NULL, + max.missing.days = c(annual = 15, monthly = 3, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" +) { + + GV.csv <- read_csv_data(file, data.columns = c(primary.column, secondary.column), date.columns, date.format, na.strings, calendar) + + primary_values <- GV.csv$data[[1]] + secondary_values <- GV.csv$data[[2]] + dates <- GV.csv$dates + + obj <- climdexGenericVector.raw( + primary = primary_values, + secondary = secondary_values, + dates = dates, + format = format, + max.missing.days = max.missing.days, + northern.hemisphere = northern.hemisphere, + calendar = calendar + ) + + return(obj) +} \ No newline at end of file diff --git a/R/climdexInput_class.R b/R/climdexInput_class.R index 0d9a5e6..1249c6a 100644 --- a/R/climdexInput_class.R +++ b/R/climdexInput_class.R @@ -251,31 +251,6 @@ climdexInput.raw <- function(tmax=NULL, tmin=NULL, prec=NULL, tmax.dates=NULL, t new.date.range <- as.PCICt(paste(as.numeric(format(range(all.dates), "%Y", tz="GMT")), c("01-01", last.day.of.year), sep="-"), cal=cal) date.series <- seq(new.date.range[1], new.date.range[2], by="day") jdays <- get.jdays.replaced.feb29(get.jdays(date.series)) - - # Classify meteorological seasons with associated years. This includes - # handling for the winter season, where data in the months of January and - # February are assigned to the winter season of the previous year. - # - # Seasons are defined as the meteorological seasons: - # - 'Winter': December, January, February - # - 'Spring': March, April, May - # - 'Summer': June, July, August - # - 'Fall': September, October, November - classify_meteorological_season_with_year <- function(date.series) { - month <- as.integer(format(date.series, "%m")) - year <- as.integer(format(date.series, format = "%Y")) - year_for_season <- ifelse(month %in% c(1, 2), year - 1, year) - - # Vector combining season and year - season_with_year <- ifelse(month %in% c(12, 1, 2), paste("Winter", year_for_season), - ifelse(month %in% 3:5, paste("Spring", year_for_season), - ifelse(month %in% 6:8, paste("Summer", year_for_season), - ifelse(month %in% 9:11, paste("Fall", year_for_season), NA) - ) - ) - ) - return(season_with_year) - } season_with_year <- classify_meteorological_season_with_year(date.series) ## Factors for dividing data up diff --git a/R/date_utils.R b/R/date_utils.R index c729f7e..81ff5bb 100644 --- a/R/date_utils.R +++ b/R/date_utils.R @@ -78,4 +78,27 @@ exact.date <- function(stat, data, date.factor, freq, cal, mask) { } ) return(df) +} + +# Classify meteorological seasons with associated years. This includes +# handling for the winter season, where data in the months of January and +# February are assigned to the winter season of the previous year. +# +# Seasons are defined as the meteorological seasons: +# - 'Winter': December, January, February +# - 'Spring': March, April, May +# - 'Summer': June, July, August +# - 'Fall': September, October, November +classify_meteorological_season_with_year <- function(dates) { + month <- as.integer(format(dates, "%m")) + year <- as.integer(format(dates, "%Y")) + year_for_season <- ifelse(month %in% c(1, 2), year - 1, year) + + season <- ifelse(month %in% c(12, 1, 2), "Winter", + ifelse(month %in% 3:5, "Spring", + ifelse(month %in% 6:8, "Summer", + ifelse(month %in% 9:11, "Fall", NA)))) + + season_with_year <- paste(season, year_for_season) + return(season_with_year) } \ No newline at end of file diff --git a/R/generic_stats.R b/R/generic_stats.R new file mode 100644 index 0000000..5ae5d53 --- /dev/null +++ b/R/generic_stats.R @@ -0,0 +1,473 @@ +library(circular) + +#' @title compute.gen.stat +#' +#' @description +#' Computes a specified statistic (e.g., max, min, mean, sum, sd, var) for a generic climate object +#' (either scalar or vector) based on the selected frequency (monthly, annual, or seasonal). +#' The function handles NA masks and exact dates for the max/min statistics. +#' +#' @param gen.var A generic climate object that contains the data, date factors, and NA masks. +#' @param stat The statistic to compute. Must be one of `"max"`, `"min"`, `"mean"`, `"sum"`, `"sd"`, or `"var"`. +#' @param data The numeric data on which to compute the statistic. +#' @param freq The frequency for which to compute the statistic. Options are `"monthly"`, `"annual"`, or `"seasonal"`. +#' @param include.exact.dates Logical. If `TRUE`, computes and returns the exact dates for max/min statistics. +#' This is not applicable for other statistics like mean, sum, sd, or var, and will automatically switch to `FALSE`. +#' +#' @details +#' The function checks the validity of the provided data and ensures that NA masks are applied to the results. +#' If exact dates are requested for statistics that do not support it (e.g., mean, sum, sd, var), the function will print a message +#' and proceed without exact dates. +#' +#' @return A numeric vector of the computed statistic for each date factor, multiplied by the corresponding NA mask. +#' If `include.exact.dates = TRUE`, it returns a data frame that includes exact dates for max/min statistics. +#' +#' @note +#' This function is internal and not intended to be called directly by users. It serves as a shared utility +#' for computing statistics of generic scalar and vector climate data. +#' +#' @seealso \code{\link{compute.stat.scalar}}, \code{\link{compute.stat.vector}} +#' +#' @examples +#' # Assuming `scalar_obj` is a valid climdexGenericScalar object: +#' \dontrun{compute.gen.stat(scalar_obj, "max", scalar_obj@data, "monthly", FALSE)} +#' +#' @export +#' @keywords internal +compute.gen.stat <- function(gen.var, stat, data, freq = c("monthly", "annual", "seasonal"), include.exact.dates = FALSE) { + stopifnot(!is.null(data)) + freq <- match.arg(freq) + exact_date_stats <- c("max", "min") + + if (include.exact.dates && !(stat %in% exact_date_stats)) { + message(paste("Warning: Exact dates are not applicable for the", stat, "statistic. Proceeding without exact dates.")) + include.exact.dates <- FALSE + } + + date.factors <- gen.var@date.factors[[freq]] + mask <- gen.var@namasks[[freq]][[1]] + cal <- attr(gen.var@dates, "cal") + + result <- suppressWarnings(tapply.fast(data, date.factors, stat, na.rm = TRUE)) + # When you compute stats on all NA values with na.rm = TRUE, R returns NaN (Not a Number) instead of NA. + result[!is.finite(result)] <- NA + if (include.exact.dates) { + return(exact.date(stat, data, date.factors, freq, cal, mask)) + } + + return(result * mask) +} + +#' @title compute.stat.scalar +#' +#' @description +#' Computes a specified statistic (e.g., max, min, mean, sum, sd, var) for scalar climate data +#' based on a given frequency (monthly, annual, seasonal). It can also compute exact dates for +#' specific statistics when requested. +#' +#' @param scalar_obj A `ClimdexGenericScalar` object containing the scalar climate data. +#' @param stat The statistic to compute. Options are `"max"`, `"min"`, `"mean"`, `"sum"`, `"sd"`, or `"var"`. +#' @param freq The frequency of the statistic. Options are `"monthly"`, `"annual"`, or `"seasonal"`. +#' @param include.exact.dates Logical. If `TRUE`, returns the exact dates of the max/min values. Not applicable for `mean`, `sd`, or `var`. +#' +#' @return The computed statistic for the given frequency and statistic type. +#' +#' @seealso \code{\link{compute.gen.stat}} +#' +#' @examples +#' # Example usage for scalar data: +#' \dontrun{compute.stat.scalar(scalar_obj, "max", "monthly", TRUE)} +#' +#' @export +compute.stat.scalar <- function(scalar_obj, + stat = c("max", "min", "mean", "sum", "sd", "var"), + freq = c("monthly", "annual", "seasonal"), include.exact.dates = FALSE +) { + stat <- match.arg(stat) + stopifnot(!is.null(scalar_obj@data)) # Ensure the data key exists + return(compute.gen.stat(scalar_obj, stat, scalar_obj@data, freq, include.exact.dates)) +} + +#' @title Convert Cartesian to Polar Coordinates +#' @description Converts Cartesian coordinates (u, v) to polar coordinates (speed, direction). +#' +#' @details +#' The formulas used for the conversion are as follows: +#' - **Speed** (magnitude): \eqn{\text{speed} = \sqrt{u^2 + v^2}} +#' - **Direction** (angle in degrees): \eqn{\text{direction} = \left( \frac{\text{atan2}(v, u) \times 180}{\pi} + 360 \right) \mod 360} +#' +#' The function ensures the direction is normalized to the range [0, 360) degrees. +#' +#' @param u A numeric value representing the x-component (u) of the Cartesian coordinate. +#' @param v A numeric value representing the y-component (v) of the Cartesian coordinate. +#' @return A list with two elements: +#' \item{speed}{The magnitude of the vector (i.e., the speed).} +#' \item{direction}{The direction of the vector in degrees, normalized to the range [0, 360).} +#' @export +convert_cartesian_to_polar <- function(u, v) { + if (!is.numeric(u) || !is.numeric(v)) { + stop("u and v must be numeric.") + } + + speed <- sqrt(u^2 + v^2) + direction <- ifelse(speed == 0, NA, (atan2(v, u) * 180 / pi + 360) %% 360) + + return(list(speed = speed, direction = direction)) +} + +#' @title Convert Polar to Cartesian Coordinates +#' @description Converts polar coordinates (speed, direction) to Cartesian coordinates (u, v). +#' +#' @details +#' The formulas used for the conversion are as follows: +#' - **x-component (u)**: \eqn{u = \text{speed} \times \cos(\text{direction} \times \frac{\pi}{180})} +#' - **y-component (v)**: \eqn{v = \text{speed} \times \sin(\text{direction} \times \frac{\pi}{180})} +#' +#' The direction is in degrees and is converted to radians for the trigonometric calculations. +#' +#' @param speed A numeric value representing the magnitude of the vector. +#' @param direction A numeric value representing the direction of the vector in degrees. +#' @return A list with two elements: +#' \item{u}{The x-component (u) of the Cartesian coordinate.} +#' \item{v}{The y-component (v) of the Cartesian coordinate.} +#' @export +convert_polar_to_cartesian <- function(speed, direction) { + if (!is.numeric(speed) || !is.numeric(direction)) { + stop("speed and direction must be numeric.") + } + radians <- direction * pi / 180 + u <- speed * cos(radians) + v <- speed * sin(radians) + return(list(u = u, v = v)) +} + +#' @title Convert Degrees to Cardinal Direction +#' @description Converts a numeric degree value into the corresponding cardinal direction (e.g., N, NE, E). +#' +#' @details +#' The conversion is based on dividing the 360-degree circle into 16 equal sectors. The degrees corresponding to cardinal directions are as follows: +#' - **0° or 360°**: N +#' - **22.5°**: NNE +#' - **45°**: NE +#' - **...** and so on, up to **337.5°** for NNW. +#' +#' The degree input is normalized to the range [0, 360) before conversion. +#' +#' @param degrees A numeric vector of degrees (0-360) representing the direction. +#' @return A character vector of cardinal directions corresponding to the degree values. +#' @export +convert_degrees_to_cardinal <- function(degrees) { + + if (!is.numeric(degrees)) { + stop("degrees must be a numeric vector.") + } + + # Normalize degrees to [0, 360) + degrees_normalized <- (degrees + 360) %% 360 + + directions <- c('N', 'NNE', 'NE', 'ENE', 'E', 'ESE', 'SE', 'SSE', + 'S', 'SSW', 'SW', 'WSW', 'W', 'WNW', 'NW', 'NNW', 'N') + index <- round(degrees_normalized / 22.5) %% 16 + 1 + return(directions[index]) +} + +#' @title Compute Directions and Magnitudes Based on Format +#' @description Internal helper function that computes directions and magnitudes for +#' climate data, depending on the specified format ("cartesian", "polar", or "cardinal"). +#' @param format A character string specifying the format of the input data. +#' Must be one of "cartesian", "polar", or "cardinal". +#' @param primary A numeric vector representing the primary data (e.g., speed for polar format). +#' @param secondary A numeric or character vector representing the secondary data +#' (e.g., direction in degrees for polar, or cardinal direction for cardinal format). +#' @return A list with two elements: +#' \item{magnitude}{A numeric vector representing the magnitude of the data.} +#' \item{direction_degrees}{A numeric vector representing the direction in degrees (0-360).} +#' NA values are preserved in the result for entries that do not have valid data. +#' @note This is an internal function and should not be called directly by users. +#' @keywords internal +compute_directions_and_magnitudes <- function(format, primary, secondary) { + valid_idx <- !is.na(secondary) + direction_degrees <- rep(NA, length(secondary)) + magnitude <- rep(NA, length(primary)) + + switch( + format, + "cartesian" = { + valid_idx <- valid_idx & !is.na(primary) + polar_data <- convert_cartesian_to_polar(primary[valid_idx], secondary[valid_idx]) + magnitude[valid_idx] <- polar_data$speed + direction_degrees[valid_idx] <- polar_data$direction + warning(simpleWarning("Input data is in cartesian format; output will be in polar format.")) + }, + "cardinal" = { + direction_degrees[valid_idx] <- sapply(secondary[valid_idx], convert_cardinal_to_degrees) + magnitude <- primary + }, + "polar" = { + direction_degrees[valid_idx] <- as.numeric(secondary[valid_idx]) + magnitude <- primary + } + ) + + return(list(magnitude = magnitude, direction_degrees = direction_degrees)) +} + +#' @title Convert Cardinal Direction to Degrees +#' @description Converts a cardinal direction (e.g., N, NE, E) into the corresponding degree value. +#' +#' @details +#' The conversion is based on dividing the 360-degree circle into 16 equal sectors. The cardinal directions corresponding to degrees are as follows: +#' - **N**: 0° or 360° +#' - **NNE**: 22.5° +#' - **NE**: 45° +#' - **...** and so on, up to **NNW** at 337.5°. +#' +#' @param direction A character vector of cardinal directions. +#' @return A numeric vector of degrees corresponding to the cardinal directions. +#' @export +convert_cardinal_to_degrees <- function(direction) { + cardinal_map <- c(N = 0, NNE = 22.5, NE = 45, ENE = 67.5, E = 90, + ESE = 112.5, SE = 135, SSE = 157.5, S = 180, + SSW = 202.5, SW = 225, WSW = 247.5, W = 270, + WNW = 292.5, NW = 315, NNW = 337.5) + + direction <- toupper(direction) + if (any(!direction %in% names(cardinal_map))) { + stop("Invalid cardinal direction provided.") + } + return(unname(cardinal_map[direction])) +} + +#' @title Filter Data by Direction Range +#' @description Filters primary data, degrees, and date factors based on a specified range of directions. +#' @param primary_data A numeric vector of the primary data to be filtered. +#' @param degrees A numeric vector of direction degrees. +#' @param date_factors A vector of date factors corresponding to the data. +#' @param direction.range A numeric vector of length 2 specifying the minimum and maximum degrees for filtering. +#' @return A list containing: +#' \item{primary_data}{The filtered primary data, setting values outside the range to NA.} +#' \item{degrees}{The filtered degrees, setting values outside the range to NA.} +#' \item{date_factors}{The filtered date factors, keeping all values intact.} +#' @export +filter_by_direction_range <- function(primary_data, degrees, date_factors, direction.range) { + if (!is.numeric(direction.range) || length(direction.range) != 2) { + stop("direction.range must be a numeric vector of length 2 specifying min and max degrees.") + } + + # Normalize degrees to [0, 360) + degrees_normalized <- (degrees + 360) %% 360 + min_dir <- direction.range[1] %% 360 + max_dir <- direction.range[2] %% 360 + + # Handle ranges that cross the 0-degree line + if (min_dir > max_dir) { + within_range <- degrees_normalized >= min_dir | degrees_normalized <= max_dir + } else { + within_range <- degrees_normalized >= min_dir & degrees_normalized <= max_dir + } + + # Apply the filter and set values outside the range to NA + primary_data_filtered <- ifelse(within_range, primary_data, NA) + degrees_filtered <- ifelse(within_range, degrees, NA) + + return(list( + primary_data = primary_data_filtered, + degrees = degrees_filtered, + date_factors = date_factors # Date factors are not filtered, so remain intact + )) +} + + +#' @title Compute Circular Mean +#' @description Computes the circular mean of directional data based on date factors. +#' @param direction_degrees A numeric vector of direction values in degrees. +#' @param date.factors A vector of date factors used to group the data. +#' @param format The format of the result, either "degrees" (default) or "cardinal". +#' @return A numeric vector of circular means for each date factor group, or a character vector of cardinal directions if `format` is set to "cardinal". +#' @importFrom circular circular mean.circular sd.circular +#' @export +compute_circular_mean <- function(direction_degrees, date.factors, format) { + + # Check for empty or NULL inputs + if (is.null(direction_degrees) || length(direction_degrees) == 0) { + stop("direction_degrees cannot be empty or NULL.") + } + if (is.null(date.factors) || length(date.factors) == 0) { + stop("date.factors cannot be empty or NULL.") + } + + # Check if inputs are numeric + if (!is.numeric(direction_degrees)) { + stop("direction_degrees must be a numeric vector.") + } + + + # Convert directions to 'circular' objects + directions_circular <- circular::circular(direction_degrees, units = "degrees", modulo = "2pi") + + # Compute circular mean + circular_mean <- tapply(directions_circular, date.factors, function(x) { + if (all(is.na(x))) { + return(NA) # Return NA if the entire group is NA + } else { + return(circular::mean.circular(x, na.rm = TRUE)) + } + }) + + + # Convert back to degrees + circular_mean_degrees <- as.numeric(circular_mean) + circular_mean_degrees <- (circular_mean_degrees + 360) %% 360 # Normalize to [0, 360) + circular_mean_degrees[is.nan(circular_mean_degrees)] <- NA + # Convert to cardinal if format is 'cardinal' + if (format == "cardinal") { + circular_mean_result <- sapply(circular_mean_degrees, convert_degrees_to_cardinal) + } else { + circular_mean_result <- circular_mean_degrees + } + names(circular_mean_result) <- levels(date.factors) + return(circular_mean_result) +} + +#' @title Compute Circular Standard Deviation +#' @description Computes the circular standard deviation of directional data based on date factors. +#' @param direction_degrees A numeric vector of direction values in degrees. +#' @param date.factors A vector of date factors used to group the data. +#' @return A numeric vector of circular standard deviations for each date factor group, in degrees. +#' @export +compute_circular_sd <- function(direction_degrees, date.factors) { + + # Check for empty or NULL inputs + if (is.null(direction_degrees) || length(direction_degrees) == 0) { + stop("direction_degrees cannot be empty or NULL.") + } + if (is.null(date.factors) || length(date.factors) == 0) { + stop("date.factors cannot be empty or NULL.") + } + + # Check if inputs are numeric + if (!is.numeric(direction_degrees)) { + stop("direction_degrees must be a numeric vector.") + } + + # Convert directions to 'circular' objects + directions_circular <- circular::circular(direction_degrees, units = "degrees", modulo = "2pi") + + # Compute circular standard deviation + circular_sd <- tapply(directions_circular, date.factors, function(x) { + if (all(is.na(x))) { + return(NA) # Return NA if the entire group is NA + } else { + return(circular::sd.circular(x, na.rm = TRUE)) + } + }) + + circular_sd_degrees <- as.numeric(circular_sd) * (180 / pi) # Convert from radians to degrees + circular_sd_degrees[is.nan(circular_sd_degrees)] <- NA + names(circular_sd_degrees) <- levels(date.factors) + return(circular_sd_degrees) +} + +#' @title compute.stat.vector +#' +#' @description +#' Computes a specified statistic (e.g., max, min, mean, sd, circular_mean, etc.) for vector climate data. +#' The data can be in polar, cartesian, or cardinal format, and the function handles both magnitude and direction. +#' Supports additional vector-specific statistics like circular mean and circular standard deviation. +#' +#' @param climate_obj A `ClimdexGenericVector` object containing vector climate data (e.g., wind speed and direction). +#' @param stat The statistic to compute. Must be one of `"max"`, `"min"`, `"mean"`, `"sum"`, `"circular_mean"`, `"sd"`, `"var"`, or `"circular_sd"`. +#' @param freq The frequency for which to compute the statistic. Options are `"monthly"`, `"annual"`, or `"seasonal"`. +#' @param format The format of the vector data. Must be one of `"polar"`, `"cartesian"`, or `"cardinal"`. +#' This determines how the primary and secondary components (e.g., magnitude and direction) are interpreted. +#' @param include.exact.dates Logical. If `TRUE`, computes and returns the exact dates for max/min statistics. Not applicable for other statistics. +#' @param direction.range A numeric vector of length 2 specifying the minimum and maximum degrees for filtering based on direction. +#' Only data with directions within this range will be included in the calculations. If `NULL`, no filtering is applied. +#' +#' @details +#' The function computes the specified statistic for the magnitude (primary component) or direction (secondary component) of the vector data. +#' It supports additional statistics like `circular_mean` and `circular_sd` for directional data. +#' If `include.exact.dates = TRUE`, exact dates are returned for max/min statistics. +#' The function can also filter the data based on a specified degree range of directions (using `direction.range`). +#' +#' @return A list containing the computed statistic for magnitude and, if applicable, the computed statistic for direction. +#' For circular statistics (e.g., `circular_mean`, `circular_sd`), the result is returned for directions in degrees. +#' If `include.exact.dates = TRUE`, the function returns exact dates for the max/min statistics. +#' +#' @note +#' This function is designed for vector climate data, where the data includes both a magnitude and direction component. +#' For scalar data, use \code{\link{compute.stat.scalar}} instead. +#' +#' @seealso \code{\link{compute.stat.scalar}}, \code{\link{compute.gen.stat}} +#' +#' @examples +#' \dontrun{ +#' # Assuming `vector_obj` is a valid ClimdexGenericVector object: +#' compute.stat.vector(vector_obj, "circular_mean", "monthly", format = "polar") +#'} +#' +#' @export +compute.stat.vector <- function( + climate_obj, + stat = c("max", "min", "mean", "sum", "circular_mean", "sd", "var", "circular_sd"), + freq = c("monthly", "annual", "seasonal"), + format = c("polar", "cartesian", "cardinal"), + include.exact.dates = FALSE, + direction.range = NULL +) { + stat <- match.arg(stat) + freq <- match.arg(freq) + format <- match.arg(format) + + date.factors <- climate_obj@date.factors[[freq]] + + # Convert all data to polar coordinates (magnitude and direction in degrees) + directions_magnitudes <- compute_directions_and_magnitudes( + format, + climate_obj@primary, + climate_obj@secondary + ) + magnitude <- directions_magnitudes$magnitude + direction_degrees <- directions_magnitudes$direction_degrees + + + # Filter data based on direction range if provided + if (!is.null(direction.range)) { + filtered <- filter_by_direction_range(magnitude, direction_degrees, date.factors, direction.range) + magnitude <- filtered$primary_data + direction_degrees <- filtered$degrees + date.factors <- filtered$date_factors + } + + result <- switch( + stat, + "circular_mean" = { + direction_result <- compute_circular_mean(direction_degrees, date.factors, format) + dir_mask <- climate_obj@namasks[[freq]][["secondary"]] + list(direction = direction_result * dir_mask) + }, + "circular_sd" = { + circular_sd_degrees <- compute_circular_sd(direction_degrees, date.factors) + dir_mask <- climate_obj@namasks[[freq]][["secondary"]] + + list(circular_sd = circular_sd_degrees * dir_mask) + }, + { + # For other statistics, compute on magnitude + magnitude_stat <- compute.gen.stat( + gen.var = climate_obj, + stat = stat, + data = magnitude, + freq = freq, + include.exact.dates = include.exact.dates + ) + + + list(magnitude = magnitude_stat) + } + ) + + return(result) +} + + diff --git a/tests/test_generic_scalar_stats.R b/tests/test_generic_scalar_stats.R new file mode 100644 index 0000000..9898dba --- /dev/null +++ b/tests/test_generic_scalar_stats.R @@ -0,0 +1,146 @@ +library(climdex.pcic) +library(RUnit) + +# Setup +wd <- "./" + +## compute.stat.scalar Tests + +climdex.pcic.test.compute.stat.scalar.mean <- function() { + # Test data and dates + data <- seq(0, 30) + dates <- seq(as.PCICt("2020-01-01", cal="gregorian"), + by = "day", length.out = 31) + + # Use the climdexGenericScalar.raw function to create scalar_obj + scalar_obj <- climdexGenericScalar.raw( + data = data, + dates = dates, + max.missing.days = c(annual = 15, monthly = 3, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + # Monthly mean + result <- compute.stat.scalar(scalar_obj, stat = "mean", freq = "monthly", include.exact.dates = FALSE) + expected_mean <- mean(data) + + checkEqualsNumeric(as.numeric(result[1]), expected_mean) +} + + +climdex.pcic.test.compute.stat.scalar.max <- function() { + # Test data and dates + data <- c(1, 3, 5, 2, 4, 6) + dates <- seq(as.PCICt("2020-01-01", cal="gregorian"), by = "day", length.out = 6) + + # Use the climdexGenericScalar.raw function to create scalar_obj with higher max.missing.days for monthly + scalar_obj <- climdexGenericScalar.raw( + data = data, + dates = dates, + max.missing.days = c(annual = 15, monthly = 28, seasonal = 6), # Allow up to 28 missing days in a month + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Monthly max + result <- compute.stat.scalar(scalar_obj, stat = "max", freq = "monthly", include.exact.dates = FALSE) + expected_max <- max(data) + + checkEqualsNumeric(as.numeric(result[1]), expected_max) +} + + +climdex.pcic.test.scalar.exact.dates <- function() { + set.seed(123) + + data <- runif(365, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 365) + + scalar_obj <- climdexGenericScalar.raw( + data = data, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + # Annual max + result <- compute.stat.scalar(scalar_obj, stat = "max", freq = "annual", include.exact.dates = TRUE) + expected_max <- max(data) + max_index <- which.max(data) + expected_max_date <- dates[max_index] + computed_exact_date <- as.PCICt(result$ymd[1], cal = "gregorian") + + checkEqualsNumeric(as.numeric(result$val[1]), expected_max, + msg = paste("Computed max statistic:", result$val[1], + "Expected max statistic:", expected_max)) + + checkEquals(computed_exact_date, expected_max_date, + msg = paste("Computed exact date:", computed_exact_date, + "Expected exact date:", expected_max_date)) +} + + +climdex.pcic.test.compute.stat.scalar.sum <- function() { + + data <- c(1, 2, 3, 4, 5) + dates <- seq(as.PCICt("2020-01-01", cal="gregorian"), by = "day", length.out = 5) + + scalar_obj <- climdexGenericScalar.raw( + data = data, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Monthly sum + result <- compute.stat.scalar(scalar_obj, stat = "sum", freq = "monthly", include.exact.dates = FALSE) + expected_sum <- sum(data) + + checkEqualsNumeric(as.numeric(result[1]), expected_sum, + msg = paste("Computed sum:", result[1], "Expected sum:", expected_sum)) +} + + +climdex.pcic.test.compute.stat.scalar.sd <- function() { + + data <- c(2, 4, 6, 8, 10) + dates <- seq(as.PCICt("2020-01-01", cal="gregorian"), by = "day", length.out = 5) + + scalar_obj <- climdexGenericScalar.raw( + data = data, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Monthly SD + result <- compute.stat.scalar(scalar_obj, stat = "sd", freq = "monthly", include.exact.dates = FALSE) + expected_sd <- sd(data) + checkEqualsNumeric(as.numeric(result[1]), expected_sd, tolerance = 1e-6, + msg = paste("Computed SD:", result[1], "Expected SD:", expected_sd)) +} + + +climdex.pcic.test.compute.stat.scalar.var <- function() { + # Test data and dates + data <- c(3, 6, 9, 12, 15) + dates <- seq(as.PCICt("2020-01-01", cal="gregorian"), by = "day", length.out = 5) + + # Use the climdexGenericScalar.raw function to create scalar_obj + scalar_obj <- climdexGenericScalar.raw( + data = data, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Monthly variance + result <- compute.stat.scalar(scalar_obj, stat = "var", freq = "monthly", include.exact.dates = FALSE) + expected_var <- var(data) + + checkEqualsNumeric(as.numeric(result[1]), expected_var, tolerance = 1e-6, + msg = paste("Computed variance:", result[1], "Expected variance:", expected_var)) +} diff --git a/tests/test_generic_stat_helpers.R b/tests/test_generic_stat_helpers.R new file mode 100644 index 0000000..02ced16 --- /dev/null +++ b/tests/test_generic_stat_helpers.R @@ -0,0 +1,168 @@ +library(climdex.pcic) +library(RUnit) + +# Setup +wd <- "./" + +# Helper Functions Tests +climdex.pcic.test.convert.cartesian.to.polar <- function() { + u <- 1 + v <- 1 + result <- convert_cartesian_to_polar(u, v) + + checkEqualsNumeric(result$speed, sqrt(2)) + checkEqualsNumeric(result$direction, 45) + + checkEqualsNumeric(convert_cartesian_to_polar(0, 1)$direction, 90) + checkEqualsNumeric(convert_cartesian_to_polar(-1, 0)$direction, 180) + checkEqualsNumeric(convert_cartesian_to_polar(0, -1)$direction, 270) + + result_zero <- convert_cartesian_to_polar(0, 0) + checkEqualsNumeric(result_zero$speed, 0, "Expected speed to be 0 when both u and v are 0") + checkTrue(is.na(result_zero$direction), "Expected direction to be NA when both u and v are 0") +} + + +climdex.pcic.test.convert.polar.to.cartesian <- function() { + speed <- sqrt(2) + direction <- 45 + result <- convert_polar_to_cartesian(speed, direction) + + checkEqualsNumeric(result$u, 1, tolerance = 1e-6) + checkEqualsNumeric(result$v, 1, tolerance = 1e-6) + + # No speed + checkEqualsNumeric(convert_polar_to_cartesian(0, 45)$u, 0) + checkEqualsNumeric(convert_polar_to_cartesian(0, 45)$v, 0) + + # Test for negative speed + result_negative <- convert_polar_to_cartesian(-1, 45) + checkEqualsNumeric(result_negative$u, -cos(45 * pi / 180), "Expected correct u for negative speed") + checkEqualsNumeric(result_negative$v, -sin(45 * pi / 180), "Expected correct v for negative speed") + +} + + +climdex.pcic.test.convert.degrees.to.cardinal <- function() { + # Test all 16 cardinal directions and boundary cases + degrees <- c(0, 22.5, 45, 67.5, 90, 112.5, 135, 157.5, 180, + 202.5, 225, 247.5, 270, 292.5, 315, 337.5, 348.75) + expected <- c('N', 'NNE', 'NE', 'ENE', 'E', 'ESE', 'SE', 'SSE', 'S', + 'SSW', 'SW', 'WSW', 'W', 'WNW', 'NW', 'NNW', 'N') + result <- convert_degrees_to_cardinal(degrees) + + checkEquals(result, expected) + + # 360-0 boundary (e.g., 360, should return 'N') + checkEquals(convert_degrees_to_cardinal(360), 'N') + + # Out-of-bound degrees + result_out_of_bound <- convert_degrees_to_cardinal(c(-10, 370)) + checkEquals(result_out_of_bound, c('N', 'N'), "Expected 'N' and 'N' for out-of-bound degrees") +} + + +climdex.pcic.test.convert.cardinal.to.degrees <- function() { + directions <- c('N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW') + expected <- c(0, 45, 90, 135, 180, 225, 270, 315) + result <- convert_cardinal_to_degrees(directions) + checkEqualsNumeric(as.numeric(result), expected) + + # Invalid direction should trigger an error + checkException(convert_cardinal_to_degrees(c('XYZ')), "Invalid cardinal direction provided.") + + # Mixed-case input + mixed_case_result <- convert_cardinal_to_degrees(c('n', 'Ne', 'e', 'SE', 'S', 'sW', 'W', 'nw')) + checkEqualsNumeric(as.numeric(mixed_case_result), expected) +} + + +climdex.pcic.test.filter.by.direction.range <- function() { + primary_data <- 1:10 + degrees <- seq(0, 360, length.out = 10) + date_factors <- rep(1, 10) + direction.range <- c(90, 270) + + filtered <- filter_by_direction_range(primary_data, degrees, date_factors, direction.range) + + expected_data <- ifelse(degrees >= 90 & degrees <= 270, primary_data, NA) + expected_degrees <- ifelse(degrees >= 90 & degrees <= 270, degrees, NA) + + checkEquals(filtered$primary_data, expected_data) + checkEquals(filtered$degrees, expected_degrees) + + # Test range that crosses 0 + direction.range <- c(350, 10) # Only include data associated with degrees 0 & 360 + filtered <- filter_by_direction_range(primary_data, degrees, date_factors, direction.range) + + # Expect NA for all values except for those near 0 and 360 + expected_data_cross_0 <- ifelse(degrees >= 350 | degrees <= 10, primary_data, NA) + expected_degrees_cross_0 <- ifelse(degrees >= 350 | degrees <= 10, degrees, NA) + + checkEquals(filtered$primary_data, expected_data_cross_0, "Expected filtered primary data for range crossing 0 to have NA") + checkEquals(filtered$degrees, expected_degrees_cross_0, "Expected filtered degrees for range crossing 0 to have NA") +} + + +climdex.pcic.test.filter.by.direction.range.full.na <- function() { + primary_data <- 1:10 + degrees <- seq(0, 360, length.out = 10) # 0 40 80 120 160 200 240 280 320 360 + date_factors <- rep(1, 10) + direction.range <- c(300, 310) # Filter everything out + + filtered <- filter_by_direction_range(primary_data, degrees, date_factors, direction.range) + + expected_data <- rep(NA, 10) + expected_degrees <- rep(NA, 10) + + checkEquals(filtered$primary_data, expected_data, "Expected all primary data to be NA") + checkEquals(filtered$degrees, expected_degrees, "Expected all degrees to be NA") +} + + +climdex.pcic.test.compute.circular.mean <- function() { + direction_degrees <- c(350, 10) # Should average to 0 degrees + date.factors <- 1 + format <- "polar" + result <- compute_circular_mean(direction_degrees, date.factors, format) + expected <- 0 + + checkEqualsNumeric(result, expected, tolerance = 1e-6) + # Single Direction + checkEqualsNumeric(compute_circular_mean(c(90, 90), c(1, 1), "polar"), 90) + # No directions + checkException(compute_circular_mean(c(), c(), "polar"), + msg = "direction_degrees cannot be empty or NULL.") +} + + +climdex.pcic.test.compute.circular.mean.with.na <- function() { + direction_degrees <- c(350, 10, NA) # Should average to 0 degrees + date.factors <- 1 + format <- "polar" + result <- compute_circular_mean(direction_degrees, date.factors, format) + expected <- 0 + checkEqualsNumeric(result, expected, tolerance = 1e-6) +} + + +climdex.pcic.test.compute.circular.sd <- function() { + direction_degrees <- c(350, 10) + date.factors <- 1 + result <- compute_circular_sd(direction_degrees, date.factors) + expected <- circular::sd.circular(circular::circular(c(350, 10), units = "degrees", modulo = "2pi")) * 180 / pi + checkEqualsNumeric(result, expected, tolerance = 1e-4) + + # Check for empty inputs (expect an error) + checkException(compute_circular_sd(c(), c()), + msg = "direction_degrees cannot be empty or NULL.") +} + + +climdex.pcic.test.compute.circular.sd.with.na <- function() { + direction_degrees <- c(350, 10, NA) + date.factors <- 1 + result <- compute_circular_sd(direction_degrees, date.factors) + expected_sd <- circular::sd.circular(circular::circular(c(350, 10), units = "degrees", modulo = "2pi")) * 180 / pi + checkEqualsNumeric(result, expected_sd, tolerance = 1e-4, "Expected circular SD ignoring NA values") +} diff --git a/tests/test_generic_variable_inputs.R b/tests/test_generic_variable_inputs.R new file mode 100644 index 0000000..4e47514 --- /dev/null +++ b/tests/test_generic_variable_inputs.R @@ -0,0 +1,439 @@ +climdex.pcic.test.na_masks_with_missing_data_thresholds <- function() { + set.seed(123) + speed <- runif(366, 0, 20) + direction <- runif(366, 0, 360) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 366) + + # Introduce missing data exceeding monthly and seasonal thresholds + speed[1:10] <- NA # First 10 days of January missing + speed[61:81] <- NA # 21 days missing in March + speed[183:203] <- NA # 21 days missing in July + + # Define max missing days thresholds + max_missing_days <- c(annual = 15, monthly = 5, seasonal = 15) + + vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + max.missing.days = max_missing_days, + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + # Function to calculate number of missing days per period + calculate_missing_days <- function(data, date.factor) { + tapply(is.na(data), date.factor, sum) + } + + # Calculate missing days for primary data + monthly_missing_days_primary <- calculate_missing_days(vector_obj@primary, vector_obj@date.factors$monthly) + seasonal_missing_days_primary <- calculate_missing_days(vector_obj@primary, vector_obj@date.factors$seasonal) + annual_missing_days_primary <- sum(is.na(vector_obj@primary)) + + # Expected months and seasons to be NA due to missing data + months_expected_na <- names(monthly_missing_days_primary[monthly_missing_days_primary > max_missing_days["monthly"]]) + seasons_expected_na <- names(seasonal_missing_days_primary[seasonal_missing_days_primary > max_missing_days["seasonal"]]) + + # Adjust for the extra season (Winter-2019) that is automatically marked as NA + all_seasons <- levels(vector_obj@date.factors$seasonal) + first_season <- all_seasons[1] + last_season <- all_seasons[length(all_seasons)] + + # Combine expected NA seasons from missing data and incomplete seasons + total_expected_na_seasons <- unique(c(first_season,seasons_expected_na, last_season)) + + # Check that the total number of NA seasons matches the expected count + seasonal_namasks_primary <- vector_obj@namasks$seasonal$primary + actual_na_seasons <- all_seasons[is.na(seasonal_namasks_primary)] + + checkEquals(length(actual_na_seasons), length(total_expected_na_seasons), + msg = paste("Total number of NA seasons does not match expected (expected ", length(total_expected_na_seasons), ").", sep="")) + + # Check that actual and expected NA seasons are identical + checkTrue(identical(actual_na_seasons,total_expected_na_seasons), msg = "Actual NA seasons do not match expected") + + # Check that the specific seasons are marked NA + checkTrue(all(is.na(seasonal_namasks_primary[total_expected_na_seasons])), + msg = "Seasons expected to be NA are not marked NA.") + + # Check that the annual namask is NA + annual_namask_primary <- vector_obj@namasks$annual$primary + checkTrue(is.na(annual_namask_primary), + msg = "Annual namask is not NA when missing data exceeds annual threshold.") + + # Verify total counts of NA months + monthly_namasks_primary <- vector_obj@namasks$monthly$primary + + checkEquals(sum(is.na(monthly_namasks_primary)), length(months_expected_na), + msg = paste("Total number of NA months does not match expected (", length(months_expected_na), " months).", sep="")) + + # Check that the specific months are marked NA + checkTrue(all(is.na(monthly_namasks_primary[months_expected_na])), + msg = "Months expected to be NA are not marked NA.") +} + + +climdex.pcic.test.scalar.raw.and.csv.construction <- function() { + set.seed(123) + + # Raw + data <- runif(366, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 366) + + scalar_obj_raw <- climdexGenericScalar.raw( + data = data, + dates =dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + checkEquals(length(scalar_obj_raw@data), length(scalar_obj_raw@dates), "Raw scalar construction data length does not match dates.") + + # CSV + csv_data <- data.frame(date = as.character(dates), data = data) + temp_csv <- tempfile() + write.csv(csv_data, temp_csv, row.names = FALSE) + + scalar_obj_csv <- climdexGenericScalar.csv( + file = temp_csv, + data.column = "data", + date.columns = "date", + date.format = "%Y-%m-%d", + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + checkEquals(length(scalar_obj_csv@data), length(scalar_obj_csv@dates), "CSV scalar construction data length does not match dates.") + checkEquals(scalar_obj_raw@dates, scalar_obj_csv@dates, "Date mismatch between raw and CSV scalar objects.") + checkTrue(all.equal(scalar_obj_csv, scalar_obj_raw), msg = "Scalar_obj built from csv is not identical to raw") +} + + +climdex.pcic.test.calendar.handling <- function() { + set.seed(123) + + # Create data for two different calendar types + data_gregorian <- runif(366, 0, 20) + dates_gregorian <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 366) + + data_noleap <- runif(365, 0, 20) + dates_noleap <- seq(as.PCICt("2020-01-01", cal = "noleap"), by = "day", length.out = 365) + + # Gregorian calendar + scalar_obj_gregorian <- climdexGenericScalar.raw( + data = data_gregorian, + dates = dates_gregorian, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # No-leap calendar + scalar_obj_noleap <- climdexGenericScalar.raw( + data = data_noleap, + dates = dates_noleap, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "noleap" # uses a 365 calendar + ) + + checkEquals(attr(scalar_obj_gregorian@dates, "cal"), "proleptic_gregorian", msg = "Gregorian calendar mismatch.") + checkEquals(attr(scalar_obj_noleap@dates, "cal"), "365", msg = "No-leap calendar mismatch.") + # Verify that February 29th is included in the Gregorian dates + checkTrue(any(format(scalar_obj_gregorian@dates, "%m-%d") == "02-29"), "Leap day missing in Gregorian calendar dates.") + # Verify that February 29th is not included in the no-leap dates + checkTrue(!any(format(scalar_obj_noleap@dates, "%m-%d") == "02-29"), "Leap day present in no-leap calendar dates.") +} + + +climdex.pcic.test.vector.raw.and.csv.construction <- function() { + set.seed(123) + + # Raw vector data construction + primary <- runif(366, 0, 20) # Primary component (e.g., magnitude) + secondary <- runif(366, 0, 360) # Secondary component (e.g., direction) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 366) + + vector_obj_raw <- climdexGenericVector.raw( + primary = primary, + secondary = secondary, + dates = dates, + format = "polar", + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + checkEquals(length(vector_obj_raw@primary), length(vector_obj_raw@dates), "Raw vector construction primary data length does not match dates.") + checkEquals(length(vector_obj_raw@secondary), length(vector_obj_raw@dates), "Raw vector construction secondary data length does not match dates.") + + # CSV vector data construction + csv_data <- data.frame(date = as.character(dates), primary = primary, secondary = secondary) + temp_csv <- tempfile() + write.csv(csv_data, temp_csv, row.names = FALSE) + + vector_obj_csv <- climdexGenericVector.csv( + file = temp_csv, + primary.column = "primary", + secondary.column = "secondary", + date.columns = "date", + date.format = "%Y-%m-%d", + format = "polar", + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + checkEquals(length(vector_obj_csv@primary), length(vector_obj_csv@dates), "CSV vector construction primary data length does not match dates.") + checkEquals(length(vector_obj_csv@secondary), length(vector_obj_csv@dates), "CSV vector construction secondary data length does not match dates.") + checkEquals(vector_obj_raw@dates, vector_obj_csv@dates, "Date mismatch between raw and CSV vector objects.") + checkTrue(all.equal(vector_obj_csv, vector_obj_raw), msg = "Vector_obj built from csv is not identical to raw") +} + + +climdex.pcic.test.vector.construction.validity <- function() { + set.seed(123) + + # Valid data for 'polar' format + speed <- c(5, 10, 15) + direction <- c(0, 90, 180) + dates <- as.PCICt(c("2020-01-01", "2020-01-05", "2020-01-10"), cal = "gregorian") + + vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Extract the indices of the dates in the filled date series + date_series <- vector_obj@dates + indices <- match(dates, date_series) + + # Check that data is correctly assigned at the correct positions + checkEquals(vector_obj@primary[indices], speed, "Primary data not correctly assigned.") + checkEquals(vector_obj@secondary[indices], direction, "Secondary data not correctly assigned.") + checkEquals(vector_obj@dates[indices], dates, "Dates not correctly assigned.") + + # Check that other positions are NA + other_indices <- setdiff(seq_along(date_series), indices) + checkTrue(all(is.na(vector_obj@primary[other_indices])), "Other primary data should be NA.") + checkTrue(all(is.na(vector_obj@secondary[other_indices])), "Other secondary data should be NA.") + + # Check that format is correctly set + checkEquals(vector_obj@format, "polar", "Format not correctly set.") + + # For 'cartesian' format, verify conversion to polar coordinates + u <- c(5, 0, -5) + v <- c(0, 5, 0) + cartesian_dates <- as.PCICt(c("2020-01-02", "2020-01-06", "2020-01-11"), cal = "gregorian") + + vector_obj_cartesian <- climdexGenericVector.raw( + primary = u, + secondary = v, + dates = cartesian_dates, + format = "cartesian", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Convert cartesian to polar manually + expected_polar <- convert_cartesian_to_polar(u, v) + + # Extract indices corresponding to the cartesian dates + indices_cartesian <- match(cartesian_dates, vector_obj_cartesian@dates) + + # Extract the primary and secondary data at those indices + primary_cartesian <- vector_obj_cartesian@primary[indices_cartesian] + secondary_cartesian <- vector_obj_cartesian@secondary[indices_cartesian] + + # Convert the object's data to polar coordinates + polar_data <- convert_cartesian_to_polar(primary_cartesian, secondary_cartesian) + + # Check that internal conversion matches expected values + checkEqualsNumeric(polar_data$speed, expected_polar$speed, tolerance = 1e-6, "Magnitude conversion from cartesian to polar incorrect.") + checkEqualsNumeric(polar_data$direction, expected_polar$direction, tolerance = 1e-6, "Direction conversion from cartesian to polar incorrect.") +} + + +climdex.pcic.test.scalar_construction_with_malformed_inputs <- function() { + # Valid data + data <- c(10.5, 12.3, 11.2) + dates <- as.PCICt(c("2020-01-01", "2020-01-02", "2020-01-03"), cal = "gregorian") + + # Test 1: Non-numeric data + invalid_data <- c("high", "medium", "low") + checkException( + climdexGenericScalar.raw( + data = invalid_data, + dates = dates + ), + msg = "Error not raised for non-numeric 'data'." + ) + + # Test 2: Missing required arguments + # Missing 'data' argument + checkException( + climdexGenericScalar.raw( + dates = dates + ), + msg = "Error not raised when 'data' argument is missing." + ) + + # Missing 'dates' argument + checkException( + climdexGenericScalar.raw( + data = data + ), + msg = "Error not raised when 'dates' argument is missing." + ) + + # Test 3: Mismatched lengths between data and dates + data_short <- data[1:2] + checkException( + climdexGenericScalar.raw( + data = data_short, + dates = dates + ), + msg = "Error not raised for mismatched lengths between 'data' and 'dates'." + ) + + # Test 4: Invalid dates (non-PCICt objects) + invalid_dates <- as.Date(c("2020-01-01", "2020-01-02", "2020-01-03")) + checkException( + climdexGenericScalar.raw( + data = data, + dates = invalid_dates + ), + msg = "Error not raised for invalid 'dates' data type." + ) +} + + +climdex.pcic.test.vector_construction_with_malformed_inputs <- function() { + # Setup valid data for comparison + valid_speed <- c(5, 10, 15) + valid_direction_numeric <- c(0, 90, 180) + valid_direction_cardinal <- c("N", "E", "S") + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 3) + + # Test 1: Mismatched lengths between primary and secondary + speed_short <- c(5, 10) + checkException( + climdexGenericVector.raw( + primary = speed_short, + secondary = valid_direction_numeric, + dates = dates, + format = "polar" + ), + msg = "Error not raised for mismatched lengths between primary and secondary." + ) + + # Test 2: Invalid format value + checkException( + climdexGenericVector.raw( + primary = valid_speed, + secondary = valid_direction_numeric, + dates = dates, + format = "invalid_format" + ), + msg = "Error not raised for invalid format value." + ) + + # Test 3: Non-numeric secondary data for 'polar' format + checkException( + climdexGenericVector.raw( + primary = valid_speed, + secondary = valid_direction_cardinal, + dates = dates, + format = "polar" + ), + msg = "Error not raised for non-numeric secondary data in 'polar' format." + ) + + # Test 4: Non-character secondary data for 'cardinal' format + checkException( + climdexGenericVector.raw( + primary = valid_speed, + secondary = valid_direction_numeric, + dates = dates, + format = "cardinal" + ), + msg = "Error not raised for non-character secondary data in 'cardinal' format." + ) + + # Test 5: Non-numeric primary data + invalid_speed <- c("fast", "medium", "slow") + checkException( + climdexGenericVector.raw( + primary = invalid_speed, + secondary = valid_direction_numeric, + dates = dates, + format = "polar" + ), + msg = "Error not raised for non-numeric primary data." + ) + + # Test 6: Missing required arguments + # Missing 'secondary' argument + checkException( + climdexGenericVector.raw( + primary = valid_speed, + dates = dates, + format = "polar" + ), + msg = "Error not raised when 'secondary' argument is missing." + ) + + # Missing 'primary' argument + checkException( + climdexGenericVector.raw( + secondary = valid_direction_numeric, + dates = dates, + format = "polar" + ), + msg = "Error not raised when 'primary' argument is missing." + ) + + # Test 7: Mismatched lengths between data and dates + dates_short <- dates[1:2] + checkException( + climdexGenericVector.raw( + primary = valid_speed, + secondary = valid_direction_numeric, + dates = dates_short, + format = "polar" + ), + msg = "Error not raised for mismatched lengths between data and dates." + ) + + # Test 8: Invalid dates (non-PCICt objects) + invalid_dates <- as.Date(c("2020-01-01", "2020-01-02", "2020-01-03")) + checkException( + climdexGenericVector.raw( + primary = valid_speed, + secondary = valid_direction_numeric, + dates = invalid_dates, + format = "polar" + ), + msg = "Error not raised for invalid 'dates' data type." + ) + + # Test 9: Invalid calendar type + checkException( + climdexGenericVector.raw( + primary = valid_speed, + secondary = valid_direction_cardinal, + dates = dates, + format = "cardinal", + calendar = "greg" + ), + msg = "Error not raised for invalid calendar type." + ) +} + diff --git a/tests/test_generic_vector_stats.R b/tests/test_generic_vector_stats.R new file mode 100644 index 0000000..01d7038 --- /dev/null +++ b/tests/test_generic_vector_stats.R @@ -0,0 +1,452 @@ +library(climdex.pcic) +library(RUnit) +library(circular) +# Setup +wd <- "./" + +## compute.stat.vector Tests +# Helper function to update primary and secondary values in a vector object. Dates need to be readded to avoid data-date length mismatch. +update_vector_obj <- function(base_vector_obj, primary, secondary, dates) { + return(climdexGenericVector.raw( + primary = primary, + secondary = secondary, + dates = dates, + max.missing.days = base_vector_obj@max.missing.days, + format = base_vector_obj@format, + northern.hemisphere = base_vector_obj@northern.hemisphere, + calendar = attr(base_vector_obj@dates, "cal") + )) +} + + +climdex.pcic.test.compute.stat.vector.circular.mean <- function() { + # Common setup for the vector object + speed <- c(5, 5) + direction <- c(350, 10) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 2) + + # Create a base vector object + base_vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Test case 1: Compute circular mean for the default setup + result <- compute.stat.vector( + base_vector_obj, + stat = "circular_mean", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE + ) + expected_direction <- 0 # Circular mean of 350 and 10 degrees is 0 degrees + checkEqualsNumeric(as.numeric(result$direction[1]), expected_direction, tolerance = 1e-6) + + # Test case 2: Same direction (90, 90), update the secondary component + base_vector_obj <- update_vector_obj(base_vector_obj, primary = c(5, 5), secondary = c(90, 90), dates = dates) + result_same_direction <- compute.stat.vector( + base_vector_obj, + stat = "circular_mean", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE + ) + checkEqualsNumeric(as.numeric(result_same_direction$direction[1]), 90, tolerance = 1e-6) + + # Test case 3: Opposite directions (0, 180), update the secondary component + base_vector_obj <- update_vector_obj(base_vector_obj, primary = c(5, 5), secondary = c(0, 180), dates = dates) + result_opposite_direction <- compute.stat.vector( + base_vector_obj, + stat = "circular_mean", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE + ) + checkTrue(is.na(result_opposite_direction$direction[1]), "Circular mean of opposite directions should be NA") + + # Test case 4: One value is NA (5, NA), update the primary and secondary components + base_vector_obj <- update_vector_obj(base_vector_obj, primary = c(5, NA), secondary = c(350, 10), dates = dates) + result_with_na <- compute.stat.vector( + base_vector_obj, + stat = "circular_mean", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE + ) + # Should omit NA and just take mean of 350 + checkEqualsNumeric(as.numeric(result_with_na$direction[1]), 350, tolerance = 1e-6, + msg = paste("Result with NA differs from expected:", result_with_na$direction[1])) +} + + +climdex.pcic.test.compute.stat.vector.circular.sd <- function() { + + speed <- c(5, 5) + direction <- c(350, 10) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 2) + + base_vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Test case 1: Compute circular SD for the default setup + result <- compute.stat.vector( + base_vector_obj, + stat = "circular_sd", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE + ) + + expected_sd <-circular::sd.circular(circular::circular(c(350, 10), units = "degrees", modulo = "2pi")) * 180 / pi + checkEqualsNumeric(as.numeric(result$circular_sd[1]), expected_sd, tolerance = 1e-4) + + # Test case 2: Same direction (90, 90), update the secondary component + base_vector_obj <- update_vector_obj(base_vector_obj, primary = c(5, 5), secondary = c(90, 90), dates = dates) + result_same_direction <- compute.stat.vector( + base_vector_obj, + stat = "circular_sd", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE + ) + expected_sd_same <- 0 # SD should be 0 as both directions are the same + checkEqualsNumeric(as.numeric(result_same_direction$circular_sd[1]), expected_sd_same, tolerance = 1e-4) + + # Test case 3: Opposite directions (0, 180), update the secondary component + base_vector_obj <- update_vector_obj(base_vector_obj, primary = c(5, 5), secondary = c(0, 180), dates = dates) + result_opposite_direction <- compute.stat.vector( + base_vector_obj, + stat = "circular_sd", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE + ) + expected_sd_opp <- circular::sd.circular(circular::circular(c(0, 180), units = "degrees", modulo = "2pi")) * 180 / pi + checkEqualsNumeric(as.numeric(result_opposite_direction$circular_sd[1]), expected_sd_opp, tolerance = 1e-6, + msg = paste("Result differs from expected:", result_opposite_direction$circular_sd[1])) + + # Test case 4: One value is NA (5, NA), update the primary and secondary components + base_vector_obj <- update_vector_obj(base_vector_obj, primary = c(5, NA), secondary = c(350, 10), dates = dates) + result_with_na <- compute.stat.vector( + base_vector_obj, + stat = "circular_sd", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE + ) + # Should omit NA and just compute SD for the remaining value, which should be 0 + expected_sd_na <- 0 + checkEqualsNumeric(as.numeric(result_with_na$circular_sd[1]), expected_sd_na, tolerance = 1e-4) +} + + +climdex.pcic.test.compute.stat.vector.mean.magnitude <- function() { + + speed <- c(5, 10, 15) + direction <- c(0, 90, 180) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 3) + + vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, # + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Monthly mean + result <- compute.stat.vector( + vector_obj, + stat = "mean", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE + ) + + expected_mean_magnitude <- mean(speed) + + checkEqualsNumeric(as.numeric(result$magnitude[1]), expected_mean_magnitude, + msg = paste("Result magnitude differs from expected:", result$magnitude)) +} + + +climdex.pcic.test.compute.stat.vector.filtered.direction.range <- function() { + + speed <- c(5, 10, 15, 20) + direction <- c(45, 90, 135, 180) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 4) + + vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Monthly max, filter by 90 to 135 degrees + result <- compute.stat.vector( + vector_obj, + stat = "max", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE, + direction.range = c(90, 135) + ) + + filtered_speeds <- speed[direction >= 90 & direction <= 135] + expected_max_magnitude <- 15 + + checkEqualsNumeric(as.numeric(result$magnitude[1]), expected_max_magnitude) +} + + +climdex.pcic.test.compute.stat.vector.missing.values <- function() { + speed <- c(5, 10, NA, 20) # One NA speed + direction <- c(45, 90, 135, NA) # One NA direction + + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 4) + + vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + # Monthly mean + result <- compute.stat.vector( + vector_obj, + stat = "mean", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE + ) + valid_entries <- !is.na(speed) & !is.na(direction) + expected_mean_magnitude <- mean(speed[valid_entries], na.rm = TRUE) # Mean of 5 and 10 (only valid entries) + checkEqualsNumeric(as.numeric(result$magnitude[1]), expected_mean_magnitude, + msg = paste("Result magnitude differs from expected. Result:", result$magnitude)) +} + + +climdex.pcic.test.compute.stat.vector.overlapping.years <- function() { + # Full winter season (Dec-Feb) + set.seed(123) + speed <- runif(90, min = 5, max = 20) + direction <- runif(90, min = 0, max = 360) + + dates <- seq(as.PCICt("2019-12-01", cal = "gregorian"), by = "day", length.out = 90) + + vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + max.missing.days = c(annual = 15, monthly = 3, seasonal = 6), + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Seasonal mean (expect it to compute correctly across Dec-Feb) + result <- compute.stat.vector( + vector_obj, + stat = "mean", + freq = "seasonal", + format = "polar", + include.exact.dates = FALSE + ) + expected_mean_speed <- mean(speed) + + checkEqualsNumeric(as.numeric(result$magnitude["Winter 2019"]), expected_mean_speed, + msg = paste("Computed seasonal mean:", result$magnitude[1], + "Expected seasonal mean:", expected_mean_speed)) +} + + +climdex.pcic.test.compute.stat.vector.filtered.direction.crossing.360 <- function() { + speed <- c(5, 10, 15, 20) + direction <- c(350, 10, 135, 180) # Directions crossing the 360-degree boundary + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 4) + + vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Monthly max, filter by 350 to 10 degrees + result <- compute.stat.vector( + vector_obj, + stat = "max", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE, + direction.range = c(350, 10) + ) + filtered_speeds <- speed[direction >= 350 | direction <= 10] + expected_max_magnitude <- max(filtered_speeds) + checkEqualsNumeric(as.numeric(result$magnitude[1]), expected_max_magnitude) +} + + +climdex.pcic.test.compute.stat.vector.no.data.in.direction.range <- function() { + speed <- c(5, 10, 15, 20) + direction <- c(45, 90, 135, 180) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 4) + + vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Monthly max, filter by 270 to 360 degrees + result <- compute.stat.vector( + vector_obj, + stat = "max", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE, + direction.range = c(270, 360) + ) + + result$magnitude[1] + checkTrue(is.na(result$magnitude[1]), "Expected NA result when no data is in the specified direction range.") +} + + +climdex.pcic.test.compute.stat.vector.cartesian.format <- function() { + u <- c(5, 10, 15) + v <- c(5, 0, -5) # Cartesian components + + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 3) + + vector_obj <- climdexGenericVector.raw( + primary = u, + secondary = v, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + format = "cartesian", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Monthly mean + result <- compute.stat.vector( + vector_obj, + stat = "mean", + freq = "monthly", + format = "cartesian", + include.exact.dates = FALSE + ) + + # Convert u and v to magnitude and direction + polar_data <- convert_cartesian_to_polar(u, v) + expected_mean_magnitude <- mean(polar_data$speed) + + checkEqualsNumeric(as.numeric(result$magnitude[1]), expected_mean_magnitude, tolerance = 1e-6, + msg = paste("Computed mean magnitude:", result$magnitude[1], + "Expected mean magnitude:", expected_mean_magnitude)) + # Test 2: Monthly circular mean + result_circular_mean <- compute.stat.vector( + vector_obj, + stat = "circular_mean", + freq = "monthly", + format = "cartesian", + include.exact.dates = FALSE + ) + + expected_circular_mean <- compute_circular_mean(polar_data$direction, rep(1, length(polar_data$direction)), "polar") + + checkEqualsNumeric(as.numeric(result_circular_mean$direction[1]), expected_circular_mean, tolerance = 1e-6, + msg = paste("Computed circular mean direction:", result_circular_mean$direction[1], + "Expected circular mean direction:", expected_circular_mean)) +} + + +climdex.pcic.test.compute.stat.vector.cardinal.format <- function() { + speed <- c(5, 10, 15) + direction <- c('N', 'E', 'S') # Cardinal directions + + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 3) + + vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + format = "cardinal", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + result <- compute.stat.vector( + vector_obj, + stat = "mean", + freq = "monthly", + format = "cardinal", + include.exact.dates = FALSE + ) + + expected_mean_speed <- mean(speed) + + checkEqualsNumeric(as.numeric(result$magnitude[1]), expected_mean_speed, tolerance = 1e-6) +} + + +climdex.pcic.test.compute.stat.vector.inverted.direction.range <- function() { + speed <- c(5, 10, 15, 20) + direction <- c(45, 90, 135, 180) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "day", length.out = 4) + + vector_obj <- climdexGenericVector.raw( + primary = speed, + secondary = direction, + dates = dates, + max.missing.days = c(annual = 15, monthly = 31, seasonal = 6), + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Monthly mean, filter from 270 to 90 degrees + result <- compute.stat.vector( + vector_obj, + stat = "mean", + freq = "monthly", + format = "polar", + include.exact.dates = FALSE, + direction.range = c(270, 90) + ) + + filtered_speeds <- speed[direction <= 90 | direction >= 270] # Filtering with inverted bounds + expected_mean_magnitude <- mean(filtered_speeds) + + checkEqualsNumeric(as.numeric(result$magnitude[1]), expected_mean_magnitude) +}