diff --git a/NAMESPACE b/NAMESPACE index 68eec17..4f8a660 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,6 +38,10 @@ export(climdexGenericVector.csv) export(climdexGenericVector.raw) export(climdexInput.csv) export(climdexInput.raw) +export(climdexSingleMonthlyScalar.csv) +export(climdexSingleMonthlyScalar.raw) +export(climdexSingleMonthlyVector.csv) +export(climdexSingleMonthlyVector.raw) export(compute.gen.stat) export(compute.stat.scalar) export(compute.stat.vector) @@ -69,6 +73,7 @@ import(methods) importFrom(circular,circular) importFrom(circular,mean.circular) importFrom(circular,sd.circular) +importFrom(stats,na.omit) importFrom(stats,quantile) importFrom(utils,head) importFrom(utils,read.csv) diff --git a/R/GenericVariable_utils.R b/R/GenericVariable_utils.R index 70e89c6..cd33f09 100644 --- a/R/GenericVariable_utils.R +++ b/R/GenericVariable_utils.R @@ -1,32 +1,74 @@ # Utility function to validate arguments for scalar and vector data. -check.generic.argument.validity <- function( data, dates, max.missing.days, calendar) { +check.generic.argument.validity <- function( + data, + dates, + max.missing.days, + calendar, + is.vector = FALSE, + secondary = NULL, + format = NULL +) { + # Internal function to validate data and date arguments + validate_data_dates <- function(data, dates, name) { + if (missing(data) || is.null(data)) { + stop(paste(name, "argument is missing.")) + } + if (missing(dates)) { + stop("Argument 'dates' is missing.") + } + if (length(data) == 0 || length(dates) == 0) { + stop(paste(name, "and dates must not be empty vectors.")) + } + if (!is.numeric(data[!is.na(data)]) && (name != "Secondary data")) { + stop(paste(name, "must be numeric.")) + } + if (length(data) != length(dates)) { + stop(paste(name, "and dates must have the same length.")) + } + if (any(is.na(dates))) { + stop(paste("Argument 'dates' has NA values.")) + } + } + # Check max.missing.days if (length(max.missing.days) != 3 || !all(c("annual", "monthly", "seasonal") %in% names(max.missing.days))) { stop("max.missing.days must be a named vector with 'annual', 'monthly', and 'seasonal' elements.") } + # Validate primary data and dates + validate_data_dates(data, dates, "Primary data") - # Check that required arguments are provided - if (missing(data)) { - stop("Primary data argument is missing.") - } - - if (missing(dates)) { - stop("Argument 'dates' is missing.") + # Check if dates are PCICt + if (!inherits(dates, "PCICt")) { + stop("Dates must be of class PCICt.") } - - if (!is.numeric(data)) { - stop("Primary Data must be numeric.") - } - - if (length(data) != length(dates)) { - stop("Primary data and dates must have the same length.") + # Vector-specific checks + if (is.vector) { + + validate_data_dates(secondary, dates, "Secondary data") + # Check that 'format' is provided + if (missing(format) || is.null(format)) { + stop("Argument 'format' is missing.") + } + + # Convert the format to lowercase to allow case-insensitive input + format <- tolower(format) + + # Additional validation for format + if (format %in% c("polar", "cartesian")) { + if (!is.numeric(secondary)) { + stop("For 'polar' or 'cartesian' formats, 'secondary' must be numeric.") + } + } else if (format == "cardinal") { + if (!is.character(secondary)) { + stop("For 'cardinal' format, 'secondary' must be character.") + } + } else { + stop("Invalid 'format'. Use 'polar', 'cartesian', or 'cardinal'.") + } } - if(!is.null(dates) && !inherits(dates, "PCICt")) - stop(paste("Dates must be of class PCICt.")) - # Calendar check: verify it matches one of the recognized types valid_calendars <- c("360_day", "360", "365_day", "365", "noleap", "gregorian", "proleptic_gregorian") if (!calendar %in% valid_calendars) { @@ -35,6 +77,25 @@ check.generic.argument.validity <- function( data, dates, max.missing.days, cale } } +# For single-value-per-month data. Check one day per month and that the day is always the first. +check.single.month.dates <- function(dates) { + valid_dates <- dates[!is.na(dates)] + # Check if there is exactly one value per month on the 1st day + unique_months <- unique(format(valid_dates, "%Y-%m")) + day_of_month <- as.integer(format(valid_dates, "%d")) + + # Check that the length of unique months matches the number of dates, ensuring only one value per month + if (length(unique_months) != length(valid_dates)) { + stop("Data must have exactly one value per month.") + } + + # Check that all dates correspond to the 1st day of each month + if (!all(day_of_month == 1)) { + stop("Data must be on the 1st day of each month.") + } +} + + # Utility function to handle date ranges and generate date factors. date_info <- function(dates) { cal <- attr(dates, "cal") diff --git a/R/climdexGenericScalar.R b/R/climdexGenericScalar.R index f9783f0..fe0601a 100644 --- a/R/climdexGenericScalar.R +++ b/R/climdexGenericScalar.R @@ -47,16 +47,16 @@ climdexGenericScalar.raw <- function( filled.list <- generate_filled_list(data, dates, date.series) names(filled.list) <- "data" namasks <- generate_namasks(filled.list, date.factors, max.missing.days) - obj <- new("climdexGenericScalar", + + return(new("climdexGenericScalar", data = filled.list[["data"]], dates = date.series, date.factors = date.factors, jdays = jdays, namasks = namasks, northern.hemisphere = northern.hemisphere, - max.missing.days = max.missing.days) - - return(obj) + max.missing.days = max.missing.days + )) } #' @title climdexGenericScalar.csv @@ -117,13 +117,107 @@ climdexGenericScalar.csv <- function( ) { GS.csv <- read_csv_data(file, data.column, date.columns, date.format, na.strings, calendar) - obj <- climdexGenericScalar.raw( - data = GS.csv$data[[1]], - dates = GS.csv$dates, - northern.hemisphere = northern.hemisphere, - max.missing.days = max.missing.days, - calendar = calendar - ) - return(obj) + return(climdexGenericScalar.raw( + data = GS.csv$data[[1]], + dates = GS.csv$dates, + northern.hemisphere = northern.hemisphere, + max.missing.days = max.missing.days, + calendar = calendar +)) +} +#' @title climdexSingleMonthlyScalar.raw +#' +#' @description +#' Creates a `ClimdexGenericScalar` object from raw scalar climate data with a single value per month constraint. +#' +#' @details +#' This function is a wrapper for creating `ClimdexGenericScalar` objects where temporal resolution is a single value per month. +#' It automatically sets the `max.missing.days` to `+Inf`. To ensure consistency, each data point must correspond +#' to the 1st day of each month. The function will raise an error if there is more than one value per month or if any date is not on the 1st. +#' +#' @param data A numeric vector containing the scalar climate data. +#' @param dates A `PCICt` vector corresponding to the data dates. Each date must correspond to the 1st day of each month. +#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. +#' @param calendar A string representing the calendar type, e.g., "gregorian". +#' @return A `ClimdexGenericScalar` object containing the processed data. +#' +#' @seealso [climdexGenericScalar.raw()], [climdexSingleMonthlyScalar.csv()] +#' +#' @examples +#' \dontrun{ +#' data <- runif(12, 0, 20) +#' dates <- as.PCICt(seq(as.Date("2020-01-01"), by = "month", length.out = 12), cal = "gregorian") +#' scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates) +#' } +#' +#' @export + +climdexSingleMonthlyScalar.raw <- function( + data, + dates, + northern.hemisphere = TRUE, + calendar = "gregorian") { + max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf) + + check.single.month.dates(dates) + + return(climdexGenericScalar.raw( + data = data, + dates = dates, + max.missing.days = max.missing.days, + northern.hemisphere = northern.hemisphere, + calendar = calendar +)) +} + +#' @title climdexSingleMonthlyScalar.csv +#' +#' @description +#' Reads scalar climate data with a single value per month constraint from a CSV file and creates a `ClimdexGenericScalar` object. +#' +#' @details +#' This function reads scalar climate data and validates that there is a single value per month. It automatically sets the `max.missing.days` +#' to `+Inf` and builds a `ClimdexGenericScalar` object. Each date must correspond to the 1st day of each month. +#' +#' @param file The file path to the CSV containing the scalar climate data. +#' @param data.column The name of the column containing the scalar data in the CSV file. +#' @param date.columns A vector of column names corresponding to the date fields in the CSV file. +#' @param date.format A string representing the format of the date fields. +#' @param na.strings A character vector of strings to interpret as `NA`. +#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. +#' @param calendar A string representing the calendar type (e.g., "gregorian"). +#' +#' @return A `ClimdexGenericScalar` object containing the processed scalar climate data. +#' +#' @seealso [climdexSingleMonthlyScalar.raw()] +#' +#' @examples +#' \dontrun{ +#' csv_file <- "path/to/scalar_data.csv" +#' scalar_obj <- climdexSingleMonthlyScalar.csv( +#' file = csv_file, data.column = "data", +#' date.columns = "date", date.format = "%Y-%m-%d" +#' ) +#' } +#' +#' @export + +climdexSingleMonthlyScalar.csv <- function( + file, + data.column, + date.columns, + date.format, + na.strings = NULL, + northern.hemisphere = TRUE, + calendar = "gregorian") { + GS.csv <- read_csv_data(file, data.columns = data.column, date.columns, date.format, na.strings, calendar) + + + return(climdexSingleMonthlyScalar.raw( + data = GS.csv$data[[1]], + dates = GS.csv$dates, + northern.hemisphere = northern.hemisphere, + calendar = calendar +)) } diff --git a/R/climdexGenericVector.R b/R/climdexGenericVector.R index 8858c66..104c06d 100644 --- a/R/climdexGenericVector.R +++ b/R/climdexGenericVector.R @@ -41,29 +41,9 @@ climdexGenericVector.raw <- function( calendar = "gregorian" ) { - check.generic.argument.validity(primary, dates, max.missing.days, calendar) - if (missing(secondary)) { - stop("Secondary data argument is missing.") - } - # Check that primary, secondary, and dates have the same length - if (length(primary) != length(secondary) || length(primary) != length(dates)) { - stop("Lengths of 'primary', 'secondary', and 'dates' must be equal.") - } - # Convert the format to lowercase to allow case-insensitive input - format <- tolower(format) - - # Additional validation for format - if (format %in% c("polar", "cartesian")) { - if (!is.numeric(secondary)) { - stop("For 'polar' or 'cartesian' formats, 'secondary' must be numeric.") - } - } else if (format == "cardinal") { - if (!is.character(secondary)) { - stop("For 'cardinal' format, 'secondary' must be character.") - } - } else { - stop("Invalid 'format'. Use 'polar', 'cartesian', or 'cardinal'.") - } + check.generic.argument.validity(primary, dates, max.missing.days, calendar, + is.vector = TRUE, secondary, format) + date.info <- date_info(dates) jdays = date.info$jdays @@ -76,19 +56,18 @@ climdexGenericVector.raw <- function( filled.secondary[is.na(filled.primary)] <- NA filled.primary[is.na(filled.secondary)] <- NA namasks <- generate_namasks(list(primary = filled.primary, secondary = filled.secondary), date.factors, max.missing.days) - - obj <- new("climdexGenericVector", - primary = filled.primary, - secondary = filled.secondary, - dates = date.series, - format = format, - date.factors = date.factors, - jdays = jdays, - namasks = namasks, - max.missing.days = max.missing.days, - northern.hemisphere = northern.hemisphere) - return(obj) + return(new("climdexGenericVector", + primary = filled.primary, + secondary = filled.secondary, + dates = date.series, + format = format, + date.factors = date.factors, + jdays = jdays, + namasks = namasks, + max.missing.days = max.missing.days, + northern.hemisphere = northern.hemisphere + )) } #' @title climdexGenericVector.csv @@ -160,15 +139,125 @@ climdexGenericVector.csv <- function( secondary_values <- GV.csv$data[[2]] dates <- GV.csv$dates - obj <- climdexGenericVector.raw( - primary = primary_values, - secondary = secondary_values, + return(climdexGenericVector.raw( + primary = primary_values, + secondary = secondary_values, + dates = dates, + format = format, + max.missing.days = max.missing.days, + northern.hemisphere = northern.hemisphere, + calendar = calendar +)) +} +#' @title climdexSingleMonthlyVector.raw +#' +#' @description +#' Creates a `ClimdexGenericVector` object from raw vector climate data with a single value per month constraint. +#' +#' @details +#' This function processes vector climate data and validates that there is a single value per month. +#' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object. +#' To ensure consistency, each data point must correspond to the 1st day of each month. +#' The function will raise an error if there is more than one value per month or if any date is not on the 1st. +#' +#' @param primary A numeric vector representing the primary data (e.g., wind speed). +#' @param secondary A numeric or character vector representing the secondary data (e.g., wind direction). +#' @param dates A `PCICt` vector corresponding to the data dates. Each date must correspond to the 1st day of each month. +#' @param format A string specifying the format of the vector data ("polar", "cartesian", or "cardinal"). +#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. +#' @param calendar String representing the calendar type, e.g., "gregorian". +#' +#' @return A `ClimdexGenericVector` object containing the processed vector data. +#' #' +#' @seealso [climdexGenericVector.raw()], [climdexSingleMonthlyVector.csv()] +#' +#' @examples +#' \dontrun{ +#' primary <- runif(12, 0, 20) +#' secondary <- runif(12, 0, 360) +#' dates <- as.PCICt(seq(as.Date("2020-01-01"), by = "month", length.out = 12), cal = "gregorian") +#' vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, "polar") +#' } +#' +#' @export + +climdexSingleMonthlyVector.raw <- function( + primary, + secondary, + dates, + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian") { + max.missing.days <- c(annual = +Inf, monthly = +Inf, seasonal = +Inf) + + check.single.month.dates(dates) + + return(climdexGenericVector.raw( + primary = primary, + secondary = secondary, dates = dates, format = format, max.missing.days = max.missing.days, northern.hemisphere = northern.hemisphere, calendar = calendar - ) - - return(obj) -} \ No newline at end of file + )) +} + +#' @title climdexSingleMonthlyVector.csv +#' +#' @description +#' Reads vector climate data with a single value per month constraint from a CSV file and creates a `ClimdexGenericVector` object. +#' +#' @details +#' This function reads vector climate data and validates that there is a single value per month. +#' It automatically sets the `max.missing.days` to `+Inf` and builds a `ClimdexGenericVector` object. +#' Each date must correspond to the 1st day of each month. +#' +#' @param file The file path to the CSV containing the vector climate data. +#' @param primary.column The name of the column containing the primary data (e.g., magnitude) in the CSV file. +#' @param secondary.column The name of the column containing the secondary data (e.g., direction) in the CSV file. +#' @param date.columns A vector of column names corresponding to the date fields in the CSV file. +#' @param date.format A string representing the format of the date fields. +#' @param format A string specifying the format of the vector data. Must be one of `"polar"`, `"cartesian"`, or `"cardinal"`. +#' @param na.strings A character vector of strings to interpret as `NA`. +#' @param northern.hemisphere Logical. Indicates whether this point is in the northern hemisphere. +#' @param calendar A string representing the calendar type (e.g., "gregorian"). +#' +#' @return A `ClimdexGenericVector` object containing the processed vector climate data. +#' +#' @seealso [climdexSingleMonthlyVector.raw()] +#' +#' @examples +#' \dontrun{ +#' csv_file <- "path/to/vector_data.csv" +#' vector_obj <- climdexSingleMonthlyVector.csv( +#' file = csv_file, primary.column = "primary", +#' secondary.column = "secondary", date.columns = "date", +#' date.format = "%Y-%m-%d", format = "polar" +#' ) +#' } +#' +#' @export + +climdexSingleMonthlyVector.csv <- function( + file, + primary.column, + secondary.column, + date.columns, + date.format, + format = "polar", + na.strings = NULL, + northern.hemisphere = TRUE, + calendar = "gregorian") { + GV.csv <- read_csv_data(file, data.columns = c(primary.column, secondary.column), date.columns, date.format, na.strings, calendar) + + return(climdexSingleMonthlyVector.raw( + primary = GV.csv$data[[1]], + secondary = GV.csv$data[[2]], + dates = GV.csv$dates, + format = format, + northern.hemisphere = northern.hemisphere, + calendar = calendar +)) + +} diff --git a/R/generic_stats.R b/R/generic_stats.R index 5ae5d53..295de9c 100644 --- a/R/generic_stats.R +++ b/R/generic_stats.R @@ -32,6 +32,8 @@ library(circular) #' # Assuming `scalar_obj` is a valid climdexGenericScalar object: #' \dontrun{compute.gen.stat(scalar_obj, "max", scalar_obj@data, "monthly", FALSE)} #' +#' @importFrom stats na.omit +#' #' @export #' @keywords internal compute.gen.stat <- function(gen.var, stat, data, freq = c("monthly", "annual", "seasonal"), include.exact.dates = FALSE) { @@ -39,6 +41,24 @@ compute.gen.stat <- function(gen.var, stat, data, freq = c("monthly", "annual", freq <- match.arg(freq) exact_date_stats <- c("max", "min") + # Determine if the data is single-value per month + single_value_per_month <- all(tapply(data, gen.var@date.factors$monthly, function(x) length(na.omit(x)) == 1, simplify = TRUE)) + + + # Check if the data is single-value per month + if (single_value_per_month) { + if (freq == "monthly") { + # Warn if trying to compute monthly stats with single-value data per month + warning("Monthly calculations on single-value-per-month data are not meaningful. Proceeding with the calculation.") + } + + if (include.exact.dates) { + # Warn if exact dates are requested on single-value-per-month data + warning("Exact dates are not meaningful for single-value-per-month data. Proceeding without exact dates.") + include.exact.dates <- FALSE + } + } + if (include.exact.dates && !(stat %in% exact_date_stats)) { message(paste("Warning: Exact dates are not applicable for the", stat, "statistic. Proceeding without exact dates.")) include.exact.dates <- FALSE diff --git a/tests/test_single_value_per_month.R b/tests/test_single_value_per_month.R new file mode 100644 index 0000000..4f2b251 --- /dev/null +++ b/tests/test_single_value_per_month.R @@ -0,0 +1,552 @@ +library(climdex.pcic) +library(RUnit) + + +validate_climdex_object <- function(obj_raw, obj_csv, primary_data, dates, expected_levels, slot_name, secondary_data = NULL) { + # Validate dates for the primary data + primary_slot <- slot(obj_raw, slot_name) + + + # Validate secondary data if applicable + if (!is.null(secondary_data)) { + checkEquals(primary_slot[!is.na(primary_slot)], primary_data[!is.na(primary_data) & !is.na(secondary_data)], "Raw object primary data for non-NA data does not match input data.") + checkEquals(obj_raw@secondary[!is.na(primary_slot)], secondary_data[!is.na(primary_data) & !is.na(secondary_data)], "Raw object secondary data for non-NA data does not match input data.") + checkEquals(obj_raw@dates[!is.na(primary_slot)] , dates[!is.na(primary_data) & !is.na(secondary_data)] , "Raw object dates for non-NA primary data do not match input dates.") + } + else{ + checkEquals(primary_slot[!is.na(primary_slot)], primary_data[!is.na(primary_data)], "Raw object primary data for non-NA data does not match input data.") + checkEquals(obj_raw@dates[!is.na(primary_slot)] , dates[!is.na(primary_data)], "Raw object dates for non-NA primary data do not match input dates.") + } + + # Validate date factors + levels_count <- sapply(obj_raw@date.factors, function(factor_obj) length(levels(factor_obj))) + checkEquals(levels_count, expected_levels, msg = "Date factors (annual, monthly, seasonal) do not have the expected number of levels.") + + # Validate jdays + checkEquals(length(obj_raw@jdays), 366, "Raw object jdays for filled leap-year do not match expected.") + + # Validate CSV object against raw object + checkEquals(obj_raw@dates, obj_csv@dates, "Date mismatch between raw and CSV objects.") + checkTrue(all.equal(obj_csv, obj_raw), msg = "Object built from CSV is not identical to raw.") +} + + + +climdex.pcic.test.single.monthly.scalar.raw.and.csv.construction <- function() { + set.seed(123) + + scalar_data <- c(runif(11, 0, 20), NA) # One value per month + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + scalar_obj_raw <- climdexSingleMonthlyScalar.raw( + data = scalar_data, + dates = dates, + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + csv_data <- data.frame(date = as.character(dates), data = scalar_data) + temp_csv <- tempfile() + write.csv(csv_data, temp_csv, row.names = FALSE) + + scalar_obj_csv <- climdexSingleMonthlyScalar.csv( + file = temp_csv, + data.column = "data", + date.columns = "date", + date.format = "%Y-%m-%d", + na.strings = 'NA', + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + validate_climdex_object( + scalar_obj_raw, scalar_obj_csv, + primary_data = scalar_data, + dates = dates, + expected_levels = c(annual = 1, monthly = 12, seasonal = 5), + slot_name = "data" + ) +} + +climdex.pcic.test.single.monthly.vector.raw.and.csv.construction <- function() { + set.seed(123) + + primary_data <- c(runif(11, 0, 20), NA) # One value per month + + secondary_data <- c(NA, runif(11, 0, 360)) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + vector_obj_raw <- climdexSingleMonthlyVector.raw( + primary = primary_data, + secondary = secondary_data, + dates = dates, + format = "polar", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + csv_data <- data.frame(date = as.character(dates), primary = primary_data, secondary = secondary_data) + temp_csv <- tempfile() + write.csv(csv_data, temp_csv, row.names = FALSE) + + vector_obj_csv <- climdexSingleMonthlyVector.csv( + file = temp_csv, + primary.column = "primary", + secondary.column = "secondary", + date.columns = "date", + date.format = "%Y-%m-%d", + format = "polar", + na.strings = 'NA', + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + validate_climdex_object( + vector_obj_raw, vector_obj_csv, + primary_data = primary_data, + dates = dates, + expected_levels = c(annual = 1, monthly = 12, seasonal = 5), + slot_name = "primary", + secondary_data = secondary_data + ) +} + + + +climdex.pcic.test.SingleMonthlyScalar.raw.missing <- function() { + set.seed(123) + + # Single monthly value data with an NA value + data <- c(1:5, NA, 7:12) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Create the climdexSingleMonthlyScalar object and expect it to pass without failure + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) +} + + +climdex.pcic.test.SingleMonthlyScalar.sub.annual <- function() { + set.seed(123) + + # Single monthly value data with an NA value + data <- c(1:5) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 5) + + # Create the climdexSingleMonthlyScalar object and expect it to pass without failure + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) + scalar_mean <- unname(compute.stat.scalar(scalar_obj, stat = "mean", freq = "annual", include.exact.dates = FALSE))[1] + checkEquals(scalar_mean, 3, "Mean of single value monthly scalar data for sub-annual period does not match expected.") +} + +climdex.pcic.test.SingleMonthlyVector.raw.missing <- function() { + set.seed(123) + + # Single monthly value vector data with an NA value in the primary component + primary <- c(1:5, NA, 7:12) + secondary <- runif(12, 0, 360) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Create the climdexSingleMonthlyVector object and expect it to pass without failure + result <- try( + vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.zero.length <- function() { + # Empty data and dates vectors + data <- numeric(0) + dates <- as.PCICt(character(0), cal = "gregorian") + + error_message <- tryCatch( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + error = function(e) e$message + ) + + # Check error message + checkTrue( + grepl("Primary data and dates must not be empty vectors.", error_message), + "Error message is not informative for empty input vectors." + ) +} + +climdex.pcic.test.MultiyearScalarContinuous <- function() { + set.seed(123) + + data <- runif(36, 0, 20) # One value per month for 3 years + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) + + # Create the scalar object + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") + + # Validate that dates and data are maintained correctly + sorted_indices <- order(dates) + sorted_dates <- dates[sorted_indices] + sorted_data <- data[sorted_indices] + + obj_dates <- scalar_obj@dates[!is.na(scalar_obj@data)] + obj_data <- scalar_obj@data[!is.na(scalar_obj@data)] + + checkEquals(obj_data, sorted_data, "Multiyear scalar data is not aligned correctly with sorted dates.") +} + +climdex.pcic.test.MultiyearVectorContinuous <- function() { + set.seed(123) + + primary <- runif(36, 0, 20) # One value per month for 3 years + secondary <- runif(36, 0, 360) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) + + vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian") + + checkEquals(length(vector_obj@primary), length(vector_obj@dates), "Multiyear vector primary length does not match dates.") + + sorted_indices <- order(dates) + sorted_dates <- dates[sorted_indices] + sorted_primary <- primary[sorted_indices] + sorted_secondary <- secondary[sorted_indices] + + obj_dates <- vector_obj@dates[!is.na(vector_obj@primary)] + obj_primary <- vector_obj@primary[!is.na(vector_obj@primary)] + obj_secondary <- vector_obj@secondary[!is.na(vector_obj@primary)] + + checkEquals(obj_primary, sorted_primary, "Multiyear vector primary data is not aligned correctly with sorted dates.") + checkEquals(obj_secondary, sorted_secondary, "Multiyear vector secondary data is not aligned correctly with sorted dates.") +} + +climdex.pcic.test.MultiyearWithGaps <- function() { + set.seed(123) + + # Scalar data for three years with some missing months + data <- c(runif(11, 0, 20), NA, runif(11, 0, 20), NA, runif(10, 0, 20), NA, NA) # 36 data points with some NA values to align with dates + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) + # Test that the scalar object is built without errors + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) + # Validate non-NA data alignment + obj_data <- scalar_obj@data[!is.na(scalar_obj@data)] + obj_dates <- scalar_obj@dates[!is.na(scalar_obj@data)] + valid_indices <- !is.na(data) + checkEquals(obj_data, data[valid_indices], "Multiyear scalar data with gaps is not aligned correctly.") +} + +climdex.pcic.test.SingleMonthlyScalar.raw.dates.not.first.day <- function() { + set.seed(123) + + data <- runif(12, 0, 20) + dates <- seq(as.PCICt("2020-01-02", cal = "gregorian"), by = "month", length.out = 12) + + # Expect an error due to dates not being on the first day + checkException( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + "Function did not raise an error when dates were not on the first day of the month." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.mismatched.lengths <- function() { + set.seed(123) + + # Data and dates of different lengths + data <- runif(11, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Expect an error due to mismatched lengths + checkException( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + "Function did not raise an error when data and dates lengths were mismatched." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.non.numeric.data <- function() { + data <- rep("non-numeric", 12) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Expect an error due to non-numeric data + checkException( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + "Function did not raise an error when non-numeric data was provided." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.csv.invalid.date.format <- function() { + data <- runif(12, 0, 20) + dates <- format(seq(as.Date("2020-01-01"), by = "month", length.out = 12), "%Y/%m/%d") # Invalid date format + csv_data <- data.frame(date = dates, data = data) + temp_csv <- tempfile() + write.csv(csv_data, temp_csv, row.names = FALSE) + + # Expect an error due to invalid date format + checkException( + climdexSingleMonthlyScalar.csv( + file = temp_csv, + data.column = "data", + date.columns = "date", + date.format = "%Y-%m-%d", # Expecting different format + northern.hemisphere = TRUE, + calendar = "gregorian" + ), + "Function did not raise an error when invalid date format was provided in CSV." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.invalid.calendar <- function() { + set.seed(123) + + # Create valid data and dates + data <- runif(12, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Expect an error due to invalid calendar type + checkException( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "invalid_calendar"), + "Function did not raise an error when an invalid calendar type was provided." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.NA.dates <- function() { + set.seed(123) + + # NA in dates + data <- runif(12, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + dates_char <- as.character(dates) + dates_char[6] <- NA # Insert NA + dates <- as.PCICt(dates_char, cal = "gregorian") + # Capture the error message + error_message <- tryCatch( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + error = function(e) e$message + ) + # Check error message + checkTrue( + grepl("Argument 'dates' has NA values.", error_message), + "Error message is not informative for NA values in dates." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.leap.year <- function() { + set.seed(123) + + # Leap year + data <- runif(24, 0, 20) + dates <- seq(as.PCICt("2019-01-01", cal = "gregorian"), by = "month", length.out = 24) + + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) + # Ensure that February 29, 2020, is included + checkTrue(any(format(scalar_obj@dates, "%Y-%m-%d") == "2020-02-29"), "Leap day not included in dates.") +} + +climdex.pcic.test.SingleMonthlyScalar.raw.extreme.values <- function() { + # Extreme values + data <- c(-1e10, runif(10, -1e5, 1e5), 1e10) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") + scalar_max <- unname(compute.stat.scalar(scalar_obj, "max", "annual", FALSE)) + scalar_min <- unname(compute.stat.scalar(scalar_obj, "min", "annual", FALSE)) + scalar_sum <- unname(compute.stat.scalar(scalar_obj, "sum", "annual", FALSE)) + checkEquals(scalar_max, 1e10, "annual max stat for single monthly scalar was not equal to max of input data") + checkEquals(scalar_min, -1e10, "annual min stat for single monthly scalar was not equal to min of input data") + checkEquals(scalar_sum, sum(data), "annual sum stat for single monthly scalar was not equal to sum of input data") +} + +climdex.pcic.test.SingleMonthlyVector.raw.missing.secondary <- function() { + set.seed(123) + + # NA in secondary component + primary <- runif(12, 0, 20) + secondary <- c(runif(5, 0, 360), NA, runif(6, 0, 360)) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + result <- try( + vector_obj <- climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "polar", northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) + checkEquals(vector_obj@primary[!is.na(vector_obj@primary)], vector_obj@primary[!is.na(vector_obj@secondary)], "Vector objects NA values are not in sync beteen primary and secondary data.") +} + + +climdex.pcic.test.SingleMonthlyVector.raw.invalid.format <- function() { + set.seed(123) + + # Valid data + primary <- runif(12, 0, 20) + secondary <- runif(12, 0, 360) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + # Expect an error due to invalid format + checkException( + climdexSingleMonthlyVector.raw(primary, secondary, dates, format = "invalid_format", northern.hemisphere = TRUE, calendar = "gregorian"), + "Function did not raise an error when an invalid format was provided." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.error.messages <- function() { + # Multiple data values per month + data <- runif(24, 0, 20) + dates <- c( + seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12), + seq(as.PCICt("2020-01-15", cal = "gregorian"), by = "month", length.out = 12) + ) + + # Capture the error message + error_message <- tryCatch( + climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + error = function(e) e$message + ) + + # Check error message + checkTrue( + grepl("exactly one value per month", error_message), + "Error message is not informative for multiple values per month." + ) +} + +climdex.pcic.test.SingleMonthlyScalar.raw.different.calendars <- function() { + set.seed(123) + + # Dates with a "noleap" calendar + data <- runif(12, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "noleap"), by = "month", length.out = 12) + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) + checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates, "Scalar object dates for non-NA data does not match input dates.") +} + +climdex.pcic.test.SingleMonthlyScalar.raw.timezones <- function() { + set.seed(123) + + # Create data with dates including time zones + data <- runif(12, 0, 20) + dates <- as.PCICt(seq(as.POSIXct("2020-01-01", tz = "UTC"), by = "month", length.out = 12), cal = "gregorian") + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) + checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates, "Scalar object dates for non-NA data does not match input dates.") +} + +climdex.pcic.test.SingleMonthlyScalar.raw.irregular.intervals <- function() { + set.seed(123) + + # Missing months + data <- runif(10, 0, 20) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "2 months", length.out = 10) + result <- try( + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian"), + silent = TRUE + ) + + checkTrue( + !inherits(result, "try-error"), + "Function raised an error despite valid monthly data." + ) + checkEquals(scalar_obj@dates[!is.na(scalar_obj@data)], dates, "Scalar object dates for non-NA data does not match input dates.") +} + +climdex.pcic.test.SingleMonthlyScalar.raw.negative.values <- function() { + # Data with negative values + data <- runif(36, -50, 0) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 36) + + scalar_obj <- climdexSingleMonthlyScalar.raw(data, dates, northern.hemisphere = TRUE, calendar = "gregorian") + scalar_max <- unname(compute.stat.scalar(scalar_obj, "max", "annual", FALSE)) + scalar_min <- unname(compute.stat.scalar(scalar_obj, "min", "annual", FALSE)) + scalar_sum <- unname(compute.stat.scalar(scalar_obj, "sum", "annual", FALSE)) + checkEquals(max(scalar_max), max(data), "annual max stat for single monthly scalar was not equal to max of input data") + checkEquals(min(scalar_min), min(data), "annual min stat for single monthly scalar was not equal to min of input data") + checkEquals(sum(scalar_sum), sum(data), "annual sum stat for single monthly scalar was not equal to sum of input data") +} + +climdex.pcic.test.SingleMonthlyVector.raw.cartesian <- function() { + set.seed(123) + + # Data in cartesian format (x,y components) + x_comp <- runif(12, -10, 10) + y_comp <- runif(12, -10, 10) + dates <- seq(as.PCICt("2020-01-01", cal = "gregorian"), by = "month", length.out = 12) + + vector_obj <- climdexSingleMonthlyVector.raw( + primary = x_comp, + secondary = y_comp, + dates = dates, + format = "cartesian", + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + # Verify components + checkEquals(length(vector_obj@primary), length(vector_obj@dates), "Cartesian vector primary length does not match dates.") + checkEquals(vector_obj@primary[!is.na(vector_obj@primary)], x_comp, "X component not stored correctly") + checkEquals(vector_obj@secondary[!is.na(vector_obj@secondary)], y_comp, "Y component not stored correctly") +} + +climdex.pcic.test.SingleMonthlyScalar.large.dataset <- function() { + set.seed(123) + + # Create 100 years of monthly data + n_months <- 100 * 12 + data <- runif(n_months, 0, 20) + dates <- seq(as.PCICt("1920-01-01", cal = "gregorian"), by = "month", length.out = n_months) + + scalar_obj <- climdexSingleMonthlyScalar.raw( + data = data, + dates = dates, + northern.hemisphere = TRUE, + calendar = "gregorian" + ) + + checkEquals(length(scalar_obj@data[!is.na(scalar_obj@data)]), n_months, "Large dataset not handled correctly") +}