pacificclimate · QSparks · Oct 12, 2023 · Oct 12, 2023 · Oct 19, 2023 · Oct 19, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -10,6 +10,7 @@ Depends:
     PCICt (>= 0.5-4)
 Encoding: UTF-8
 Imports:
+    circular,
     methods,
     Rcpp (>= 0.11.4),
     stats,
@@ -31,7 +32,12 @@ RoxygenNote: 7.3.2
 Collate:
   'input_utils.R'
   'climdexInput_class.R'
+  'GenericVariable_utils.R'
+  'climdexGenericVariable_class.R'
+  'climdexGenericScalar.R'
+  'climdexGenericVector.R'
   'climdex.pcic-package.R'
+  'generic_stats.R'
   'constants.R'
   'date_utils.R'
   'precipitation_indices.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -7,9 +7,6 @@ export(climdex.cwd)
 export(climdex.dtr)
 export(climdex.fd)
 export(climdex.get.available.indices)
-export(climdex.min.max.idx.list)
-export(climdex.mean.idx.list)
-export(climdex.bootstrap.idx.list)
 export(climdex.gsl)
 export(climdex.id)
 export(climdex.mean.idx.list)
@@ -35,8 +32,21 @@ export(climdex.tx90p)
 export(climdex.txn)
 export(climdex.txx)
 export(climdex.wsdi)
+export(climdexGenericScalar.csv)
+export(climdexGenericScalar.raw)
+export(climdexGenericVector.csv)
+export(climdexGenericVector.raw)
 export(climdexInput.csv)
 export(climdexInput.raw)
+export(compute.stat.scalar)
+export(compute.stat.vector)
+export(compute_circular_mean)
+export(compute_circular_sd)
+export(convert_cardinal_to_degrees)
+export(convert_cartesian_to_polar)
+export(convert_degrees_to_cardinal)
+export(convert_polar_to_cartesian)
+export(filter_by_direction_range)
 export(get.last.monthday.of.year)
 export(get.outofbase.quantiles)
 export(get.series.lengths.at.ends)
@@ -49,10 +59,15 @@ export(simple.precipitation.intensity.index)
 export(spell.length.max)
 export(threshold.exceedance.duration.index)
 export(total.precip.op.threshold)
+exportClasses(climdexGenericScalar)
+exportClasses(climdexGenericVector)
 exportClasses(climdexInput)
 import(PCICt)
 import(Rcpp)
 import(methods)
+importFrom(circular,circular)
+importFrom(circular,mean.circular)
+importFrom(circular,sd.circular)
 importFrom(stats,quantile)
 importFrom(utils,head)
 importFrom(utils,read.csv)

diff --git a/R/GenericVariable_utils.R b/R/GenericVariable_utils.R
@@ -0,0 +1,170 @@
+# Utility function to validate arguments for scalar and vector data.
+check.generic.argument.validity <- function( data, dates, max.missing.days, calendar) {
+
+  if (length(max.missing.days) != 3 || !all(c("annual", "monthly", "seasonal") %in% names(max.missing.days))) {
+    stop("max.missing.days must be a named vector with 'annual', 'monthly', and 'seasonal' elements.")
+  }
+
+
+  # Check that required arguments are provided
+  if (missing(data)) {
+    stop("Primary data argument is missing.")
+  }
+
+  if (missing(dates)) {
+    stop("Argument 'dates' is missing.")
+  }
+
+
+  if (!is.numeric(data)) {
+    stop("Primary Data must be numeric.")
+  }
+
+  if (length(data) != length(dates)) {
+    stop("Primary data and dates must have the same length.")
+  }
+
+  if(!is.null(dates) && !inherits(dates, "PCICt"))
+      stop(paste("Dates must be of class PCICt."))
+
+  # Calendar check: verify it matches one of the recognized types
+  valid_calendars <- c("360_day", "360", "365_day", "365", "noleap", "gregorian", "proleptic_gregorian")
+  if (!calendar %in% valid_calendars) {
+    stop(paste("Invalid calendar type:", calendar, 
+               ". Accepted types are '360_day', '360', '365_day', '365', 'noleap', 'gregorian', 'proleptic_gregorian'."))
+  }
+}
+
+# Utility function to handle date ranges and generate date factors.
+date_info <- function(dates) {
+  cal <- attr(dates, "cal")
+
+  last.day.of.year <- get.last.monthday.of.year(dates)
+
+  date.range <- as.PCICt(paste(as.numeric(format(range(dates), "%Y", tz = "GMT")), c("01-01", last.day.of.year), sep = "-"), cal = cal)
+  date.series <- seq(date.range[1], date.range[2], by = "day")
+
+  jdays <- get.jdays.replaced.feb29(get.jdays(date.series))
+
+  season_with_year <- classify_meteorological_season_with_year(date.series)
+
+  date.factors <- list(
+    annual = factor(format(date.series, format = "%Y", tz = "GMT")),
+    monthly = factor(format(date.series, format = "%Y-%m", tz = "GMT")),
+    seasonal = factor(season_with_year, levels = unique(season_with_year))
+  )
+
+  return(list(
+    cal = cal,
+    date.series = date.series,
+    date.factors = date.factors,
+    jdays = jdays
+  ))
+}
+
+# Generates NA masks based on filled data and date factors
+generate_namasks <- function(filled.list, date.factors, max.missing.days) {
+  namasks <- list(
+    annual = lapply(filled.list, get.na.mask, date.factors$annual, max.missing.days["annual"]),
+    monthly = lapply(filled.list, get.na.mask, date.factors$monthly, max.missing.days["monthly"]),
+    seasonal = lapply(filled.list, get.na.mask, date.factors$seasonal, max.missing.days["seasonal"]))
+    # Vectors: Combine the masks for magnitude and direction
+  if ("primary" %in% names(filled.list) && "secondary" %in% names(filled.list)) {
+    # Synchronize annual masks
+    namasks$annual$primary <- namasks$annual$primary * namasks$annual$secondary
+    namasks$annual$secondary <- namasks$annual$primary
+
+    # Synchronize monthly masks
+    namasks$monthly$primary <- namasks$monthly$primary * namasks$monthly$secondary
+    namasks$monthly$secondary <- namasks$monthly$primary
+
+    # Synchronize seasonal masks
+    namasks$seasonal$primary <- namasks$seasonal$primary * namasks$seasonal$secondary
+    namasks$seasonal$secondary <- namasks$seasonal$primary
+  }
+  namasks$annual <- lapply(names(namasks$annual), function(v) {
+    d <- namasks$annual[[v]] * as.numeric(tapply(namasks$monthly[[v]], rep(seq_along(namasks$annual[[v]]), each = 12), prod))
+    dimnames(d) <- dim(d) <- NULL
+    d
+  })
+  names(namasks$annual) <- names(namasks$seasonal) <- names(namasks$monthly)
+
+
+  season_month_counts <- sapply(unique(date.factors$seasonal), function(season) {
+    length(unique(date.factors$monthly[date.factors$seasonal == season]))
+  })
+  data.vars <- names(filled.list)
+
+  for (var in data.vars) {
+    seasonal_namasks <- namasks$seasonal[[var]]
+    na_months <- unique(date.factors$monthly)[is.na(namasks$monthly[[var]])]
+    seasons_of_na_months <- unique(date.factors$seasonal[date.factors$monthly %in% na_months])
+    seasonal_namasks[unique(date.factors$seasonal) %in% seasons_of_na_months] <- NA
+    # Identify and set NA for seasons with less than 3 months
+    for (season in seq_along(season_month_counts) ) {
+      if (!is.na(season_month_counts[season]) && season_month_counts[season] < 3) {
+        seasonal_namasks[season] <- NA
+      }
+    }
+    namasks$seasonal[[var]] <- seasonal_namasks
+  } 
+  return(namasks)
+}
+
+generate_filled_list <- function(data, dates, date.series) {
+  if (is.vector(data)) {
+    return(list(create.filled.series(data, trunc(dates), date.series)))
+  } else {
+    filled.list <- sapply(data, function(x) { 
+      return(create.filled.series(x, trunc(dates), date.series)) 
+    }, simplify = FALSE)
+    return(filled.list)
+  }
+}
+
+
+# Reads data from a CSV file, validates it, and converts date columns to PCICt dates.
+read_csv_data <- function(
+    file,
+    data.columns,
+    date.columns,
+    date.format,
+    na.strings,
+    calendar
+) {
+
+  calling_func <- as.character(sys.call(-1)[[1]])
+
+  # Ensure that the number of data columns matches the type of the calling function
+  if (grepl("Scalar", calling_func, ignore.case = TRUE) && length(data.columns) != 1) {
+    stop("For scalar data, 'data.columns' should contain exactly 1 column.")
+  } else if (grepl("Vector", calling_func, ignore.case = TRUE) && length(data.columns) != 2) {
+    stop("For vector data, 'data.columns' should contain exactly 2 columns.")
+  }
+
+  # Read the CSV file
+  GV.csv <- read.csv(file, na.strings = na.strings)
+
+  # Check that data columns exist
+  for (col in data.columns) {
+    if (!(col %in% names(GV.csv))) {
+      stop(paste("Data column", col, "not found in data."))
+    }
+  }
+
+  # Check that date columns exist
+  if (!all(date.columns %in% names(GV.csv))) {
+    stop(paste("Date columns", paste(date.columns, collapse = ", "), "not found in data."))
+  }
+
+  # Extract data cols
+  data_values <- lapply(data.columns, function(col) GV.csv[[col]])
+
+  # Extract the date fields and create date strings
+  date_strings <- apply(GV.csv[date.columns], 1, function(row) paste(row, collapse = " "))
+
+  # Convert date strings to PCICt dates
+  dates <- as.PCICt(strptime(date_strings, format = date.format, tz = "UTC"), cal = calendar)
+
+  return(list(data = data_values, dates = dates))
+}
diff --git a/R/climdexGenericScalar.R b/R/climdexGenericScalar.R
@@ -0,0 +1,129 @@
+#' @title climdexGenericScalar.raw
+#' 
+#' @description
+#' Creates a `ClimdexGenericScalar` object from raw scalar climate data.
+#' 
+#' @details
+#' This function processes scalar climate data (e.g., humidity, snow-depth)
+#' and creates a `ClimdexGenericScalar` object. The function fills missing
+#' values and applies NA masks based on the `max.missing.days` argument.
+#' The `ClimdexGenericScalar` class is used to compute
+#' basic climate indices from scalar data.
+#' 
+#' @param data A numeric vector containing the scalar climate data.
+#' @param dates A `PCICt` vector corresponding to the data dates.
+#' @param max.missing.days A named vector indicating the maximum allowed missing days for `annual`, `monthly`, and `seasonal` time periods.
+#' @param northern.hemisphere Whether this point is in the northern hemisphere.
+#' @param calendar String representing the calendar type, e.g., "gregorian".
+#' @return A `ClimdexGenericScalar` object containing the processed data.
+#' 
+#' @seealso \code{\link{climdexGenericVector.raw}}, \code{\link{climdexGenericScalar.csv}}
+#' 
+#' @examples
+#' data <- c(10.5, 12.3, 11.2)
+#' dates <- as.PCICt(c("2024-01-01", "2024-01-02", "2024-01-03"),
+#'                   format = "%Y-%m-%d", cal = "gregorian")
+#' climdexGenericScalar.raw(data, 
+#'                          dates,
+#'                          max.missing.days = c(annual = 15, monthly = 3, seasonal = 6))
+#' 
+#' @export
+
+climdexGenericScalar.raw <- function(
+  data,
+  dates,
+  max.missing.days = c(annual = 15, monthly = 3, seasonal = 6),
+  northern.hemisphere = TRUE,
+  calendar = "gregorian"
+) {
+
+  check.generic.argument.validity(data,dates,max.missing.days,calendar)
+
+  date.info <- date_info(dates)
+  jdays = date.info$jdays
+  date.series = date.info$date.series
+  date.factors = date.info$date.factors
+
+  filled.list <- generate_filled_list(data, dates, date.series)
+  names(filled.list) <- "data"
+  namasks <- generate_namasks(filled.list, date.factors, max.missing.days)
+  obj <- new("climdexGenericScalar",
+    data = filled.list[["data"]],
+    dates = date.series,
+    date.factors = date.factors,
+    jdays = jdays,
+    namasks = namasks,
+    northern.hemisphere = northern.hemisphere,
+    max.missing.days = max.missing.days)
+
+  return(obj)
+}
+
+#' @title climdexGenericScalar.csv
+#'
+#' @description
+#' Reads scalar climate data from a CSV file and creates a `ClimdexGenericScalar` object.
+#'
+#' @details
+#' This function reads scalar climate data (e.g., humidity, snow-depth) from a CSV file
+#' and generates a `ClimdexGenericScalar` object.
+#'
+#' The CSV file should contain the climate data in a specified column, and the date fields should be provided in separate columns.
+#'
+#' @param file The file path to the CSV containing the scalar climate data.
+#' @param data.column The name of the column containing the scalar data in the CSV file.
+#' @param date.columns A vector of column names corresponding to the date fields in the CSV file.
+#' @param date.format A string representing the format of the date fields.
+#' @param na.strings A character vector of strings to interpret as `NA`.
+#' @param northern.hemisphere Logical indicating whether the data is from the northern hemisphere.
+#' @param max.missing.days A named vector specifying the maximum number of missing days allowed for annual, monthly, and seasonal periods.
+#' @param calendar A string representing the calendar type (e.g., "gregorian").
+#'
+#' @return A `ClimdexGenericScalar` object containing the processed scalar climate data.
+#'
+#' @seealso \code{\link{climdexGenericScalar.raw}}, \code{\link{climdexGenericVector.csv}}
+#'
+#' @examples
+#' # Example usage for scalar data:
+#'
+#' # Simulating CSV data for humidity
+#' csv_data <- "
+#' year,month,day,humidity
+#' 2024,01,01,80
+#' 2024,01,02,82
+#' 2024,01,03,85
+#' "
+#'
+#' # Write the CSV to a temporary file
+#' temp_file <- tempfile(fileext = ".csv")
+#' writeLines(csv_data, temp_file)  
+#'
+#' # Call the climdexGenericScalar.csv function
+#' climdexGenericScalar.csv(temp_file, data.column = "humidity",
+#'                          date.columns = c("year", "month", "day"),
+#'                          date.format = "%Y %m %d", calendar = "gregorian")
+
+#' @export
+
+climdexGenericScalar.csv <- function(
+    file,
+    data.column,
+    date.columns,
+    date.format,
+    na.strings = NULL,
+    northern.hemisphere = TRUE,
+    max.missing.days = c(annual = 15, monthly = 3, seasonal = 6),
+    calendar = "gregorian"
+) {
+
+  GS.csv <- read_csv_data(file, data.column, date.columns, date.format,  na.strings, calendar)
+  obj <- climdexGenericScalar.raw(
+    data = GS.csv$data[[1]],
+    dates = GS.csv$dates,
+    northern.hemisphere = northern.hemisphere,
+    max.missing.days = max.missing.days,
+    calendar = calendar
+  )
+
+  return(obj)
+}