diff --git a/NAMESPACE b/NAMESPACE index 8e7c0bc..8de95c6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,14 @@ # Generated by roxygen2: do not edit by hand +S3method(plot,available) S3method(plot,hy) S3method(plot,realtime) +S3method(print,available) S3method(print,hy) S3method(print,realtime) S3method(print,ws) +export(available_flows) +export(available_levels) export(download_hydat) export(enexpr) export(enquo) @@ -59,8 +63,6 @@ export(search_stn_name) export(search_stn_number) export(sym) export(syms) -export(ws_daily_flows) -export(ws_daily_levels) importFrom(rlang,":=") importFrom(rlang,.data) importFrom(rlang,UQ) diff --git a/NEWS.md b/NEWS.md index 5552843..4e288b1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # tidyhydat 0.7.2.9000 -- add historical webservice functions `ws_daily_flows` and `ws_daily_levels` (#212) +- add `hy_daily_flows()` and `hy_daily_levels()` now support `hydat_path = FALSE` to access historical web service data without downloading HYDAT +- add `available_flows()` and `available_levels()` functions that combine validated historical data with provisional real-time data - add httptest2 for mocking webservice tests (#212) - add classes to webservice outputs with print methods (#212) diff --git a/R/available-classes.R b/R/available-classes.R new file mode 100644 index 0000000..a25c7f6 --- /dev/null +++ b/R/available-classes.R @@ -0,0 +1,261 @@ +# Copyright 2025 Hakai Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +## Add "available" class for combined validated + provisional data +as.available <- function(x) { + class(x) <- c("available", setdiff(class(x), "available")) + t <- Sys.time() + attr(t, "tzone") <- "UTC" + attr(x, "query_time") <- t + + x +} + +#' @export +print.available <- function(x, ...) { + cat(paste(" Queried on:", attributes(x)$query_time, "(UTC)\n")) + + ## Historical data source + hist_source <- attributes(x)$historical_source + if (!is.null(hist_source) && !is.na(hist_source)) { + cat(paste0(" Historical data source: ", hist_source, "\n")) + } + + ## Date range by approval status + if ("Date" %in% names(x) && "Approval" %in% names(x)) { + ## Final/validated data range + final_data <- x[x$Approval == "final" & !is.na(x$Approval), ] + if (nrow(final_data) > 0) { + final_range <- paste0( + range(as.Date(final_data$Date), na.rm = TRUE), + collapse = " to " + ) + cat(paste0(" Final data range: ", final_range, "\n")) + } else { + cat(crayon::yellow(" Final data range: No final data\n")) + } + + ## Provisional data range + prov_data <- x[x$Approval == "provisional" & !is.na(x$Approval), ] + if (nrow(prov_data) > 0) { + prov_range <- paste0( + range(as.Date(prov_data$Date), na.rm = TRUE), + collapse = " to " + ) + cat(paste0(" Provisional data range: ", prov_range, "\n")) + } else { + cat(crayon::yellow(" Provisional data range: No provisional data\n")) + } + + ## Overall date range + overall_range <- paste0( + range(as.Date(x$Date), na.rm = TRUE), + collapse = " to " + ) + cat(paste0(" Overall date range: ", overall_range, "\n")) + } + + ## Data source breakdown + if ("Approval" %in% names(x)) { + approval_counts <- table(x$Approval) + cat(" Records by approval status:\n") + for (status in names(approval_counts)) { + count <- format(approval_counts[status], big.mark = ",") + cat(paste0(" ", status, ": ", count, "\n")) + } + } + + ## Station coverage + if ("STATION_NUMBER" %in% names(x)) { + n_stns <- format(dplyr::n_distinct(x$STATION_NUMBER), big.mark = ",") + cat(paste0(" Station(s) returned: ", n_stns, "\n")) + + differ <- attributes(x)$missed_stns + if (!is.null(differ) && length(differ) > 0) { + cat(" Stations requested but not returned: \n") + if (length(differ) > 10) { + cat(crayon::cyan( + " More than 10 stations requested but not returned.\n" + )) + } else { + cat(crayon::cyan(paste0(" ", paste0(differ, collapse = " "), "\n"))) + } + } else if (!is.null(differ)) { + cat(crayon::cyan(" All stations successfully retrieved.\n")) + } + } + + ## Parameter info + if ("Parameter" %in% names(x)) { + cat(paste0( + " Parameter(s): ", + paste0(unique(x$Parameter), collapse = "/"), + "\n" + )) + } + + print(dplyr::as_tibble(x), ...) +} + +#' Plot available data (final + provisional) +#' +#' This method plots combined final and provisional data, visually distinguishing +#' between validated (final) and provisional records. +#' +#' @param x Object created by `available_flows()` or `available_levels()` +#' @param ... passed to [plot()] +#' +#' @method plot available +#' @name plot +#' +#' @examples +#' \dontrun{ +#' # One station +#' flows <- available_flows("08MF005") +#' plot(flows) +#' } +#' +#' @export +#' +plot.available <- function(x = NULL, ...) { + if (!all(c("STATION_NUMBER", "Date", "Parameter", "Value", "Approval") %in% names(x))) { + stop("plot.available requires STATION_NUMBER, Date, Parameter, Value, and Approval columns", call. = FALSE) + } + + ### Join with meta data to get station name + hydf <- dplyr::left_join( + x, + suppressMessages(tidyhydat::allstations), + by = c("STATION_NUMBER") + ) + + hydf$STATION <- paste(hydf$STATION_NAME, hydf$STATION_NUMBER, sep = " - ") + hydf$STATION <- factor(hydf$STATION) + + num_stns <- length(unique(hydf$STATION)) + + if (num_stns > 4L) { + stop("You are trying to plot more than four stations at once.", call. = FALSE) + } + + if (num_stns > 2L) { + m <- matrix(c(1, 1, 2, 3, 4, 5, 6, 6), nrow = 4, ncol = 2, byrow = TRUE) + graphics::layout(mat = m, heights = c(0.1, 0.35, 0.35, 0.2)) + } + + if (num_stns == 2L) { + m <- matrix(c(1, 1, 2, 3, 4, 4), nrow = 3, ncol = 2, byrow = TRUE) + graphics::layout(mat = m, heights = c(0.2, 0.6, 0.2)) + } + + if (num_stns == 1L) { + m <- matrix(c(1, 2, 3), nrow = 3, ncol = 1, byrow = TRUE) + graphics::layout(mat = m, heights = c(0.2, 0.6, 0.2)) + } + + graphics::par(mar = c(1, 1, 1, 1)) + graphics::plot.new() + graphics::text( + 0.5, + 0.5, + "Water Survey of Canada Gauges\n(Final + Provisional Data)", + cex = 2, + font = 2 + ) + + for (i in seq_along(unique(hydf$STATION))) { + graphics::par( + mar = c(4, 5, 2, 1), + mgp = c(3.1, 0.4, 0), + las = 1, + tck = -.01, + xaxs = "i", + yaxs = "i" + ) + + station_data <- hydf[hydf$STATION == unique(hydf$STATION)[i], ] + + ## Plot final data first + final_data <- station_data[station_data$Approval == "final", ] + provisional_data <- station_data[station_data$Approval == "provisional", ] + + graphics::plot( + Value ~ Date, + data = station_data, + xlab = "Date", + ylab = eval(parse(text = label_helper(unique(hydf$Parameter)))), + axes = FALSE, + type = "n", + ylim = c(0, max(station_data$Value, na.rm = TRUE)), + frame.plot = TRUE, + ... + ) + + ## Plot final data in dark color + if (nrow(final_data) > 0) { + graphics::points( + Value ~ Date, + data = final_data, + pch = 20, + cex = 0.75, + col = "#000000" + ) + } + + ## Plot provisional data in lighter color + if (nrow(provisional_data) > 0) { + graphics::points( + Value ~ Date, + data = provisional_data, + pch = 20, + cex = 0.75, + col = "#82D6FF" + ) + } + + at_y <- utils::head(pretty(station_data$Value), -1) + graphics::mtext( + side = 2, + text = at_y, + at = at_y, + col = "grey20", + line = 1, + cex = 0.75 + ) + + at_x <- utils::tail(utils::head(pretty(station_data$Date), -1), -1) + graphics::mtext( + side = 1, + text = format(at_x, "%Y"), + at = at_x, + col = "grey20", + line = 1, + cex = 0.75 + ) + + graphics::title(main = paste0(unique(hydf$STATION)[i]), cex.main = 1.1) + } + + ## Legend + graphics::plot(1, type = "n", axes = FALSE, xlab = "", ylab = "") + graphics::legend( + x = "center", + legend = c("Final (validated)", "Provisional"), + pch = 20, + col = c("#000000", "#82D6FF"), + bty = "n", + cex = 1.2, + horiz = TRUE + ) + + invisible(TRUE) +} diff --git a/R/available.R b/R/available.R new file mode 100644 index 0000000..2e9d493 --- /dev/null +++ b/R/available.R @@ -0,0 +1,413 @@ +# Copyright 2025 Hakai Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +#' Get all available flow data (final + provisional) +#' +#' Convenience function that combines final historical data +#' (from HYDAT or web service) with provisional real-time data in a single call. +#' +#' @inheritParams hy_daily_flows +#' @param start_date Start date for data retrieval in YYYY-MM-DD format. +#' Defaults to NULL (retrieves all available historical data). +#' @param end_date End date for data retrieval in YYYY-MM-DD format. +#' Defaults to current date (Sys.Date()). +#' +#' @details +#' This function combines data from final and provisional data sources to provide a complete +#' discharge record. +#' +#' ## Data Sources and Priority +#' +#' **Historical (Final) Data:** +#' +#' The function automatically determines the best source for historical data: +#' - **`hydat_path` provided** (path to database): Uses local HYDAT database at that path +#' - **`hydat_path = FALSE`**: Forces use of web service (useful when HYDAT unavailable) +#' - **`hydat_path = NULL`** (default): Uses HYDAT default location, automatically falls back +#' to web service if HYDAT is unavailable +#' +#' **Real-time (Provisional) Data:** +#' +#' Provisional data is retrieved from ECCC's real-time web service using the +#' `realtime_ws()` function. This data covers approximately the last 18 months +#' and is updated regularly. +#' +#' ## Data Approval Status +#' +#' The `Approval` column uses ECCC's terminology +#' (see \url{https://wateroffice.ec.gc.ca/contactus/faq_e.html}): +#' +#' - **"final"**: Historical data from HYDAT or web service that has been approved by ECCC. +#' +#' - **"provisional"**: Real-time data from the web service representing the best +#' available measurements, but subject to revision and not yet approved by ECCC. +#' +#' +#' @return A tibble with class `available` combining final and provisional data +#' with an additional `Approval` column indicating whether each record is +#' "final" or "provisional". The object includes attributes for tracking data +#' sources and query metadata. +#' +#' @format A tibble with 6 variables: +#' \describe{ +#' \item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number} +#' \item{Date}{Observation date. Formatted as a Date class.} +#' \item{Parameter}{Parameter being measured. Value is "Flow"} +#' \item{Value}{Discharge value. The units are m^3/s.} +#' \item{Symbol}{Measurement/river conditions} +#' \item{Approval}{Approval status: "final" (approved) or "provisional" (subject to revision)} +#' } +#' +#' @examples +#' \dontrun{ +#' ## Basic usage - get all available data +#' flows <- available_flows("08MF005") +#' +#' ## Multiple stations +#' flows <- available_flows(c("08MF005", "08NM116")) +#' +#' ## Get only recent data (last 2 years) +#' recent_flows <- available_flows( +#' "08MF005", +#' start_date = Sys.Date() - lubridate::years(2), +#' end_date = Sys.Date() +#' ) +#' +#' ## Force use of web service (when HYDAT not available) +#' flows_ws <- available_flows("08MF005", hydat_path = FALSE) +#' } +#' +#' @family available functions +#' @export +available_flows <- function( + station_number, + start_date = NULL, + end_date = Sys.Date(), + hydat_path = NULL, + prov_terr_state_loc = NULL +) { + get_available_data( + station_number = station_number, + start_date = start_date, + end_date = end_date, + hydat_path = hydat_path, + prov_terr_state_loc = prov_terr_state_loc, + parameter_type = "Flow", + parameter_code = 47 + ) +} + + +#' Get all available level data (final + provisional) +#' +#' Convenience function that combines final historical data +#' (from HYDAT or web service) with provisional real-time data in a single call. +#' +#' @inheritParams available_flows +#' +#' @details +#' This function combines data from final and provisional data sources to provide a complete +#' water level record. +#' +#' ## Data Sources and Priority +#' +#' **Historical (Final) Data:** +#' +#' The function automatically determines the best source for historical data: +#' - **`hydat_path` provided** (path to database): Uses local HYDAT database at that path +#' - **`hydat_path = FALSE`**: Forces use of web service (useful when HYDAT unavailable) +#' - **`hydat_path = NULL`** (default): Uses HYDAT default location, automatically falls back +#' to web service if HYDAT is unavailable +#' +#' **Real-time (Provisional) Data:** +#' +#' Provisional data is retrieved from ECCC's real-time web service using the +#' `realtime_ws()` function. This data covers approximately the last 18 months +#' and is updated regularly. +#' +#' ## Data Approval Status +#' +#' The `Approval` column uses ECCC's terminology +#' (see \url{https://wateroffice.ec.gc.ca/contactus/faq_e.html}): +#' +#' - **"final"**: Historical data from HYDAT or web service that has been approved by ECCC. +#' +#' - **"provisional"**: Real-time data from the web service representing the best +#' available measurements, but subject to revision and not yet approved by ECCC. +#' +#' @return A tibble with class `available` combining final and provisional data +#' with an additional `Approval` column indicating whether each record is +#' "final" or "provisional". The object includes attributes for tracking data +#' sources and query metadata. +#' +#' @format A tibble with 6 variables: +#' \describe{ +#' \item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number} +#' \item{Date}{Observation date. Formatted as a Date class.} +#' \item{Parameter}{Parameter being measured. Value is "Level"} +#' \item{Value}{Level value. The units are metres.} +#' \item{Symbol}{Measurement/river conditions} +#' \item{Approval}{Approval status: "final" (approved) or "provisional" (subject to revision)} +#' } +#' +#' @examples +#' \dontrun{ +#' ## Basic usage - get all available data +#' levels <- available_levels("08MF005") +#' +#' ## Multiple stations +#' levels <- available_levels(c("08MF005", "08NM116")) +#' +#' ## Get only recent data (last 2 years) +#' recent_levels <- available_levels( +#' "08MF005", +#' start_date = Sys.Date() - lubridate::years(2), +#' end_date = Sys.Date() +#' ) +#' +#' ## Force use of web service (when HYDAT not available) +#' levels_ws <- available_levels("08MF005", hydat_path = FALSE) +#' } +#' +#' @family available functions +#' @export +available_levels <- function( + station_number, + start_date = NULL, + end_date = Sys.Date(), + hydat_path = NULL, + prov_terr_state_loc = NULL +) { + get_available_data( + station_number = station_number, + start_date = start_date, + end_date = end_date, + hydat_path = hydat_path, + prov_terr_state_loc = prov_terr_state_loc, + parameter_type = "Level", + parameter_code = 46 + ) +} + + +#' Internal helper to get available data +#' +#' Core logic for available_flows() and available_levels(). Handles data source +#' selection, retrieval, and combination. +#' +#' @param station_number Station number(s) +#' @param start_date Start date (YYYY-MM-DD) +#' @param end_date End date (YYYY-MM-DD) +#' @param hydat_path Path to HYDAT database (NULL/FALSE for auto/web service) +#' @param prov_terr_state_loc Province/territory/state location code +#' @param parameter_type "Flow" or "Level" +#' @param parameter_code Parameter code for realtime_ws (47=Flow, 46=Level) +#' +#' @return Combined tibble with Approval column +#' @noRd +#' @keywords internal +get_available_data <- function( + station_number, + start_date = NULL, + end_date = Sys.Date(), + hydat_path = NULL, + prov_terr_state_loc = NULL, + parameter_type, + parameter_code +) { + + ## Initialize variables to store data + final_data <- NULL + provisional_data <- NULL + historical_source <- NA_character_ + + ## Get final data using hy_daily_* functions + ## These now handle data source selection internally based on hydat_path + if (parameter_type == "Flow") { + hydat_fn <- hy_daily_flows + } else if (parameter_type == "Level") { + hydat_fn <- hy_daily_levels + } else { + stop("parameter_type must be 'Flow' or 'Level'", call. = FALSE) + } + + ## Get final data - try HYDAT first, fallback to web service if NULL + final_data <- tryCatch( + { + result <- hydat_fn( + station_number = station_number, + hydat_path = hydat_path, + prov_terr_state_loc = prov_terr_state_loc, + start_date = start_date, + end_date = end_date + ) + + ## Determine source based on class + if (inherits(result, "hy")) { + historical_source <- "HYDAT" + } else if (inherits(result, "ws")) { + historical_source <- "Web Service" + } else { + historical_source <- "Unknown" + } + + result + }, + error = function(e) { + ## Only fallback to web service if hydat_path was NULL + if (is.null(hydat_path)) { + message("HYDAT unavailable, falling back to web service...") + + ## Ensure dates for web service + ws_start <- if (is.null(start_date)) as.Date("1850-01-01") else start_date + ws_end <- if (is.null(end_date)) Sys.Date() else end_date + + tryCatch( + { + result <- hydat_fn( + station_number = station_number, + hydat_path = FALSE, # Force web service + start_date = ws_start, + end_date = ws_end + ) + historical_source <<- "Web Service" + result + }, + error = function(e2) { + warning( + "Failed to retrieve validated data from both HYDAT and web service", + call. = FALSE + ) + NULL + } + ) + } else { + ## If hydat_path was explicitly set (not NULL), just error + warning( + "Failed to retrieve validated data: ", e$message, + call. = FALSE + ) + NULL + } + } + ) + + ## Add Approval column to final data + if (!is.null(final_data) && nrow(final_data) > 0) { + final_data$Approval <- "final" + } + + + # Get provisional/realtime data + # Determine starting date for realtime query + # Use the latest date from final data as the starting point + realtime_start <- if (!is.null(final_data) && nrow(final_data) > 0) { + ## Start from the day after the last final record + max(final_data$Date, na.rm = TRUE) + lubridate::days(1) + } else if (!is.null(start_date)) { + ## No final data, use user-provided start_date + as.Date(start_date) + } else { + ## No final data and no start_date, query from 18 months ago + Sys.Date() - lubridate::month(18) + } + + ## End date defaults to today unless user specified + realtime_end <- if (!is.null(end_date)) { + as.Date(end_date) + } else { + Sys.Date() + } + + ## Only query realtime if there's a valid date range + if (realtime_start <= realtime_end) { + ## Query realtime web service + rt_data <- tryCatch( + { + realtime_ws( + station_number = station_number, + parameters = parameter_code, + start_date = realtime_start, + end_date = realtime_end + ) + }, + error = function(e) { + if (grepl("No data exists for this station query", e$message, fixed = TRUE)) { + return(NULL) + } + stop(e) + } + ) + + ## Only process if we got realtime data + if (!is.null(rt_data)) { + ## Convert Date to Date class (it comes as POSIXct) + rt_data$Date <- as.Date(rt_data$Date) + + ## Aggregate to daily means + sym_STATION_NUMBER <- sym("STATION_NUMBER") + sym_Date <- sym("Date") + sym_Value <- sym("Value") + + provisional_data <- rt_data |> + dplyr::group_by(!!sym_STATION_NUMBER, !!sym_Date) |> + dplyr::summarise(Value = mean(!!sym_Value, na.rm = TRUE), .groups = "drop") |> + dplyr::mutate( + Parameter = parameter_type, + Symbol = NA_character_, + Approval = "provisional" + ) |> + dplyr::select(STATION_NUMBER, Date, Parameter, Value, Symbol, Approval) + } + } + + + ## Combine final and provisional data + combined_data <- dplyr::bind_rows(final_data, provisional_data) + + ## Apply date filtering and sorting only if we have data + if (nrow(combined_data) > 0) { + ## Apply date filtering if not already applied + if (!is.null(start_date) || !is.null(end_date)) { + sym_Date <- sym("Date") + + if (!is.null(start_date)) { + combined_data <- dplyr::filter(combined_data, !!sym_Date >= as.Date(start_date)) + } + if (!is.null(end_date)) { + combined_data <- dplyr::filter(combined_data, !!sym_Date <= as.Date(end_date)) + } + } + + ## Sort by station and date + sym_STATION_NUMBER <- sym("STATION_NUMBER") + sym_Date <- sym("Date") + combined_data <- dplyr::arrange(combined_data, !!sym_STATION_NUMBER, !!sym_Date) + } + + ## Store metadata as attributes + attr(combined_data, "historical_source") <- historical_source + + ## Calculate missed stations only if we have data + if (nrow(combined_data) > 0) { + attr(combined_data, "missed_stns") <- setdiff( + unique(station_number), + unique(combined_data$STATION_NUMBER) + ) + } else { + ## If no data at all, all requested stations were missed + attr(combined_data, "missed_stns") <- unique(station_number) + } + + ## Return with available class + as.available(combined_data) +} diff --git a/R/historical-webservice.R b/R/historical-webservice.R index f4397de..a183faa 100644 --- a/R/historical-webservice.R +++ b/R/historical-webservice.R @@ -1,74 +1,13 @@ -#' Download historical flow and level data from the ECCC web service +#' Internal helper to get historical data from web service #' -#' Functions to retrieve historical flow and levels data from ECCC web service. This data is -#' the same as HYDAT data but provides the convenience of not having to download -#' the HYDAT database. This function is useful when a smaller amount of data is needed. If -#' you need lots of data, consider using HYDAT and the `hy_` family of functions +#' @param station_number Water Survey of Canada station number +#' @param parameters Either "flow" or "level" +#' @param start_date Start date in YYYY-MM-DD format (required) +#' @param end_date End date in YYYY-MM-DD format (required) #' -#' @param station_number Water Survey of Canada station number. -#' @param start_date Accepts YYYY-MM-DD. You need to provide a start date. -#' The default value is NULL -#' @param end_date Accepts either YYYY-MM-DD. You need to provide an end date. -#' The default value is NULL -#' -#' -#' @format A tibble with 6 variables: -#' \describe{ -#' \item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number} -#' \item{Date}{Observation date and time. Formatted as a POSIXct class as UTC for consistency.} -#' \item{Parameter}{Type of parameter} -#' \item{Value}{Value of the measurement.} -#' \item{Symbol}{future use} -#' } -#' -#' @seealso hy_daily_flows -#' @examples -#' \dontrun{ -#' try( -#' flow_data <- ws_daily_flows( -#' station_number = c("08NL071", "08NM174"), -#' start_date = Sys.Date() - 365, -#' end_date = Sys.Date() -#' ) -#' ) -#' try( -#' level_data <- ws_daily_level( -#' station_number = c("08NL071", "08NM174"), -#' start_date = Sys.Date() - 365, -#' end_date = Sys.Date() -#' ) -#' ) -#'} -#' @export -ws_daily_flows <- function( - station_number, - start_date = NULL, - end_date = NULL) { - - get_historical_data( - station_number = station_number, - parameters = "flow", - start_date = start_date, - end_date = end_date - ) -} - -#' @rdname ws_daily_flows -#' @export -ws_daily_levels <- function( - station_number, - start_date = NULL, - end_date = NULL) { - - get_historical_data( - station_number = station_number, - parameters = "level", - start_date = start_date, - end_date = end_date - ) -} - - +#' @return A tibble with historical data from the web service +#' @noRd +#' @keywords internal get_historical_data <- function( station_number, parameters = "flow", diff --git a/R/hy_daily_flows.R b/R/hy_daily_flows.R index bdd4473..c048fe6 100644 --- a/R/hy_daily_flows.R +++ b/R/hy_daily_flows.R @@ -10,11 +10,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -#' @title Extract daily flows information from the HYDAT database +#' @title Extract daily flows information from HYDAT database or web service #' -#' @description Provides wrapper to turn the DLY_FLOWS table in HYDAT into a tidy data frame of daily flows. -#' `station_number` and `prov_terr_state_loc` can both be supplied. If both are omitted all -#' values from the `hy_stations` table are returned. That is a large tibble for `hy_daily_flows`. +#' @description Provides wrapper to turn the DLY_FLOWS table in HYDAT (or historical web service) +#' into a tidy data frame of daily flows. `station_number` and `prov_terr_state_loc` can both be +#' supplied. If both are omitted all values from the `hy_stations` table are returned. +#' That is a large tibble for `hy_daily_flows`. #' #' @inheritParams hy_stations #' @param start_date Leave blank if all dates are required. Date format needs to be in YYYY-MM-DD. Date is inclusive. @@ -22,6 +23,12 @@ #' @param symbol_output Set whether the raw code, or the `english` or the `french` translations are outputted. Default #' value is `code`. #' +#' @details +#' The `hydat_path` argument controls the data source: +#' - **NULL** (default): Uses local HYDAT database (default location) +#' - **FALSE**: Forces use of historical web service (requires `start_date` and `end_date`) +#' - **Path string**: Uses HYDAT database at the specified path +#' #' @return A tibble of daily flows #' #' @format A tibble with 5 variables: @@ -56,6 +63,25 @@ hy_daily_flows <- function( end_date = NULL, symbol_output = "code" ) { + ## Case 1: hydat_path = FALSE (force web service) + if (isFALSE(hydat_path)) { + ## Web service requires dates + if (is.null(start_date)) { + stop("start_date is required when using web service (hydat_path = FALSE)", call. = FALSE) + } + if (is.null(end_date)) { + stop("end_date is required when using web service (hydat_path = FALSE)", call. = FALSE) + } + + return(get_historical_data( + station_number = station_number, + parameters = "flow", + start_date = start_date, + end_date = end_date + )) + } + + ## Case 2: Use HYDAT (either explicit path or NULL for default) ## Determine which dates should be queried dates_null <- date_check(start_date, end_date) diff --git a/R/hy_daily_levels.R b/R/hy_daily_levels.R index 7de82f7..64655b9 100644 --- a/R/hy_daily_levels.R +++ b/R/hy_daily_levels.R @@ -10,14 +10,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -#' @title Extract daily levels information from the HYDAT database +#' @title Extract daily levels information from HYDAT database or web service #' -#' @description Provides wrapper to turn the DLY_LEVELS table in HYDAT into a tidy data frame. The primary value returned by this -#' function is discharge. `station_number` and `prov_terr_state_loc` can both be supplied. If both are omitted all +#' @description Provides wrapper to turn the DLY_LEVELS table in HYDAT (or historical web service) +#' into a tidy data frame. The primary value returned by this function is water level. +#' `station_number` and `prov_terr_state_loc` can both be supplied. If both are omitted all #' values from the `hy_stations` table are returned. That is a large vector for `hy_daily_levels`. #' #' @inheritParams hy_daily_flows #' +#' @details +#' The `hydat_path` argument controls the data source: +#' - **NULL** (default): Uses local HYDAT database (default location) +#' - **FALSE**: Forces use of historical web service (requires `start_date` and `end_date`) +#' - **Path string**: Uses HYDAT database at the specified path +#' #' @return A tibble of daily levels #' #' @format A tibble with 5 variables: @@ -51,6 +58,25 @@ hy_daily_levels <- function( end_date = NULL, symbol_output = "code" ) { + ## Case 1: hydat_path = FALSE (force web service) + if (isFALSE(hydat_path)) { + ## Web service requires dates + if (is.null(start_date)) { + stop("start_date is required when using web service (hydat_path = FALSE)", call. = FALSE) + } + if (is.null(end_date)) { + stop("end_date is required when using web service (hydat_path = FALSE)", call. = FALSE) + } + + return(get_historical_data( + station_number = station_number, + parameters = "level", + start_date = start_date, + end_date = end_date + )) + } + + ## Case 2: Use HYDAT (either explicit path or NULL for default) ## Determine which dates should be queried dates_null <- date_check(start_date, end_date) diff --git a/R/realtime-webservice.R b/R/realtime-webservice.R index f298dbf..4bb7a01 100755 --- a/R/realtime-webservice.R +++ b/R/realtime-webservice.R @@ -23,10 +23,10 @@ #' for some options though undocumented parameters may be implemented. Defaults to Water level provisional, Secondary water level, #' Tertiary water level, Discharge Provisional, Discharge, sensor, Water temperature, Secondary water temperature, Accumulated precipitation #' @param start_date Accepts either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS. -#' If only start date is supplied (i.e. YYYY-MM-DD) values are returned from the start of that day. +#' If only `start date`` is supplied (i.e. YYYY-MM-DD) values are returned from the start of that day. #' Defaults to 30 days before current date. Time is supplied in UTC. #' @param end_date Accepts either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS. -#' If only a date is supplied (i.e. YYYY-MM-DD) values are returned from the end of that day. +#' If only `end_date`` is supplied (i.e. YYYY-MM-DD) values are returned from the end of that day. #' Defaults to current date. Time is supplied in UTC. #' #' @@ -56,7 +56,7 @@ #' station_number = c("08NL071", "08NM174"), #' parameters = c(47, 5), #' end_date = Sys.Date(), # today -#' start_date = Sys.Date() - 5 # five days ago +#' start_date = Sys.Date() - lubridate::days(5) # five days ago #' ) #' } #' @family realtime functions @@ -65,7 +65,7 @@ realtime_ws <- function( station_number, parameters = NULL, - start_date = Sys.Date() - 30, + start_date = Sys.Date() - lubridate::days(30), end_date = Sys.Date() ) { if (is.null(parameters)) parameters <- c(46, 16, 52, 47, 8, 5, 41, 18) @@ -121,7 +121,7 @@ realtime_ws <- function( ## Turn it into a tibble and specify correct column classes csv_df <- readr::read_csv( - httr2::resp_body_string(resp), + I(httr2::resp_body_string(resp)), col_types = "cTidccc" ) diff --git a/R/zzz.R b/R/zzz.R index 43529ba..5a4feae 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -17,7 +17,7 @@ ## Only check when there is likely a new version i.e. about 3 months after last version if ( file.exists(file.path(hy_dir(), "Hydat.sqlite3")) && - Sys.Date() > (as.Date(hy_version()$Date) + 115) + Sys.Date() > (as.Date(hy_version()$Date) + lubridate::days(115)) ) { packageStartupMessage(info("Checking for a new version of HYDAT...")) diff --git a/README.Rmd b/README.Rmd index 9222a1c..fe38f9d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -27,9 +27,9 @@ knitr::opts_chunk$set( ## What does `tidyhydat` do? -- Provides functions (`hy_*`) that access hydrometric data from the HYDAT database, a national archive of Canadian hydrometric data and return tidy data. +- Provides functions (`available_*`) that combine validated historical data with provisional real-time data. +- Provides functions (`hy_*`) that access hydrometric data from the HYDAT database or web service, a national archive of Canadian hydrometric data and return tidy data. - Provides functions (`realtime_*`) that access Environment and Climate Change Canada's real-time hydrometric data source. -- Provides functions (`ws_*`) that access historical daily data via web service without requiring HYDAT download. - Provides functions (`search_*`) that can search through the approximately 7000 stations in the database and aid in generating station vectors - Keep functions as simple as possible. For example, for daily flows, the `hy_daily_flows()` function queries the database, *tidies* the data and returns a [tibble](https://tibble.tidyverse.org/) of daily flows. @@ -61,16 +61,24 @@ To use many of the functions in the `tidyhydat` package you will need to downloa download_hydat() ``` -This downloads (with your permission) the most recent version of HYDAT and then saves it in a location on your computer where ` -tidyhydat -`'s function will look for it. Do be patient though as this can take a long time! To see where HYDAT was saved you can run ` -hy_default_db() -`. Now that you have HYDAT downloaded and ready to go, you are all set to begin looking at Canadian hydrometric data. +This downloads (with your permission) the most recent version of HYDAT and then saves it in a location on your computer where `tidyhydat`'s +function will look for it. Do be patient though as this can take a long time! To see where HYDAT was saved you can run `hy_default_db()`. +Now that you have HYDAT downloaded and ready to go, you are all set to begin looking at Canadian hydrometric data. + +### Combining validated and provisional data +For a complete record combining validated historical data with recent provisional data use the `available_flows` and `available_levels` functions. + +```{r} +available_flows( + station_number = "08MF005", + start_date = "2020-01-01", + end_date = Sys.Date() +) +``` ### Real-time -To download real-time data using the datamart we can use approximately the same conventions discussed above. Using ` -realtime_dd() -` we can easily select specific stations by supplying a station of interest: +To download real-time data using the datamart we can use approximately the same conventions discussed above. +Using `realtime_dd()` we can easily select specific stations by supplying a station of interest: ```{r} realtime_dd(station_number = "08MF005") ``` @@ -86,21 +94,24 @@ realtime_ws( ) ``` -### Historical web service -For smaller queries where downloading the entire HYDAT database is unnecessary, `tidyhydat` provides web service functions that access historical daily data directly: +### Using only HYDAT +If you wish to use only the final approved data in HYDAT database you can use: ```{r} -ws_daily_flows( +hy_daily_flows( station_number = "08MF005", start_date = "2020-01-01", end_date = "2020-12-31" ) ``` -Similarly for water levels: +### Using the web service without HYDAT +For smaller queries where downloading the entire HYDAT database is unnecessary, you can use `hy_daily_flows()` and `hy_daily_levels()` with `hydat_path = FALSE` to access historical daily data directly from the web service: + ```{r} -ws_daily_levels( +hy_daily_flows( station_number = "08MF005", + hydat_path = FALSE, start_date = "2020-01-01", end_date = "2020-12-31" ) @@ -119,18 +130,12 @@ ws_daily_levels( ### Plotting -Plot methods are also provided to quickly visualize realtime data: -```{r} -realtime_ex <- realtime_dd(station_number = "08MF005") - -plot(realtime_ex) -``` +Plot methods are also provided to quickly visualize data: -and also historical data: ```{r, fig.height=7, fig.width=12} -hy_ex <- hy_daily_flows(station_number = "08MF005", start_date = "2013-01-01") +flows_ex <- available_flows(station_number = "08MF005", start_date = "2013-01-01") -plot(hy_ex) +plot(flows_ex) ``` ## Getting Help or Reporting an Issue diff --git a/README.md b/README.md index d44a0ad..934589c 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,13 @@ checks](https://badges.cranchecks.info/worst/tidyhydat.svg)](https://cran.r-proj ## What does `tidyhydat` do? +- Provides functions (`available_*`) that combine validated historical + data with provisional real-time data. - Provides functions (`hy_*`) that access hydrometric data from the - HYDAT database, a national archive of Canadian hydrometric data and - return tidy data. + HYDAT database or web service, a national archive of Canadian + hydrometric data and return tidy data. - Provides functions (`realtime_*`) that access Environment and Climate Change Canada’s real-time hydrometric data source. -- Provides functions (`ws_*`) that access historical daily data via web - service without requiring HYDAT download. - Provides functions (`search_*`) that can search through the approximately 7000 stations in the database and aid in generating station vectors @@ -78,6 +78,43 @@ time! To see where HYDAT was saved you can run `hy_default_db()`. Now that you have HYDAT downloaded and ready to go, you are all set to begin looking at Canadian hydrometric data. +### Combining validated and provisional data + +For a complete record combining validated historical data with recent +provisional data use the `available_flows` and `available_levels` +functions. + + available_flows( + station_number = "08MF005", + start_date = "2020-01-01", + end_date = Sys.Date() + ) + #> Queried on: 2025-12-09 17:40:38.103082 (UTC) + #> Historical data source: HYDAT + #> Final data range: 2020-01-01 to 2024-12-31 + #> Provisional data range: 2025-01-01 to 2025-12-09 + #> Overall date range: 2020-01-01 to 2025-12-09 + #> Records by approval status: + #> final: 1,827 + #> provisional: 343 + #> Station(s) returned: 1 + #> All stations successfully retrieved. + #> Parameter(s): Flow + #> # A tibble: 2,170 × 6 + #> STATION_NUMBER Date Parameter Value Symbol Approval + #> + #> 1 08MF005 2020-01-01 Flow 1340 final + #> 2 08MF005 2020-01-02 Flow 1330 final + #> 3 08MF005 2020-01-03 Flow 1310 final + #> 4 08MF005 2020-01-04 Flow 1420 final + #> 5 08MF005 2020-01-05 Flow 1350 final + #> 6 08MF005 2020-01-06 Flow 1310 final + #> 7 08MF005 2020-01-07 Flow 1280 final + #> 8 08MF005 2020-01-08 Flow 1320 final + #> 9 08MF005 2020-01-09 Flow 1230 final + #> 10 08MF005 2020-01-10 Flow 1210 final + #> # ℹ 2,160 more rows + ### Real-time To download real-time data using the datamart we can use approximately @@ -85,22 +122,22 @@ the same conventions discussed above. Using `realtime_dd()` we can easily select specific stations by supplying a station of interest: realtime_dd(station_number = "08MF005") - #> Queried on: 2025-10-29 17:49:41.08772 (UTC) - #> Date range: 2025-09-29 to 2025-10-29 - #> # A tibble: 17,502 × 8 + #> Queried on: 2025-12-09 17:40:39.67949 (UTC) + #> Date range: 2025-11-09 to 2025-12-09 + #> # A tibble: 17,500 × 8 #> STATION_NUMBER PROV_TERR_STATE_LOC Date Parameter Value Grade #> - #> 1 08MF005 BC 2025-09-29 08:00:00 Flow 1390 - #> 2 08MF005 BC 2025-09-29 08:05:00 Flow 1390 - #> 3 08MF005 BC 2025-09-29 08:10:00 Flow 1390 - #> 4 08MF005 BC 2025-09-29 08:15:00 Flow 1390 - #> 5 08MF005 BC 2025-09-29 08:20:00 Flow 1390 - #> 6 08MF005 BC 2025-09-29 08:25:00 Flow 1390 - #> 7 08MF005 BC 2025-09-29 08:30:00 Flow 1390 - #> 8 08MF005 BC 2025-09-29 08:35:00 Flow 1400 - #> 9 08MF005 BC 2025-09-29 08:40:00 Flow 1390 - #> 10 08MF005 BC 2025-09-29 08:45:00 Flow 1390 - #> # ℹ 17,492 more rows + #> 1 08MF005 BC 2025-11-09 08:00:00 Flow 1330 + #> 2 08MF005 BC 2025-11-09 08:05:00 Flow 1330 + #> 3 08MF005 BC 2025-11-09 08:10:00 Flow 1330 + #> 4 08MF005 BC 2025-11-09 08:15:00 Flow 1330 + #> 5 08MF005 BC 2025-11-09 08:20:00 Flow 1320 + #> 6 08MF005 BC 2025-11-09 08:25:00 Flow 1320 + #> 7 08MF005 BC 2025-11-09 08:30:00 Flow 1330 + #> 8 08MF005 BC 2025-11-09 08:35:00 Flow 1330 + #> 9 08MF005 BC 2025-11-09 08:40:00 Flow 1330 + #> 10 08MF005 BC 2025-11-09 08:45:00 Flow 1320 + #> # ℹ 17,490 more rows #> # ℹ 2 more variables: Symbol , Code Or we can use `realtime_ws`: @@ -111,40 +148,75 @@ Or we can use `realtime_ws`: start_date = Sys.Date() - 14, end_date = Sys.Date() ) - #> Queried on: 2025-10-29 17:49:42.453161 (UTC) - #> Date range: 2025-10-15 to 2025-10-29 + #> Queried on: 2025-12-09 17:40:40.893085 (UTC) + #> Date range: 2025-11-25 to 2025-12-09 #> Station(s) returned: 1 #> All stations successfully retrieved. #> All parameters successfully retrieved. #> # A tibble: 4,593 × 12 #> STATION_NUMBER Date Name_En Value Unit Grade Symbol Approval #> - #> 1 08MF005 2025-10-15 00:00:00 Water t… 12.1 °C NA Provisi… - #> 2 08MF005 2025-10-15 01:00:00 Water t… 12.1 °C NA Provisi… - #> 3 08MF005 2025-10-15 02:00:00 Water t… 12.1 °C NA Provisi… - #> 4 08MF005 2025-10-15 03:00:00 Water t… 12.1 °C NA Provisi… - #> 5 08MF005 2025-10-15 04:00:00 Water t… 12.0 °C NA Provisi… - #> 6 08MF005 2025-10-15 05:00:00 Water t… 12.0 °C NA Provisi… - #> 7 08MF005 2025-10-15 06:00:00 Water t… 12.0 °C NA Provisi… - #> 8 08MF005 2025-10-15 07:00:00 Water t… 12.0 °C NA Provisi… - #> 9 08MF005 2025-10-15 08:00:00 Water t… 12.0 °C NA Provisi… - #> 10 08MF005 2025-10-15 09:00:00 Water t… 12.0 °C NA Provisi… + #> 1 08MF005 2025-11-25 00:00:00 Water t… 7.32 °C NA Provisi… + #> 2 08MF005 2025-11-25 01:00:00 Water t… 7.32 °C NA Provisi… + #> 3 08MF005 2025-11-25 02:00:00 Water t… 7.31 °C NA Provisi… + #> 4 08MF005 2025-11-25 03:00:00 Water t… 7.31 °C NA Provisi… + #> 5 08MF005 2025-11-25 04:00:00 Water t… 7.31 °C NA Provisi… + #> 6 08MF005 2025-11-25 05:00:00 Water t… 7.3 °C NA Provisi… + #> 7 08MF005 2025-11-25 06:00:00 Water t… 7.31 °C NA Provisi… + #> 8 08MF005 2025-11-25 07:00:00 Water t… 7.31 °C NA Provisi… + #> 9 08MF005 2025-11-25 08:00:00 Water t… 7.31 °C NA Provisi… + #> 10 08MF005 2025-11-25 09:00:00 Water t… 7.3 °C NA Provisi… #> # ℹ 4,583 more rows #> # ℹ 4 more variables: Parameter , Code , Qualifier , #> # Qualifiers -### Historical web service +### Using only HYDAT + +If you wish to use only the final approved data in HYDAT database you +can use: + + hy_daily_flows( + station_number = "08MF005", + start_date = "2020-01-01", + end_date = "2020-12-31" + ) + #> Queried from version of HYDAT released on 2025-10-14 + #> Observations: 366 + #> Measurement flags: 0 + #> Parameter(s): Flow + #> Date range: 2020-01-01 to 2020-12-31 + #> Station(s) returned: 1 + #> Stations requested but not returned: + #> All stations returned. + #> # A tibble: 366 × 5 + #> STATION_NUMBER Date Parameter Value Symbol + #> + #> 1 08MF005 2020-01-01 Flow 1340 + #> 2 08MF005 2020-01-02 Flow 1330 + #> 3 08MF005 2020-01-03 Flow 1310 + #> 4 08MF005 2020-01-04 Flow 1420 + #> 5 08MF005 2020-01-05 Flow 1350 + #> 6 08MF005 2020-01-06 Flow 1310 + #> 7 08MF005 2020-01-07 Flow 1280 + #> 8 08MF005 2020-01-08 Flow 1320 + #> 9 08MF005 2020-01-09 Flow 1230 + #> 10 08MF005 2020-01-10 Flow 1210 + #> # ℹ 356 more rows + +### Using the web service without HYDAT For smaller queries where downloading the entire HYDAT database is -unnecessary, `tidyhydat` provides web service functions that access -historical daily data directly: +unnecessary, you can use `hy_daily_flows()` and `hy_daily_levels()` with +`hydat_path = FALSE` to access historical daily data directly from the +web service: - ws_daily_flows( + hy_daily_flows( station_number = "08MF005", + hydat_path = FALSE, start_date = "2020-01-01", end_date = "2020-12-31" ) - #> Queried on: 2025-10-29 17:49:43.567461 (UTC) + #> Queried on: 2025-12-09 17:40:42.101829 (UTC) #> Date range: 2020-01-01 to 2020-12-31 #> Station(s) returned: 1 #> All stations successfully retrieved. @@ -163,32 +235,6 @@ historical daily data directly: #> 10 08MF005 2020-01-10 discharge/débit 1210 #> # ℹ 356 more rows -Similarly for water levels: - - ws_daily_levels( - station_number = "08MF005", - start_date = "2020-01-01", - end_date = "2020-12-31" - ) - #> Queried on: 2025-10-29 17:49:44.687726 (UTC) - #> Date range: 2020-01-01 to 2020-12-31 - #> Station(s) returned: 1 - #> All stations successfully retrieved. - #> # A tibble: 366 × 5 - #> STATION_NUMBER Date Parameter Value Symbol - #> - #> 1 08MF005 2020-01-01 water level/niveau 3.72 - #> 2 08MF005 2020-01-02 water level/niveau 3.72 - #> 3 08MF005 2020-01-03 water level/niveau 3.69 - #> 4 08MF005 2020-01-04 water level/niveau 3.81 - #> 5 08MF005 2020-01-05 water level/niveau 3.74 - #> 6 08MF005 2020-01-06 water level/niveau 3.69 - #> 7 08MF005 2020-01-07 water level/niveau 3.66 - #> 8 08MF005 2020-01-08 water level/niveau 3.70 - #> 9 08MF005 2020-01-09 water level/niveau 3.60 - #> 10 08MF005 2020-01-10 water level/niveau 3.58 - #> # ℹ 356 more rows - ## Compare realtime\_ws and realtime\_dd `tidyhydat` provides two methods to download realtime data. @@ -212,19 +258,11 @@ difference to `realtime_dd()`. These include: ### Plotting -Plot methods are also provided to quickly visualize realtime data: - - realtime_ex <- realtime_dd(station_number = "08MF005") - - plot(realtime_ex) - -![](man/figures/README-unnamed-chunk-10-1.png) - -and also historical data: +Plot methods are also provided to quickly visualize data: - hy_ex <- hy_daily_flows(station_number = "08MF005", start_date = "2013-01-01") + flows_ex <- available_flows(station_number = "08MF005", start_date = "2013-01-01") - plot(hy_ex) + plot(flows_ex) ![](man/figures/README-unnamed-chunk-11-1.png) diff --git a/man/available_flows.Rd b/man/available_flows.Rd new file mode 100644 index 0000000..7c8447c --- /dev/null +++ b/man/available_flows.Rd @@ -0,0 +1,111 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/available.R +\name{available_flows} +\alias{available_flows} +\title{Get all available flow data (final + provisional)} +\format{ +A tibble with 6 variables: +\describe{ +\item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number} +\item{Date}{Observation date. Formatted as a Date class.} +\item{Parameter}{Parameter being measured. Value is "Flow"} +\item{Value}{Discharge value. The units are m^3/s.} +\item{Symbol}{Measurement/river conditions} +\item{Approval}{Approval status: "final" (approved) or "provisional" (subject to revision)} +} +} +\usage{ +available_flows( + station_number, + start_date = NULL, + end_date = Sys.Date(), + hydat_path = NULL, + prov_terr_state_loc = NULL +) +} +\arguments{ +\item{station_number}{A seven digit Water Survey of Canada station number. If this argument is omitted, the value of \code{prov_terr_state_loc} +is returned.} + +\item{start_date}{Start date for data retrieval in YYYY-MM-DD format. +Defaults to NULL (retrieves all available historical data).} + +\item{end_date}{End date for data retrieval in YYYY-MM-DD format. +Defaults to current date (Sys.Date()).} + +\item{hydat_path}{The path to the hydat database or NULL to use the default location +used by \link{download_hydat}. It is also possible to pass in an existing +\link[dplyr]{src_sqlite} such that the database only needs to be opened once per +user-level call.} + +\item{prov_terr_state_loc}{Province, state or territory. If this argument is omitted, the value of \code{station_number} +is returned. See \code{unique(allstations$prov_terr_state_loc)}. Will also accept \code{CA} to return only Canadian stations.} +} +\value{ +A tibble with class \code{available} combining final and provisional data +with an additional \code{Approval} column indicating whether each record is +"final" or "provisional". The object includes attributes for tracking data +sources and query metadata. +} +\description{ +Convenience function that combines final historical data +(from HYDAT or web service) with provisional real-time data in a single call. +} +\details{ +This function combines data from final and provisional data sources to provide a complete +discharge record. +\subsection{Data Sources and Priority}{ + +\strong{Historical (Final) Data:} + +The function automatically determines the best source for historical data: +\itemize{ +\item \strong{\code{hydat_path} provided} (path to database): Uses local HYDAT database at that path +\item \strong{\code{hydat_path = FALSE}}: Forces use of web service (useful when HYDAT unavailable) +\item \strong{\code{hydat_path = NULL}} (default): Uses HYDAT default location, automatically falls back +to web service if HYDAT is unavailable +} + +\strong{Real-time (Provisional) Data:} + +Provisional data is retrieved from ECCC's real-time web service using the +\code{realtime_ws()} function. This data covers approximately the last 18 months +and is updated regularly. +} + +\subsection{Data Approval Status}{ + +The \code{Approval} column uses ECCC's terminology +(see \url{https://wateroffice.ec.gc.ca/contactus/faq_e.html}): +\itemize{ +\item \strong{"final"}: Historical data from HYDAT or web service that has been approved by ECCC. +\item \strong{"provisional"}: Real-time data from the web service representing the best +available measurements, but subject to revision and not yet approved by ECCC. +} +} +} +\examples{ +\dontrun{ +## Basic usage - get all available data +flows <- available_flows("08MF005") + +## Multiple stations +flows <- available_flows(c("08MF005", "08NM116")) + +## Get only recent data (last 2 years) +recent_flows <- available_flows( + "08MF005", + start_date = Sys.Date() - lubridate::years(2), + end_date = Sys.Date() +) + +## Force use of web service (when HYDAT not available) +flows_ws <- available_flows("08MF005", hydat_path = FALSE) +} + +} +\seealso{ +Other available functions: +\code{\link{available_levels}()} +} +\concept{available functions} diff --git a/man/available_levels.Rd b/man/available_levels.Rd new file mode 100644 index 0000000..6d8bb51 --- /dev/null +++ b/man/available_levels.Rd @@ -0,0 +1,111 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/available.R +\name{available_levels} +\alias{available_levels} +\title{Get all available level data (final + provisional)} +\format{ +A tibble with 6 variables: +\describe{ +\item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number} +\item{Date}{Observation date. Formatted as a Date class.} +\item{Parameter}{Parameter being measured. Value is "Level"} +\item{Value}{Level value. The units are metres.} +\item{Symbol}{Measurement/river conditions} +\item{Approval}{Approval status: "final" (approved) or "provisional" (subject to revision)} +} +} +\usage{ +available_levels( + station_number, + start_date = NULL, + end_date = Sys.Date(), + hydat_path = NULL, + prov_terr_state_loc = NULL +) +} +\arguments{ +\item{station_number}{A seven digit Water Survey of Canada station number. If this argument is omitted, the value of \code{prov_terr_state_loc} +is returned.} + +\item{start_date}{Start date for data retrieval in YYYY-MM-DD format. +Defaults to NULL (retrieves all available historical data).} + +\item{end_date}{End date for data retrieval in YYYY-MM-DD format. +Defaults to current date (Sys.Date()).} + +\item{hydat_path}{The path to the hydat database or NULL to use the default location +used by \link{download_hydat}. It is also possible to pass in an existing +\link[dplyr]{src_sqlite} such that the database only needs to be opened once per +user-level call.} + +\item{prov_terr_state_loc}{Province, state or territory. If this argument is omitted, the value of \code{station_number} +is returned. See \code{unique(allstations$prov_terr_state_loc)}. Will also accept \code{CA} to return only Canadian stations.} +} +\value{ +A tibble with class \code{available} combining final and provisional data +with an additional \code{Approval} column indicating whether each record is +"final" or "provisional". The object includes attributes for tracking data +sources and query metadata. +} +\description{ +Convenience function that combines final historical data +(from HYDAT or web service) with provisional real-time data in a single call. +} +\details{ +This function combines data from final and provisional data sources to provide a complete +water level record. +\subsection{Data Sources and Priority}{ + +\strong{Historical (Final) Data:} + +The function automatically determines the best source for historical data: +\itemize{ +\item \strong{\code{hydat_path} provided} (path to database): Uses local HYDAT database at that path +\item \strong{\code{hydat_path = FALSE}}: Forces use of web service (useful when HYDAT unavailable) +\item \strong{\code{hydat_path = NULL}} (default): Uses HYDAT default location, automatically falls back +to web service if HYDAT is unavailable +} + +\strong{Real-time (Provisional) Data:} + +Provisional data is retrieved from ECCC's real-time web service using the +\code{realtime_ws()} function. This data covers approximately the last 18 months +and is updated regularly. +} + +\subsection{Data Approval Status}{ + +The \code{Approval} column uses ECCC's terminology +(see \url{https://wateroffice.ec.gc.ca/contactus/faq_e.html}): +\itemize{ +\item \strong{"final"}: Historical data from HYDAT or web service that has been approved by ECCC. +\item \strong{"provisional"}: Real-time data from the web service representing the best +available measurements, but subject to revision and not yet approved by ECCC. +} +} +} +\examples{ +\dontrun{ +## Basic usage - get all available data +levels <- available_levels("08MF005") + +## Multiple stations +levels <- available_levels(c("08MF005", "08NM116")) + +## Get only recent data (last 2 years) +recent_levels <- available_levels( + "08MF005", + start_date = Sys.Date() - lubridate::years(2), + end_date = Sys.Date() +) + +## Force use of web service (when HYDAT not available) +levels_ws <- available_levels("08MF005", hydat_path = FALSE) +} + +} +\seealso{ +Other available functions: +\code{\link{available_flows}()} +} +\concept{available functions} diff --git a/man/figures/README-unnamed-chunk-11-1.png b/man/figures/README-unnamed-chunk-11-1.png index ab80d55..0b04f1d 100644 Binary files a/man/figures/README-unnamed-chunk-11-1.png and b/man/figures/README-unnamed-chunk-11-1.png differ diff --git a/man/figures/README-unnamed-chunk-12-1.png b/man/figures/README-unnamed-chunk-12-1.png new file mode 100644 index 0000000..ab80d55 Binary files /dev/null and b/man/figures/README-unnamed-chunk-12-1.png differ diff --git a/man/hy_daily_flows.Rd b/man/hy_daily_flows.Rd index a703234..73cf397 100644 --- a/man/hy_daily_flows.Rd +++ b/man/hy_daily_flows.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/hy_daily_flows.R \name{hy_daily_flows} \alias{hy_daily_flows} -\title{Extract daily flows information from the HYDAT database} +\title{Extract daily flows information from HYDAT database or web service} \format{ A tibble with 5 variables: \describe{ @@ -49,9 +49,18 @@ value is \code{code}.} A tibble of daily flows } \description{ -Provides wrapper to turn the DLY_FLOWS table in HYDAT into a tidy data frame of daily flows. -\code{station_number} and \code{prov_terr_state_loc} can both be supplied. If both are omitted all -values from the \code{hy_stations} table are returned. That is a large tibble for \code{hy_daily_flows}. +Provides wrapper to turn the DLY_FLOWS table in HYDAT (or historical web service) +into a tidy data frame of daily flows. \code{station_number} and \code{prov_terr_state_loc} can both be +supplied. If both are omitted all values from the \code{hy_stations} table are returned. +That is a large tibble for \code{hy_daily_flows}. +} +\details{ +The \code{hydat_path} argument controls the data source: +\itemize{ +\item \strong{NULL} (default): Uses local HYDAT database (default location) +\item \strong{FALSE}: Forces use of historical web service (requires \code{start_date} and \code{end_date}) +\item \strong{Path string}: Uses HYDAT database at the specified path +} } \examples{ \dontrun{ diff --git a/man/hy_daily_levels.Rd b/man/hy_daily_levels.Rd index 0184e27..1523ed1 100644 --- a/man/hy_daily_levels.Rd +++ b/man/hy_daily_levels.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/hy_daily_levels.R \name{hy_daily_levels} \alias{hy_daily_levels} -\title{Extract daily levels information from the HYDAT database} +\title{Extract daily levels information from HYDAT database or web service} \format{ A tibble with 5 variables: \describe{ @@ -49,10 +49,19 @@ value is \code{code}.} A tibble of daily levels } \description{ -Provides wrapper to turn the DLY_LEVELS table in HYDAT into a tidy data frame. The primary value returned by this -function is discharge. \code{station_number} and \code{prov_terr_state_loc} can both be supplied. If both are omitted all +Provides wrapper to turn the DLY_LEVELS table in HYDAT (or historical web service) +into a tidy data frame. The primary value returned by this function is water level. +\code{station_number} and \code{prov_terr_state_loc} can both be supplied. If both are omitted all values from the \code{hy_stations} table are returned. That is a large vector for \code{hy_daily_levels}. } +\details{ +The \code{hydat_path} argument controls the data source: +\itemize{ +\item \strong{NULL} (default): Uses local HYDAT database (default location) +\item \strong{FALSE}: Forces use of historical web service (requires \code{start_date} and \code{end_date}) +\item \strong{Path string}: Uses HYDAT database at the specified path +} +} \examples{ \dontrun{ hy_daily_levels( diff --git a/man/plot.Rd b/man/plot.Rd index 92e5a38..eafb6fa 100644 --- a/man/plot.Rd +++ b/man/plot.Rd @@ -1,11 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/hy_plot.R, R/realtime_plot.R +% Please edit documentation in R/available-classes.R, R/hy_plot.R, +% R/realtime_plot.R \name{plot} \alias{plot} +\alias{plot.available} \alias{plot.hy} \alias{plot.realtime} -\title{Plot historical and realtime data} +\title{Plot available data (final + provisional)} \usage{ +\method{plot}{available}(x = NULL, ...) + \method{plot}{hy}(x = NULL, ...) \method{plot}{realtime}(x = NULL, Parameter = c("Flow", "Level"), ...) @@ -18,6 +22,9 @@ \item{Parameter}{Parameter of interest. Either "Flow" or "Level". Defaults to "Flow".} } \description{ +This method plots combined final and provisional data, visually distinguishing +between validated (final) and provisional records. + This method plots either daily time series data from HYDAT or realtime data from the datamart. These plots are intended to be convenient and quick methods to visualize hydrometric data. @@ -28,6 +35,12 @@ visualize hydrometric data. }} \examples{ +\dontrun{ +# One station +flows <- available_flows("08MF005") +plot(flows) +} + \dontrun{ # One station fraser <- hy_daily_flows("08MF005") diff --git a/man/realtime_ws.Rd b/man/realtime_ws.Rd index 7448fe0..d3bc2f5 100644 --- a/man/realtime_ws.Rd +++ b/man/realtime_ws.Rd @@ -22,7 +22,7 @@ A tibble with 6 variables: realtime_ws( station_number, parameters = NULL, - start_date = Sys.Date() - 30, + start_date = Sys.Date() - lubridate::days(30), end_date = Sys.Date() ) } @@ -34,11 +34,11 @@ for some options though undocumented parameters may be implemented. Defaults to Tertiary water level, Discharge Provisional, Discharge, sensor, Water temperature, Secondary water temperature, Accumulated precipitation} \item{start_date}{Accepts either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS. -If only start date is supplied (i.e. YYYY-MM-DD) values are returned from the start of that day. +If only `start date`` is supplied (i.e. YYYY-MM-DD) values are returned from the start of that day. Defaults to 30 days before current date. Time is supplied in UTC.} \item{end_date}{Accepts either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS. -If only a date is supplied (i.e. YYYY-MM-DD) values are returned from the end of that day. +If only `end_date`` is supplied (i.e. YYYY-MM-DD) values are returned from the end of that day. Defaults to current date. Time is supplied in UTC.} } \description{ @@ -60,7 +60,7 @@ fivedays <- realtime_ws( station_number = c("08NL071", "08NM174"), parameters = c(47, 5), end_date = Sys.Date(), # today - start_date = Sys.Date() - 5 # five days ago + start_date = Sys.Date() - lubridate::days(5) # five days ago ) } } diff --git a/man/ws_daily_flows.Rd b/man/ws_daily_flows.Rd deleted file mode 100644 index 85ba3ab..0000000 --- a/man/ws_daily_flows.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/historical-webservice.R -\name{ws_daily_flows} -\alias{ws_daily_flows} -\alias{ws_daily_levels} -\title{Download historical flow and level data from the ECCC web service} -\format{ -A tibble with 6 variables: -\describe{ -\item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number} -\item{Date}{Observation date and time. Formatted as a POSIXct class as UTC for consistency.} -\item{Parameter}{Type of parameter} -\item{Value}{Value of the measurement.} -\item{Symbol}{future use} -} -} -\usage{ -ws_daily_flows(station_number, start_date = NULL, end_date = NULL) - -ws_daily_levels(station_number, start_date = NULL, end_date = NULL) -} -\arguments{ -\item{station_number}{Water Survey of Canada station number.} - -\item{start_date}{Accepts YYYY-MM-DD. You need to provide a start date. -The default value is NULL} - -\item{end_date}{Accepts either YYYY-MM-DD. You need to provide an end date. -The default value is NULL} -} -\description{ -Functions to retrieve historical flow and levels data from ECCC web service. This data is -the same as HYDAT data but provides the convenience of not having to download -the HYDAT database. This function is useful when a smaller amount of data is needed. If -you need lots of data, consider using HYDAT and the \code{hy_} family of functions -} -\examples{ -\dontrun{ -try( - flow_data <- ws_daily_flows( - station_number = c("08NL071", "08NM174"), - start_date = Sys.Date() - 365, - end_date = Sys.Date() - ) -) -try( - level_data <- ws_daily_level( - station_number = c("08NL071", "08NM174"), - start_date = Sys.Date() - 365, - end_date = Sys.Date() - ) -) -} -} -\seealso{ -hy_daily_flows -} diff --git a/tests/testthat/fixtures/wateroffice.ec.gc.ca/services/real_time_data/csv/inline-610297.R b/tests/testthat/fixtures/wateroffice.ec.gc.ca/services/real_time_data/csv/inline-610297.R new file mode 100644 index 0000000..6a3bf5f --- /dev/null +++ b/tests/testthat/fixtures/wateroffice.ec.gc.ca/services/real_time_data/csv/inline-610297.R @@ -0,0 +1,12 @@ +structure(list(method = "GET", url = "https://wateroffice.ec.gc.ca/services/real_time_data/csv/inline?stations[]=08MF005¶meters[]=47&start_date=2013-01-06%2000:00:00&end_date=2013-01-05%2023:59:59", + status_code = 200L, headers = structure(list(Date = "Mon, 06 Jan 2013 12:00:00 GMT", + Server = "Apache", `Strict-Transport-Security` = "max-age=63072000; preload", + `Content-Disposition` = "inline; filename=real_time_data.csv", + Pragma = "public", `Cache-Control` = "must-revalidate, post-check=0, pre-check=0", + Vary = "Accept-Encoding", `Content-Encoding` = "gzip", + `Referrer-Policy` = "no-referrer-when-downgrade", `Content-Security-Policy` = "frame-src 'self'; media-src 'self'; object-src 'self'; base-uri 'self'; form-action 'self' https://www.canada.ca https://canada.ca https://recherche-search.gc.ca https://weather.gc.ca https://meteo.gc.ca https://*.cmc.ec.gc.ca https://*.edc-mtl.ec.gc.ca", + `Content-Length` = "150", `Content-Type` = "text/csv; charset=utf-8"), class = "httr2_headers"), + body = charToRaw(" ID,Date,Parameter/Paramètre,Value/Valeur,Qualifier/Qualificatif,Symbol/Symbole,Approval/Approbation,Grade/Classification,Qualifiers/Qualificatifs"), + timing = c(redirect = 0, namelookup = 0.001, connect = 0.050, + pretransfer = 0.100, starttransfer = 0.150, total = 0.151 + ), cache = new.env(parent = emptyenv())), class = "httr2_response") diff --git a/tests/testthat/fixtures/wateroffice.ec.gc.ca/services/real_time_data/csv/inline-d311f2.R b/tests/testthat/fixtures/wateroffice.ec.gc.ca/services/real_time_data/csv/inline-d311f2.R new file mode 100644 index 0000000..6ed2a46 --- /dev/null +++ b/tests/testthat/fixtures/wateroffice.ec.gc.ca/services/real_time_data/csv/inline-d311f2.R @@ -0,0 +1,12 @@ +structure(list(method = "GET", url = "https://wateroffice.ec.gc.ca/services/real_time_data/csv/inline?stations[]=08MF005¶meters[]=46&start_date=2025-01-01%2000:00:00&end_date=2025-01-02%2023:59:59", + status_code = 200L, headers = structure(list(Date = "Mon, 01 Jan 2025 12:00:00 GMT", + Server = "Apache", `Strict-Transport-Security` = "max-age=63072000; preload", + `Content-Disposition` = "inline; filename=real_time_data.csv", + Pragma = "public", `Cache-Control` = "must-revalidate, post-check=0, pre-check=0", + Vary = "Accept-Encoding", `Content-Encoding` = "gzip", + `Referrer-Policy` = "no-referrer-when-downgrade", `Content-Security-Policy` = "frame-src 'self'; media-src 'self'; object-src 'self'; base-uri 'self'; form-action 'self' https://www.canada.ca https://canada.ca https://recherche-search.gc.ca https://weather.gc.ca https://meteo.gc.ca https://*.cmc.ec.gc.ca https://*.edc-mtl.ec.gc.ca", + `Content-Length` = "150", `Content-Type` = "text/csv; charset=utf-8"), class = "httr2_headers"), + body = charToRaw(" ID,Date,Parameter/Paramètre,Value/Valeur,Qualifier/Qualificatif,Symbol/Symbole,Approval/Approbation,Grade/Classification,Qualifiers/Qualificatifs"), + timing = c(redirect = 0, namelookup = 0.001, connect = 0.050, + pretransfer = 0.100, starttransfer = 0.150, total = 0.151 + ), cache = new.env(parent = emptyenv())), class = "httr2_response") diff --git a/tests/testthat/test-available-classes.R b/tests/testthat/test-available-classes.R new file mode 100644 index 0000000..cbb5d9d --- /dev/null +++ b/tests/testthat/test-available-classes.R @@ -0,0 +1,146 @@ +# Tests for available class + +test_that("as.available creates available class", { + df <- data.frame( + STATION_NUMBER = c("08MF005", "08MF005"), + Date = as.Date(c("2020-01-01", "2020-01-02")), + Parameter = c("Flow", "Flow"), + Value = c(10.5, 11.2), + Symbol = c(NA_character_, NA_character_), + Approval = c("final", "provisional") + ) + + result <- as.available(df) + + expect_s3_class(result, "available") + expect_true(!is.null(attr(result, "query_time"))) + expect_s3_class(attr(result, "query_time"), "POSIXct") +}) + +test_that("print.available displays metadata correctly", { + df <- data.frame( + STATION_NUMBER = c("08MF005", "08MF005", "08MF005"), + Date = as.Date(c("2020-01-01", "2020-06-01", "2020-12-01")), + Parameter = c("Flow", "Flow", "Flow"), + Value = c(10.5, 11.2, 12.3), + Symbol = c(NA_character_, NA_character_, NA_character_), + Approval = c("final", "final", "provisional") + ) + + result <- as.available(df) + attr(result, "historical_source") <- "HYDAT" + attr(result, "missed_stns") <- character(0) + + # Capture the printed output + output <- capture.output(print(result)) + + # Check that key elements are present in the output + expect_true(any(grepl("Queried on:", output))) + expect_true(any(grepl("Historical data source: HYDAT", output))) + expect_true(any(grepl("Final data range:", output))) + expect_true(any(grepl("Provisional data range:", output))) + expect_true(any(grepl("Records by approval status:", output))) + expect_true(any(grepl("Station\\(s\\) returned:", output))) +}) + +test_that("print.available handles missing final data", { + df <- data.frame( + STATION_NUMBER = c("08MF005", "08MF005"), + Date = as.Date(c("2020-12-01", "2020-12-02")), + Parameter = c("Flow", "Flow"), + Value = c(12.3, 13.1), + Symbol = c(NA_character_, NA_character_), + Approval = c("provisional", "provisional") + ) + + result <- as.available(df) + attr(result, "historical_source") <- "Web Service" + + output <- capture.output(print(result)) + + expect_true(any(grepl("No final data", output))) + expect_true(any(grepl("Provisional data range:", output))) +}) + +test_that("print.available handles missing provisional data", { + df <- data.frame( + STATION_NUMBER = c("08MF005", "08MF005"), + Date = as.Date(c("2020-01-01", "2020-01-02")), + Parameter = c("Flow", "Flow"), + Value = c(10.5, 11.2), + Symbol = c(NA_character_, NA_character_), + Approval = c("final", "final") + ) + + result <- as.available(df) + attr(result, "historical_source") <- "HYDAT" + + output <- capture.output(print(result)) + + expect_true(any(grepl("Final data range:", output))) + expect_true(any(grepl("No provisional data", output))) +}) + +test_that("print.available shows missed stations", { + df <- data.frame( + STATION_NUMBER = c("08MF005", "08MF005"), + Date = as.Date(c("2020-01-01", "2020-01-02")), + Parameter = c("Flow", "Flow"), + Value = c(10.5, 11.2), + Symbol = c(NA_character_, NA_character_), + Approval = c("final", "provisional") + ) + + result <- as.available(df) + attr(result, "missed_stns") <- c("08NM116", "08NL071") + + output <- capture.output(print(result)) + + expect_true(any(grepl("Stations requested but not returned:", output))) + expect_true(any(grepl("08NM116", output))) + expect_true(any(grepl("08NL071", output))) +}) + +test_that("print.available handles many missed stations", { + df <- data.frame( + STATION_NUMBER = c("08MF005"), + Date = as.Date(c("2020-01-01")), + Parameter = c("Flow"), + Value = c(10.5), + Symbol = c(NA_character_), + Approval = c("final") + ) + + result <- as.available(df) + # Create more than 10 missed stations + attr(result, "missed_stns") <- paste0("STN", sprintf("%03d", 1:15)) + + output <- capture.output(print(result)) + + expect_true(any(grepl("More than 10 stations", output))) +}) + +test_that("available_flows handles 'No data exists' error from realtime_ws gracefully", { + + # This tests that when realtime_ws returns no data (empty CSV), + # available_flows continues and returns only the final/validated data + httptest2::with_mock_dir("fixtures", { + result <- available_flows( + station_number = "05AA008", # Station with flow data in test database + hydat_path = hy_test_db(), + start_date = as.Date("1910-07-01"), + end_date = as.Date("1910-07-05") + ) + + # Should have data from HYDAT (final) + expect_s3_class(result, "available") + expect_true(nrow(result) > 0) + + # All data should be "final" (no provisional data) + expect_true(all(result$Approval == "final")) + + # Should not have any provisional data + expect_false(any(result$Approval == "provisional")) + }) +}) + diff --git a/tests/testthat/test-historical-webservice.R b/tests/testthat/test-historical-webservice.R index 947717d..6826b62 100644 --- a/tests/testthat/test-historical-webservice.R +++ b/tests/testthat/test-historical-webservice.R @@ -3,9 +3,10 @@ # To re-record fixtures, see tests/testthat/record_fixtures.R httptest2::with_mock_dir("fixtures", { - test_that("ws_daily_flows returns the correct data header", { - ws_test <- ws_daily_flows( + test_that("hy_daily_flows with hydat_path = FALSE returns the correct data header", { + ws_test <- hy_daily_flows( station_number = "08MF005", + hydat_path = FALSE, start_date = as.Date("2023-01-01"), end_date = as.Date("2023-12-31") ) @@ -20,18 +21,20 @@ httptest2::with_mock_dir("fixtures", { }) - test_that("ws_daily_flows is empty with a nearish date", { + test_that("hy_daily_flows with hydat_path = FALSE is empty with a nearish date", { # using a fixed date that was empty on the date of fixture creation - expect_error(ws_daily_flows( + expect_error(hy_daily_flows( station_number = "08MF005", + hydat_path = FALSE, start_date = as.Date("2025-10-27"), end_date = as.Date("2025-10-29") ), "No data exists for this station query during the period chosen") }) - test_that("ws_daily_levels returns the correct data header", { - ws_test <- ws_daily_levels( + test_that("hy_daily_levels with hydat_path = FALSE returns the correct data header", { + ws_test <- hy_daily_levels( station_number = "08MF005", + hydat_path = FALSE, start_date = as.Date("2023-01-01"), end_date = as.Date("2023-12-31") ) @@ -46,37 +49,41 @@ httptest2::with_mock_dir("fixtures", { }) - test_that("ws_daily_levels is empty with a nearish date", { + test_that("hy_daily_levels with hydat_path = FALSE is empty with a nearish date", { # using a fixed date that was empty on the date of fixture creation - expect_error(ws_daily_levels( + expect_error(hy_daily_levels( station_number = "08MF005", + hydat_path = FALSE, start_date = as.Date("2025-10-27"), end_date = as.Date("2025-10-29") ), "No data exists for this station query during the period chosen") }) }) -test_that("get_historical_data error informatively with no dates given", { +test_that("hy_daily_flows with hydat_path = FALSE errors informatively with no dates given", { expect_error( - get_historical_data( - station_number = "08MF005" + hy_daily_flows( + station_number = "08MF005", + hydat_path = FALSE ), - "please provide a valid date for the start_date argument" + "start_date is required when using web service" ) expect_error( - get_historical_data( + hy_daily_flows( station_number = "08MF005", + hydat_path = FALSE, start_date = Sys.Date() ), - "please provide a valid date for the end_date argument" + "end_date is required when using web service" ) }) -test_that("get_historical_data errors when end_date is before start_date", { +test_that("hy_daily_flows with hydat_path = FALSE errors when end_date is before start_date", { expect_error( - get_historical_data( + hy_daily_flows( station_number = "08MF005", + hydat_path = FALSE, start_date = Sys.Date(), end_date = Sys.Date() - 10 ), @@ -84,12 +91,13 @@ test_that("get_historical_data errors when end_date is before start_date", { ) }) -test_that("ws_daily_flows accepts Date objects", { +test_that("hy_daily_flows with hydat_path = FALSE accepts Date objects", { skip_if_not_installed("httptest2") httptest2::with_mock_dir("fixtures", { - result <- ws_daily_flows( + result <- hy_daily_flows( station_number = "08MF005", + hydat_path = FALSE, start_date = as.Date("2023-01-01"), end_date = as.Date("2023-12-31") ) diff --git a/tests/testthat/test-realtime-webservice.R b/tests/testthat/test-realtime-webservice.R index 4c6fd7d..71d585e 100644 --- a/tests/testthat/test-realtime-webservice.R +++ b/tests/testthat/test-realtime-webservice.R @@ -77,3 +77,21 @@ test_that("realtime_ws handles Date objects", { expect_equal(unique(output$Parameter), 46) }) }) + +test_that("realtime_ws handles empty CSV response (regression test for I() wrapper)", { + + # This test specifically addresses a bug where readr::read_csv() without I() + # would interpret the CSV header string as a file path instead of CSV text. + + httptest2::with_mock_dir("fixtures", { + expect_error( + realtime_ws( + station_number = "08MF005", + parameters = 46, + start_date = as.Date("2025-01-01"), + end_date = as.Date("2025-01-02") + ), + "No data exists for this station query" + ) + }) +}) diff --git a/tests/testthat/test-webservice-classes.R b/tests/testthat/test-webservice-classes.R index 7f098b2..ea61a17 100644 --- a/tests/testthat/test-webservice-classes.R +++ b/tests/testthat/test-webservice-classes.R @@ -138,9 +138,10 @@ test_that("print.ws handles data with no Date column", { }) httptest2::with_mock_dir("fixtures", { - test_that("ws_daily_flows returns ws class", { - result <- ws_daily_flows( + test_that("hy_daily_flows with hydat_path = FALSE returns ws class", { + result <- hy_daily_flows( station_number = "08MF005", + hydat_path = FALSE, start_date = as.Date("2023-01-01"), end_date = as.Date("2023-12-31") ) @@ -150,9 +151,10 @@ httptest2::with_mock_dir("fixtures", { expect_true(!is.null(attr(result, "missed_stns"))) }) - test_that("ws_daily_levels returns ws class", { - result <- ws_daily_levels( + test_that("hy_daily_levels with hydat_path = FALSE returns ws class", { + result <- hy_daily_levels( station_number = "08MF005", + hydat_path = FALSE, start_date = as.Date("2023-01-01"), end_date = as.Date("2023-12-31") ) diff --git a/vignettes/tidyhydat_an_introduction.Rmd b/vignettes/tidyhydat_an_introduction.Rmd index 4568283..f06e7d4 100644 --- a/vignettes/tidyhydat_an_introduction.Rmd +++ b/vignettes/tidyhydat_an_introduction.Rmd @@ -206,14 +206,15 @@ realtime_ws(station_number = "08MF005", end_date = Sys.Date()) ``` -## Historical web service functions -For smaller queries where downloading the entire HYDAT database is unnecessary, `tidyhydat` provides web service functions that access historical daily data directly from ECCC. These functions are particularly useful when you need data for only a few stations or a limited time period: +## Using the web service without HYDAT +For smaller queries where downloading the entire HYDAT database is unnecessary, you can use `hy_daily_flows()` and `hy_daily_levels()` with `hydat_path = FALSE` to access historical daily data directly from ECCC's web service. These are particularly useful when you need data for only a few stations or a limited time period: -### Historical flows and levels - `ws_daily_flows()` and `ws_daily_levels()` +### Historical flows and levels via web service These functions download historical daily data without requiring HYDAT. Both require a date range to be specified: ``` r -ws_daily_flows(station_number = "08MF005", +hy_daily_flows(station_number = "08MF005", + hydat_path = FALSE, start_date = "2020-01-01", end_date = "2020-12-31") ``` @@ -221,7 +222,8 @@ ws_daily_flows(station_number = "08MF005", Similarly for water levels: ``` r -ws_daily_levels(station_number = "08NL071", +hy_daily_levels(station_number = "08NL071", + hydat_path = FALSE, start_date = "2019-01-01", end_date = "2019-12-31") ``` @@ -229,11 +231,21 @@ ws_daily_levels(station_number = "08NL071", Multiple stations can be queried in a single call: ``` r -ws_daily_flows(station_number = c("08MF005", "08NL071"), +hy_daily_flows(station_number = c("08MF005", "08NL071"), + hydat_path = FALSE, start_date = "2020-01-01", end_date = "2020-12-31") ``` +## Combining validated and provisional data +The `available_flows()` and `available_levels()` functions combine validated historical data with provisional real-time data to provide a complete record: + +``` r +available_flows(station_number = "08MF005", + start_date = "2020-01-01", + end_date = Sys.Date()) +``` + ## Search functions You can also make use of auxiliary functions in `tidyhydat` called `search_stn_name()` and `search_stn_number()` to look for matches when you know part of a name of a station. For example: diff --git a/vignettes/tidyhydat_an_introduction.Rmd.orig b/vignettes/tidyhydat_an_introduction.Rmd.orig index 701a94d..97f8c47 100644 --- a/vignettes/tidyhydat_an_introduction.Rmd.orig +++ b/vignettes/tidyhydat_an_introduction.Rmd.orig @@ -12,9 +12,9 @@ vignette: > --- ```{r options, include=FALSE} -knitr::opts_chunk$set(echo = TRUE, - warning = FALSE, - message = FALSE, +knitr::opts_chunk$set(echo = TRUE, + warning = FALSE, + message = FALSE, fig.path = "vignette-fig-", fig.width=13, fig.height=7) ``` @@ -27,7 +27,7 @@ library(ggplot2) ``` # `tidyhydat` package -This vignette will outline a few key options that will hopefully make `tidyhydat` useful. +This vignette will outline a few key options that will hopefully make `tidyhydat` useful. ## HYDAT download To use many of the functions in the `tidyhydat` package you will need to download a version of the HYDAT database, Environment and Climate Change Canada's database of historical hydrometric data then tell R where to find the database. Conveniently `tidyhydat` does all this for you via: @@ -35,9 +35,35 @@ To use many of the functions in the `tidyhydat` package you will need to downloa download_hydat() ``` This downloads the most recent version of HYDAT and then saves it in a location on your computer where `tidyhydat`'s function will look for it. Do be patient though as this takes a long time! To see where HYDAT was saved you can run `hy_dir()`. Now that you have HYDAT downloaded and ready to go, you are all set to begin some hydrologic analysis. - -## Usage -Most functions in `tidyhydat` follow a common argument structure. We will use the `hy_daily_flows()` function for the following examples though the same approach applies to most functions in the package (See `ls("package:tidyhydat")` for a list of exported objects). Much of the functionality of `tidyhydat` originates with the choice of hydrometric stations that you are interested in. A user will often find themselves creating vectors of station numbers. There are several ways to do this. + +## Combining validated and provisional data +For most analysis needs, the `available_flows()` and `available_levels()` functions provide the most complete picture by combining validated historical data with recent provisional real-time data: + +```{r, eval=FALSE} +available_flows( + station_number = "08MF005", + start_date = "2020-01-01", + end_date = Sys.Date() +) +``` + +These functions: +- Automatically combine validated data from HYDAT (or web service if HYDAT unavailable) with provisional real-time data +- Include an `Approval` column indicating whether each record is "final" (validated) or "provisional" (subject to revision) +- Aggregate real-time data to daily means for consistency +- Handle missing data gracefully + +For water levels, use `available_levels()`: +```{r, eval=FALSE} +available_levels( + station_number = "08MF005", + start_date = "2020-01-01", + end_date = Sys.Date() +) +``` + +## Usage of HYDAT functions +Most functions in `tidyhydat` follow a common argument structure. We will use the `hy_daily_flows()` function for the following examples though the same approach applies to most functions in the package (See `ls("package:tidyhydat")` for a list of exported objects). Much of the functionality of `tidyhydat` originates with the choice of hydrometric stations that you are interested in. A user will often find themselves creating vectors of station numbers. There are several ways to do this. The simplest case is if you would like to extract only station. You can supply this directly to the `station_number` argument: ```{r example1, warning=FALSE} @@ -66,12 +92,12 @@ search_stn_name("canada") |> We saw above that if we were only interested in a subset of dates we could use the `start_date` and `end_date` arguments. A date must be supplied to both these arguments in the form of YYYY-MM-DD. If you were interested in all daily flow data from station number "08LA001" for 1981, you would specify all days in 1981 : ```{r warning=FALSE, warning=FALSE, message=FALSE, eval=FALSE} -hy_daily_flows(station_number = "08LA001", - start_date = "1981-01-01", +hy_daily_flows(station_number = "08LA001", + start_date = "1981-01-01", end_date = "1981-12-31") ``` -This generally outlines the usage of the HYDAT functions within `tidyhydat`. +This generally outlines the usage of the HYDAT functions within `tidyhydat`. ## Real-time functions In addition to the approved and vetted data in the HYDAT database ECCC also offers unapproved data that is subject to revision. `tidyhydat` provides three functions to access these data sources. Remember these are **unapproved** data and should treated as such: @@ -107,7 +133,7 @@ The `realtime_ws()` function provides access to ECCC's real-time web service. Th ```{r, eval=FALSE} realtime_ws(station_number = "08MF005", parameters = 46, ## Water level - start_date = Sys.Date() - 30, + start_date = Sys.Date() - lubridate::days(30), end_date = Sys.Date()) ``` @@ -115,31 +141,33 @@ You can query multiple parameters simultaneously. See `data("param_id")` for a c ```{r, eval=FALSE} realtime_ws(station_number = "08MF005", parameters = c(46, 47), ## Water level and discharge - start_date = Sys.Date() - 7, + start_date = Sys.Date() - lubridate::days(7), end_date = Sys.Date()) ``` -## Historical web service functions -For smaller queries where downloading the entire HYDAT database is unnecessary, `tidyhydat` provides web service functions that access historical daily data directly from ECCC. These functions are particularly useful when you need data for only a few stations or a limited time period: +## Using the web service without HYDAT +For smaller queries where downloading the entire HYDAT database is unnecessary, you can use `hy_daily_flows()` and `hy_daily_levels()` with `hydat_path = FALSE` to access historical daily data directly from ECCC's web service. These are particularly useful when you need data for only a few stations or a limited time period: -### Historical flows and levels - `ws_daily_flows()` and `ws_daily_levels()` -These functions download historical daily data without requiring HYDAT. Both require a date range to be specified: +### Historical flows and levels via web service ```{r, eval=FALSE} -ws_daily_flows(station_number = "08MF005", +hy_daily_flows(station_number = "08MF005", + hydat_path = FALSE, start_date = "2020-01-01", end_date = "2020-12-31") ``` Similarly for water levels: ```{r, eval=FALSE} -ws_daily_levels(station_number = "08NL071", +hy_daily_levels(station_number = "08NL071", + hydat_path = FALSE, start_date = "2019-01-01", end_date = "2019-12-31") ``` Multiple stations can be queried in a single call: ```{r, eval=FALSE} -ws_daily_flows(station_number = c("08MF005", "08NL071"), +hy_daily_flows(station_number = c("08MF005", "08NL071"), + hydat_path = FALSE, start_date = "2020-01-01", end_date = "2020-12-31") ``` @@ -154,7 +182,7 @@ Similarly, `search_stn_number()` can be useful if you are interested in all stat search_stn_number("08MF") ``` -## Using joins +## Using joins Sometimes it is required to make use of information from two tables from HYDAT. In some cases, we need to combine the information into one table using a common column. Here we will illustrate calculating runoff by combining the `hy_stations` tables with the `hy_daily_flows` table by the `STATION_NUMBER` column: ```{r} stns <- c("08NH130", "08NH005") @@ -164,10 +192,10 @@ runoff_data <- hy_daily_flows(station_number = stns, start_date = "2000-01-01") select(STATION_NUMBER, STATION_NAME, DRAINAGE_AREA_GROSS), by = "STATION_NUMBER") |> ## conversion to mm/d - mutate(runoff = Value / DRAINAGE_AREA_GROSS * 86400 / 1e6 * 1e3) + mutate(runoff = Value / DRAINAGE_AREA_GROSS * 86400 / 1e6 * 1e3) -ggplot(runoff_data) + +ggplot(runoff_data) + geom_line(aes(x = Date, y = runoff, colour = STATION_NAME)) + labs(y = "Mean daily runoff [mm/d]") + scale_colour_viridis_d(option = "C") + @@ -182,7 +210,7 @@ This is an effective way to make use of the variety of tables available in HYDAT Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. - You may obtain a copy of the License at + You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0