diff --git a/R/fst-utils.R b/R/fst-utils.R index 04f86905..b0d7f7d0 100644 --- a/R/fst-utils.R +++ b/R/fst-utils.R @@ -2,10 +2,16 @@ #' @name fst-utils #' @noRd #' -#' @param cat_no,path As in \code{\link{read_abs}}. +#' @param cat_no,tpath As in \code{\link{read_abs}}. +#' @param table A length-one vector. +#' Either "all" or an integer vector specifying the table within +#' `cat_no`. If "all" or `integer(0)`, the filename just reflects the `cat_no`. +#' Otherwise, the filename will be specific for the `table`. Note that `read_abs` +#' accepts `length(tables) > 1` but `catno2fst` does not (since it would mean +#' every combination of `tables` would be cached). #' #' @return For `catno2fst` the path to the `fst` file to be saved or read, given -#' `cat_no` and `path`. +#' `cat_no`, `table`, and `path`. #' #' `fst_available` returns `TRUE` if and only if an appropriate `fst` file is #' available. @@ -16,18 +22,29 @@ catno2fst <- function(cat_no, + table = integer(0L), path = Sys.getenv("R_READABS_PATH", unset = tempdir())) { - hutils::provide.file(file.path(path, - "fst", - paste0(gsub(".", "-", cat_no, fixed = TRUE), - ".fst")), - on_failure = stop("`path = ", normalizePath(path, - winslash = "/"), + if (length(table) > 1L) { + stop("Internal error (catno2fst): length(table) > 1 at this time. Please report.") + } + basename.fst <- gsub(".", "-", cat_no, fixed = TRUE) + if (length(table) == 0L || identical(table, "all")) { + basename.fst <- paste0(basename.fst, ".fst") + } else if (is.integer(table)) { + basename.fst <- paste0(basename.fst, sprintf("T%02d", table), ".fst") + } else { + basename.fst <- paste0(basename.fst, "T0", tolower(as.character(table)), ".fst") + } + fullname.fst <- file.path(path, "fst", basename.fst) + hutils::provide.file(fullname.fst, + on_failure = stop("`path = ", + normalizePath(path, winslash = "/"), "`, ", "but it was not possible to write to this directory.")) } fst_available <- function(cat_no, + table = integer(0L), path = Sys.getenv("R_READABS_PATH", unset = tempdir())) { if (!requireNamespace("fst", quietly = TRUE) || @@ -42,16 +59,21 @@ fst_available <- function(cat_no, return(FALSE) } - file.fst <- catno2fst(cat_no, path) + file.fst <- catno2fst(cat_no, table = table, path) if (!file.exists(file.fst)) { return(FALSE) # nocov } + # Is the file clearly not an fst file + # (where "clearly not an fst file" means "empty" or "a directory")? + file_info <- file.info(file.fst, extra_cols = FALSE) + if (!file_info[["size"]] || file_info[["isdir"]]) { + return(FALSE) + } - # fst may be damaged. If it appears to be (i.e. fst metadata returns an error) + # fst may be damaged/not a real fst file. + # If it appears to be (i.e. fst metadata returns an error) # return FALSE - - out <- tryCatch(inherits(fst::fst.metadata(file.fst), "fstmetadata"), error = function(e) FALSE, warning = function(e) FALSE) diff --git a/R/read_abs.R b/R/read_abs.R index 8d0aaa95..478b548c 100644 --- a/R/read_abs.R +++ b/R/read_abs.R @@ -81,13 +81,11 @@ read_abs <- function(cat_no = NULL, retain_files = TRUE, check_local = TRUE) { - if (isTRUE(check_local) && + # Anything other than TRUE is equivalent to FALSE + check_local <- isTRUE(check_local) + if (check_local && + identical(tables, "all") && fst_available(cat_no = cat_no, path = path)) { - if (!identical(tables, "all")) { - warning("`tables` was provided", - "yet `check_local = TRUE` and fst files are available ", - "so `tables` will be ignored.") - } out <- fst::read_fst(path = catno2fst(cat_no = cat_no, path = path)) out <- tibble::as_tibble(out) if (is.null(series_id)) { @@ -133,10 +131,77 @@ read_abs <- function(cat_no = NULL, tables <- "all" } - if (!is.logical(metadata)) { + if (!is.atomic(tables)) { + stop("`tables` was not atomic.") + } + if (anyNA(tables)) { + warning("`tables` contains missing values, these will be removed.") + tables <- tables[!is.na(tables)] + } + if (!is.integer(tables) && length(tables) != 0L && is.numeric(tables)) { + # Edge case: if user supplies a very large number, + # any(tables != as.integer(tables)) + # below will return a cryptic error message (possibly during recursion). + # Unlikely to happen on purpose. + if (min(tables) < 0 || max(tables) > .Machine$integer.max) { + stop("`tables` was a numeric vector but had values outside [0, .Machine$integer.max]. ", + "These are unlikely values for table numbers and are ") + } + if (any(tables != as.integer(tables))) { + stop("`tables` was not an integer(ish) vector of table numbers.") + } + tables <- as.integer(tables) + } + + if (!is.logical(metadata) || length(metadata) != 1L || is.na(metadata)) { stop("`metadata` argument must be either TRUE or FALSE") } + if (check_local) { + # In the case of table = "all" we simply get the fst file for + # the whole cat_no. Equally simple is the case of a single + # table. Both are handled by length(tables) <= 1L + + # If len > 1 integer vector is supplied to tables, we recurse + # for each element of tables, checking the table's fst file availability + # independently of the other elements. Either we use the fst + # file or we download that single table. Each operation of lapply + # will produce a tibble. + if (length(tables) <= 1L) { + if (fst_available(cat_no = cat_no, table = tables, path = path)) { + file.fst <- catno2fst(cat_no = cat_no, table = tables, path = path) + out <- fst::read_fst(file.fst) + out <- tibble::as_tibble(out) + if (is.null(series_id)) { + return(out) + } + if (series_id %in% out[["series_id"]]) { + users_series_id <- series_id + out <- dplyr::filter(out, series_id %in% users_series_id) + } else { + warning("`series_id` was provided,", + "but was not present in the local table and will be ignored.") + } + return(out) + } else { + # continue as if check_local = FALSE + } + } else { + # Recursion + out <- + lapply(tables, function(ta) { + read_abs(cat_no = cat_no, + tables = ta, + series_id = series_id, + path = path, + metadata = metadata, + show_progress_bars = show_progress_bars, + retain_files = retain_files) + }) + return(dplyr::bind_rows(out)) + } + } + # satisfy CRAN ProductReleaseDate = SeriesID = NULL @@ -252,13 +317,13 @@ read_abs <- function(cat_no = NULL, } # if fst is available, and what has been requested is the full data, - # write the result to the /fst/ file + # or a single table, retain the fst file. if (retain_files && - is.null(series_id) && - identical(tables, "all") && - requireNamespace("fst", quietly = TRUE)) { + requireNamespace("fst", quietly = TRUE) && + length(tables) <= 1L) { fst::write_fst(sheet, catno2fst(cat_no = cat_no, + table = tables, path = path)) } diff --git a/R/read_abs_local.R b/R/read_abs_local.R index 750a90a3..bcadc68c 100644 --- a/R/read_abs_local.R +++ b/R/read_abs_local.R @@ -76,7 +76,7 @@ read_abs_local <- function(cat_no = NULL, } # Retrieve cache if available - if (is.null(filenames) && isTRUE(use_fst) && fst_available(cat_no, path)) { + if (is.null(filenames) && isTRUE(use_fst) && fst_available(cat_no, path = path)) { out <- fst::read_fst(path = catno2fst(cat_no = cat_no, path = path)) return(tibble::as_tibble(out)) } diff --git a/tests/testthat/test-fst.R b/tests/testthat/test-fst.R index 88dd0a4d..8dbb861e 100644 --- a/tests/testthat/test-fst.R +++ b/tests/testthat/test-fst.R @@ -25,9 +25,6 @@ test_that("read_abs works out of the box", { regexp = "but was not present in the local table and will be ignored", fixed = TRUE) - expect_warning(read_abs(cat_no = "6401.0", tables = "not all"), - regexp = "tables.*will be ignored") - })