Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 34 additions & 12 deletions R/fst-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@
#' @name fst-utils
#' @noRd
#'
#' @param cat_no,path As in \code{\link{read_abs}}.
#' @param cat_no,tpath As in \code{\link{read_abs}}.
#' @param table A length-one vector.
#' Either "all" or an integer vector specifying the table within
#' `cat_no`. If "all" or `integer(0)`, the filename just reflects the `cat_no`.
#' Otherwise, the filename will be specific for the `table`. Note that `read_abs`
#' accepts `length(tables) > 1` but `catno2fst` does not (since it would mean
#' every combination of `tables` would be cached).
#'
#' @return For `catno2fst` the path to the `fst` file to be saved or read, given
#' `cat_no` and `path`.
#' `cat_no`, `table`, and `path`.
#'
#' `fst_available` returns `TRUE` if and only if an appropriate `fst` file is
#' available.
Expand All @@ -16,18 +22,29 @@


catno2fst <- function(cat_no,
table = integer(0L),
path = Sys.getenv("R_READABS_PATH", unset = tempdir())) {
hutils::provide.file(file.path(path,
"fst",
paste0(gsub(".", "-", cat_no, fixed = TRUE),
".fst")),
on_failure = stop("`path = ", normalizePath(path,
winslash = "/"),
if (length(table) > 1L) {
stop("Internal error (catno2fst): length(table) > 1 at this time. Please report.")
}
basename.fst <- gsub(".", "-", cat_no, fixed = TRUE)
if (length(table) == 0L || identical(table, "all")) {
basename.fst <- paste0(basename.fst, ".fst")
} else if (is.integer(table)) {
basename.fst <- paste0(basename.fst, sprintf("T%02d", table), ".fst")
} else {
basename.fst <- paste0(basename.fst, "T0", tolower(as.character(table)), ".fst")
}
fullname.fst <- file.path(path, "fst", basename.fst)
hutils::provide.file(fullname.fst,
on_failure = stop("`path = ",
normalizePath(path, winslash = "/"),
"`, ",
"but it was not possible to write to this directory."))
}

fst_available <- function(cat_no,
table = integer(0L),
path = Sys.getenv("R_READABS_PATH",
unset = tempdir())) {
if (!requireNamespace("fst", quietly = TRUE) ||
Expand All @@ -42,16 +59,21 @@ fst_available <- function(cat_no,
return(FALSE)
}

file.fst <- catno2fst(cat_no, path)
file.fst <- catno2fst(cat_no, table = table, path)

if (!file.exists(file.fst)) {
return(FALSE) # nocov
}
# Is the file clearly not an fst file
# (where "clearly not an fst file" means "empty" or "a directory")?
file_info <- file.info(file.fst, extra_cols = FALSE)
if (!file_info[["size"]] || file_info[["isdir"]]) {
return(FALSE)
}

# fst may be damaged. If it appears to be (i.e. fst metadata returns an error)
# fst may be damaged/not a real fst file.
# If it appears to be (i.e. fst metadata returns an error)
# return FALSE


out <- tryCatch(inherits(fst::fst.metadata(file.fst), "fstmetadata"),
error = function(e) FALSE,
warning = function(e) FALSE)
Expand Down
87 changes: 76 additions & 11 deletions R/read_abs.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,11 @@ read_abs <- function(cat_no = NULL,
retain_files = TRUE,
check_local = TRUE) {

if (isTRUE(check_local) &&
# Anything other than TRUE is equivalent to FALSE
check_local <- isTRUE(check_local)
if (check_local &&
identical(tables, "all") &&
fst_available(cat_no = cat_no, path = path)) {
if (!identical(tables, "all")) {
warning("`tables` was provided",
"yet `check_local = TRUE` and fst files are available ",
"so `tables` will be ignored.")
}
out <- fst::read_fst(path = catno2fst(cat_no = cat_no, path = path))
out <- tibble::as_tibble(out)
if (is.null(series_id)) {
Expand Down Expand Up @@ -133,10 +131,77 @@ read_abs <- function(cat_no = NULL,
tables <- "all"
}

if (!is.logical(metadata)) {
if (!is.atomic(tables)) {
stop("`tables` was not atomic.")
}
if (anyNA(tables)) {
warning("`tables` contains missing values, these will be removed.")
tables <- tables[!is.na(tables)]
}
if (!is.integer(tables) && length(tables) != 0L && is.numeric(tables)) {
# Edge case: if user supplies a very large number,
# any(tables != as.integer(tables))
# below will return a cryptic error message (possibly during recursion).
# Unlikely to happen on purpose.
if (min(tables) < 0 || max(tables) > .Machine$integer.max) {
stop("`tables` was a numeric vector but had values outside [0, .Machine$integer.max]. ",
"These are unlikely values for table numbers and are ")
}
if (any(tables != as.integer(tables))) {
stop("`tables` was not an integer(ish) vector of table numbers.")
}
tables <- as.integer(tables)
}

if (!is.logical(metadata) || length(metadata) != 1L || is.na(metadata)) {
stop("`metadata` argument must be either TRUE or FALSE")
}

if (check_local) {
# In the case of table = "all" we simply get the fst file for
# the whole cat_no. Equally simple is the case of a single
# table. Both are handled by length(tables) <= 1L

# If len > 1 integer vector is supplied to tables, we recurse
# for each element of tables, checking the table's fst file availability
# independently of the other elements. Either we use the fst
# file or we download that single table. Each operation of lapply
# will produce a tibble.
if (length(tables) <= 1L) {
if (fst_available(cat_no = cat_no, table = tables, path = path)) {
file.fst <- catno2fst(cat_no = cat_no, table = tables, path = path)
out <- fst::read_fst(file.fst)
out <- tibble::as_tibble(out)
if (is.null(series_id)) {
return(out)
}
if (series_id %in% out[["series_id"]]) {
users_series_id <- series_id
out <- dplyr::filter(out, series_id %in% users_series_id)
} else {
warning("`series_id` was provided,",
"but was not present in the local table and will be ignored.")
}
return(out)
} else {
# continue as if check_local = FALSE
}
} else {
# Recursion
out <-
lapply(tables, function(ta) {
read_abs(cat_no = cat_no,
tables = ta,
series_id = series_id,
path = path,
metadata = metadata,
show_progress_bars = show_progress_bars,
retain_files = retain_files)
})
return(dplyr::bind_rows(out))
}
}

# satisfy CRAN
ProductReleaseDate = SeriesID = NULL

Expand Down Expand Up @@ -252,13 +317,13 @@ read_abs <- function(cat_no = NULL,
}

# if fst is available, and what has been requested is the full data,
# write the result to the <path>/fst/ file
# or a single table, retain the fst file.
if (retain_files &&
is.null(series_id) &&
identical(tables, "all") &&
requireNamespace("fst", quietly = TRUE)) {
requireNamespace("fst", quietly = TRUE) &&
length(tables) <= 1L) {
fst::write_fst(sheet,
catno2fst(cat_no = cat_no,
table = tables,
path = path))
}

Expand Down
2 changes: 1 addition & 1 deletion R/read_abs_local.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ read_abs_local <- function(cat_no = NULL,
}

# Retrieve cache if available
if (is.null(filenames) && isTRUE(use_fst) && fst_available(cat_no, path)) {
if (is.null(filenames) && isTRUE(use_fst) && fst_available(cat_no, path = path)) {
out <- fst::read_fst(path = catno2fst(cat_no = cat_no, path = path))
return(tibble::as_tibble(out))
}
Expand Down
3 changes: 0 additions & 3 deletions tests/testthat/test-fst.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@ test_that("read_abs works out of the box", {
regexp = "but was not present in the local table and will be ignored",
fixed = TRUE)

expect_warning(read_abs(cat_no = "6401.0", tables = "not all"),
regexp = "tables.*will be ignored")



})
Expand Down