Skip to content

Commit

Permalink
Add pf_access_distance_matrix() function.
Browse files Browse the repository at this point in the history
  • Loading branch information
edwardlavender committed Nov 16, 2021
1 parent 6e39a5b commit 4dfd99d
Show file tree
Hide file tree
Showing 12 changed files with 524 additions and 217 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Suggests:
glatos,
plotly, animation, scales, viridis,
circular,
Rfast,
httr, jsonlite,
rmarkdown, knitr
Remotes:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export(make_matrix_ids)
export(make_matrix_receivers)
export(mask_io)
export(pf)
export(pf_access_distance_matrix)
export(pf_access_history)
export(pf_access_history_files)
export(pf_access_particles_unique)
Expand Down
1 change: 1 addition & 0 deletions R/flapper-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@
#' \item \link{pf_access_history_files} lists particle histories saved to file;
#' \item \link{pf_access_history} accesses particle histories;
#' \item \link{pf_access_particles_unique} accesses unique particle samples;
#' \item \link{pf_access_distance_matrix} calculates distances between unique particle samples;
#' \item \link{pf_plot_history} plots simulated particle histories;
#' \item \link{pf_animate_history} animates simulated particle histories;
#' \item \link{pf_simplify} assembles movement paths from particle histories;
Expand Down
412 changes: 412 additions & 0 deletions R/pf_access.R

Large diffs are not rendered by default.

214 changes: 0 additions & 214 deletions R/pf_analyse_archive.R
Original file line number Diff line number Diff line change
@@ -1,217 +1,3 @@
######################################
######################################
#### pf_access_history_from_file()

#' @title List `history' files from a PF algorithm
#' @description This function creates an ordered vector (or list) of `history' files derived from the particle filtering (PF) algorithm (\code{\link[flapper]{pf}}). This is applicable if \code{\link[flapper]{pf}} is implemented with the \code{write_history} argument specified.
#'
#' @param root A string that defines the directory in which files are located.
#' @param use_absolute_paths A logical variable that defines whether to return relative paths (\code{FALSE}) or absolute paths (\code{TRUE}) (see \code{\link[tools]{file_path_as_absolute}}).
#' @param use_list A logical variable that defines whether or not return a vector (\code{use_list = FALSE}) or a list (\code{use_list = TRUE}).
#' @param ... Additional arguments passed to \code{\link[base]{list.files}} (excluding \code{full.names}).
#'
#' @details This function requires the \code{\link[stringr]{stringr}} package.
#'
#' @return The function returns an ordered list of file paths.
#'
#' @examples
#' #### Example (1): Example with default arguments
#' # Define a directory in which to save files from PF
#' root <- paste0(tempdir(), "/pf/")
#' dir.create(root)
#' # Implement the PF algorithm with write_history specified
#' # ... For speed, we will implement the algorithm using pre-defined data
#' pf_args <- dat_dcpf_histories$args
#' pf_args$calc_distance_euclid_fast <- TRUE
#' pf_args$write_history <- list(file = root)
#' do.call(pf, pf_args)
#' # List the files
#' files <- pf_access_history_files(root)
#' utils::head(files)
#'
#' @seealso This function is designed to list outputs from \code{\link[flapper]{pf}} (see the \code{write_history} argument).
#' @author Edward Lavender
#' @export

pf_access_history_files <- function(root, use_absolute_paths = FALSE, use_list = FALSE,...){
if(!requireNamespace("stringr", quietly = TRUE)){
stop("This function requires the 'stringr' package. Please install it before continuing with install.packages('stringr').")
}
check...("full.names",...)
check_dir(input = root)
files <- list.files(root,...)
if(!grepl("pf_", files[1], fixed = TRUE)){
stop("File naming structure is unrecognised.", immediate. = TRUE)
}
files <- data.frame(index = 1:length(files), name = files)
files$pf_id <- stringr::str_split_fixed(files$name, "_", 2)[, 2]
files$pf_id <- substr(files$pf_id, 1, nchar(files$pf_id) - 4)
files$pf_id <- as.integer(as.character(files$pf_id))
files <- files %>% dplyr::arrange(.data$pf_id)
files <- list.files(root, full.names = TRUE,...)[files$index]
if(use_absolute_paths) {
files <- sapply(files, function(f) tools::file_path_as_absolute(f))
names(files) <- NULL
}
if(use_list) files <- as.list(files)
return(files)
}


########################################
########################################
#### pf_access_history

#' @title Access the `history' element of a \code{\link[flapper]{pf_archive-class}} object
#' @description This function accesses and simplifies the `history' list in a \code{\link[flapper]{pf_archive-class}} object.
#' @param archive A \code{\link[flapper]{pf_archive-class}} object.
#' @param bathy (optional) A \code{\link[raster]{raster}} that defines the grid across the area over which particle filtering was applied. If unsupplied, this is extracted from \code{archive} if available.
#' @details From the `history' element of a \code{\link[flapper]{pf_archive-class}} object, this function extracts particle samples as a dataframe with columns for time steps, cell IDs, cell probabilities and coordinates (if \code{bathy} is available).
#' @return The function returns a dataframe that defines, for each time step (`timestep'), particle samples (`cell_id'), associated probabilities (`cell_pr') and, if \code{bathy} is available, cell coordinates (`cell_x', `cell_y' and `cell_z').
#' @examples
#' pf_access_history(dat_dcpf_histories)
#' @author Edward Lavender
#' @export

pf_access_history <- function(archive,
bathy = NULL
){
check_class(input = archive, to_class = "pf_archive")
if(is.null(bathy)) bathy <- archive$args$bathy
history <- lapply(1:length(archive$history), function(t){
elm <- archive$history[[t]]
if(!rlang::has_name(elm, "timestep")) elm$timestep <- t
elm <- elm[, c("timestep", "id_current", "pr_current")]
})
history <- do.call(rbind, history)
colnames(history) <- c("timestep", "cell_id", "cell_pr")
if(!is.null(bathy)){
history[, c("cell_x", "cell_y")] <- raster::extract(bathy, history$cell_id)
history$cell_z <- raster::extract(bathy, history$cell_id)
}
cols <- c("timestep", "cell_id", "cell_x", "cell_y", "cell_z", "cell_pr")
history[, cols[cols %in% colnames(history)]]
history <-
history %>%
dplyr::arrange(.data$timestep, .data$cell_id, .data$cell_pr)
return(history)
}


########################################
########################################
#### pf_access_particles_unique()

#' @title Access the cells sampled by PF
#' @description Given a list of particle histories (or a list of file paths), this function accesses the unique particles (cells) sampled by a particle filtering (PF) algorithm (\code{\link[flapper]{pf}}).
#'
#' @param history A \code{\link[flapper]{pf_archive-class}} class object from \code{\link[flapper]{pf}}, the list of particle histories (the `history' element of a \code{\link[flapper]{pf_archive-class}} object) from \code{\link[flapper]{pf}} or a list of file paths to particle histories.
#' @param use_memory_safe If \code{history} is a record of file paths, \code{use_memory_safe} is a logical variable that defines whether or not to use the `memory-safe(r)' method to access unique cell samples. If specified, the function sequentially loads each file and re-defines the vector of unique particles at each time step as the unique combination of previous (unique) samples and the samples from the current time step. This may be slow. Alternatively, under the default \code{use_memory_safe = FALSE} option, the function loads each file (in parallel if specified), retaining all sampled particles, before selecting the unique particles (once) at the end of this process. This option should be faster.
#' @param cl,varlist Parallelisation options implemented if (a) particle histories are contained in memory or (b) particle histories are supplied as a list of file paths with \code{use_memory_safe = FALSE}. \code{cl} is a cluster object created by \code{\link[parallel]{makeCluster}}. If supplied, the connection to the cluster is closed within the function. \code{varlist} is a character vector of names of objects to export that is passed to the \code{varlist} argument of \code{\link[parallel]{clusterExport}}. Exported objects must be located in the global environment.
#'
#' @return The function returns a vector of the unique particles sampled by the PF algorithm.
#'
#' @examples
#' #### Example (1): Access unique particles when 'history' exists in memory
#' # Access unique particles from a pf_archive object
#' pf_access_particles_unique(dat_dcpf_histories)
#' # Access unique particles from a list of particle histories
#' pf_access_particles_unique(dat_dcpf_histories$history)
#' # Supply a cluster to speed up the algorithm (for very large lists)
#' pf_access_particles_unique(dat_dcpf_histories$history,
#' cl = parallel::makeCluster(2L))
#'
#' #### Example (2): Access unique particles when 'history' is a list of file paths
#'
#' ## Write example particle histories to file (to load)
#' root <- paste0(tempdir(), "/pf/")
#' dir.create(root)
#' pf_args <- dat_dcpf_histories$args
#' pf_args$calc_distance_euclid_fast <- TRUE
#' pf_args$write_history <- list(file = root)
#' out_pf <- do.call(pf, pf_args)
#'
#' ## Access particle histories using default options (use_memory_safe = FALSE)
#' # Access particle histories via pf_access_history_files()
#' pf_access_particles_unique(pf_access_history_files(root, use_list = TRUE))
#' # Supply a cluster to speed up the algorithm (for very large lists)
#' pf_access_particles_unique(pf_access_history_files(root, use_list = TRUE),
#' cl = parallel::makeCluster(2L))
#'
#' ## Access particle histories using the 'memory_safe' option
#' # For large lists, this is likely to be slower
#' # ... but it may be the only option in some cases.
#' pf_access_particles_unique(pf_access_history_files(root, use_list = TRUE),
#' use_memory_safe = TRUE)
#'
#' @seealso \code{\link[flapper]{pf}} implements particle filtering.
#' @author Edward Lavender
#' @export

pf_access_particles_unique <- function(history,
use_memory_safe = FALSE,
cl = NULL, varlist = NULL){

#### Setup
check_class(input = history, to_class = "list")
if(inherits(history, "pf_archive")) history <- history$history
history_1 <- history[[1]]
read_history <- FALSE
if(inherits(history_1, "character")){
read_history <- TRUE
if(!file.exists(history_1)) stop(paste0("history[[1]] ('", history_1, "') does not exist."))
history_1 <- readRDS(history_1)
}
if(is.null(cl) & !is.null(varlist)) {
warning("'cl' is NULL but 'varlist' is not: 'varlist' ignored.",
immediate. = TRUE, call. = FALSE)
varlist <- FALSE
}

#### Access unique cells (from file)
if(read_history){

## Memory-safe(r) option: load files sequentially, selecting unique cells at each step
if(use_memory_safe){
if(!is.null(cl)) {
warning("'cl' is not implemented for loading files when use_memory_safe = TRUE.",
immediate. = TRUE, call. = FALSE)
cl <- varlist <- NULL
}
cells <- unique(history_1$id_current)
for(i in 2:length(history)){
cells <- unique(c(cells, readRDS(history[[i]])$id_current))
}

## Faster option: load all cells for each time step, selecting unique cells at the end
} else {
if(!is.null(cl) & is.null(varlist)) parallel::clusterExport(cl = cl, varlist = varlist)
cells_by_time <-
pbapply::pblapply(history,
cl = cl,
function(f) readRDS(f)$id_current)
if(!is.null(cl)) parallel::stopCluster(cl = cl)
cells <- unlist(cells_by_time)
cells <- unique(cells)

}

#### Access unique cells (in memory)
} else {

if(!is.null(cl) & is.null(varlist)) parallel::clusterExport(cl = cl, varlist = varlist)
cells <- pbapply::pblapply(history, cl = cl, function(d) d$id_current)
if(!is.null(cl)) parallel::stopCluster(cl = cl)
cells <- unlist(cells)
cells <- unique(cells)

}

#### Return unique cells
return(cells)
}


########################################
########################################
#### pf_plot_history()
Expand Down
1 change: 1 addition & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ Each algorithm (AC, DC and ACDC) can be extended through incorporation of a move
* `pf_access_history_files()` lists particle histories saved to file;
* `pf_access_history()` accesses particle histories;
* `pf_access_particles_unique()` accesses unique particle samples;
* `pf_access_distance_matrix()` calculates distances between unique particle samples;
* `pf_plot_history()` plots simulated particle histories;
* `pf_animate_history()` animates simulated particle histories;
* `pf_simplify()` assembles movement paths from particle histories;
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,8 @@ process provided by the `pf*()` family of functions:
- `pf_access_history_files()` lists particle histories saved to file;
- `pf_access_history()` accesses particle histories;
- `pf_access_particles_unique()` accesses unique particle samples;
- `pf_access_distance_matrix()` calculates distances between unique
particle samples;
- `pf_plot_history()` plots simulated particle histories;
- `pf_animate_history()` animates simulated particle histories;
- `pf_simplify()` assembles movement paths from particle histories;
Expand Down
1 change: 1 addition & 0 deletions man/flapper.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4dfd99d

Please sign in to comment.