Add pf_access_distance_matrix() function.

edwardlavender · Nov 16, 2021 · 4dfd99d · 4dfd99d
1 parent 6e39a5b
commit 4dfd99d
Show file tree

Hide file tree

Showing 12 changed files with 524 additions and 217 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,6 +17,7 @@ Suggests:
     glatos,
     plotly, animation, scales, viridis,
     circular, 
+    Rfast,
     httr, jsonlite,
     rmarkdown, knitr
 Remotes: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -56,6 +56,7 @@ export(make_matrix_ids)
 export(make_matrix_receivers)
 export(mask_io)
 export(pf)
+export(pf_access_distance_matrix)
 export(pf_access_history)
 export(pf_access_history_files)
 export(pf_access_particles_unique)

diff --git a/R/flapper-package.R b/R/flapper-package.R
@@ -136,6 +136,7 @@
 #'       \item \link{pf_access_history_files} lists particle histories saved to file;
 #'       \item \link{pf_access_history} accesses particle histories;
 #'       \item \link{pf_access_particles_unique} accesses unique particle samples;
+#'       \item \link{pf_access_distance_matrix} calculates distances between unique particle samples;
 #'       \item \link{pf_plot_history} plots simulated particle histories;
 #'       \item \link{pf_animate_history} animates simulated particle histories;
 #'       \item \link{pf_simplify} assembles movement paths from particle histories;

diff --git a/R/pf_access.R b/R/pf_access.R
diff --git a/R/pf_analyse_archive.R b/R/pf_analyse_archive.R
@@ -1,217 +1,3 @@
-######################################
-######################################
-#### pf_access_history_from_file()
-
-#' @title List `history' files from a PF algorithm
-#' @description This function creates an ordered vector (or list) of `history' files derived from the particle filtering (PF) algorithm (\code{\link[flapper]{pf}}). This is applicable if \code{\link[flapper]{pf}} is implemented with the \code{write_history} argument specified.
-#'
-#' @param root A string that defines the directory in which files are located.
-#' @param use_absolute_paths A logical variable that defines whether to return relative paths (\code{FALSE}) or absolute paths (\code{TRUE}) (see \code{\link[tools]{file_path_as_absolute}}).
-#' @param use_list A logical variable that defines whether or not return a vector (\code{use_list = FALSE}) or a list (\code{use_list = TRUE}).
-#' @param ... Additional arguments passed to \code{\link[base]{list.files}} (excluding \code{full.names}).
-#'
-#' @details This function requires the \code{\link[stringr]{stringr}} package.
-#'
-#' @return The function returns an ordered list of file paths.
-#'
-#' @examples
-#' #### Example (1): Example with default arguments
-#' # Define a directory in which to save files from PF
-#' root <- paste0(tempdir(), "/pf/")
-#' dir.create(root)
-#' # Implement the PF algorithm with write_history specified
-#' # ... For speed, we will implement the algorithm using pre-defined data
-#' pf_args <- dat_dcpf_histories$args
-#' pf_args$calc_distance_euclid_fast <- TRUE
-#' pf_args$write_history             <- list(file = root)
-#' do.call(pf, pf_args)
-#' # List the files
-#' files <- pf_access_history_files(root)
-#' utils::head(files)
-#'
-#' @seealso This function is designed to list outputs from \code{\link[flapper]{pf}} (see the \code{write_history} argument).
-#' @author Edward Lavender
-#' @export
-
-pf_access_history_files <- function(root, use_absolute_paths = FALSE, use_list = FALSE,...){
-  if(!requireNamespace("stringr", quietly = TRUE)){
-    stop("This function requires the 'stringr' package. Please install it before continuing with install.packages('stringr').")
-  }
-  check...("full.names",...)
-  check_dir(input = root)
-  files <- list.files(root,...)
-  if(!grepl("pf_", files[1], fixed = TRUE)){
-    stop("File naming structure is unrecognised.", immediate. = TRUE)
-  }
-  files <- data.frame(index = 1:length(files), name = files)
-  files$pf_id <- stringr::str_split_fixed(files$name, "_", 2)[, 2]
-  files$pf_id <- substr(files$pf_id, 1, nchar(files$pf_id) - 4)
-  files$pf_id <- as.integer(as.character(files$pf_id))
-  files <- files %>% dplyr::arrange(.data$pf_id)
-  files <- list.files(root, full.names = TRUE,...)[files$index]
-  if(use_absolute_paths) {
-    files <- sapply(files, function(f) tools::file_path_as_absolute(f))
-    names(files) <- NULL
-  }
-  if(use_list) files <- as.list(files)
-  return(files)
-}
-
-
-########################################
-########################################
-#### pf_access_history
-
-#' @title Access the `history' element of a \code{\link[flapper]{pf_archive-class}} object
-#' @description This function accesses and simplifies the `history' list in a \code{\link[flapper]{pf_archive-class}} object.
-#' @param archive A \code{\link[flapper]{pf_archive-class}} object.
-#' @param bathy (optional) A \code{\link[raster]{raster}} that defines the grid across the area over which particle filtering was applied. If unsupplied, this is extracted from \code{archive} if available.
-#' @details From the `history' element of a \code{\link[flapper]{pf_archive-class}} object, this function extracts particle samples as a dataframe with columns for time steps, cell IDs, cell probabilities and coordinates (if \code{bathy} is available).
-#' @return The function returns a dataframe that defines, for each time step (`timestep'), particle samples (`cell_id'), associated probabilities (`cell_pr') and, if \code{bathy} is available, cell coordinates (`cell_x', `cell_y' and `cell_z').
-#' @examples
-#' pf_access_history(dat_dcpf_histories)
-#' @author Edward Lavender
-#' @export
-
-pf_access_history <- function(archive,
-                              bathy = NULL
-){
-  check_class(input = archive, to_class = "pf_archive")
-  if(is.null(bathy)) bathy <- archive$args$bathy
-  history <- lapply(1:length(archive$history), function(t){
-    elm <- archive$history[[t]]
-    if(!rlang::has_name(elm, "timestep")) elm$timestep <- t
-    elm <- elm[, c("timestep", "id_current", "pr_current")]
-  })
-  history <- do.call(rbind, history)
-  colnames(history) <- c("timestep", "cell_id", "cell_pr")
-  if(!is.null(bathy)){
-    history[, c("cell_x", "cell_y")] <- raster::extract(bathy, history$cell_id)
-    history$cell_z <- raster::extract(bathy, history$cell_id)
-  }
-  cols <- c("timestep", "cell_id", "cell_x", "cell_y", "cell_z", "cell_pr")
-  history[, cols[cols %in% colnames(history)]]
-  history <-
-    history %>%
-    dplyr::arrange(.data$timestep, .data$cell_id, .data$cell_pr)
-  return(history)
-}
-
-
-########################################
-########################################
-#### pf_access_particles_unique()
-
-#' @title Access the cells sampled by PF
-#' @description Given a list of particle histories (or a list of file paths), this function accesses the unique particles (cells) sampled by a particle filtering (PF) algorithm (\code{\link[flapper]{pf}}).
-#'
-#' @param history A \code{\link[flapper]{pf_archive-class}} class object from \code{\link[flapper]{pf}}, the list of particle histories (the `history' element of a \code{\link[flapper]{pf_archive-class}} object) from \code{\link[flapper]{pf}} or a list of file paths to particle histories.
-#' @param use_memory_safe If \code{history} is a record of file paths, \code{use_memory_safe} is a logical variable that defines whether or not to use the `memory-safe(r)' method to access unique cell samples. If specified, the function sequentially loads each file and re-defines the vector of unique particles at each time step as the unique combination of previous (unique) samples and the samples from the current time step. This may be slow. Alternatively, under the default \code{use_memory_safe = FALSE} option, the function loads each file (in parallel if specified), retaining all sampled particles, before selecting the unique particles (once) at the end of this process. This option should be faster.
-#' @param cl,varlist Parallelisation options implemented if (a) particle histories are contained in memory or (b) particle histories are supplied as a list of file paths with \code{use_memory_safe = FALSE}. \code{cl} is a cluster object created by \code{\link[parallel]{makeCluster}}. If supplied, the connection to the cluster is closed within the function. \code{varlist} is a character vector of names of objects to export that is passed to the \code{varlist} argument of \code{\link[parallel]{clusterExport}}. Exported objects must be located in the global environment.
-#'
-#' @return The function returns a vector of the unique particles sampled by the PF algorithm.
-#'
-#' @examples
-#' #### Example (1): Access unique particles when 'history' exists in memory
-#' # Access unique particles from a pf_archive object
-#' pf_access_particles_unique(dat_dcpf_histories)
-#' # Access unique particles from a list of particle histories
-#' pf_access_particles_unique(dat_dcpf_histories$history)
-#' # Supply a cluster to speed up the algorithm (for very large lists)
-#' pf_access_particles_unique(dat_dcpf_histories$history,
-#'                            cl = parallel::makeCluster(2L))
-#'
-#' #### Example (2): Access unique particles when 'history' is a list of file paths
-#'
-#' ## Write example particle histories to file (to load)
-#' root <- paste0(tempdir(), "/pf/")
-#' dir.create(root)
-#' pf_args <- dat_dcpf_histories$args
-#' pf_args$calc_distance_euclid_fast <- TRUE
-#' pf_args$write_history <- list(file = root)
-#' out_pf <- do.call(pf, pf_args)
-#'
-#' ## Access particle histories using default options (use_memory_safe = FALSE)
-#' # Access particle histories via pf_access_history_files()
-#' pf_access_particles_unique(pf_access_history_files(root, use_list = TRUE))
-#' # Supply a cluster to speed up the algorithm (for very large lists)
-#' pf_access_particles_unique(pf_access_history_files(root, use_list = TRUE),
-#'                            cl = parallel::makeCluster(2L))
-#'
-#' ## Access particle histories using the 'memory_safe' option
-#' # For large lists, this is likely to be slower
-#' # ... but it may be the only option in some cases.
-#' pf_access_particles_unique(pf_access_history_files(root, use_list = TRUE),
-#'                            use_memory_safe = TRUE)
-#'
-#' @seealso \code{\link[flapper]{pf}} implements particle filtering.
-#' @author Edward Lavender
-#' @export
-
-pf_access_particles_unique <- function(history,
-                                       use_memory_safe = FALSE,
-                                       cl = NULL, varlist = NULL){
-
-  #### Setup
-  check_class(input = history, to_class = "list")
-  if(inherits(history, "pf_archive")) history <- history$history
-  history_1   <- history[[1]]
-  read_history <- FALSE
-  if(inherits(history_1, "character")){
-    read_history <- TRUE
-    if(!file.exists(history_1)) stop(paste0("history[[1]] ('", history_1, "') does not exist."))
-    history_1 <- readRDS(history_1)
-  }
-  if(is.null(cl) & !is.null(varlist)) {
-    warning("'cl' is NULL but 'varlist' is not: 'varlist' ignored.",
-            immediate. = TRUE, call. = FALSE)
-    varlist <- FALSE
-  }
-
-  #### Access unique cells (from file)
-  if(read_history){
-
-    ## Memory-safe(r) option: load files sequentially, selecting unique cells at each step
-    if(use_memory_safe){
-      if(!is.null(cl)) {
-        warning("'cl' is not implemented for loading files when use_memory_safe = TRUE.",
-                immediate. = TRUE, call. = FALSE)
-        cl <- varlist <- NULL
-      }
-      cells <- unique(history_1$id_current)
-      for(i in 2:length(history)){
-        cells <- unique(c(cells, readRDS(history[[i]])$id_current))
-      }
-
-      ## Faster option: load all cells for each time step, selecting unique cells at the end
-    } else {
-      if(!is.null(cl) & is.null(varlist)) parallel::clusterExport(cl = cl, varlist = varlist)
-      cells_by_time <-
-        pbapply::pblapply(history,
-                          cl = cl,
-                          function(f) readRDS(f)$id_current)
-      if(!is.null(cl)) parallel::stopCluster(cl = cl)
-      cells <- unlist(cells_by_time)
-      cells <- unique(cells)
-
-    }
-
-    #### Access unique cells (in memory)
-  } else {
-
-    if(!is.null(cl) & is.null(varlist)) parallel::clusterExport(cl = cl, varlist = varlist)
-    cells <- pbapply::pblapply(history, cl = cl, function(d) d$id_current)
-    if(!is.null(cl)) parallel::stopCluster(cl = cl)
-    cells <- unlist(cells)
-    cells <- unique(cells)
-
-  }
-
-  #### Return unique cells
-  return(cells)
-}
-
-
 ########################################
 ########################################
 #### pf_plot_history()

diff --git a/README.Rmd b/README.Rmd
@@ -213,6 +213,7 @@ Each algorithm (AC, DC and ACDC) can be extended through incorporation of a move
 * `pf_access_history_files()` lists particle histories saved to file; 
 * `pf_access_history()` accesses particle histories;
 * `pf_access_particles_unique()` accesses unique particle samples;
+* `pf_access_distance_matrix()` calculates distances between unique particle samples;
 * `pf_plot_history()` plots simulated particle histories; 
 * `pf_animate_history()` animates simulated particle histories;
 * `pf_simplify()` assembles movement paths from particle histories;

diff --git a/README.md b/README.md
@@ -429,6 +429,8 @@ process provided by the `pf*()` family of functions:
   - `pf_access_history_files()` lists particle histories saved to file;
   - `pf_access_history()` accesses particle histories;
   - `pf_access_particles_unique()` accesses unique particle samples;
+  - `pf_access_distance_matrix()` calculates distances between unique
+    particle samples;
   - `pf_plot_history()` plots simulated particle histories;
   - `pf_animate_history()` animates simulated particle histories;
   - `pf_simplify()` assembles movement paths from particle histories;

diff --git a/man/flapper.Rd b/man/flapper.Rd