Add assemble_sentinel_counts() function.

edwardlavender · Jun 22, 2020 · 2e4bfbd · 2e4bfbd
1 parent 480b0ae
commit 2e4bfbd
Show file tree

Hide file tree

Showing 13 changed files with 766 additions and 505 deletions.
diff --git a/.Rhistory b/.Rhistory
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -6,11 +6,11 @@ Author: Edward Lavender
 Maintainer: Edward Lavender <[email protected]>
 Description: flapper is an R package designed to facilitate the use of passive acoustic telemetry (PAT) data for ecological inferences, especially those pertaining to animal space use. This includes functions for processing PAT data, new algorithms for inferring space use and simulations designed to evaluate the efficacy of existing and new algorithms for inferring space use. Package development has been motivated by the collection of PAT data for a Critically Endangered benthopelagic elasmobranch on the West Coast of Scotland. 
 Imports:
-  data.table, dplyr, lubridate,
-  pbapply,
-  glatos, geosphere, 
-  fields,
-  plot.pretty, Tools4ETS
+    magrittr, data.table, dplyr, rlang, lubridate,
+    pbapply,
+    glatos,
+    geosphere, fields,
+    plot.pretty, Tools4ETS
 Suggests:
     knitr,
     rmarkdown

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
+export("%>%")
 export(add_receiver_id)
+export(assemble_sentinel_counts)
 export(compute_det_sim)
 export(dist_btw_receivers)
 export(false_detections_sf)
+importFrom(lubridate,"%within%")
+importFrom(magrittr,"%>%")
+importFrom(rlang,.data)
diff --git a/R/assemble_sentinels.R b/R/assemble_sentinels.R
@@ -0,0 +1,161 @@
+####################################
+####################################
+#### assemble_sentinel_counts()
+
+#' @title Assemble counts of transmissions/detections from sentinel tags
+#' @importFrom rlang .data
+#' @importFrom lubridate %within%
+#'
+#' @description This function assembles counts of transmissions/detections from sentinel tags over user-defined time windows. For each sentinel tag, in each time window, the function counts the number of transmissions of that tag (i.e., the expected number of detections at nearby receivers). At the same time, the function determines the number of detections at all nearby receivers (defined as those within a user-specified distance) that were active at the same time as the sentinel tag. The result is a dataframe which comprises, for each tag and each time window, the expected number of transmissions and the corresponding number od detections at each nearby receiver, arranged in long format.
+#'
+#' @param sentinel A dataframe which includes all sentinel tag transmissions and detections. This should include the following columns: 'timestamp', the time of each observation; 'type', the type of each observation ("transmission" or "detection"); 'sink_id', a unique identifier of the receiver which received the transmission; and 'source_id', a unique identifier of the receiver from which the transmission was released (by the built-in sentinel tag). (When the 'source_id' is the same as the 'sink_id', the observation is of type "transmission; otherwise, the observation must be of type "detection"). See \code{\link[flapper]{dat_sentinel}} for an example.
+#' @param moorings A dataframe which includes receiver identifiers, deployment times and locations. This information is necessary so that, for each time window, only receivers that could have detected transmissions (i.e., were nearby to the transmitter and active at the time of transmission) are included in the processed dataframe. This should include the following columns: 'receiver_id', a unique identifier for each receiver; 'receiver_start_date', the deployment date for each receiver; 'receiver_end_date', the deployment end date for each receiver; 'receiver_long', the receiver longitude (decimal degrees); and 'receiver_lat', the receiver latitude (in decimal degrees).
+#' @param breaks The time interval over which transmissions/detections are counted (e.g. \code{"hours"}. This is passed to the \code{breaks} argument of \code{\link[base]{cut.POSIXt}}.
+#' @param as_POSIXct A function which defines how to convert character time bins (returned by \code{\link[base]{cut.POSIXt}}) to POSIXct format. Usually, \code{\link[base]{as.POSIXct}} is suitable but, for large timeseries this is slow.In this case, \code{\link[fasttime]{fastPOSIXct}} can be used to improve algorithm speed, if suitable. The default function is \code{function(x, tz = "UTC",...) fasttime::fastPOSIXct(x, tz = tz,...)}.
+#' @param max_det_dist A number which defines the distance (m) between the transmitting receiver and other receivers within with detections may plausibly be received. For each transmitter, within this distance, all detections at active receivers are counted; other receivers are not considered.
+#' @param dist_btw_receivers (optional) A dataframe which specifies the distance between all combinations of receivers. If not provided, this is computed internally by \code{\link[flapper]{dist_btw_receivers}}. If provided, the dataframe contain columns: 'r1', 'r2', 'dist' (see \code{\link[flapper]{dist_btw_receivers}}). Note that, if provided, the 'dist' column should be in m, not km, as returned by default by \code{\link[flapper]{dist_btw_receivers}}.
+#' @param ... Other arguments. None currently implemented.
+#'
+#' @return The function returns a dataframe which, for each source receiver (i.e., sentinel tag), specifies the number of transmissions from that receiver and the number of detections at each nearby receiver over the specified time window. The dataframe has the following columns: 'timestamp_bin', the time window; 'source_id', the identifier of the source of transmission; 'sink_id', the identifer of each potential recipient of each transmission; 'n_trms', the number of transmissions from the source over each time window; 'n_dets', the number of detections of each transmission at each potential recipient receiver; 'dist_btw_receivers', the distance between the source and the sink receiver. Rows are ordered by 'source_id', then 'timestamp', then 'sink_id'.
+#'
+#' @examples
+#' #### Example (1): Use default options and example dataframes, which contain
+#' # ... all required columns:
+#' sentinel_counts <- assemble_sentinel_counts(sentinel = dat_sentinel,
+#'                                             moorings = dat_moorings)
+#' head(sentinel_counts); tail(sentinel_counts);
+#'
+#' #### Example (2): Adjust time window
+#' sentinel_counts <- assemble_sentinel_counts(sentinel = dat_sentinel,
+#'                                             moorings = dat_moorings,
+#'                                             breaks = "days")
+#' head(sentinel_counts); tail(sentinel_counts);
+#'
+#' #### Example (2): Adjust maximum detection distance and supply distances_btw_receivers_df
+#' dist_btw_receivers_m <- dist_btw_receivers(dat_moorings[, c("receiver_id",
+#'                                                             "receiver_long",
+#'                                                             "receiver_lat")],
+#'                                            f = function(x) return(x*1000))
+#' sentinel_counts <- assemble_sentinel_counts(sentinel = dat_sentinel,
+#'                                             moorings = dat_moorings,
+#'                                             max_det_dist = 1500,
+#'                                             dist_btw_receivers = dist_btw_receivers_m)
+#' head(sentinel_counts); tail(sentinel_counts)
+#'
+#' @author Edward Lavender
+#' @export
+
+assemble_sentinel_counts <-
+  function(
+    sentinel,
+    moorings,
+    breaks = "hours",
+    as_POSIXct = function(x, tz = "UTC",...) { fasttime::fastPOSIXct(x, tz = tz,...) },
+    max_det_dist = 2000,
+    dist_btw_receivers = NULL,...){
+
+    #### Define global variables
+    timestamp_bin <- NULL; source_id <- NULL; sink_id <- NULL; n_trms <- NULL; n_dets <- NULL;
+
+    #### Define time bins of user-specified size
+    sentinel$timestamp_bin <- cut(sentinel$timestamp, breaks)
+
+    #### For each source, count the number of transmissions in each timestamp_bin
+    trms <- sentinel[sentinel$type == "transmission", ]
+    count <- trms %>%
+      dplyr::group_by(.data$source_id, .data$timestamp_bin) %>%
+      dplyr::summarise(n_trms = dplyr::n())
+    count$timestamp_bin <- as_POSIXct(count$timestamp_bin)
+
+    #### Duplicate the dataframe for all source/sink pair combinations
+    count <-
+      lapply(unique(moorings$receiver_id), function(id){
+        count$sink_id <- id
+        return(count)
+      })
+    count <- do.call("rbind", count)
+
+    #### Exclude sinks that were deployed at different time and could not have detected the transmission
+    match_sink    <- match(count$sink_id, moorings$receiver_id)
+    match_source <- match(count$source_id, moorings$receiver_id)
+    count$sink_start_date <- moorings$receiver_start_date[match_sink]
+    count$sink_end_date   <- moorings$receiver_end_date[match_sink]
+    count$sink_interval   <- lubridate::interval(count$sink_start_date, count$sink_end_date)
+    count$within <- count$timestamp_bin %within% count$sink_interval
+    count <- count[count$within, ]
+
+    #### Exclude sinks that were too far away and could not have detected the transmission
+    # Define matching indices for convenience
+    match_sink    <- match(count$sink_id, moorings$receiver_id)
+    match_source <- match(count$source_id, moorings$receiver_id)
+    # Add receiver location
+    count$sink_lat     <- moorings$receiver_lat[match_sink]
+    count$sink_long    <- moorings$receiver_long[match_sink]
+    count$source_lat  <- moorings$receiver_lat[match_source]
+    count$source_long <- moorings$receiver_long[match_source]
+    # Compute distances between all combinations of receivers, if not provided:
+    if(is.null(dist_btw_receivers)){
+      dist_btw_receivers <- dist_btw_receivers(moorings[, c("receiver_id",
+                                                            "receiver_long",
+                                                            "receiver_lat")]
+      )
+      dist_btw_receivers$dist <- dist_btw_receivers$dist * 1000
+    }
+    # Add distances to dataframe
+    dist_btw_receivers$key_dist <- paste0(dist_btw_receivers$r1, ",", dist_btw_receivers$r2)
+    count$key_dist <- paste0(count$source_id, ",", count$sink_id)
+    count$dist_btw_receivers <- dist_btw_receivers$dist[match(count$key_dist, dist_btw_receivers$key_dist)]
+    # Exclude sinks beyond the maximum detection distance from sources.
+    count <- count[count$dist_btw_receivers <= max_det_dist, ]
+
+    #### Remove pairs in which the source and the sink are the same receiver
+    count <- count[count$source_id != count$sink_id, ]
+
+    #### Define dataframe containing only detections
+    dets <- sentinel[sentinel$type == "detection", ]
+
+    #### Count the number of detections at each sink of each source in each bin
+    count_dets <- dets %>%
+      dplyr::group_by(.data$source_id, .data$sink_id, .data$timestamp_bin) %>%
+      dplyr::summarise(n_dets = dplyr::n())
+    count_dets$timestamp_bin <- as_POSIXct(count_dets$timestamp_bin)
+
+    #### Add the number of detections to the dataframe
+    count$key <- paste0(count$source_id, ",", count$sink_id, ",", count$timestamp_bin)
+    count_dets$key <- paste0(count_dets$source_id, ",", count_dets$sink_id, ",", count_dets$timestamp_bin)
+    count$n_dets <- count_dets$n_dets[match(count$key, count_dets$key)]
+    # Set NA to 0 detections
+    count$n_dets[is.na(count$n_dets)] <- 0
+
+    #### Organise dataframe
+    count <- count %>%
+      dplyr::select(timestamp_bin,
+                    source_id,
+                    sink_id,
+                    n_trms,
+                    n_dets,
+                    dist_btw_receivers) %>%
+      dplyr::arrange(source_id,
+                     timestamp_bin,
+                     sink_id)
+    count <- data.frame(count)
+
+    #### Return dataframe
+    return(count)
+  }
+
+
+
+####################################
+####################################
+#### assemble_sentinel_obs()
+
+
+####################################
+####################################
+####
+
+
+#### End of code.
+####################################
+####################################
diff --git a/R/utils.R b/R/utils.R
@@ -1,3 +1,20 @@
+######################################
+######################################
+#### %>%
+
+#' Pipe operator
+#'
+#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
+#'
+#' @name %>%
+#' @rdname pipe
+#' @keywords internal
+#' @export
+#' @importFrom magrittr %>%
+#' @usage lhs \%>\% rhs
+NULL
+
+
 ######################################
 ######################################
 #### check functions

diff --git a/data-raw/dat_ids.rds b/data-raw/dat_ids.rds
diff --git a/data-raw/dat_moorings.rds b/data-raw/dat_moorings.rds
diff --git a/data-raw/dat_sentinel.rds b/data-raw/dat_sentinel.rds
diff --git a/data/dat_ids.rda b/data/dat_ids.rda
diff --git a/data/dat_moorings.rda b/data/dat_moorings.rda
diff --git a/data/dat_sentinel.rda b/data/dat_sentinel.rda
diff --git a/man/assemble_sentinel_counts.Rd b/man/assemble_sentinel_counts.Rd
diff --git a/man/pipe.Rd b/man/pipe.Rd