-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathassembles.R
171 lines (155 loc) · 9.82 KB
/
assembles.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
####################################
####################################
#### assemble_sentinel_counts()
#' @title Assemble counts of transmissions/detections from sentinel tags
#' @importFrom rlang .data
#' @importFrom lubridate %within%
#'
#' @description This function assembles a dataframe of counts of transmissions/detections from sentinel tags over user-defined time windows. For each sentinel tag, in each time window, the function counts the number of transmissions of that tag (i.e., the expected number of detections at nearby receivers). At the same time, the function determines the number of detections at all nearby receivers (defined as those within a user-specified distance) that were active at the same time as the sentinel tag. The result is a dataframe which comprises, for each tag and each time window, the number of transmissions (i.e., the expected number of detections) and the observed number of detections at each nearby receiver, arranged in long format.
#'
#' @param sentinel A dataframe that includes all sentinel tag transmissions and detections. This should include the following columns: `timestamp', the time of each observation; `type', the type of each observation ("transmission" or "detection"); `sink_id', a unique identifier of the receivers which received the transmissions; and `source_id', a unique identifier of the receivers from which the transmission was released (by the built-in sentinel tag). (When the `source_id' is the same as the `sink_id', the observation is of type "transmission"; otherwise, the observation must be of type "detection"). See \code{\link[flapper]{dat_sentinel}} for an example.
#' @param moorings A dataframe that includes receiver identifiers, deployment times and locations. This information is necessary so that, for each time window, only receivers that could have detected transmissions (i.e., were nearby to the transmitter and active at the time of transmission) are included in the processed dataframe. This should include the following columns: `receiver_id', a unique identifier for each receiver; `receiver_start_date', the deployment date for each receiver; `receiver_end_date', the deployment end date for each receiver; `receiver_long', the receiver longitude (decimal degrees); and `receiver_lat', the receiver latitude (decimal degrees). See \code{\link[flapper]{dat_moorings}} for an example.
#' @param breaks The time interval over which transmissions/detections are counted (e.g. \code{"hours"}). This is passed to the \code{breaks} argument of \code{\link[base]{cut.POSIXt}}.
#' @param as_POSIXct A function that defines how to convert character time bins (returned by \code{\link[base]{cut.POSIXt}}) to POSIXct format. Usually, \code{\link[base]{as.POSIXct}} is suitable but, for large time series, this is slow. In this case, \code{\link[fasttime]{fastPOSIXct}} can be used to improve algorithm speed, if suitable. The default function is \code{function(x, tz = "UTC",...) fasttime::fastPOSIXct(x, tz = tz,...)}.
#' @param detection_range A number that defines the maximum distance (m) between a transmitting receiver and other receivers within with detections may plausibly be received. For each transmission, all detections at active receivers within this distance are counted; other receivers are not considered.
#' @param dist_btw_receivers (optional) A dataframe that specifies the distances between all combinations of receivers. If not provided, this is computed internally by \code{\link[flapper]{dist_btw_receivers}}, which assumes Euclidean distances are appropriate. If provided, the dataframe should contain columns: `r1', `r2', `dist' (see \code{\link[flapper]{dist_btw_receivers}}). Note that, if provided, the `dist' column should be in m, not km, as returned by default by \code{\link[flapper]{dist_btw_receivers}}, to match the units of \code{detection_range}.
#'
#' @return The function returns a dataframe that, for each source receiver (i.e., sentinel tag), specifies the number of transmissions from that receiver and the number of detections at each nearby receiver over the specified time window. The dataframe has the following columns: `timestamp_bin', the time window; `source_id', the identifier of the source of transmission; `sink_id', the identifier of each potential recipient of each transmission; `n_trms', the number of transmissions from the source over each time window; `n_dets', the number of detections of each transmission at each potential recipient receiver; `dist_btw_receivers', the distance between the source and the sink receiver. Rows are ordered by `source_id', then `timestamp' and finally `sink_id'.
#'
#' @examples
#' #### Example (1): Use default options and example dataframes, which contain
#' # ... all required columns:
#' sentinel_counts <- assemble_sentinel_counts(
#' sentinel = dat_sentinel,
#' moorings = dat_moorings
#' )
#' head(sentinel_counts)
#' tail(sentinel_counts)
#' #### Example (2): Adjust time window
#' sentinel_counts <- assemble_sentinel_counts(
#' sentinel = dat_sentinel,
#' moorings = dat_moorings,
#' breaks = "days"
#' )
#' head(sentinel_counts)
#' tail(sentinel_counts)
#' #### Example (2): Adjust maximum detection distance and supply dist_btw_receivers
#' dist_btw_receivers_m <- dist_btw_receivers(
#' dat_moorings[, c(
#' "receiver_id",
#' "receiver_long",
#' "receiver_lat"
#' )],
#' f = function(x) {
#' return(x * 1000)
#' }
#' )
#' sentinel_counts <- assemble_sentinel_counts(
#' sentinel = dat_sentinel,
#' moorings = dat_moorings,
#' detection_range = 1500,
#' dist_btw_receivers = dist_btw_receivers_m
#' )
#' head(sentinel_counts)
#' tail(sentinel_counts)
#'
#' @author Edward Lavender
#' @export
assemble_sentinel_counts <-
function(
sentinel,
moorings,
breaks = "hours",
as_POSIXct = function(x, tz = "UTC", ...) fasttime::fastPOSIXct(x, tz = tz, ...),
detection_range = 2000,
dist_btw_receivers = NULL) {
#### Define global variables
timestamp_bin <- NULL
source_id <- NULL
sink_id <- NULL
n_trms <- NULL
n_dets <- NULL
#### Define time bins of user-specified size
sentinel$timestamp_bin <- cut(sentinel$timestamp, breaks)
#### For each source, count the number of transmissions in each timestamp_bin
trms <- sentinel[sentinel$type == "transmission", ]
count <- trms %>%
dplyr::group_by(.data$source_id, .data$timestamp_bin) %>%
dplyr::summarise(n_trms = dplyr::n())
count$timestamp_bin <- as_POSIXct(count$timestamp_bin)
#### Duplicate the dataframe for all source/sink pair combinations
count <-
lapply(unique(moorings$receiver_id), function(id) {
count$sink_id <- id
return(count)
})
count <- do.call("rbind", count)
#### Exclude sinks that were deployed at different time and could not have detected the transmission
match_sink <- match(count$sink_id, moorings$receiver_id)
match_source <- match(count$source_id, moorings$receiver_id)
count$sink_start_date <- moorings$receiver_start_date[match_sink]
count$sink_end_date <- moorings$receiver_end_date[match_sink]
count$sink_interval <- lubridate::interval(count$sink_start_date, count$sink_end_date)
count$within <- count$timestamp_bin %within% count$sink_interval
count <- count[count$within, ]
#### Exclude sinks that were too far away and could not have detected the transmission
# Define matching indices for convenience
match_sink <- match(count$sink_id, moorings$receiver_id)
match_source <- match(count$source_id, moorings$receiver_id)
# Add receiver location
count$sink_lat <- moorings$receiver_lat[match_sink]
count$sink_long <- moorings$receiver_long[match_sink]
count$source_lat <- moorings$receiver_lat[match_source]
count$source_long <- moorings$receiver_long[match_source]
# Compute distances between all combinations of receivers, if not provided:
if (is.null(dist_btw_receivers)) {
dist_btw_receivers <- dist_btw_receivers(moorings[, c(
"receiver_id",
"receiver_long",
"receiver_lat"
)])
dist_btw_receivers$dist <- dist_btw_receivers$dist * 1000
}
# Add distances to dataframe
dist_btw_receivers$key_dist <- paste0(dist_btw_receivers$r1, ",", dist_btw_receivers$r2)
count$key_dist <- paste0(count$source_id, ",", count$sink_id)
count$dist_btw_receivers <- dist_btw_receivers$dist[match(count$key_dist, dist_btw_receivers$key_dist)]
# Exclude sinks beyond the maximum detection distance from sources.
count <- count[count$dist_btw_receivers <= detection_range, ]
#### Remove pairs in which the source and the sink are the same receiver
count <- count[count$source_id != count$sink_id, ]
#### Define dataframe containing only detections
dets <- sentinel[sentinel$type == "detection", ]
#### Count the number of detections at each sink of each source in each bin
count_dets <- dets %>%
dplyr::group_by(.data$source_id, .data$sink_id, .data$timestamp_bin) %>%
dplyr::summarise(n_dets = dplyr::n())
count_dets$timestamp_bin <- as_POSIXct(count_dets$timestamp_bin)
#### Add the number of detections to the dataframe
count$key <- paste0(count$source_id, ",", count$sink_id, ",", count$timestamp_bin)
count_dets$key <- paste0(count_dets$source_id, ",", count_dets$sink_id, ",", count_dets$timestamp_bin)
count$n_dets <- count_dets$n_dets[match(count$key, count_dets$key)]
# Set NA to 0 detections
count$n_dets[is.na(count$n_dets)] <- 0
#### Organise dataframe
count <- count %>%
dplyr::select(
timestamp_bin,
source_id,
sink_id,
n_trms,
n_dets,
dist_btw_receivers
) %>%
dplyr::arrange(
source_id,
timestamp_bin,
sink_id
)
count <- data.frame(count)
#### Return dataframe
return(count)
}
#### End of code.
####################################
####################################