-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdc.R
368 lines (361 loc) · 21.2 KB
/
dc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
#' @title The depth-contour (DC) algorithm
#' @description This function implements the depth-contour (DC) algorithm. This algorithm relates one-dimensional depth time series to a two-dimensional bathymetry surface to determine the extent to which different parts of an area might have (or have not) been used, or effectively represent occupied depths, over time. Given a sequence of depth observations (\code{archival}) from a tagged animal and a measurement error parameter (\code{calc_depth_error}), at each time step the function determines the cells on a bathymetry \code{\link[raster]{raster}} (\code{bathy}) that match the observed depth*. Across all time steps, matches are summed to produce a single map representing the number of occasions when the depth in each cell matched the observed depth.
#'
#' @param archival A dataframe of depth time series (for a single individual). At a minimum, this should contain a column named `depth' with depth observations. Depth should be recorded using absolute values in the same units as the bathymetry (\code{bathy}, see below).
#' @param bathy A \code{\link[raster]{raster}} of the bathymetry in an area within which the animal is likely to have been located over the study. Bathymetry values should be recorded as absolute values and in the same units as for depths (see \code{archival}).
#' @param plot_ts A logical input that defines whether or not to the depth time series before the algorithm is initiated.
#' @param calc_depth_error A function that returns the depth errors around a vector of depths. The function should accept vector of depths (from \code{archival$depth}) and return a matrix, with one row for each (lower and upper) error and one one column for each depth (if the error varies with depth). For each depth, the two numbers are added to the observed depth to define the range of depths on the bathymetry raster (\code{bathy}) that the individual could plausibly have occupied at any time. Since the depth errors are added to the individual's depth, the first number should be negative (i.e., the individual could have been shallower that observed) and the second positive (i.e., the individual could have been deeper than observed). The appropriate form for \code{calc_depth_error} depends on the species (pelagic versus demersal/benthic species), the measurement error for the depth observations in \code{archival} and bathymetry (\code{bathy}) data, as well as the tidal range (m) across the area (over the duration of observations). For example, for a pelagic species, the constant function \code{calc_depth_error = function(...) matrix(c(-2.5, Inf)} implies that the individual could have occupied bathymetric cells that are deeper than the observed depth + (-2.5) m and shallower than Inf m (i.e. the individual could have been in any location in which the depth was deeper than the shallow depth limit for the individual). In contrast, for a benthic species, the constant function \code{calc_depth_error = function(...) matrix(c(-2.5, 2.5), nrow = 2)} implies that the individual could have occupied bathymetric cells whose depth lies within the interval defined by the observed depth + (-2.5) and + (+2.5) m.
#' @param check_availability A logical input that defines whether or not to record explicitly, for each time step, whether or not there were any cells on \code{bathy} that matched the observed depth (within the bounds defined by \code{calc_depth_error}).
#' @param normalise A logical variable that defines whether or not to normalise the map of possible locations at each time step so that their scores sum to one.
#' @param save_record_spatial An integer vector that defines the time steps for which to return time step-specific and cumulative maps of the individual's possible locations. \code{save_record_spatial = 0} suppresses the return of this information and \code{save_record_spatial = NULL} returns this information for all time steps.
#' @param write_record_spatial_for_pf (optional) A named list, passed to \code{\link[raster]{writeRaster}}, to save the \code{\link[raster]{raster}} of the individual's possible positions at each time step to file. The `filename' argument should be the directory in which to save files. Files are named by archival time steps as `arc_1', `arc_2' and so on.
#' @param save_args A logical input that defines whether or not to save the list of function inputs in the returned object.
#' @param verbose A logical variable that defines whether or not to print messages to the console or to file to relay function progress. If \code{con = ""}, messages are printed to the console; otherwise, they are written to file (see below).
#' @param con If \code{verbose = TRUE}, \code{con} is character string that defines how messages relaying function progress are returned. If \code{con = ""}, messages are printed to the console (unless redirected by \code{\link[base]{sink}}). Otherwise, \code{con} defines the full pathway to a .txt file (which can be created on-the-fly) into which messages are written to relay function progress.
#' @param split,cl,varlist (optional) Parallelisation options. \code{split} is an integer which, if supplied, splits the \code{archival} dataframe every \code{n}th row into chunks†. The algorithm is applied sequentially within each chunk (if applicable) and chunk-wise maps can be summed afterwards to create a single map of space use. The advantage of this approach is that chunks can be analysed in parallel, via \code{cl} and \code{varlist}, while memory use is minimised. \code{cl} is (a) a cluster object from \code{\link[parallel]{makeCluster}} or (b) an integer that defines the number of child processes. \code{varlist} is a character vector of variables for export (see \code{\link[flapper]{cl_export}}). Exported variables must be located in the global environment. If a cluster is supplied, the connection to the cluster is closed within the function (see \code{\link[flapper]{cl_stop}}). For further information, see \code{\link[flapper]{cl_lapply}} and \code{\link[flapper]{flapper-tips-parallel}}.
#'
#' @details
#'
#' This algorithm can be applied to both pelagic species (which must be in an area where the depth is at least as deep as the observed depth) and benthic/demersal species (which must be in an area relatively close in depth to the observed depth). However, the results are likely to be much more precise for benthic species because depth observations for benthic animals are typically more informative about a tagged animal's possible location(s) than for pelagic species.
#'
#' *Location probability could be weighted by the proximity between the observed depth and the depths of cells within the range defined by the observed depth and the measurement error (i.e., \code{archival$depth[1] + calc_depth_error(archival$depth[1])[1], archival$depth[1] + calc_depth_error(archival$depth[1])[2]}), but this is not currently implemented.
#'
#' †Under the default options (\code{split = NULL}), the function starts with a blank map of the area and iterates over each time step, adding the `possible positions' of the individual to the map at each step. By continuously updating a single map, this approach is slow but minimises memory requirements. An alternative approach is to split the time series into chunks, implement an iterative approach within each chunk, and then join the maps for each chunk. This is implemented by \code{split} (plus \code{\link[flapper]{acdc_simplify}}).
#'
#' @return The function returns an \code{\link[flapper]{acdc_archive-class}} object. If a connection to write files has also been specified, messages are also written to file relaying function progress.
#'
#'
#' @examples
#' #### Define depth time series for examples
#' # We will use a sample depth time series for one individual
#' # We will select a small sample of observations for example speed
#' depth <- dat_archival[dat_archival$individual_id == 25, ][1:100, ]
#'
#' #### Example (1): Implement algorithm with default options
#' ## Implement algorithm
#' out_dc <- dc(archival = depth, bathy = dat_gebco)
#' ## dc() returns a named list that is an 'acdc_archive' class object
#' summary(out_dc)
#' # ... archive elements contain the key information from the algorithm
#' # ... ts_by_chunk contains the time series for each chunk
#' # ... time contains a dataframe of the algorithm's progression
#' # ... args contains a list of user inputs
#' ## Simplify outputs via acdc_simplify()
#' dc_summary <- acdc_simplify(out_dc, type = "dc")
#' summary(dc_summary)
#' ## Examine time-specific maps via acdc_plot_record()
#' acdc_plot_record(dc_summary)
#' ## Examine overall map via a raster* plotting function
#' # Each cell shows expected proportion of time steps spent in each cell
#' prettyGraphics::pretty_map(
#' add_rasters = list(x = dc_summary$map),
#' add_polys = list(x = dat_coast)
#' )
#' # Convert counts on map to percentages
#' prettyGraphics::pretty_map(
#' add_rasters =
#' list(
#' x = dc_summary$map / nrow(depth) * 100,
#' zlim = c(0, 100)
#' ),
#' add_polys = list(x = dat_coast)
#' )
#' # Check for occasions when the individual's depth was not consistent
#' # ... with the depth data for the area and the depth error e.g., possibly
#' # ... due to movement beyond this area:
#' any(do.call(rbind, lapply(dc_summary$record, function(elm) elm$dat))$availability == FALSE)
#'
#' #### Example (2): Implement depth error functions that depend on depth
#' ## Here, we will define a calc_depth_error function that depends on:
#' # ... tag error
#' # ... tidal range
#' # ... a depth-dependent bathymetry error
#' cde <- function(depth) {
#' e <- 4.77 + 2.5 + sqrt(0.5^2 + (0.013 * depth)^2)
#' e <- matrix(c(-e, e), nrow = 2)
#' return(e)
#' }
#' # Vectorise function over depths (essential)
#' cde <- Vectorize(cde)
#' ## Implement algorithm with depth-dependent error
#' out_dc <- dc(archival = depth, bathy = dat_gebco, calc_depth_error = cde)
#'
#' #### Example (3): Write timestep-specific maps of allowed positions to file
#' out_dc <- dc(
#' archival = depth, bathy = dat_gebco,
#' write_record_spatial_for_pf = list(filename = tempdir())
#' )
#' list.files(tempdir())
#'
#' #### Example (4): Implement the algorithm in parallel
#' ## Trial different options for 'split' and compare speed
#' # Approach (1)
#' at1 <- Sys.time()
#' cl <- parallel::makeCluster(2L)
#' parallel::clusterEvalQ(cl = cl, library(raster))
#' out_dc <- dc(
#' archival = depth,
#' bathy = dat_gebco,
#' plot_ts = FALSE,
#' split = 1,
#' cl = cl
#' )
#' at2 <- Sys.time()
#' # Approach (2)
#' bt1 <- Sys.time()
#' cl <- parallel::makeCluster(2L)
#' parallel::clusterEvalQ(cl = cl, library(raster))
#' out_dc <- dc(
#' archival = depth,
#' bathy = dat_gebco,
#' plot_ts = FALSE,
#' split = 5,
#' cl = cl
#' )
#' bt2 <- Sys.time()
#' # Compare timings
#' difftime(at2, at1)
#' difftime(bt2, bt1)
#'
#' #### Example (5): Write messages to file via con
#' out_dc <- dc(
#' archival = depth, bathy = dat_gebco,
#' con = paste0(tempdir(), "/dc_log.txt")
#' )
#' readLines(paste0(tempdir(), "/dc_log.txt"))
#'
#' #### Example (6) Compare an automated vs. manual implementation of DC
#'
#' ## (A) Compare step-wise results for randomly select time steps
#' # Implement algorithm (using default calc_depth_error)
#' out_dc <- dc(
#' archival = depth,
#' bathy = dat_gebco,
#' save_record_spatial = NULL
#' )
#' # Compare results for randomly selected time steps to manual implementation
#' # ... with the same simple calc_depth_error model
#' pp <- graphics::par(mfrow = c(5, 2))
#' for (i in sample(1:nrow(depth), 5)) {
#' # Extract map created via dc()
#' raster::plot(out_dc$archive[[1]]$record[[i]]$spatial[[1]]$map_timestep)
#' # Compare to manual creation of map
#' raster::plot(dat_gebco >= depth$depth[i] - 2.5 & dat_gebco <= depth$depth[i] + 2.5)
#' }
#' graphics::par(pp)
#'
#' ## (B) Compare the chunk-wise results and manual implementation
#' # Implement algorithm chunk-wise
#' out_dc <- dc(
#' archival = depth,
#' bathy = dat_gebco,
#' save_record_spatial = NULL,
#' split = 10L
#' )
#' # Get overall map via acdc_simplify()
#' map_1 <- acdc_simplify(out_dc, type = "dc", mask = dat_gebco)$map
#' # Get overall map via a simple custom approach
#' map_2 <- raster::setValues(dat_gebco, 0)
#' for (i in 1:nrow(depth)) {
#' map_2 <- map_2 + (dat_gebco >= depth$depth[i] - 2.5 & dat_gebco <= depth$depth[i] + 2.5)
#' }
#' map_2 <- raster::mask(map_2, dat_gebco)
#' # Show that the two overall maps are identical
#' pp <- par(mfrow = c(1, 3))
#' raster::plot(map_1, main = "Automated")
#' raster::plot(map_2, main = "Custom")
#' raster::plot(map_1 - map_2, main = "Difference")
#' par(pp)
#'
#' @seealso \code{\link[flapper]{acdc_simplify}} simplifies the outputs of the algorithm. \code{\link[flapper]{acdc_plot_trace}}, \code{\link[flapper]{acdc_plot_record}} and \code{\link[flapper]{acdc_animate_record}} provide plotting routines. \code{\link[flapper]{dcq}} implements a faster version of this algorithm termed the `quick depth-contour' (DCQ) algorithm. Rather than considering the depth interval that the individual could have occupied at each time step, the DCQ algorithm considers a sequence of depth bins (e.g., 10 m bins), isolates these on the bathymetry \code{\link[raster]{raster}} (\code{bathy}) and counts the number of matches in each cell. The DCPF algorithm (see \code{\link[flapper]{pf}}) extends the DC algorithm via particle filtering to reconstruct possible movement paths over \code{bathy}. The ACDC algorithm (see \code{\link[flapper]{acdc}}) extends the depth-contour algorithm by integrating information from acoustic detections of individuals at each time step to restrict the locations in which depth contours are identified.
#'
#' @author Edward Lavender
#' @export
dc <- function(archival,
bathy,
plot_ts = TRUE,
calc_depth_error = function(...) matrix(c(-2.5, 2.5), nrow = 2),
check_availability = TRUE,
normalise = TRUE,
save_record_spatial = 1L,
write_record_spatial_for_pf = NULL,
save_args = TRUE,
verbose = TRUE,
con = "",
split = NULL,
cl = NULL,
varlist = NULL) {
#### Set up function
t_onset <- Sys.time()
if (con != "") {
if (!verbose) {
message("Input to 'con' ignored since verbose = FALSE.")
} else {
# Check directory
check_dir(input = dirname(con))
# Write black file to directory if required
if (!file.exists(con)) {
message(paste0(con, " does not exist: attempting to write file in specified directory..."))
file.create(file1 = con)
message("... Blank file successfully written to file.")
}
}
}
append_messages <- ifelse(con == "", FALSE, TRUE)
cat_to_cf <- function(..., message = verbose, file = con, append = append_messages) {
if (message) cat(paste(..., "\n"), file = con, append = append)
}
#### Initiate function with storage container for outputs
cat_to_cf(paste0("flapper::dc() called (@ ", t_onset, ")..."))
cat_to_cf("... Setting up function...")
out <- list(archive = NULL, ts_by_chunk = NULL, time = NULL, args = NULL)
out$time <- data.frame(event = "onset", time = t_onset)
if (save_args) {
out$args <- list(
archival = archival,
bathy = bathy,
plot_ts = plot_ts,
calc_depth_error = calc_depth_error,
check_availability = check_availability,
normalise = normalise,
save_record_spatial = save_record_spatial,
write_record_spatial_for_pf = write_record_spatial_for_pf,
save_args = save_args,
verbose = verbose,
con = con,
split = split,
cl = cl,
varlist = varlist
)
}
#### Checks
## Check archival dataframe
if (!inherits(archival, "data.frame")) stop("'archival' must be a data.frame")
check_names(input = archival, req = "depth", extract_names = colnames, type = all)
if (any(is.na(archival$depth))) stop("'archival$depth' contains NAs.")
## Check calc_depth_error
de <- calc_depth_error(archival$depth)
if (inherits(de, "matrix")) {
if (nrow(de) == 2) {
if (ncol(de) == 1) {
message("'calc_depth_error' function taken to be independent of depth.")
} else {
message("'calc_depth_error' taken to depend on depth.")
}
if (any(de[1, ] > 0) | any(de[2, ] < 0)) stop("'calc_depth_error' should be a function that returns a two-row matrix with lower (negative) adjustment(s) (top row) and upper (positive) adjustment(s) (bottom row).'", call. = FALSE)
} else {
stop("'calc_depth_error' should return a two-row matrix.", call. = FALSE)
}
} else {
stop("'calc_depth_error' should return a two-row matrix.", call. = FALSE)
}
## Check cluster
if (is.null(split) & !is.null(cl)) {
warning("'cl' argument ignored unless 'split' is supplied.",
immediate. = TRUE, call. = TRUE
)
cl <- NULL
varlist <- NULL
}
## Check write_record_spatial_for_pf inputs
if (!is.null(write_record_spatial_for_pf)) {
check_named_list(input = write_record_spatial_for_pf)
check_names(input = write_record_spatial_for_pf, req = "filename")
write_record_spatial_for_pf$filename <- check_dir(input = write_record_spatial_for_pf$filename, check_slash = TRUE)
write_record_spatial_for_pf_dir <- write_record_spatial_for_pf$filename
}
#### Set up objects for algorithm
## Archival index (for saving the spatial record)
if (rlang::has_name(archival, "timestep_cumulative")) {
warning("'archival$timestep_cumulative overwritten.", immediate. = TRUE, call. = FALSE)
}
archival$timestep_cumulative <- 1:nrow(archival)
if (is.null(save_record_spatial)) save_record_spatial <- archival$timestep_cumulative
## Archival time series with depth error
cat_to_cf("... Implementing calc_depth_error()...")
archival$depth_lwr <- archival$depth + calc_depth_error(archival$depth)[1, ]
archival$depth_upr <- archival$depth + calc_depth_error(archival$depth)[2, ]
## Archival time series with availability
if (check_availability) archival$availability <- NA
## Archival time series as list (by chunk)
if (is.null(split)) {
archival_ls <- list(archival)
} else {
archival_ls <- split(archival, rep(1:ceiling(nrow(archival) / split), each = split)[1:nrow(archival)])
}
out$ts_by_chunk <- lapply(archival_ls, function(d) list(acoustics = NULL, archival = d))
## Blank map for updating
blank <- raster::setValues(bathy, 0)
#### Plot time series (for each chunk)
if (plot_ts) {
cat_to_cf("... Plotting movement time series (for each chunk)...")
if (length(archival_ls) < 25) pp <- graphics::par(mfrow = prettyGraphics::par_mf(length(archival_ls)))
lapply(archival_ls, function(d) {
if (nrow(d) == 1) pt <- "p" else pt <- "l"
prettyGraphics::pretty_plot(1:nrow(d), abs(d$depth) * -1,
pretty_axis_args = list(side = 3:2),
xlab = "Time (index)", ylab = "Depth (m)",
type = pt
)
graphics::lines(1:nrow(d), abs(d$depth_lwr) * -1, lty = 3, col = "royalblue", type = pt)
graphics::lines(1:nrow(d), abs(d$depth_upr) * -1, lty = 3, col = "royalblue", type = pt)
})
if (length(archival_ls) < 25) graphics::par(pp)
}
#### Implement algorithm
## Initiation
out$time <- rbind(out$time, data.frame(event = "algorithm_initiation", time = Sys.time()))
if (is.null(split)) {
cat_to_cf("... Implementing algorithm over time steps...")
} else {
cat_to_cf("... Implementing algorithm over chunks...")
}
## Implement .dc() algorithm over chunks
.out_by_chunk <- cl_lapply(archival_ls, cl = cl, varlist = varlist, fun = function(d) {
# Define storage container for chunk-specific outputs
.out <- list(
map = NULL,
record = list(),
time = NULL,
args = NULL,
chunks = NULL,
simplify = FALSE
)
use <- blank
for (i in 1:nrow(d)) {
avail <- bathy >= d$depth_lwr[i] & bathy <= d$depth_upr[i]
if (check_availability) d$availability[i] <- raster::maxValue(avail) == 1
if (normalise) avail <- avail / raster::cellStats(avail, "sum")
if (!is.null(write_record_spatial_for_pf)) {
write_record_spatial_for_pf$x <- avail
write_record_spatial_for_pf$filename <- paste0(write_record_spatial_for_pf_dir, "arc_", d$timestep_cumulative[i])
do.call(raster::writeRaster, write_record_spatial_for_pf)
}
use <- use + avail
.out$record[[i]] <- list(
dat = d[i, , drop = FALSE],
spatial = list()
)
if (save_record_spatial[1] != 0) {
if (d$timestep_cumulative[i] %in% save_record_spatial) {
.out$record[[i]]$spatial[[1]] <- list(map_timestep = avail, map_cumulative = use)
}
}
}
# Update and return outputs
.out$map <- use
class(.out) <- c(class(.out), "acdc_archive")
return(.out)
})
#### Finalise algorithm
out$archive <- .out_by_chunk
t_end <- Sys.time()
out$time <- rbind(out$time, data.frame(event = "algorithm_competion", time = t_end))
out$time$serial_duration <- Tools4ETS::serial_difference(out$time$time, units = "mins")
out$time$total_duration <- NA
total_duration <- sum(as.numeric(out$time$serial_duration), na.rm = TRUE)
out$time$total_duration[nrow(out$time)] <- total_duration
class(out) <- c(class(out), "acdc_archive")
cat_to_cf(paste0("... flapper::dc() call completed (@ ", t_end, ") after ~", round(total_duration, digits = 2), " minutes."))
return(out)
}