-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpf_simplify.Rd
374 lines (335 loc) · 28.6 KB
/
pf_simplify.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pf_simplify.R
\name{pf_simplify}
\alias{pf_simplify}
\title{Convert particle histories from \code{\link[flapper]{pf}} into movement paths}
\usage{
pf_simplify(
archive,
max_n_particles = NULL,
max_n_particles_sampler = c("random", "weighted", "max"),
bathy = NULL,
calc_distance = NULL,
calc_distance_lcp_fast = NULL,
calc_distance_graph = NULL,
calc_distance_limit = NULL,
calc_distance_barrier = NULL,
calc_distance_barrier_limit = NULL,
calc_distance_barrier_grid = NULL,
calc_distance_restrict = FALSE,
calc_distance_algorithm = "bi",
calc_distance_constant = 1,
mobility = NULL,
mobility_from_origin = mobility,
write_history = NULL,
cl = NULL,
varlist = NULL,
use_all_cores = FALSE,
return = c("path", "archive"),
summarise_pr = FALSE,
max_n_copies = NULL,
max_n_copies_sampler = c("random", "weighted", "max"),
max_n_paths = 100L,
add_origin = TRUE,
verbose = TRUE
)
}
\arguments{
\item{archive}{A \code{\link[flapper]{pf_archive-class}} object from \code{\link[flapper]{pf}}.}
\item{max_n_particles}{(optional) An integer that defines the maximum number of particles to selected at each time step. If supplied, particle samples are thinned, with \code{max_n_particles} retained at each time step (in a method specified by \code{max_n_particles_sampler}), prior to processing. This reduces computation time but may lead to failures in path building (if the subset of sample particles cannot be connected into any movement paths).}
\item{max_n_particles_sampler}{If \code{max_n_particles} is supplied, \code{max_n_particles_sampler} is a character that defines the sampling method. Currently supported options are: \code{"random"}, which implements random sampling; \code{"weighted"}, which implements weighted sampling, with random samples taken according to their probability at the current time step; and \code{"max"}, which selects the top \code{max_n_particles} most likely particles.}
\item{bathy}{A \code{\link[raster]{raster}} that defines the grid across the area within which the individual could have moved. \code{bathy} must be planar (i.e., Universal Transverse Mercator projection) with units of metres in x and y directions. If \code{calc_distance = "lcp"}, the surface's resolution is taken to define the distance between horizontally and vertically connected cells and must be the same in both x and y directions. Any cells with NA values (e.g., due to missing data) are treated as `impossible' to move though by the algorithm (see \code{\link[flapper]{lcp_over_surface}}). If unsupplied, \code{bathy} can be extracted from \code{archive}, if available.}
\item{calc_distance}{A character that defines the method used to calculate distances between sequential combinations of particles (see \code{\link[flapper]{pf}}). Currently supported options are Euclidean distances (\code{"euclid"}) or shortest (least-cost) distances ("lcp"). In practice, Euclidean distances are calculated and then, for the subset of connections that meet specified criteria (see \code{calc_distance_limit}, \code{calc_distance_barrier}, \code{mobility_from_origin} and \code{mobility}), Euclidean distances are updated to shortest distances, if specified. Note that \code{calc_distance} does not need to be the same method as used for \code{\link[flapper]{pf}}: it is often computationally beneficial to implement \code{\link[flapper]{pf}} using Euclidean distances and then, for the subset of sampled particles, implement \code{\link[flapper]{pf_simplify}} with \code{calc_distance = "lcp"} to re-compute distances using the shortest-distances algorithm, along with the adjusted probabilities. However, for large paths, the quickest option is to implement both functions using \code{calc_distance = "euclid"} and then interpolate shortest paths only for the set of returned paths (see \code{\link[flapper]{lcp_interp}}). If \code{calc_distance = NULL}, the method saved in \code{archive} is used.}
\item{calc_distance_lcp_fast}{(optional) If \code{calc_distance = "lcp"}, \code{calc_distance_lcp_fast} is a function that predicts the shortest distance. This function must accept two arguments (even if they are ignored): (a) a numeric vector of Euclidean distances and (b) an integer vector of barrier overlaps that defines whether (1) or not (1) each transect crosses a barrier (see \code{\link[flapper]{lcp_comp}}). This option avoids the need to implement shortest-distance algorithms.}
\item{calc_distance_graph}{(optional) If \code{calc_distance = "lcp"} and \code{calc_distance_lcp_fast = NULL}, \code{calc_distance_graph} is a graph object that defines the distances between connected cells in \code{bathy}. If unsupplied, this is taken from \code{archive$args$calc_distance_graph}, if available, or computed via \code{\link[flapper]{lcp_graph_surface}}.}
\item{calc_distance_limit}{(optional) If \code{calc_distance = "lcp"} and \code{calc_distance_lcp_fast = NULL}, \code{calc_distance_limit} is a number that defines the lower Euclidean distance limit for shortest-distances calculations. If supplied, shortest distances are only calculated for cell connections that are more than a Euclidean distance of \code{calc_distance_limit} apart. In other words, if supplied, it is assumed that there exists a valid shortest path (shorter than the maximum distance imposed by the animal's mobility constraints) if the Euclidean distance between two points is less than \code{calc_distance_limit} (unless that segment crosses a barrier: see \code{calc_distance_barrier}). This option can improve the speed of distance calculations. However, if supplied, note that Euclidean and shortest distances (and resultant probabilities) may be mixed in function outputs. This argument is currently only implemented for \code{\link[flapper]{pf_archive-class}} objects derived with \code{calc_distance = "euclid"} and \code{calc_distance_euclid_fast = TRUE} via \code{\link[flapper]{pf}} for which connected cell pairs need to be derived (i.e., not following a previous implementation of \code{\link[flapper]{pf_simplify}}).}
\item{calc_distance_barrier}{(optional) If \code{calc_distance = "lcp"}, \code{calc_distance_barrier} is a simple feature geometry that defines a barrier, such as the coastline, to movement (see \code{\link[flapper]{segments_cross_barrier}}). The coordinate reference system for this object must match \code{bathy}. If supplied, shortest distances are only calculated for segments that cross a barrier (or exceed \code{calc_distance_limit}). This option can improve the speed of distance calculations. However, if supplied, note that Euclidean and shortest distances (and resultant probabilities) may be mixed in function outputs. All \code{calc_distance_barrier*} arguments are currently only implemented for \code{\link[flapper]{pf_archive-class}} objects derived with \code{calc_distance = "euclid"} and \code{calc_distance_euclid_fast = TRUE} via \code{\link[flapper]{pf}} for which connected cell pairs need to be derived (i.e., not following a previous implementation of \code{\link[flapper]{pf_simplify}}).}
\item{calc_distance_barrier_limit}{(optional) If \code{calc_distance_barrier} is supplied, \code{calc_distance_barrier_limit} is the lower Euclidean limit for determining barrier overlaps. If supplied, barrier overlaps are only determined for cell connections that are more than \code{calc_distance_barrier_limit} apart. This option can reduce the number of cell connections for which barrier overlaps need to be determined (and ultimately the speed of distance calculations).}
\item{calc_distance_barrier_grid}{(optional) If \code{calc_distance_barrier} is supplied, \code{calc_distance_barrier_grid} is a \code{\link[raster]{raster}} that defines the distance to the barrier (see \code{\link[flapper]{segments_cross_barrier}}). The coordinate reference system must match \code{bathy}. If supplied, \code{mobility_from_origin} and \code{mobility} are also required. \code{calc_distance_barrier_grid} is used to reduce the wall time of the barrier-overlap routine (see \code{\link[flapper]{segments_cross_barrier}}).}
\item{calc_distance_restrict}{(optional) If and \code{calc_distance_lcp_fast = NULL} and \code{calc_distance_limit} and/or \code{calc_distance_barrier} are supplied, \code{calc_distance_restrict} is a logical variable that defines whether (\code{TRUE}) or not (\code{FALSE}) to restrict further the particle pairs for which shortest distances are calculated. If \code{TRUE}, the subset of particles flagged by \code{calc_distance_limit} and \code{calc_distance_barrier} for shortest-distance calculations is further restricted to consider only those pairs of particles for which there is not a valid connection to or from those particles under the Euclidean distance metric. Either way, there is no reduction in the set of particles, only in the number of connections evaluated between those particles. This can improve the speed of distance calculations.}
\item{calc_distance_algorithm, calc_distance_constant}{Additional shortest-distance calculation options if \code{calc_distance_lcp_fast = NULL} (see \code{\link[cppRouting]{get_distance_pair}}). \code{calc_distance_algorithm} is a character that defines the algorithm: \code{"Dijkstra"}, \code{"bi"}, \code{"A*"} or \code{"NBA"} are supported. \code{calc_distance_constant} is the numeric constant required to maintain the heuristic function admissible in the A* and NBA algorithms. For shortest distances (based on costs derived via \code{\link[flapper]{lcp_costs}}), the default (\code{calc_distance_constant = 1}) is appropriate.}
\item{mobility, mobility_from_origin}{(optional) The mobility parameters (see \code{\link[flapper]{pf}}). If unsupplied, these can be extracted from \code{archive}, if available. However, even if \code{\link[flapper]{pf}} was implemented without these options, it is beneficial to specify mobility limits here (especially if \code{calc_distance = "lcp"}) because they restrict the number of calculations that are required (for example, for shortest distances, at each time step, distances are only calculated for the subset of particle connections below \code{mobility_from_origin} or \code{mobility} in distance).}
\item{write_history}{A named list of arguments, passed to \code{\link[base]{saveRDS}}, to write `intermediate' files. The `file' argument should be the directory in which to write files. This argument is currently only implemented for \code{\link[flapper]{pf_archive-class}} objects derived with \code{calc_distance = TRUE} and \code{calc_distance_euclid_fast = TRUE} via \code{\link[flapper]{pf}} for which connected cell pairs need to be derived (i.e., not following a previous implementation of \code{\link[flapper]{pf_simplify}}). If supplied, two directories are created in `file' (1/ and 2/), in which dataframes of the pairwise distances between connected cells and the subset of those that formed continuous paths from the start to the end of the time series are written, respectively. Files are named by time steps as `pf_1', `pf_2' and so on. Files for each time step are written and re-read from this directory during particle processing. This helps to minimise vector memory requirements because the information for all time steps does not have to be retained in memory at once.}
\item{cl, varlist, use_all_cores}{(optional) Parallelisation options for the first stage of the algorithm, which identifies connected cell pairs, associated distances and movement probabilities. The first parallelisation option is to parallelise the algorithm over time steps via \code{cl}. \code{cl} is (a) a cluster object from \code{\link[parallel]{makeCluster}} or (b) an integer that defines the number of child processes. If \code{cl} is supplied, \code{varlist} may be required. This is a character vector of variables for export (see \code{\link[flapper]{cl_export}}). Exported variables must be located in the global environment. If a cluster is supplied, the connection to the cluster is closed within the function (see \code{\link[flapper]{cl_stop}}). For further information, see \code{\link[flapper]{cl_lapply}} and \code{\link[flapper]{flapper-tips-parallel}}.The second parallelisation option is to parallelise shortest-distance calculations within time steps via a logical input (\code{TRUE}) to \code{use_all_cores} that is passed to \code{\link[cppRouting]{get_distance_matrix}}. This option is only implemented for \code{calc_distance = "lcp"}.}
\item{return}{A character (\code{return = "path"} or \code{return = "archive"}) that defines the type of object that is returned (see Details).}
\item{summarise_pr}{(optional) For \code{return = "archive"}, \code{summarise_pf} is a function or a logical input that defines whether or not (and how) to summarise the probabilities of duplicate cell records for each time step. If a function is supplied, only one record of each sampled cell is returned per time step, with the associated probability calculated from the probabilities of each sample of that cell using the supplied function. For example, \code{summarise_pr = max} returns the most probable sample. Alternatively, if a logical input (\code{summarise_pr = TRUE}) is supplied, only one record of each sampled cell is returned per time step, with the associated probability calculated as the sum of the normalised probabilities of all samples for that cell, rescaled so that the maximum probability takes a score of one. Specifying \code{summarise_pr} is useful for deriving maps of the `probability of use' across an area based on particle histories because it ensures that `probability of use' scores depend on the number of time steps during which an individual could have occupied a location, rather than the total number of samples of that location (see \code{\link[flapper]{pf_plot_map}}). Both \code{summarise_pr = NULL} and \code{summarise_pr = FALSE} suppress this argument.}
\item{max_n_copies}{(optional) For \code{return = "path"}, \code{max_n_copies} is an integer that specifies the maximum number of copies of a sampled cell that are retained at each time stamp. Each copy represents a different route to that cell. By default, all copies (i.e. routes to that cell are retained) via \code{max_n_copies = NULL}. However, in cases where there are a large number of paths through a landscape, the function can run into vector memory limitations during path assembly, so \code{max_n_copies} may need to be set. In this case, at each time step, if there are more than \code{max_n_copies} paths to a given cell, then a subset of these (\code{max_n_copies}) are sampled, according to the \code{max_n_copies_sampler} argument.}
\item{max_n_copies_sampler}{(optional) For \code{return = "path"}, if \code{max_n_copies} is supplied, \code{max_n_copies_sampler} is a character that defines the sampling method. Currently supported options are: \code{"random"}, which implements random sampling; \code{"weighted"}, which implements weighted sampling, with random samples taken according to their probability at the current time step; and \code{"max"}, which selects for the top \code{max_n_copies} most likely copies of a given cell according to the probability associated with movement into that cell from the previous location.}
\item{max_n_paths}{(optional) For \code{return = "path"}, \code{max_n_paths} is an integer that specifies the maximum number of paths to be reconstructed. During path assembly, following the implementation of \code{max_n_copies} (if provided), \code{max_n_paths} are selected at random at each time step. This option is provided to improve the speed of path assembly in situations with large numbers of paths. \code{max_n_paths = NULL} computes all paths (if possible).}
\item{add_origin}{For \code{return = "path"}, \code{add_origin} is a logical input that defines whether or not to include the origin in the returned dataframe.}
\item{verbose}{A logical input that defines whether or not to print messages to the console to monitor function progress.}
}
\value{
If \code{return = "archive"}, the function returns a \code{\link[flapper]{pf_archive-class}} object, as inputted, but in which only the most likely record of each cell that was connected to cells at the next time step is retained and with the \code{method = "pf_simplify"} flag. If \code{return = "path"}, the function returns a \code{\link[flapper]{pf_path-class}} object, which is a dataframe that defines the movement paths.
}
\description{
This function is designed to simplify the \code{\link[flapper]{pf_archive-class}} object from \code{\link[flapper]{pf}} that defines sampled particle histories into a set of movement paths. The function identifies pairs of cells between which movement may have occurred at each time step (if necessary), (re)calculates distances and probabilities between connected cell pairs and then, if specified, links pairwise movements between cells into a set of possible movement paths.
}
\details{
The implementation of this function depends on how \code{\link[flapper]{pf}} has been implemented and the \code{return} argument. Under the default options in \code{\link[flapper]{pf}}, the fast Euclidean distances method is used to sample sequential particle positions, in which case the history of each particle through the landscape is not retained and has to be assembled afterwards. In this case, \code{\link[flapper]{pf_simplify}} calculates the distances between all combinations of cells at each time step, using either a Euclidean distances or shortest distances algorithm according to the input to \code{calc_distance}. Distances are converted to probabilities using the `intrinsic' probabilities associated with each location and the movement models retained in \code{archive} from the call to \code{\link[flapper]{pf}} to identify possible movement paths between cells at each time step. If the fast Euclidean distances method has not been used, then pairwise cell movements are retained by \code{\link[flapper]{pf}}. In this case, the function simply recalculates distances between sequential cell pairs and the associated cell probabilities, which are then processed according to the \code{return} argument.
Following the identification of pairwise cell movements, if \code{return = "archive"}, the function selects all of the unique cells at each time step that were connected to cells at the next time step. (For cells that were selected multiple times at a given time step, due to sampling with replacement in \code{\link[flapper]{pf}}, if \code{summarise_pr} is supplied, only one sample is retained: in maps of the `probability of use' across an area (see \code{\link[flapper]{pf_plot_map}}), this ensures that cell scores depend on the number of time steps when the individual could have occupied a given cell, rather than the total number of samples of a location.) Otherwise, if \code{return = "path"}, pairwise cell movements are assembled into complete movement paths.
}
\examples{
#### Example particle histories
# In these examples, we will use the example particle histories included in flapper
summary(dat_dcpf_histories)
#### Example (1): The default implementation
paths_1 <- pf_simplify(dat_dcpf_histories)
## Demonstration that the distance and probabilities calculations are correct
# The simple method below works if three conditions are met:
# ... The 'intrinsic' probability associated with each cell is the same (as for DC algorithm);
# ... Paths have been reconstructed via pf_simplify() using Euclidean distances;
# ... The calc_movement_pr() movement model applies to all time steps;
require(magrittr)
require(rlang)
paths_1 <-
paths_1 \%>\%
dplyr::group_by(.data$path_id) \%>\%
dplyr::mutate(
cell_xp = dplyr::lag(.data$cell_x),
cell_yp = dplyr::lag(.data$cell_y),
cell_dist_chk = sqrt((.data$cell_xp - .data$cell_x)^2 +
(.data$cell_yp - .data$cell_y)^2),
cell_pr_chk = dat_dcpf_histories$args$calc_movement_pr(.data$cell_dist_chk),
dist_equal = .data$cell_dist_chk == .data$cell_dist_chk,
pr_equal = .data$cell_pr == .data$cell_pr_chk
) \%>\%
data.frame()
utils::head(paths_1)
## Demonstration that the depths of sampled cells are correct
paths_1$cell_z_chk <- raster::extract(
dat_dcpf_histories$args$bathy,
paths_1$cell_id
)
all.equal(paths_1$cell_z, paths_1$cell_z_chk)
## Compare depth time series
# There is a relatively large degree of mismatch here, which reflects
# ... the low resolution bathymetry data used for the algorithm.
pf_plot_1d(paths_1, dat_dc$args$archival)
## Examine paths
# Log likelihood
pf_loglik(paths_1)
# 2-d visualisation
pf_plot_2d(paths_1, dat_dcpf_histories$args$bathy,
add_paths = list(length = 0.05)
)
# 3-d visualisation
pf_plot_3d(paths_1, dat_dcpf_histories$args$bathy)
#### Example (2): Re-calculate distances as shortest distances
## Implement flapper::pf()
# For this example, we need to increase the number of particles
# ... for Euclidean-based sampling to generate viable paths
# ... when we consider shortest distances
set.seed(1)
dcpf_args <- dat_dcpf_histories$args
dcpf_args$calc_distance_euclid_fast <- TRUE
dcpf_args$n <- 50
out_dcpf_2 <- do.call(pf, dcpf_args)
## Implement pf_simplify() using shortest distances
paths_2 <- pf_simplify(out_dcpf_2, calc_distance = "lcp")
# ... Duration: ~ 0.655 s
system.time(
invisible(utils::capture.output(
pf_simplify(out_dcpf_2, calc_distance = "lcp")
))
)
## Demonstrate the LCP calculations are correct
paths_2_lcps <- lcp_interp(paths_2,
out_dcpf_2$args$bathy,
calc_distance = TRUE
)
head(cbind(paths_2$dist, paths_2_lcps$dist_lcp$dist))
## Trial options for increasing speed of shortest-distance calculations
# Speed up shortest-distance calculations via (a) the graph:
# ... Duration: ~0.495 s
# ... Note that you may achieve further speed improvements via
# ... a simplified/contracted graph
# ... ... see cppRouting::cpp_simplify()
# ... ... see cppRouting::cpp_contract()
costs <- lcp_costs(out_dcpf_2$args$bathy)
graph <- lcp_graph_surface(out_dcpf_2$args$bathy, costs$dist_total)
system.time(
invisible(utils::capture.output(
pf_simplify(out_dcpf_2,
calc_distance = "lcp",
calc_distance_graph = graph
)
))
)
# Speed up shortest-distance calculations via (b) the lower Euclid dist limit
# ... Duration: ~0.493 s
costs <- lcp_costs(out_dcpf_2$args$bathy)
graph <- lcp_graph_surface(out_dcpf_2$args$bathy, costs$dist_total)
system.time(
invisible(utils::capture.output(
pf_simplify(out_dcpf_2,
calc_distance = "lcp",
calc_distance_graph = graph,
calc_distance_limit = 100
)
))
)
# Speed up shortest-distance calculations via (c) the barrier
# ... Duration: ~1.411 s (much slower in this example)
coastline <- sf::st_as_sf(dat_coast)
sf::st_crs(coastline) <- NA
system.time(
invisible(utils::capture.output(
pf_simplify(out_dcpf_2,
calc_distance = "lcp",
calc_distance_graph = graph,
calc_distance_limit = 100,
calc_distance_barrier = coastline
)
))
)
# Speed up calculations via (d) mobility limits
# ... (In the examples above, the mobility parameters
# ... can be extracted from out_dcpf_2,
# ... so specifying them directly here in this example makes
# ... no material difference, but this is not necessarily the case
# ... if pf() has been implemented without mobility parameters).
system.time(
invisible(utils::capture.output(
pf_simplify(out_dcpf_2,
calc_distance = "lcp",
calc_distance_graph = graph,
calc_distance_limit = 100,
mobility = 200,
mobility_from_origin = 200
)
))
)
# Speed up calculations via (e) parallelisation
# ... see the details in the documentation.
#### Example (3): Restrict the number of routes to each cell at each time step
# Implement approach for different numbers of copies
# Since we only have sampled a small number of particles for this simulation
# ... this does not make any difference here, but it can dramatically reduce
# ... the time taken to assemble paths and prevent vector memory issues.
paths_3a <- pf_simplify(dat_dcpf_histories, max_n_copies = 1)
paths_3b <- pf_simplify(dat_dcpf_histories, max_n_copies = 5)
paths_3c <- pf_simplify(dat_dcpf_histories, max_n_copies = 7)
# Compare the number of paths retained
unique(paths_3a$path_id)
unique(paths_3b$path_id)
unique(paths_3c$path_id)
#### Example (4): Change the sampling method used to retain paths
# Again, this doesn't make a difference here, but it can when there are
# ... more particles.
paths_4a <- pf_simplify(dat_dcpf_histories,
max_n_copies = 5,
max_n_copies_sampler = "random"
)
paths_4b <- pf_simplify(dat_dcpf_histories,
max_n_copies = 5,
max_n_copies_sampler = "weighted"
)
paths_4c <- pf_simplify(dat_dcpf_histories,
max_n_copies = 5,
max_n_copies_sampler = "max"
)
# Compare retained paths
pf_loglik(paths_3a)
pf_loglik(paths_3b)
pf_loglik(paths_3c)
#### Example (5): Set the maximum number of paths for reconstruction (for speed)
# Reconstruct all paths (note you may experience vector memory limitations)
# unique(pf_simplify(dat_dcpf_histories, max_n_paths = NULL)$path_id)
# Reconstruct one path
unique(pf_simplify(dat_dcpf_histories, max_n_paths = 1)$path_id)
# Reconstruct (at most) five paths
unique(pf_simplify(dat_dcpf_histories, max_n_paths = 5)$path_id)
#### Example (6): Retain/drop the origin, if specified
# For the example particle histories, an origin was specified
dat_dcpf_histories$args$origin
# This is included as 'timestep = 0' in the returned dataframe
# ... with the coordinates re-defined on bathy:
paths_5a <- pf_simplify(dat_dcpf_histories)
paths_5a[1, c("cell_x", "cell_y")]
raster::xyFromCell(
dat_dcpf_histories$args$bathy,
raster::cellFromXY(
dat_dcpf_histories$args$bathy,
dat_dcpf_histories$args$origin
)
)
head(paths_5a)
# If specified, the origin is dropped with add_origin = FALSE
paths_5b <- pf_simplify(dat_dcpf_histories, add_origin = FALSE)
head(paths_5b)
#### Example (6) Get particle samples for connected particles
## Implement DCPF with more particles for demonstration purposes
set.seed(1)
dcpf_args <- dat_dcpf_histories$args
dcpf_args$calc_distance_euclid_fast <- TRUE
dcpf_args$n <- 250
out_dcpf_6a <- do.call(pf, dcpf_args)
head(out_dcpf_6a$history[[1]])
## Extract particle samples for connected particles
# There may be multiple records of any given cell at any given time step
# ... due to sampling with replacement.
out_dcpf_6b <- pf_simplify(out_dcpf_6a, return = "archive")
head(out_dcpf_6b$history[[1]])
table(duplicated(out_dcpf_6b$history[[1]]$id_current))
## Extract particle samples for connected particles,
# ... with only the most likely record of each particle returned.
# We can implement the approach using out_dcpf_6b
# ... to skip distance calculations.
# Now, there is only one (the most likely) record of sampled cells
# ... at each time step.
out_dcpf_6c <- pf_simplify(out_dcpf_6b,
summarise_pr = TRUE,
return = "archive"
)
head(out_dcpf_6c$history[[1]])
table(duplicated(out_dcpf_6c$history[[1]]$id_current))
## Make movement paths
# Again, we use out_dcpf_6b to skip distance calculations.
out_dcpf_6d <- pf_simplify(out_dcpf_6b,
max_n_copies = 2L,
return = "path"
)
## Compare resultant maps
# The map for all particles is influenced by particles that were 'dead ends',
# ... which isn't ideal for a map of space use.
# The map for connected samples deals with this problem, but is influenced by
# ... the total number of samples of each cell, rather than the number of time steps
# ... in which the individual could have been located in a given cell.
# The map for unique, connected samples deals with this issue, so that scores
# ... represent the number of time steps in which the individual could have occupied
# ... a given cell, over the length of the time series.
# The map for the paths are sparser because paths have only been reconstructed
# ... for a sample of sampled particles.
pp <- par(mfrow = c(2, 2), oma = c(2, 2, 2, 2), mar = c(2, 4, 2, 4))
paa <- list(side = 1:4, axis = list(labels = FALSE))
transform <- NULL
m_1 <- pf_plot_map(out_dcpf_6a, dcpf_args$bathy,
transform = transform,
pretty_axis_args = paa, main = "all samples"
)
m_2 <- pf_plot_map(out_dcpf_6b, dcpf_args$bathy,
transform = transform,
pretty_axis_args = paa, main = "connected samples"
)
m_3 <- pf_plot_map(out_dcpf_6c, dcpf_args$bathy,
transform = transform,
pretty_axis_args = paa, main = "unique, connected samples"
)
m_4 <- pf_plot_map(out_dcpf_6d, dcpf_args$bathy,
transform = transform,
pretty_axis_args = paa, main = "paths"
)
par(pp)
# Note that all locations in reconstructed paths are derived from PF samples
all(out_dcpf_6d$cell_id[out_dcpf_6d$timestep != 0] \%in\%
do.call(rbind, out_dcpf_6c$history)$id_current)
# But the paths only contain a subset of sampled particles
h_6a <- lapply(out_dcpf_6a$history, function(elm) elm[, "id_current"])
table(unique(unlist(h_6a)) \%in\% out_dcpf_6d$cell_id[out_dcpf_6d$timestep != 0])
h_6b <- lapply(out_dcpf_6b$history, function(elm) elm[, "id_current"])
table(unique(unlist(h_6b)) \%in\% out_dcpf_6d$cell_id[out_dcpf_6d$timestep != 0])
}
\author{
Edward Lavender
}