-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcl.Rd
132 lines (114 loc) · 4.88 KB
/
cl.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cl_helpers.R
\name{cl}
\alias{cl}
\alias{cl_lapply}
\alias{cl_check}
\alias{cl_cores}
\alias{cl_chunks}
\alias{cl_export}
\alias{cl_stop}
\title{Parallelisation helpers}
\usage{
cl_lapply(x, fun, ..., cl = NULL, varlist = NULL, use_chunks = FALSE)
cl_check(cl = NULL, varlist = NULL)
cl_cores(cl = NULL)
cl_chunks(cl = NULL, length)
cl_export(cl = NULL, varlist = NULL)
cl_stop(cl = NULL)
}
\arguments{
\item{x}{A \code{\link[base]{list}} over which to iterate.}
\item{fun, ...}{A function that is applied to elements of \code{x} alongside any optional arguments to \code{fun}.}
\item{cl}{(optional) A cluster from \code{\link[parallel]{makeCluster}} or an integer that defines the number of child processes (see \code{\link[pbapply]{pblapply}}).}
\item{varlist}{(optional) A character vector of objects for export (see \code{\link[parallel]{clusterExport}}). If \code{cl} is a cluster, this may be required. Exported objects must be located in the global environment.}
\item{use_chunks}{A logical vector that defines whether to parallelise over `chunks' (\code{TRUE}) or over the elements of \code{x} (\code{FALSE}). If \code{use_chunks = TRUE}, \code{x} is split into \emph{n} chunks (one per core) that are processed in parallel; within each chunk \code{x} is updated iteratively.}
\item{length}{An integer that defines the number of elements in the iteration.}
}
\value{
\itemize{
\item \code{\link[flapper]{cl_lapply}} returns a list.
\item \code{\link[flapper]{cl_cores}} returns an integer.
\item \code{\link[flapper]{cl_chunks}} returns a list of integers.
\item \code{\link[flapper]{cl_check}}, \code{\link[flapper]{cl_export}} and \code{\link[flapper]{cl_stop}} return \code{invisible()}.
}
}
\description{
A set of wrappers for \code{\link[parallel]{parallel}} functions that facilitate the implementation of parallel routines in functions via \code{\link[pbapply]{pblapply}}.
}
\details{
\code{\link[flapper]{cl_lapply}} is a wrapper for \code{\link[pbapply]{pblapply}} that handles cluster checking, set up and closure, using the following functions:
\itemize{
\item \code{\link[flapper]{cl_check}} checks \code{cl} and \code{varlist} arguments, as inputted to a parent function. For example, if \code{cl = NULL}, \code{varlist} should also be \code{NULL}.
\item \code{\link[flapper]{cl_cores}} identifies the number of cores specified.
\item \code{\link[flapper]{cl_chunks}} defines a list, with one element for core specified, that contains an integer vector of the positions of an object over which to iterate serially in each chunk.
\item \code{\link[flapper]{cl_export}} implements \code{\link[parallel]{clusterExport}} if both \code{cl} and \code{varlist} are specified.
\item \code{\link[flapper]{cl_stop}} implements \code{\link[parallel]{stopCluster}} if \code{cl} is a cluster object from \code{\link[parallel]{makeCluster}}.
}
}
\examples{
#### Examples of cl_lapply()
# Implement cl_lapply() without cluster
z <- cl_lapply(1:10, function(x) x + 1)
# Implement cl_lapply() with forking (not on Windows)
z <- cl_lapply(1:10, function(x) x + 1, cl = 2L)
# Implement cl_lapply() with socket cluster
z <- cl_lapply(1:10, function(x) x + 1, cl = parallel::makeCluster(2L))
#### Catch mistakes
z <- cl_lapply(1:10, function(x) x + 1, cl = 2L, varlist = list())
z <- cl_lapply(1:10, function(x) x + 1, varlist = list())
#### Compare time trials for chunk-wise versus element-wise parallelisation
if (flapper_run_parallel) {
## Background
# In this simple example, we will sample 'size' cells n times from a raster
# The output is a list of cell samples. We compare the time taken to complete
# sampling using different approaches.
## Define a dataframe of time trial scenarios
require(dplyr)
dat <- expand.grid(
n = 1e4,
method = c("socket", "fork"),
cores = 2L,
use_chunks = c(FALSE, TRUE),
time = NA
)
## Estimate the duration of each scenario
dat_by_trial <-
lapply(split(dat, seq_len(nrow(dat))), function(d) {
if (d$method == "socket") {
t1 <- Sys.time()
z <- cl_lapply(
x = 1:d$n,
fun = function(i) {
raster::sampleRandom(flapper::dat_gebco, size = 5)
},
cl = parallel::makeCluster(d$cores),
use_chunks = d$use_chunks
)
t2 <- Sys.time()
} else if (d$method == "fork") {
t1 <- Sys.time()
z <- cl_lapply(
x = 1:d$n,
fun = function(i) {
raster::sampleRandom(flapper::dat_gebco, size = 5)
},
cl = d$cores,
use_chunks = d$use_chunks
)
t2 <- Sys.time()
}
d$time <- as.numeric(difftime(t2, t1, "secs"))
return(d)
})
## Examine the results
dat_for_trials <-
dat_by_trial \%>\%
dplyr::bind_rows() \%>\%
dplyr::arrange(.data$n, .data$time) \%>\%
print()
}
}
\author{
Edward Lavender
}