Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fetch measurements from archive.opensensemap.org (#23) #25

Merged
merged 4 commits into from
Oct 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Imports:
httr,
digest,
readr,
purrr,
magrittr
Suggests:
maps,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ S3method("[",sensebox)
S3method(osem_measurements,bbox)
S3method(osem_measurements,default)
S3method(osem_measurements,sensebox)
S3method(osem_measurements_archive,default)
S3method(osem_measurements_archive,sensebox)
S3method(osem_phenomena,sensebox)
S3method(plot,osem_measurements)
S3method(plot,sensebox)
Expand All @@ -19,6 +21,7 @@ export(osem_clear_cache)
export(osem_counts)
export(osem_endpoint)
export(osem_measurements)
export(osem_measurements_archive)
export(osem_phenomena)
importFrom(graphics,legend)
importFrom(graphics,par)
Expand Down
11 changes: 7 additions & 4 deletions R/api.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,10 @@ get_box_ = function (boxId, endpoint, ...) {
parse_senseboxdata()
}

get_measurements_ = function (..., endpoint) {
result = osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text')

parse_measurement_csv = function (resText) {
# parse the CSV response manually & mute readr
suppressWarnings({
result = readr::read_csv(result, col_types = readr::cols(
result = readr::read_csv(resText, col_types = readr::cols(
# factor as default would raise issues with concatenation of multiple requests
.default = readr::col_character(),
createdAt = readr::col_datetime(),
Expand All @@ -53,6 +51,11 @@ get_measurements_ = function (..., endpoint) {
osem_as_measurements(result)
}

get_measurements_ = function (..., endpoint) {
osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text') %>%
parse_measurement_csv
}

get_stats_ = function (endpoint, cache) {
result = osem_get_resource(endpoint, path = c('stats'), progress = FALSE, cache = cache)
names(result) = c('boxes', 'measurements', 'measurements_per_minute')
Expand Down
135 changes: 135 additions & 0 deletions R/archive.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# client for archive.opensensemap.org
# in this archive, CSV files for measurements of each sensor per day is provided.

#' Returns the default endpoint for the archive *download*
#' While the front end domain is archive.opensensemap.org, file downloads
#' are provided via sciebo.
osem_archive_endpoint = function () {
'https://uni-muenster.sciebo.de/index.php/s/HyTbguBP4EkqBcp/download?path=/data'
}

#' Fetch day-wise measurements for a single box from the openSenseMap archive.
#'
#' This function is significantly faster than \code{\link{osem_measurements}} for large
#' time-frames, as daily CSV dumps for each sensor from
#' \href{http://archive.opensensemap.org}{archive.opensensemap.org} are used.
#' Note that the latest data available is from the previous day.
#'
#' By default, data for all sensors of a box is fetched, but you can select a
#' subset with a \code{\link[dplyr]{dplyr}}-style NSE filter expression.
#'
#' The function will warn when no data is available in the selected period,
#' but continue the remaining download.
#'
#' @param x A `sensebox data.frame` of a single box, as retrieved via \code{\link{osem_box}},
#' to download measurements for.
#' @param fromDate Start date for measurement download.
#' @param toDate End date for measurement download (inclusive).
#' @param sensorFilter A NSE formula matching to \code{x$sensors}, selecting a subset of sensors.
#' @param progress Whether to print download progress information, defaults to \code{TRUE}.
#' @return A \code{tbl_df} Containing observations of all selected sensors for each time stamp.
#'
#' @seealso \href{https://archive.opensensemap.org}{openSenseMap archive}
#' @seealso \code{\link{osem_measurements}}
#' @seealso \code{\link{osem_box}}
#'
#' @export
osem_measurements_archive = function (x, ...) UseMethod('osem_measurements_archive')

#' @export
osem_measurements_archive.default = function (x, ...) {
# NOTE: to implement for a different class:
# in order to call `archive_fetch_measurements()`, `box` must be a dataframe
# with a single row and the columns `X_id` and `name`
stop(paste('not implemented for class', toString(class(x))))
}

#' @describeIn osem_measurements_archive Get daywise measurements for one or
#' more sensors of a single box
#' @export
#' @examples
#' # fetch measurements for a single day
#' box = osem_box('593bcd656ccf3b0011791f5a')
#' m = osem_measurements_archive(box, as.POSIXlt('2018-09-13'))
#'
#' \donttest{
#' # fetch measurements for a date range and selected sensors
#' sensors = ~ phenomenon %in% c('Temperatur', 'Beleuchtungsstärke')
#' m = osem_measurements_archive(box, as.POSIXlt('2018-09-01'), as.POSIXlt('2018-09-30'), sensorFilter = sensors)
#' }
osem_measurements_archive.sensebox = function (x, fromDate, toDate = fromDate, sensorFilter = ~ T, progress = T) {
if (nrow(x) != 1)
stop('this function only works for exactly one senseBox!')

# filter sensors using NSE, for example: `~ phenomenon == 'Temperatur'`
sensors = x$sensors[[1]] %>%
dplyr::filter(lazyeval::f_eval(sensorFilter, .))

# fetch each sensor separately
dfs = by(sensors, 1:nrow(sensors), function (sensor) {
df = archive_fetch_measurements(x, sensor$id, fromDate, toDate, progress) %>%
dplyr::select(createdAt, value) %>%
#dplyr::mutate(unit = sensor$unit, sensor = sensor$sensor) %>% # inject sensor metadata
dplyr::rename_at(., 'value', function(v) sensor$phenomenon)
})

# merge all data.frames by timestamp
dfs %>% purrr::reduce(dplyr::full_join, 'createdAt')
}

#' fetch measurements from archive from a single box, and a single sensor
archive_fetch_measurements = function (box, sensor, fromDate, toDate, progress) {
dates = list()
from = fromDate
while (from <= toDate) {
dates = append(dates, list(from))
from = from + as.difftime(1, units = 'days')
}

http_handle = httr::handle(osem_archive_endpoint()) # reuse the http connection for speed!
progress = if (progress && !is_non_interactive()) httr::progress() else NULL

measurements = lapply(dates, function(date) {
url = build_archive_url(date, box, sensor)
res = httr::GET(url, progress, handle = http_handle)

if (httr::http_error(res)) {
warning(paste(
httr::status_code(res),
'on day', format.Date(date, '%F'),
'for sensor', sensor
))

if (httr::status_code(res) == 404)
return(data.frame(createdAt = character(), value = character()))
}

measurements = httr::content(res, type = 'text', encoding = 'UTF-8') %>%
parse_measurement_csv
})

measurements %>% dplyr::bind_rows()
}

#' returns URL to fetch measurements from a sensor for a specific date,
#' based on `osem_archive_endpoint()`
build_archive_url = function (date, box, sensor) {
sensorId = sensor
d = format.Date(date, '%F')
format = 'csv'

paste(
osem_archive_endpoint(),
d,
osem_box_to_archivename(box),
paste(paste(sensorId, d, sep = '-'), format, sep = '.'),
sep = '/'
)
}

#' replace chars in box name according to archive script:
#' https://github.com/sensebox/osem-archiver/blob/612e14b/helpers.sh#L66
osem_box_to_archivename = function (box) {
name = gsub('[^A-Za-z0-9._-]', '_', box$name)
paste(box$X_id, name, sep='-')
}
12 changes: 11 additions & 1 deletion R/box.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,18 @@ parse_senseboxdata = function (boxdata) {
if (!is.null(thebox$updatedAt))
thebox$updatedAt = as.POSIXct(strptime(thebox$updatedAt, format = '%FT%T', tz = 'GMT'))

# create a dataframe of sensors
thebox$sensors = sensors %>%
lapply(as.data.frame, stringsAsFactors = F) %>%
dplyr::bind_rows(.) %>%
dplyr::select(phenomenon = title, id = X_id, unit, sensor = sensorType) %>%
list

# extract metadata from sensors
thebox$phenomena = lapply(sensors, function(s) s$title) %>% unlist %>% list
thebox$phenomena = sensors %>%
setNames(lapply(., function (s) s$`_id`)) %>%
lapply(function(s) s$title) %>%
unlist %>% list # convert to vector

# FIXME: if one sensor has NA, max() returns bullshit
get_last_measurement = function(s) {
Expand Down
29 changes: 20 additions & 9 deletions R/opensensmapr.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,27 @@
#' }
#'
#' @section Retrieving measurements:
#' Measurements can be retrieved through \code{\link{osem_measurements}} for a
#' given phenomenon only. A subset of measurements may be selected by
#'
#' There are two ways to retrieve measurements:
#' \itemize{
#' \item a list of senseBoxes, previously retrieved through
#' \code{\link{osem_box}} or \code{\link{osem_boxes}}.
#' \item a geographic bounding box, which can be generated with the
#' \code{\link[sf]{sf}} package.
#' \item a time frame
#' \item a exposure type of the given box
#' \item \code{\link{osem_measurements_archive}}:
#' Downloads measurements for a \emph{single box} from the openSenseMap archive.
#' This function does not provide realtime data, but is suitable for long time frames.
#'
#' \item \code{\link{osem_measurements}}:
#' This function retrieves (realtime) measurements from the API. It works for a
#' \emph{single phenomenon} only, but provides various filters to select sensors by
#'
#' \itemize{
#' \item a list of senseBoxes, previously retrieved through
#' \code{\link{osem_box}} or \code{\link{osem_boxes}}.
#' \item a geographic bounding box, which can be generated with the
#' \code{\link[sf]{sf}} package.
#' \item a time frame
#' \item a exposure type of the given box
#' }
#'
#' Use this function with caution for long time frames, as the API becomes
#' quite slow is limited to 10.000 measurements per 30 day interval.
#' }
#'
#' Data is returned as \code{tibble} with the class \code{osem_measurements}.
Expand Down
13 changes: 13 additions & 0 deletions man/build_archive_url.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions man/osem_archive_endpoint.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions man/osem_box_to_archivename.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions man/osem_measurements_archive.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 53 additions & 0 deletions tests/testthat/test_archive.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
source('testhelpers.R')

context('osem_box_to_archivename()')

try({
boxes = osem_boxes(grouptag = 'ifgi')
box = filter(boxes, row_number() == 1)
})

test_that('osem_box_to_archive_name does the correct character replacements', {
b = data.frame(
name = 'aA1._- äß!"?$%&/',
X_id = 'UUID'
)

archivename = opensensmapr:::osem_box_to_archivename(b)
expect_equal(archivename, 'UUID-aA1._-__________')
})

test_that('osem_box_to_archive_name works for one box', {
if (is.null(box)) skip('no box data could be fetched')

archivename = opensensmapr:::osem_box_to_archivename(box)
expect_length(archivename, 1)
expect_type(archivename, 'character')
})

test_that('osem_box_to_archive_name works for multiple boxes', {
if (is.null(boxes)) skip('no box data available')

archivename = opensensmapr:::osem_box_to_archivename(boxes)
expect_length(archivename, nrow(boxes))
expect_type(archivename, 'character')
})

context('osem_measurements_archive()')

test_that('osem_measurements_archive works for one box', {
if (is.null(box)) skip('no box data could be fetched')

m = osem_measurements_archive(box, as.POSIXlt('2018-08-08'))
expect_length(m, nrow(box$sensors[[1]]) + 1) # one column for each sensor + createdAt
expect_s3_class(m, c('osem_measurements', 'tbl_df'))
})

test_that('osem_measurements_archive fails for multiple boxes', {
if (is.null(boxes)) skip('no box data available')

expect_error(
osem_measurements_archive(boxes, as.POSIXlt('2018-08-08')),
'this function only works for exactly one senseBox!'
)
})