Skip to content

Commit

Permalink
Test and documentation fixes. kmeans addition to predictor_derivate()
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin-Jung committed Aug 25, 2024
1 parent db0c362 commit 94c8ba2
Show file tree
Hide file tree
Showing 14 changed files with 147 additions and 41 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#### New features
* Support for 'modal' value calculations in `ensemble()`.
* Support for 'superlearner' in `ensemble()`.
* Support for 'kmeans' derived threshold calculation in `threshold()`
* Support for 'kmeans' derived threshold calculation in `threshold()` and `predictor_derivate()`.
* Support for future processing streamlined. See FAQ section for instructions #18.

#### Minor improvements and bug fixes
Expand Down
11 changes: 6 additions & 5 deletions R/add_predictors.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ NULL
#' * \code{'interaction'} - Add interacting predictors. Interactions need to be specified (\code{"int_variables"})!
#' * \code{'thresh'} - Add threshold derivate predictors.
#' * \code{'hinge'} - Add hinge derivate predictors.
#' * \code{'kmeans'} - Add k-means derived factors.
#' * \code{'bin'} - Add predictors binned by their percentiles.
#'
#' @note
Expand Down Expand Up @@ -130,7 +131,7 @@ methods::setMethod(
assertthat::assert_that(inherits(x, "BiodiversityDistribution"),
is.Raster(env),
all(transform == 'none') || all( transform %in% c('pca', 'scale', 'norm', 'windsor') ),
all(derivates == 'none') || all( derivates %in% c('thresh', 'hinge', 'quadratic', 'bin', 'interaction') ),
all(derivates == 'none') || all( derivates %in% c('thresh', 'hinge', 'quadratic', 'bin', 'kmeans', 'interaction') ),
is.vector(derivate_knots) || is.numeric(derivate_knots),
is.null(names) || assertthat::is.scalar(names) || is.vector(names),
is.logical(explode_factors),
Expand Down Expand Up @@ -753,7 +754,7 @@ methods::setMethod(
# names = names = NULL; transform = 'none'; derivates = 'none'; derivate_knots = 4; int_variables = NULL;harmonize_na = FALSE; state = NULL
# Try and match transform and derivatives arguments
transform <- match.arg(transform, c('none','pca', 'scale', 'norm', 'windsor', 'percentile'), several.ok = FALSE) # Several ok set to FALSE as states are not working otherwise
derivates <- match.arg(derivates, c('none','thresh', 'hinge', 'quadratic', 'bin', 'interaction'), several.ok = TRUE)
derivates <- match.arg(derivates, c('none','thresh', 'hinge', 'quadratic', 'bin', 'kmeans', 'interaction'), several.ok = TRUE)

assertthat::validate_that(inherits(env,'stars'), msg = 'Projection rasters need to be stars stack!')
assertthat::assert_that(inherits(x, "BiodiversityScenario"),
Expand All @@ -767,8 +768,8 @@ methods::setMethod(
assertthat::validate_that(length(env) >= 1)

# Get model object
obj <- x$get_model()
assertthat::assert_that(!(is.null(obj) || is.Waiver(obj)),
obj <- x$get_model(copy = TRUE)
assertthat::assert_that(!(isFALSE(obj) || is.Waiver(obj)),
msg = "No model object found in scenario?")
model <- obj$model

Expand Down Expand Up @@ -856,7 +857,7 @@ methods::setMethod(
# Get variable names
varn <- obj$get_coefficients()[,1]
# Are there any derivates present in the coefficients?
if(any( length( grep("hinge_|bin_|quadratic_|thresh_|interaction_", varn ) ) > 0 )){
if(any( length( grep("hinge_|bin_|kmeans_|quadratic_|thresh_|interaction_", varn ) ) > 0 )){
if(getOption('ibis.setupmessages', default = TRUE)) myLog('[Setup]','green','Creating predictor derivates...')
for(dd in derivates){
if(any(grep(dd, varn))){
Expand Down
6 changes: 6 additions & 0 deletions R/engine_glm.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ NULL
#' This engine is essentially a wrapper for [stats::glm.fit()], however with customized
#' settings to support offsets and weights.
#'
#' If \code{"optim_hyperparam"} is set to \code{TRUE} in [`train()`], then a AIC
#' based step-wise (backwards) model selection is performed.
#' Generally however [`engine_glmnet`] should be the preferred package for models
#' with more than \code{>3} covariates.
#'
#' @returns An [Engine].
#'
#' @references
Expand All @@ -43,6 +48,7 @@ NULL
#'
#' # Add GLM as an engine
#' x <- distribution(background) |> engine_glm()
#' print(x)
#'
#' @name engine_glm
NULL
Expand Down
12 changes: 6 additions & 6 deletions R/threshold.R
Original file line number Diff line number Diff line change
Expand Up @@ -334,13 +334,13 @@ methods::setMethod(
} else if(method == 'kmeans') {
# K-means based clustering. Presence and absences are identified through
# by getting the value within regular sampled values
val <- terra::spatSample(raster_thresh, size = 1e6, method = "regular",
ex <- terra::spatSample(raster_thresh, size = 1e6, method = "regular",
na.rm = TRUE, exhaustive = TRUE)
val <- subset(val, complete.cases(val))
if(nrow(val)<5) stop("Not enough values for clustering found...")
clus <- stats::kmeans(val, centers = 2)
tr <- clus$centers[which.min(clus$centers[,1])]
rm(clus, val)
ex <- subset(ex, complete.cases(ex))
if(nrow(ex)<5) stop("Not enough values for clustering found...")
clus <- stats::kmeans(ex, centers = 2)
tr <- clus$centers[which.max(clus$centers[,1])]
rm(clus, ex)
} else {
# Optimized threshold statistics using the modEvA package
# FIXME: Could think of porting these functions but too much effort for
Expand Down
95 changes: 85 additions & 10 deletions R/utils-predictors.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,14 @@
#' @keywords utils
#'
#' @examples
#' \dontrun{
#' # Where x is a SpatRaster
#' new_x <- predictor_transform(x, option = 'scale')
#' }
#' # Dummy raster
#' r_ori <- terra::rast(nrows = 10, ncols = 10, res = 0.05, xmin = -1.5, xmax = 1.5, ymin = -1.5, ymax = 1.5, vals = rnorm(3600,mean = .01,sd = .1))
#'
#' # Normalize
#' r_norm <- predictor_transform(r_ori, option = 'norm')
#' new <- c(r_ori, r_norm)
#' names(new) <- c("original scale", "normalized units")
#' terra::plot(new)
#'
#' @export
predictor_transform <- function(env, option, windsor_props = c(.05,.95), pca.var = 0.8, state = NULL, method = NULL, ...){
Expand Down Expand Up @@ -351,20 +355,25 @@ predictor_transform <- function(env, option, windsor_props = c(.05,.95), pca.var
#' * \code{'bin'} - Creates a factor representation of a covariates by cutting the
#' range of covariates by their percentiles. The number of percentile cuts and thus
#' new derivates is specified via the parameter \code{'nknots'} (Default: \code{4}).
#' * \code{'kmeans'} Creates a factor representation of a covariates through a
#' [`kmeans()`] clustering. The number of clusters are specified via the parameter \code{'nknots'}.
#'
#' @return Returns the derived adjusted [`SpatRaster`] objects of identical resolution.
#'
#' @seealso predictor_transform
#' @keywords utils
#'
#' @examples
#' \dontrun{
#' # Create a hinge transformation of one or multiple SpatRaster.
#' predictor_derivate(covs, option = "hinge", knots = 4)
#' }
#' # Dummy raster
#' r_ori <- terra::rast(nrows = 10, ncols = 10, res = 0.05, xmin = -1.5, xmax = 1.5, ymin = -1.5, ymax = 1.5, vals = rpois(3600, 10))
#'
#' # Create a hinge transformation with 4 knots of one or multiple SpatRaster.
#' new <- predictor_derivate(r_ori, option = "hinge", knots = 4)
#' terra::plot(new)
#'
#' @export
predictor_derivate <- function(env, option, nknots = 4, deriv = NULL, int_variables = NULL, method = NULL, ...){
predictor_derivate <- function(env, option, nknots = 4, deriv = NULL,
int_variables = NULL, method = NULL, ...){
assertthat::assert_that(
is.Raster(env) || inherits(env, "stars"),
!missing(env),
Expand All @@ -382,7 +391,7 @@ predictor_derivate <- function(env, option, nknots = 4, deriv = NULL, int_variab
base::length(option) == 1
)
# Match argument.
option <- match.arg(option, c('none','quadratic', 'hinge',
option <- match.arg(option, c('none','quadratic', 'hinge', 'kmeans',
'thresh', 'bin', 'interaction'),
several.ok = FALSE)

Expand Down Expand Up @@ -600,6 +609,72 @@ predictor_derivate <- function(env, option, nknots = 4, deriv = NULL, int_variab
}
}

# For k-means thresholding
if(option == 'kmeans'){
if(is.Raster(env)){
new_env <- terra::rast()
for(val in names(env)){
# Also get the cut_off values
ex <- terra::values(env[[val]]) |> (\(.) subset(., stats::complete.cases(.)))()
if(!is.null(cutoffs)) k <- cutoffs else k <- nknots
cu <- stats::kmeans(ex,centers = k)
assertthat::assert_that(inherits(cu, "kmeans"),
msg = "K-means clustering failed...")
suppressWarnings( o <- terra::k_means(env[[val]], centers = cu$centers[,1]) )
if(is.null(o)) next()
# Factorize and explode
o <- explode_factorized_raster( terra::as.factor(o),
name = paste0('kmeans_', val))
for(i in 1:terra::nlyr(o)){
names(o[[i]]) <- paste0('kmeans_',val,'_',round(cu$centers[i], 3))
attr(o[[i]], "deriv.kmeans") <- cu$centers[i]
}
new_env <- c(new_env, o)
rm(o)
}
} else {
# For stats layers
for(val in names(env_list)){
# FIXME: To be implemented once there is a need...
stop("KMeans for stars to be implemented...")
# Format cutoffs
cu <- cutoffs[which(cutoffs$deriv == val), 3]
cu <- strsplit(cu, "_") |> unlist()
# Remove any leading points
if(any(substr(cu,1, 1)==".")){
cu[which(substr(cu,1, 1)==".")] <- gsub("^.","",cu[which(substr(cu,1, 1)==".")])
}
suppressWarnings( cu <- as.numeric(cu) )
nn <- terra::rast()
# If NA, set
if(is.na(cu[1]) || is.na(cu[2])){
# Use the default knots for cutting and get knots
o <- makeBin(env_list[[val]][[1]], n = val, nknots = nknots, cutoffs = NULL)
suppressWarnings( cu <- strsplit(names(o), "_") |>
unlist() |>
as.numeric())
cu <- subset(cu, stats::complete.cases(cu))
cu <- matrix(cu,ncol=2,byrow = TRUE) # Convert to pmin and pmax
cu <- cbind(cu, 1:nrow(cu))
}
for(k in 1:terra::nlyr(env_list[[val]])){
newcu <- cu
# Set smallest and largest value to global minimum/maximum to account for rounding issues
newcu[1] <- terra::global(env_list[[val]][[k]], "min",na.rm=T)[,1]
newcu[nrow(newcu)*2] <- terra::global(env_list[[val]][[k]], "max",na.rm=T)[,1]

o <- terra::classify(env_list[[val]][[k]], newcu, include.lowest=TRUE)
terra::time(o) <- terra::time( env_list[[val]][[k]] )
names(o) <- paste0(val, "_",nrow(newcu))
suppressWarnings( nn <- append(nn, o) )
rm(o, newcu)
}
new_env[[val]] <- nn
}
invisible(gc())
}
}

# Create interaction variables
if(option == 'interaction'){
# Check whether interaction is provided or an attribute
Expand Down
14 changes: 9 additions & 5 deletions R/utils-spatial.R
Original file line number Diff line number Diff line change
Expand Up @@ -1044,11 +1044,11 @@ get_ngbvalue <- function(coords, env, longlat = TRUE, field_space = c('x','y'),
return(out)
}

#' Function to extract directly the raster value of provided points
#' Function to extract point values directly from a SpatRaster
#'
#' @description This function simply extracts the values from a provided
#' [`SpatRaster`], [`SpatRasterDataset`] or [`SpatRasterCollection`] object. For
#' points where or NA values were extracted a small buffer is applied to try and
#' points where or \code{NA} values were extracted a small buffer is applied to try and
#' obtain the remaining values.
#'
#' @param coords A [`data.frame`], [`matrix`] or [`sf`] object.
Expand All @@ -1066,10 +1066,14 @@ get_ngbvalue <- function(coords, env, longlat = TRUE, field_space = c('x','y'),
#' @keywords utils
#'
#' @examples
#' \dontrun{
#' # Dummy raster:
#' r <- terra::rast(nrows = 10, ncols = 10, res = 0.05, xmin = -1.5, xmax = 1.5, ymin = -1.5, ymax = 1.5, vals = rnorm(3600,mean = .5,sd = .1))
#' # (dummy points)
#' pp <- terra::spatSample(r,20,as.points = TRUE) |> sf::st_as_sf()
#'
#' # Extract values
#' vals <- get_rastervalue(coords, env)
#' }
#' vals <- get_rastervalue(pp, r)
#' head(vals)
#'
#' @export
get_rastervalue <- function(coords, env, ngb_fill = TRUE, rm.na = FALSE){
Expand Down
1 change: 1 addition & 0 deletions man/add_predictors.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/engine_glm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 9 additions & 5 deletions man/get_rastervalue.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 8 additions & 4 deletions man/predictor_derivate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 8 additions & 4 deletions man/predictor_transform.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/threshold.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test_Scenarios.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ test_that('Scenarios and constraints', {
expect_null(sc$get_limits())

# Add covariates in various transformations
sc <- scenario(fit)
sc <- scenario(fit, copy_model = TRUE) # Copy model here over for the test
x <- sc |> add_predictors(pred_future, transform = "none")
expect_length(x$get_predictor_names(), 9)
suppressWarnings(
Expand Down
4 changes: 4 additions & 0 deletions tests/testthat/test_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ test_that('Custom functions - Test gridded transformations and ensembles', {
expect_match(attr(tr1, "method"), "percentile")
expect_match(attr(tr1, "format"), "binary")

# Kmeans threshold
expect_no_error(tr2 <- threshold(r2, method = "kmeans"))
expect_match(attr(tr2, "method"), "kmeans")
expect_match(attr(tr2, "format"), "binary")
# --- #
})

Expand Down

0 comments on commit 94c8ba2

Please sign in to comment.