Added vignette on data preparation #67

iiasa · Aug 29, 2023 · 1e8c768 · 1e8c768
1 parent 4179be6
commit 1e8c768
Show file tree

Hide file tree

Showing 6 changed files with 350 additions and 22 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: ibis.iSDM
 Type: Package
 Title: Modelling framework for integrated biodiversity distribution scenarios
-Version: 0.0.8
+Version: 0.0.9
 Authors@R: 
     c(person(given = "Martin",
            family = "Jung",

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,6 @@
 # ibis.iSDM 0.0.9 (current dev branch)
 #### New features
+* Added new vignette on available functions for data preparation #67
 
 #### Minor improvements and bug fixes
 * Small fix to `threshold()` now returning threshold values correctly. 

diff --git a/R/pseudoabsence.R b/R/pseudoabsence.R
@@ -257,7 +257,7 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL
       bias <- terra::resample(bias, background, method = "bilinear")
     }
     # Normalize if not already set
-    if(terra::global(bias, 'max', na.rm = TRUE) > 1 || terra::global(bias, 'min', na.rm = TRUE) < 0 ){
+    if(terra::global(bias, 'max', na.rm = TRUE)[,1] > 1 || terra::global(bias, 'min', na.rm = TRUE)[,1] < 0 ){
       bias <- predictor_transform(bias, option = "norm")
     }
   } else { bias <- NULL }
@@ -276,11 +276,11 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL
     # Now sample from all cells not occupied
     if(!is.null(bias)){
       # Get probability values for cells where no sampling has been conducted
-      prob_bias <- bias[which(bg1[]==0)]
+      prob_bias <- bias[which(terra::values(bg1)[,1]==0)][,1]
       if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0
-      abs <- sample(which(bg1[]==0), size = nrpoints, replace = TRUE, prob = prob_bias)
+      abs <- sample(which(terra::values(bg1)[,1]==0), size = nrpoints, replace = TRUE, prob = prob_bias)
     } else {
-      abs <- sample(which(bg1[]==0), size = nrpoints, replace = TRUE)
+      abs <- sample(which(terra::values(bg1)[,1]==0), size = nrpoints, replace = TRUE)
     }
   } else if(method == "buffer"){
     assertthat::assert_that(is.numeric(buffer_distance),msg = "Buffer distance parameter not numeric!")
@@ -301,7 +301,7 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL
     # Now sample from all cells not occupied
     if(!is.null(bias)){
       # Get probability values for cells where no sampling has been conducted
-      prob_bias <- bias[which(bg2[]==1)]
+      prob_bias <- bias[which(bg2[]==1)][,1]
       if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0
       abs <- sample(which(bg2[]==1), size = nrpoints, replace = TRUE, prob = prob_bias)
     } else {
@@ -317,7 +317,7 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL
     bg2 <- terra::mask(bg1, mask = pol, inverse = !inside)
     if(!is.null(bias)){
       # Get probability values for cells where no sampling has been conducted
-      prob_bias <- bias[which(bg2[]==0)]
+      prob_bias <- bias[which(bg2[]==0)][,1]
       if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0
       abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE, prob = prob_bias)
     } else {
@@ -340,11 +340,11 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL
     bg2 <- terra::mask(bg1, mask = layer, inverse = !inside)
     if(!is.null(bias)){
       # Get probability values for cells where no sampling has been conducted
-      prob_bias <- bias[which(bg2[]==0)]
+      prob_bias <- bias[which(terra::values(bg2)[,1]==0)][,1]
       if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0
-      abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE, prob = prob_bias)
+      abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE, prob = prob_bias)
     } else {
-      abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE)
+      abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE)
     }
     rm(bg2)
   } else if(method == "zones"){
@@ -379,11 +379,11 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL
     bg2 <- terra::mask(bg1, mask = zones)
     if(!is.null(bias)){
       # Get probability values for cells where no sampling has been conducted
-      prob_bias <- bias[which(bg2[]==0)]
+      prob_bias <- bias[which(terra::values(bg2)[,1]==0)][,1]
       if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0
-      abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE, prob = prob_bias)
+      abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE, prob = prob_bias)
     } else {
-      abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE)
+      abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE)
     }
     rm(bg2)
   } else if(method == "target"){
@@ -398,11 +398,11 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL
     bg2 <- terra::mask(bg1, mask = layer)
     if(!is.null(bias)){
       # Get probability values for cells where no sampling has been conducted
-      prob_bias <- bias[which(bg2[]==0)]
+      prob_bias <- bias[which(terra::values(bg2)[,1]==0)][,1]
       if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0
-      abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE, prob = prob_bias)
+      abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE, prob = prob_bias)
     } else {
-      abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE)
+      abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE)
     }
     rm(bg2)
   } else {

diff --git a/R/utils-spatial.R b/R/utils-spatial.R
@@ -1278,7 +1278,9 @@ thin_observations <- function(df, background, env = NULL, method = "random", min
   )
   check_package("dplyr")
   # Match method
-  method <- match.arg(method, choices = c("random", "spatial", "bias", "environmental", "zones"), several.ok = FALSE)
+  method <- match.arg(method,
+                      choices = c("random", "spatial", "bias", "environmental", "zones"),
+                      several.ok = FALSE)
 
   # Label background with id
   bg <- background
@@ -1300,7 +1302,8 @@ thin_observations <- function(df, background, env = NULL, method = "random", min
   ras <- terra::rasterize(coords, bg) # Get the number of observations per grid cell
 
   # Bounds for thining
-  totake <- c(lower = minpoints, upper = max( terra::global(ras, "min", na.rm = TRUE)[,1], minpoints))
+  totake <- c(lower = minpoints,
+              upper = max( terra::global(ras, "min", na.rm = TRUE)[,1], minpoints))
 
   # -- #
   if(method == "random"){
@@ -1374,7 +1377,8 @@ thin_observations <- function(df, background, env = NULL, method = "random", min
                             is.factor(zones))
 
     if(!terra::compareGeom(bg, zones, stopOnError = FALSE)){
-      zones <- alignRasters(zones, bg, method = "near", func = terra::modal, cl = FALSE)
+      zones <- alignRasters(zones, bg, method = "near",
+                            func = terra::modal, cl = FALSE)
     }
 
     # Output vector
@@ -1398,7 +1402,8 @@ thin_observations <- function(df, background, env = NULL, method = "random", min
     # Environmental clustering
 
     if(!terra::compareGeom(bg, env, stopOnError = FALSE)){
-      env <- alignRasters(env, bg, method = "near", func = terra::modal, cl = FALSE)
+      env <- alignRasters(env, bg, method = "near",
+                          func = terra::modal, cl = FALSE)
     }
     # If there are any factors, explode
     if(any(is.factor(env))){

diff --git a/inst/CITATION b/inst/CITATION
@@ -21,5 +21,5 @@ citEntry(
         as.person("Maximilian H.K. Hesselbarth")
         ),
   year     = "2023",
-  version  = "0.0.5",
+  version  = "0.0.9",
   textVersion = "Jung, M., Hesselbarth, H.K.M. (2023). An integrated species distribution modelling framework for heterogeneous biodiversity data. R package version 0.0.5")