From 55e6859bbfbd9000b37a406456624fa655178dee Mon Sep 17 00:00:00 2001 From: Neander Marcel Heming Date: Tue, 4 Jul 2023 12:33:18 -0300 Subject: [PATCH] * bux fixes - fixed error in `.str.sample()` to avoid negative probabilities when all species were absent from a cell (i.e. all values are zero) * enhancements - in test data, added 2 layers. One with one occ and another with one absence. Also added a cell with zero richness - added new vignette - included Fixed-Fixed algorithm in vignette - added references and improved accuracy of algorithm description in DESCRIPTION, README, and vignettes - improved accuracy of null model descriptions in vignettes - added link to functions in documentation --- NEWS.md | 21 ++++++----- cran-comments.md | 50 ++++++++++++++++++++++---- data-raw/ext_data.R | 9 +++-- inst/extdata/spp_sites.tif | Bin 2021 -> 2632 bytes tests/testthat/test-bootspat_ff.R | 12 +++---- tests/testthat/test-bootspat_naive.R | 8 ++--- tests/testthat/test-bootspat_str.R | 7 ++-- vignettes/S1-get-started.Rmd | 52 ++++++++++++++++++++++++--- vignettes/S2-null-models.Rmd | 3 +- vignettes/references.bib | 15 ++++++++ 10 files changed, 136 insertions(+), 41 deletions(-) diff --git a/NEWS.md b/NEWS.md index cb3aee0..7a00120 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,13 +1,18 @@ # SESraster 0.6.2 -## bux fixes -- fixed error when all species were absent from a cell (i.e. all values are zero) - -## enhancements -- added new vignette -- added references and improved accuracy of algorithm description in DESCRIPTION, README, and vignettes -- improved accuracy of null model descriptions in vignettes -- added link to functions in documentation +* bux fixes + - fixed error in `.str.sample()` to avoid negative probabilities when all +species were absent from a cell (i.e. all values are zero) + +* new function + - added `bootspat_ff()` to include Fixed-Fixed algorithm + +* enhancements + - added new vignette + - added references and improved accuracy of algorithm description in DESCRIPTION, + README, and vignettes + - improved accuracy of null model descriptions in vignettes + - added link to functions in documentation # SESraster 0.6.1 diff --git a/cran-comments.md b/cran-comments.md index e3e8cf5..78dc28e 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,22 +1,60 @@ ## R CMD check results -0 errors | 0 warnings | 1 notes +0 errors | 0 warnings | 1 note -Days since last update: 4 +Maintainer: ‘Neander Marcel Heming ’ Found the following (possibly) invalid URLs: + URL: doi:10.1046/j.1365-2699.2003.00875.x From: README.md Message: Invalid URI scheme (use \doi for DOIs in Rd markup) URL: doi:10.2307/177478 From: README.md Message: Invalid URI scheme (use \doi for DOIs in Rd markup) + URL: https://codecov.io/gh/HemingNM/SESraster (moved to https://app.codecov.io/gh/HemingNM/SESraster) + From: README.md + Status: 200 + Message: OK + URL: https://doi.org/10.1046/j.1365-2699.2003.00875.x + From: inst/doc/S1-get-started.html + Status: 403 + Message: Forbidden + URL: https://doi.org/10.2307/177478 + From: inst/doc/S1-get-started.html + inst/doc/S2-null-models.html + Status: 403 + Message: Forbidden + + Found the following (possibly) invalid DOIs: + + DOI: 10.1046/j.1365-2699.2003.00875.x + From: DESCRIPTION + Status: Forbidden + Message: 403 + DOI: 10.2307/177478 + From: DESCRIPTION + Status: Forbidden + Message: 403 ## Maintainer comments +I was not able to solve the above NOTE about possibly invalid URLs from DOIs. I +know they are correct, so I am submitting as it is. + +Main changes are listed below: + * This is a new release. -## bux fixes -- fixed error when all species were absent from a cell (i.e. all values are zero) +* bux fixes + - fixed error in `.str.sample()` to avoid negative probabilities when all +species were absent from a cell (i.e. all values are zero) + +* new function + - added `bootspat_ff()` to include Fixed-Fixed algorithm -## enhancements -- several enhancements, see NEWS.md +* enhancements + - added new vignette + - added references and improved accuracy of algorithm description in DESCRIPTION, + README, and vignettes + - improved accuracy of null model descriptions in vignettes + - added link to functions in documentation diff --git a/data-raw/ext_data.R b/data-raw/ext_data.R index a08f46a..c1eaf08 100644 --- a/data-raw/ext_data.R +++ b/data-raw/ext_data.R @@ -1,13 +1,16 @@ ## code to prepare external dataset # creating data set.seed(100) -spp_sites <- terra::rast(ncol = 5, nrow = 5, nlyr = 5) +spp_sites <- terra::rast(ncol = 5, nrow = 5, nlyr = 7) terra::values(spp_sites) <- round(runif(terra::ncell(spp_sites) * terra::nlyr(spp_sites))) +terra::values(spp_sites[[1]]) <- rep(c(0,1), c(24,1)) +terra::values(spp_sites[[7]]) <- 1 spp_sites[1] <- NA -names(spp_sites) <- paste0("sp", 1:5) +spp_sites[2] <- 0 +names(spp_sites) <- paste0("sp", 1:terra::nlyr(spp_sites)) # as.data.frame(spp_sites) # if(!dir.exists("inst/extdata")) dir.create("inst/extdata", recursive = T) -# terra::writeRaster(spp_sites, "inst/extdata/spp_sites.tif", overwrite=T) +terra::writeRaster(spp_sites, "inst/extdata/spp_sites.tif", overwrite=T) # usethis::use_data(spp_sites, overwrite = TRUE) diff --git a/inst/extdata/spp_sites.tif b/inst/extdata/spp_sites.tif index bfae16d969d7cef986f1c2427cfc6d28cff1fc76..7ca09cb0eaeb729fb9c68949b79e5f8af10d11ae 100644 GIT binary patch delta 483 zcmaFLe?nw}8YBBeb@lpVoD2-2jLZy-3=9nHKr9AjvjW-Tj4WXBCLmh|Dh^U-i^L8B zve|%QJV1$HARAC;-Ltfb0$JJaP<7K+ZZK zXJb1P0|$`356Es<0u%*u)F!@`6;=SN2g*PJGXwKxe#Tv_lRcTVm`#=JCOa@GPp)L- z{Z<4q&%~rb0Rb}Eym#%20iN~H9&9pg`YChTLsnO1urb3;`{>!dNtoc`|zaS`W zisf3Z>9&h5tB33=^_m#4EMVoPReQ@;hRwOS`s%NzX|XOFg0n?;^=`Txwe{!%1prV* BchLX< delta 280 zcmX>h@|1sq8YAmOb@loxb_NDfMrH;^1_lOJAQnSni!-u-#j}CpGEi}lI$I=m5RlCR z6ypF&1OwS1GX#KazGfZ#*Rqp&7&$h}Fz#fXEXuh*R>YLSzU}ZLK>-#!RdC@w(qi%U%6_FJr@P74(aUSVgdkQfIR*H diff --git a/tests/testthat/test-bootspat_ff.R b/tests/testthat/test-bootspat_ff.R index eec8174..f007072 100644 --- a/tests/testthat/test-bootspat_ff.R +++ b/tests/testthat/test-bootspat_ff.R @@ -3,10 +3,6 @@ test_that("function bootspat_FF works", { # loading data bin1 <- load_ext_data() # terra::rast(system.file("extdata", "spp_sites.tif", package="SESraster")) - bin1[1] <- NA - bin1 <- c(bin1, bin1[[1]]) - bin1[[6]] <- c(NA, rep(1, (terra::ncol(bin1[[6]])*terra::nrow(bin1[[6]]))-1)) - bin1[2] <- 0 rprobnull <- terra::app(bin1, function(x){ @@ -22,11 +18,11 @@ test_that("function bootspat_FF works", { expect_true(inherits(rand.str2, "SpatRaster"), "TRUE") expect_equal(unlist(rand.str2[1]), setNames(as.double(rep(NA, terra::nlyr(bin1))), names(bin1))) - expect_equal(unlist(rand.str2[2]), setNames(c(0,0,0,0,0,0), names(bin1))) - expect_equal(unlist(rand.str2[3]), setNames(c(1,1,0,1,0,1), names(bin1))) + expect_equal(unlist(rand.str2[2]), setNames(rep(0, terra::nlyr(bin1)), names(bin1))) + expect_equal(unlist(rand.str2[3]), setNames(c(0,1,0,0,1,1,1), names(bin1))) expect_equal(unlist(terra::global(rand.str2, function(x) sum(x, na.rm = TRUE)))[1:2], unlist(terra::global(bin1, function(x) sum(x, na.rm = TRUE)))[1:2]) - expect_equal(sum(rand.str)[1:8], sum(bin1)[1:8]) - expect_equal(sum(rand.str2)[1:8], sum(bin1)[1:8]) + expect_equal(sum(rand.str)[1:6], sum(bin1)[1:6]) + expect_equal(sum(rand.str2)[1:6], sum(bin1)[1:6]) expect_true(all(is.na(unlist(rand.str2[1])))) }) diff --git a/tests/testthat/test-bootspat_naive.R b/tests/testthat/test-bootspat_naive.R index 94d7b8b..bcae03c 100644 --- a/tests/testthat/test-bootspat_naive.R +++ b/tests/testthat/test-bootspat_naive.R @@ -11,8 +11,6 @@ test_that("function bootspat_naive works", { # loading data bin1 <- load_ext_data() #terra::rast(system.file("extdata", "spp_sites.tif", package="SESraster")) - bin1[1] <- NA - bin1[2] <- 0 # applying the function rand.site <- bootspat_naive(bin1, "site") @@ -27,9 +25,9 @@ test_that("function bootspat_naive works", { expect_true(inherits(rand.sp, "SpatRaster"), "TRUE") expect_true(inherits(rand.both, "SpatRaster"), "TRUE") - expect_equal(unlist(rand.site[2]), setNames(c(0,0,0,0,0), names(bin1))) - expect_equal(unlist(rand.sp[2]), setNames(c(0,1,1,0,0), names(bin1))) - expect_equal(unlist(rand.both[2]), setNames(c(1,0,0,0,1), names(bin1))) + expect_equal(unlist(rand.site[2]), setNames(rep(0, terra::nlyr(bin1)), names(bin1))) + expect_equal(unlist(rand.sp[2]), setNames(c(0,0,0,0,1,0,1), names(bin1))) + expect_equal(unlist(rand.both[2]), setNames(c(0,1,1,0,0,1,1), names(bin1))) expect_equal(unlist(rand.site[1]), setNames(as.double(rep(NA, terra::nlyr(bin1))), names(bin1))) expect_equal(unlist(rand.sp[1]), setNames(as.double(rep(NA, terra::nlyr(bin1))), names(bin1))) diff --git a/tests/testthat/test-bootspat_str.R b/tests/testthat/test-bootspat_str.R index 0382dee..c40e2b9 100644 --- a/tests/testthat/test-bootspat_str.R +++ b/tests/testthat/test-bootspat_str.R @@ -3,8 +3,7 @@ test_that("function bootspat_str works", { # loading data bin1 <- load_ext_data() # terra::rast(system.file("extdata", "spp_sites.tif", package="SESraster")) - bin1[1] <- NA - bin1[2] <- 0 + rprobnull <- terra::app(bin1, function(x){ ifelse(is.na(x), NA, 1) @@ -19,8 +18,8 @@ test_that("function bootspat_str works", { expect_true(inherits(rand.str2, "SpatRaster"), "TRUE") expect_equal(unlist(rand.str2[1]), setNames(as.double(rep(NA, terra::nlyr(bin1))), names(bin1))) - expect_equal(unlist(rand.str2[2]), setNames(c(0,0,0,0,0), names(bin1))) - expect_equal(unlist(rand.str2[3]), setNames(c(0,1,1,0,1), names(bin1))) + expect_equal(unlist(rand.str2[2]), setNames(rep(0, terra::nlyr(bin1)), names(bin1))) + expect_equal(unlist(rand.str2[3]), setNames(c(0,1,1,0,1,0,1), names(bin1))) expect_equal(sum(rand.str)[1:8], sum(bin1)[1:8]) expect_true(all(is.na(unlist(rand.str2[1])))) }) diff --git a/vignettes/S1-get-started.Rmd b/vignettes/S1-get-started.Rmd index c953ca4..663fa48 100644 --- a/vignettes/S1-get-started.Rmd +++ b/vignettes/S1-get-started.Rmd @@ -82,7 +82,7 @@ With the distributions in hand, we can perform the spatial randomizations. First, let's randomize species distribution ignoring the spatial structure with the function `bootspat_naive`. -#### both, by site and species simultaneously +#### both, by site and species simultaneously: Equiprobable-Equiprobable We can randomize the presences/absences (1s/0s) using the method `both`. This method combines randomization by site and species at the same time. It will shuffle all presences across cells and layers, changing site richness and species distribution sizes and location at the same time. It is equivalent to the SIM1 (equiprobable-equiprobable) method of Gotelli [-@gotelli2000]. Notice that NA cells are ignored. @@ -95,7 +95,7 @@ plot(srb, legend=F) plot(c(sum(r10), sum(srb)), main=c("observed", "randomized")) ``` -#### by species +#### by species: Fixed-Equiprobable We can randomize by `species`. This second method is performed at each layer (species) of the stack by randomizing the position of species presences in space. If the data fits to the computer RAM memory this randomization is equivalent to the SIM2 (fixed-equiprobable) method of Gotelli [-@gotelli2000] and to the flatland model of Laffan & Crisp [-@laffan2003]. It changes the species richness at each cell while retaining the size of the species distribution (except if randomization is performed by frequency). When running by frequency, presences/absences (1s/0s) are sampled at each pixel based on the probability (frequency) that the species is found within the study area. It is equivalent to the SIM7 (proportional-equiprobable) method of Gotelli [-@gotelli2000]. For each species, the randomized frequency of presences is very similar to the actual frequency but not exactly the same. @@ -120,7 +120,7 @@ cbind(observed=sapply(r10, function(x)freq(x)[2,3]), randomized_freq=sapply(sr1b, function(x)freq(x)[2,3])) ``` -#### by site +#### by site: Equiprobable-Fixed Now, we will randomize by `site`. This method randomizes the position (presence/absence) of the species within each site (cell) of the stack. This method keeps species richness constant at each cell but the size of the species distribution might change, as more or less pixels can be randomly assigned to each species (raster layer). This randomization is equivalent to the SIM3 (equiprobable-fixed) method of Gotelli [-@gotelli2000]. Notice that, although the spatial structure of species richness is held constant, the number of pixels that each species occupy is completely randomized. @@ -135,9 +135,11 @@ plot(c(sum(r10), sum(sr2)), main=c("observed", "randomized")) ### Spatially Structured Randomization -Notice that randomization by `site` from `bootspat_naive` keeps the species richness fixed (i.e. equal to the input raster), but the size of the species' distribution (i.e. number of pixels of each species) is completely randomized. On the other hand, randomization by `species` keeps the number of pixels of each species fixed, but the richness is completely randomized. +Notice that randomization by `site` from `bootspat_naive()` keeps the species richness fixed (i.e. equal to the input raster), but the size of the species' distribution (i.e. number of pixels of each species) is completely randomized. On the other hand, randomization by `species` keeps the number of pixels of each species fixed, but the richness is completely randomized. The functions `bootspat_str()` and `bootspat_ff()` add some constraints to the randomization. See below: -In the function `bootspat_str` we implement a spatially structured randomization that keeps the species richness pattern fixed and distribution size of each species, proportional. This method is based on the second null model of Laffan & Crisp [-@laffan2003], but uses probability of sampling presences based on frequency of presences for each species. Therefore, it is equivalent to the SIM5 (proportional-fixed) method of Gotelli [-@gotelli2000]. Notice that it will lack spatial structure in the same way of randomization by `site`, however the size of the species distribution is (nearly) retained. +#### function `bootspat_str()`: Fixed-Proportional + +In the function `bootspat_str()` we implement a spatially structured randomization that keeps the species richness pattern fixed and distribution size of each species, proportional. This method is based on the second null model of Laffan & Crisp [-@laffan2003], but uses probability of sampling presences based on frequency of presences for each species. Therefore, it is equivalent to the SIM5 (proportional-fixed) method of Gotelli [-@gotelli2000]. Notice that it will lack spatial structure in the same way of randomization by `site`, however the size of the species distribution is (nearly) retained. Randomizations are based on frequencies (given or calculated from the output raster (a presence-absence SpatRaster) and, optionally, a probability raster stack. Both, the frequency vector and the probability raster stack, control the probability that a given species is sampled in each raster cell. The frequency vector controls the probability of sampling each species compared to all others. The probability raster stack controls the probability that each species is sampled on a given raster cell. @@ -170,5 +172,45 @@ Check that the number of occupied pixels of randomized distributions are very si cbind(observed=sapply(r10, function(x)freq(x)[2,3]), randomized=sapply(randr10, function(x)freq(x)[2,3])) ``` + + +#### function `bootspat_ff()`: Fixed-Fixed + +In the function `bootspat_ff()` we implement a spatially structured randomization that keeps both, the species richness pattern and species distribution size, fixed. This method is equivalent to the SIM9 (fixed-fixed) method of Gotelli [-@gotelli2000] and is based on the null model of Connor & Simberloff [-@connor1979], but has some differences. + +The original algorithm randomly chooses the sequence of species and fills sites (originally islands) until they reach the observed species richness. However, as sites (cells) are filled with species, some species do not have enough available sites to be placed, and their sampled frequency is smaller than observed. Additionally, some sites cannot be completely filled because duplicated species are not allowed in the same site. Their solution was to increase the number of sites to place the species. Here, we opted to order the sequence of species from the largest Nj to the smallest. Our algorithm is usually able to match the constraints (site richness and distribution size) but in some cases, specially with small datasets, some sites lack on species to match the original richness and some species are assigned to less sites than originally observed. + +```{r} +# bootstrapping once +randff <- bootspat_ff(r10) +``` + +See how species distribution sizes are maintained. +Compare the original and null distributions. Plots are sorted from the largest to the smallest distributions. + +```{r, str-ff, fig.height=4, fig.width=6} +obs_fr <- unlist(terra::global(r10, function(x) sum(x, na.rm = TRUE))) +v_seq <- order(obs_fr) +plot(c(r10[[v_seq[length(v_seq)-1:4]]], randff[[v_seq[length(v_seq)-1:4]]]), + nr=2, main=paste(rep(c("original", "null"), each=4), names(r10[[v_seq[length(v_seq)-1:4]]]))) +plot(c(r10[[v_seq[floor(length(v_seq)/2) + 2:-1]]], randff[[v_seq[floor(length(v_seq)/2)+ 2:-1]]]), + nr=2, main=paste(rep(c("original", "null"), each=4), names(r10[[v_seq[floor(length(v_seq)/2)+ 2:-1]]]))) +plot(c(r10[[v_seq[4:1]]], randff[[v_seq[4:1]]]), + nr=2, main=paste(rep(c("original", "null"), each=4), names(r10[[v_seq[4:1]]]))) + +``` + +See unchanged spatial pattern of species richness. + +```{r, str-ff-rich, fig.height=4, fig.width=6} +plot(c(sum(r10), sum(randff), sum(r10)-sum(randff)), main=c("observed", "randomized", "difference"), nr=1) +``` + +Check that the number of occupied pixels of randomized distributions are very similar those of the observed distributions. + +```{r, table-freqff} +cbind(observed=sapply(r10, function(x)freq(x)[2,3]), + randomized=sapply(randff, function(x)freq(x)[2,3])) +``` ## References {#references} diff --git a/vignettes/S2-null-models.Rmd b/vignettes/S2-null-models.Rmd index a352364..e17a57d 100644 --- a/vignettes/S2-null-models.Rmd +++ b/vignettes/S2-null-models.Rmd @@ -24,8 +24,8 @@ knitr::opts_chunk$set( - [Algorithms](#algorithms) - [References](#references) - ## Introduction {#intro} + Gotelli (2000) summarizes nine null model algorithms for species co-occurrence analysis based on how sums of species (originally rows) and sites (originally columns) are treated (i.e. fixed, equiprobable, or proportional sums; see Table \@ref(tab:table1), [@gotelli2000]). The data for null model analyses usually consists of a binary presence-absence matrix, in which rows represent species or taxa, columns represent sites or samples, and the entries represent the presence (1) or absence (0) of a particular species in a particular site [@ulrich2012]. ```{r table1-data, echo=FALSE} @@ -80,5 +80,4 @@ Let's see how each of these algorithms behave with raster data. library(SESraster) ``` - ## References {#references} diff --git a/vignettes/references.bib b/vignettes/references.bib index dd80799..6b6fef7 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -41,3 +41,18 @@ @article{laffan2003 url = {https://onlinelibrary.wiley.com/doi/abs/10.1046/j.1365-2699.2003.00875.x}, langid = {en} } + +@article{connor1979, + title = {The Assembly of Species Communities: Chance or Competition?}, + author = {Connor, Edward F. and Simberloff, Daniel}, + year = {1979}, + date = {1979}, + journal = {Ecology}, + pages = {1132--1140}, + volume = {60}, + number = {6}, + doi = {10.2307/1936961}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.2307/1936961}, + note = {{\_}eprint: https://onlinelibrary.wiley.com/doi/pdf/10.2307/1936961}, + langid = {en} +}