worked on articles

rmk118 · Dec 2, 2024 · 996cc43 · 996cc43
1 parent 942d77b
commit 996cc43
Show file tree

Hide file tree

Showing 16 changed files with 757 additions and 228 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -8,3 +8,4 @@
 ^\.github$
 ^doc$
 ^Meta$
+^vignettes/articles$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -33,6 +33,9 @@ Suggests:
     rmarkdown,
     patchwork,
     tidyr,
+    gtsummary,
+    broom,
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
 VignetteBuilder: knitr
+Config/Needs/website: rmarkdown
diff --git a/R/two_line_logistic.R b/R/two_line_logistic.R
@@ -1,5 +1,11 @@
 #' Two-line logistic model
 #'
+#' @description Fits a nonlinear model made up of two distinct line segments
+#'   connected by a logistic curve.
+#'
+#' @details This relies on `minpack.lm::nlsLM()`, which is often able to
+#'   converge when the default `stats::nls()` function cannot find a solution.
+#'
 #' @param dat data frame or matrix containing the data
 #' @param xvar Name of column (integer or double) of measurements for the x-axis
 #'   variable (e.g., carapace width).
@@ -28,8 +34,15 @@
 #'
 #' @examples
 #' set.seed(12)
-#' fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2))
-#' two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE)
+#' fc <- fake_crustaceans(
+#'   error_scale = 17,
+#'   slope = 9,
+#'   L50 = 75,
+#'   n = 800,
+#'   allo_params = c(0.9, 0.25, 1.05, 0.2),
+#'   x_mean = 85
+#' )
+#' two_line_logistic(fc, "x", "y", verbose = FALSE)
 two_line_logistic <- function(dat,
                               xvar,
                               yvar,

diff --git a/README.Rmd b/README.Rmd
@@ -69,7 +69,14 @@ library(morphmat)
 set.seed(12) # for reproducibility when generating the simulated data
 
 # Generate a simulated dataset with known size at maturity
-fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2))
+fc <- fake_crustaceans(
+  error_scale = 17,
+  slope = 9,
+  L50 = 75,
+  n = 800,
+  allo_params = c(0.9, 0.25, 1.05, 0.2),
+  x_mean = 85
+)
 ```
 
 ### Broken-stick/piecewise regression methods
@@ -83,7 +90,7 @@ regrans(fc, "x", "y", verbose = FALSE)
 Two-line logistic:
 
 ```{r}
-two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE, SM50_start = 105)
+two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE, SM50_start = 85)
 ```
 
 Two-line model (lines are fit separately; no forced intersection):

diff --git a/README.md b/README.md
@@ -86,7 +86,14 @@ library(morphmat)
 set.seed(12) # for reproducibility when generating the simulated data
 
 # Generate a simulated dataset with known size at maturity
-fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2))
+fc <- fake_crustaceans(
+  error_scale = 17,
+  slope = 9,
+  L50 = 75,
+  n = 800,
+  allo_params = c(0.9, 0.25, 1.05, 0.2),
+  x_mean = 85
+)
 ```
 
 ### Broken-stick/piecewise regression methods
@@ -95,31 +102,31 @@ REGRANS:
 
 ``` r
 regrans(fc, "x", "y", verbose = FALSE)
-#> [1] 89.43822
+#> [1] 67.67091
 ```
 
 Two-line logistic:
 
 ``` r
-two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE, SM50_start = 105)
-#>     SM50 
-#> 104.7636
+two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE, SM50_start = 85)
+#>    SM50 
+#> 77.6817
 ```
 
 Two-line model (lines are fit separately; no forced intersection):
 
 ``` r
 two_line(fc, xvar = "x", yvar = "y", verbose = FALSE)
 #>   breakpoint intersection 
-#>     106.0655    1383.9744
+#>     75.43651     56.76587
 ```
 
 Broken-stick Stevens (only iterates over values of the x-axis variable
 present in the data):
 
 ``` r
 broken_stick_stevens(fc, xvar = "x", yvar = "y", verbose = FALSE)
-#> [1] 91.10524
+#> [1] 68.33387
 ```
 
 Other packages:
@@ -134,9 +141,9 @@ Compare estimates from all piecewise regression methods:
 ``` r
 piecewise_mods(fc, xvar = "x", yvar = "y", method = "all")
 #>                chngpt             segmented               REGRANS 
-#>              89.44561              88.71463              89.43822 
+#>              67.68312              63.95451              67.67091 
 #>               Stevens   Two_line.breakpoint Two_line.intersection 
-#>              91.10524             106.06548            1383.97444
+#>              68.33387              75.43651              56.76587
 ```
 
 ### Clustering methods
@@ -147,7 +154,7 @@ Somerton method:
 out_df <- somerton(fc, xvar = "x", yvar = "y")[[1]]
 mod <- glm(data = out_df, pred_mat_num ~ x, family = binomial(link = "logit"))
 unname(-coef(mod)[1] / coef(mod)[2])
-#> [1] 102.37
+#> [1] 77.70282
 ```
 
 ## References

diff --git a/man/two_line_logistic.Rd b/man/two_line_logistic.Rd
diff --git a/vignettes/articles/.gitignore b/vignettes/articles/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/broken-stick.Rmd → vignettes/articles/broken-stick.Rmd b/vignettes/broken-stick.Rmd → vignettes/articles/broken-stick.Rmd
@@ -1,7 +1,7 @@
 ---
 title: "Broken-stick methods"
 output: rmarkdown::html_vignette
-bibliography: references.bib
+bibliography: ../references.bib
 link-citations: true
 link-external-newwindow: true 
 vignette: >
@@ -26,26 +26,24 @@ knitr::opts_chunk$set(
 #| warning: FALSE
 #| output: FALSE
 
-library(morphmat)
-library(chngpt)
-library(segmented)
-library(dplyr)
-library(ggplot2)
+# List of packages required:
+packages <- c("morphmat", "chngpt", "segmented", "dplyr", "ggplot2")
+
+# Load packages into session
+lapply(packages, require, character.only = TRUE, quietly = TRUE)
 ```
 
 ```{r}
 #| label: setup-2
 #| echo: false
 
-mytheme <- theme_classic() + #define custom theme for ggplots
+mytheme <- theme_classic() + # define custom theme for ggplots
   theme(
     axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
     axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)),
     text = element_text(size = 13))
 ```
 
-<!-- Here I will write a vignette explaining all of the broken-stick regression methods. This will be useful for referring readers to more complex options available in packages like `segmented` that are not available within `morphmat`. -->
-
 Sources (more will be added): Somerton [-@somerton1980]; Muggeo [-@muggeo2003; @muggeo2008; @muggeo2017]; Hall et al. [-@hall2006]
 
 \begin{equation*}\log{y}=
@@ -58,6 +56,7 @@ Sources (more will be added): Somerton [-@somerton1980]; Muggeo [-@muggeo2003; @
 At $\log{x}=c$, the bottom equation becomes $\tilde{\beta_1}+(\log{x})(\alpha_1-\alpha_2)+\alpha_2\log{x}$, equivalent to the top equation. Note that in this formulation, there is only one intercept term $(\beta_1)$, so the immature and mature lines cannot have the same slope without being equivalent lines. In this model, the estimated value of the breakpoint parameter $c$ is taken to be SM50.
 
 We will start by simulating a data set with a known SM50 value of 75 mm to demonstrate the use (and limitations) of broken-stick methods.
+
 ```{r}
 #| label: generate-crabs
 
@@ -66,10 +65,10 @@ set.seed(12) # set seed for reproducibility
 fc <- fake_crustaceans(
   error_scale = 17,
   slope = 9,
-  L50 = 75,
-  n = 800,
+  L50 = 75, # known size at maturity is 75 mm
+  n = 800, # sample size
   allo_params = c(0.9, 0.25, 1.05, 0.2),
-  x_mean = 85
+  x_mean = 85 # mean carapace width of the sample
 )
 ```
 
@@ -107,6 +106,7 @@ segmented::confint.segmented(lm_orig_seg, method = "gradient")
 ```
 
 We can also use an ANOVA test to compare the segmented model to a single linear model:
+
 ```{r}
 #| label: segmented-anova
 
@@ -225,7 +225,7 @@ summary(
 )[["chngpt"]]
 ```
 
-This method is EXTREMELY sensitive to the "lb.quantile" and "ub.quantile" arguments, which refer to the lower and upper bounds of the search range for change point estimate, respectively (the function defaults are 0.05 and 0.95).  For example, if we change the lower quantile bound to 0.33 and the upper to 0.66, the model will return an SM50 value closer to the true value of 75 mm.
+This method is EXTREMELY sensitive to the "lb.quantile" and "ub.quantile" arguments, which refer to the lower and upper bounds of the search range for change point estimate, respectively (the function defaults are 0.05 and 0.95). For example, if we change the lower quantile bound to 0.33 and the upper to 0.66, the model will return an SM50 value closer to the true value of 75 mm.
 
 ```{r}
 #| label: chngpt-more-accurate
@@ -312,6 +312,8 @@ As with the previous broken-stick methods, REGRANS tends to underestimate SM50 a
 
 A third algorithm for using segmented regression to estimate SM50 was written by Dr. Bradley Stevens at the University of Maryland Eastern Shore. This code is part of the [Crab_Maturity program](https://github.com/Crabman52/Crustacean_Sensation) available on GitHub. There are several different methods included within Crab_Maturity; this segmented regression approach is included here because it differs from the `segmented` and REGRANS methods in that *possible SM50 values are restricted to values of the x-variable present in the data set*.
 
+See Olsen and Stevens [-@olsen2020] for an example real-world application.
+
 ```{r}
 stevens_est <- broken_stick_stevens(fc, "x", "y", verbose = FALSE)
 stevens_est
@@ -332,5 +334,4 @@ ggplot() +
   mytheme
 ```
 
-
-### References
+## References
diff --git a/vignettes/classification.Rmd → vignettes/articles/classification.Rmd b/vignettes/classification.Rmd → vignettes/articles/classification.Rmd
@@ -14,7 +14,7 @@ vignette: >
 link-external-newwindow: true 
 number-sections: true
 link-citations: true
-bibliography: references.bib
+bibliography: ../references.bib
 editor_options: 
   chunk_output_type: console
 ---
@@ -57,15 +57,15 @@ We will start by simulating a data set with a known SM50 value of 75 mm to demon
 ```{r}
 #| label: generate-crabs
 
-set.seed(123) # set seed for reproducibility
+set.seed(12) # set seed for reproducibility
 
 fc <- fake_crustaceans(
   error_scale = 17,
   slope = 9,
-  L50 = 75,
-  n = 800,
+  L50 = 75, # known size at maturity is 75 mm
+  n = 800, # sample size
   allo_params = c(0.9, 0.25, 1.05, 0.2),
-  x_mean = 85
+  x_mean = 85 # mean carapace width of the sample
 )
 ```
 
@@ -162,16 +162,4 @@ infl_pt_mod <- glm(pred_mat ~ x,
 unname(-coef(infl_pt_mod)[1] / coef(infl_pt_mod)[2])
 ```
 
-Beyond the familiar `stats::glm`, there are many packages that can be used to model the relationship between size and maturity status, allowing for the incorporation of random effects, temporal or spatial structuring, and other additional complexities. These include popular packages for fitting generalized linear mixed models (GLMMs) and generalized additive (mixed) models (GAMMs) such as `lmer`, `nlme`, `mgcv`, `glmmTMB`, and `sdmTMB`.
-
-## Methods to obtain confidence intervals for SM50 value
-
-1.  [confint_L.R script](https://github.com/rafamoral/L50/blob/main/confint_L.R) from Mainguy et al. [-@mainguy2024]: Delta method, Fieller method, profile-likelihood, non-parametric bootstrapping, parametric bootstrapping, Monte Carlo, Bayesian
-
-2.  R package qra: Fieller method
-
-3.  R package twopartm: Fieller method
-
-4.  R package drc [@ritz2015]: Delta method, Fieller method. This package can also fit 5/4/3/2-parameter logistic, log-logistic, Weibull, etc. models, incorporate weights, robust nls fitting, constrained optimization, and other customization options. It also contains functions to simulate data
-
-5: R package tidydelta: Delta method
+See `vignette("logistic")` for more details on options for methods to obtain SM50 values and confidence intervals once your data set includes maturity labels.
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,3 +8,4 @@ @@
     ^\.github$
     ^doc$
     ^Meta$
+    ^vignettes/articles$