Skip to content

Commit

Permalink
added cluster functions
Browse files Browse the repository at this point in the history
  • Loading branch information
rmk118 committed Dec 2, 2024
1 parent 976ece1 commit 39dcad0
Show file tree
Hide file tree
Showing 16 changed files with 425 additions and 222 deletions.
13 changes: 7 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,14 @@ Suggests:
tidyr,
gtsummary,
broom,
drc,
drda,
sandwich,
nlstools,
qra,
lmtest,
testthat (>= 3.0.0)
Config/testthat/edition: 3
VignetteBuilder: knitr
Config/Needs/website: rmarkdown
Config/Needs/website:
rmarkdown,
drc,
drda,
sandwich,
qra,
nlstools
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ importFrom(glue,glue)
importFrom(lifecycle,deprecated)
importFrom(magrittr,"%>%")
importFrom(mclust,Mclust)
importFrom(mclust,mclustBIC)
importFrom(minpack.lm,nls.lm.control)
importFrom(minpack.lm,nlsLM)
importFrom(splus2R,peaks)
105 changes: 94 additions & 11 deletions R/cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,122 @@
#'
#' @param dat data frame or matrix containing the data
#' @param xvar Name of column (integer or double) of measurements for the x-axis
#' variable (e.g., carapace width).
#' variable (e.g., carapace width).
#' @param yvar Name of column (integer or double) of measurements for the y-axis
#' variable (e.g., claw height).
#' @param log Boolean; should both variables be log-transformed before performing the
#' regression? Defaults to FALSE.
#' variable (e.g., claw height).
#' @param log Boolean; should both variables be log-transformed before
#' performing the regression? Defaults to FALSE.
#' @param method Classification method to use. A single string or vector
#' containing one or more of c("mclust", "Somerton", "kmeans", "hclust", "infl_pt"), or "all" to return the results of all methods for comparison.
#' @returns An estimate of SM50 from the specified method(s).
#' containing one or more of c("mclust", "Somerton", "kmeans", "hclust",
#' "infl_pt"), or "all" to return the results of all methods for comparison.
#' @param plot Boolean; optionally display a plot of the input data shaded
#' according to the maturity classifications from the specified method(s).
#' Defaults to FALSE.
#' @returns A data frame with each point classified according to the specified
#' method(s).
#' @export
#'
#' @examples
#' set.seed(12)
#' fc <- fake_crustaceans(n = 100, L50 = 100, allo_params = c(1, 0.2, 1.1, 0.2))
#' cluster_mods(fc, xvar = "x", yvar = "y", method = c("kmeans"))
#' cluster_mods(fc, xvar = "x", yvar = "y", method = c("all"), plot = TRUE)
cluster_mods <- function(dat,
xvar,
yvar,
log = FALSE,
method = c("mclust", "Somerton", "kmeans", "hclust", "infl_pt", "all")) {

method = c("mclust", "Somerton", "kmeans", "hclust", "infl_pt", "all"),
plot = FALSE) {
method <- tolower(method)
new_dat <- data.frame(xvar = dat[[xvar]], yvar = dat[[yvar]])

if (isTRUE(log)) {
new_dat$xvar <- log(dat[[xvar]])
new_dat$yvar <- log(dat[[yvar]])
}

out_df <- data.frame()

if ("infl_pt" %in% method | "all" %in% method) {

disc <- infl_pt(new_dat, "xvar", "yvar", plot = FALSE)
temp_df <- new_dat %>%
dplyr::mutate(pred_mat = (dplyr::if_else(.data$yvar / .data$xvar > disc, 1, 0))) %>%
dplyr::mutate(method = "infl_pt")
out_df <- out_df %>% dplyr::bind_rows(temp_df)

}

if ("kmeans" %in% method | "all" %in% method) {

out <- new_dat
temp_vec <- kmeans(new_dat, centers = 2, iter.max = 15)$cluster - 1
temp_df <- new_dat %>%
dplyr::mutate(pred_mat = temp_vec, method = "kmeans")
mature_label <- dplyr::slice_max(temp_df, xvar) %>% dplyr::pull(pred_mat)
temp_df <- temp_df %>%
dplyr::mutate(pred_mat = dplyr::if_else(pred_mat == mature_label,
as.numeric(1), as.numeric(0)))
out_df <- out_df %>% dplyr::bind_rows(temp_df)

}
}

if ("hclust" %in% method | "all" %in% method) {

temp_vec <- (cutree(hclust(
dist(new_dat, method = "euclidean"), method = "ward.D"), k = 2) - 1)
temp_df <- new_dat %>%
dplyr::mutate(pred_mat = temp_vec, method = "hclust")
mature_label <- dplyr::slice_max(temp_df, xvar) %>% dplyr::pull(pred_mat)
temp_df <- temp_df %>%
dplyr::mutate(pred_mat = dplyr::if_else(pred_mat == mature_label,
as.numeric(1), as.numeric(0)))
out_df <- out_df %>% dplyr::bind_rows(temp_df)
}

if ("somerton" %in% method | "all" %in% method) {

temp_vec <- somerton(new_dat, "xvar", "yvar")[[1]]$pred_mat_num
temp_df <- new_dat %>%
dplyr::mutate(pred_mat = temp_vec, method = "Somerton")
out_df <- out_df %>% dplyr::bind_rows(temp_df)
}

if ("mclust" %in% method | "all" %in% method) {

temp_vec <- mclust::Mclust(
data = new_dat,
G = 2,
verbose = FALSE,
modelNames = "EVV"
)$classification
temp_df <- new_dat %>%
dplyr::mutate(pred_mat = temp_vec, method = "mclust")
mature_label <- dplyr::slice_max(temp_df, xvar) %>% dplyr::pull(pred_mat)
temp_df <- temp_df %>%
dplyr::mutate(pred_mat = dplyr::if_else(pred_mat == mature_label,
as.numeric(1), as.numeric(0)))
out_df <- out_df %>% dplyr::bind_rows(temp_df)
}

if (isTRUE(plot)) {

print(ggplot() +
geom_point(data = out_df, aes(xvar, yvar, color = as.factor(pred_mat)),
alpha = 0.4) +
labs(x = xvar, y = yvar, color = "Predicted group") +
theme_bw() +
facet_wrap(~method) +
scale_color_manual(
values = c("0" = "lightblue", "1" = "black"),
breaks = c(0, 1),
labels = c("0" = "Immature", "1" = "Mature")) +
theme(
axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)),
text = element_text(size = 13)) +
guides(color = guide_legend(override.aes = list(alpha = 1))))
}

return(out)
return(out_df)

}
1 change: 1 addition & 0 deletions R/morphmat-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' @importFrom glue glue
#' @importFrom lifecycle deprecated
#' @importFrom mclust Mclust
#' @importFrom mclust mclustBIC
#' @importFrom splus2R peaks
#' @importFrom minpack.lm nls.lm.control
#' @importFrom minpack.lm nlsLM
Expand Down
2 changes: 1 addition & 1 deletion R/two_line_logistic.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' connected by a logistic curve.
#'
#' @details This relies on `minpack.lm::nlsLM()`, which is often able to
#' converge when the default `stats::nls()` function cannot find a solution.
#' converge when `stats::nls()` function cannot find a solution.
#'
#' @param dat data frame or matrix containing the data
#' @param xvar Name of column (integer or double) of measurements for the x-axis
Expand Down
48 changes: 15 additions & 33 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,16 @@ You can install the development version of morphmat from [GitHub](https://github
devtools::install_github("rmk118/morphmat")
```

## Examples
## Articles/vignettes

- [Getting Started](https://rmk118.github.io/morphmat/articles/morphmat.html)
- [Broken-stick models](https://rmk118.github.io/morphmat/articles/broken-stick.html)
- [Two-line models](https://rmk118.github.io/morphmat/articles/two-line.html)
- [Classification methods](https://rmk118.github.io/morphmat/articles/classification.html)
- [Post-classification logistic regression](https://rmk118.github.io/morphmat/articles/logistic.html)
- [Simulating data](https://rmk118.github.io/morphmat/articles/simulations.html)

## Basic Examples

```{r example_generate}
library(morphmat)
Expand All @@ -79,47 +88,20 @@ fc <- fake_crustaceans(
)
```

### Broken-stick/piecewise regression methods

REGRANS:

```{r}
regrans(fc, "x", "y", verbose = FALSE)
```

Two-line logistic:

```{r}
two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE, SM50_start = 85)
```

Two-line model (lines are fit separately; no forced intersection):

```{r}
two_line(fc, xvar = "x", yvar = "y", verbose = FALSE)
```

Broken-stick Stevens (only iterates over values of the x-axis variable present in the data):
Compare estimates from all piecewise regression methods:

```{r}
broken_stick_stevens(fc, xvar = "x", yvar = "y", verbose = FALSE)
piecewise_mods(fc, xvar = "x", yvar = "y", method = "all")
```

Other packages:

```{r}
# segmented
# chngpt
```
### Clustering methods

Compare estimates from all piecewise regression methods:
Compare all clustering methods

```{r}
piecewise_mods(fc, xvar = "x", yvar = "y", method = "all")
all_clusters <- cluster_mods(fc, xvar = "x", yvar = "y", method = c("all"), plot = TRUE)
```

### Clustering methods

Somerton method:

```{r}
Expand Down
65 changes: 24 additions & 41 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,22 @@ You can install the development version of morphmat from
devtools::install_github("rmk118/morphmat")
```

## Examples
## Articles/vignettes

- [Getting
Started](https://rmk118.github.io/morphmat/articles/morphmat.html)
- [Broken-stick
models](https://rmk118.github.io/morphmat/articles/broken-stick.html)
- [Two-line
models](https://rmk118.github.io/morphmat/articles/two-line.html)
- [Classification
methods](https://rmk118.github.io/morphmat/articles/classification.html)
- [Post-classification logistic
regression](https://rmk118.github.io/morphmat/articles/logistic.html)
- [Simulating
data](https://rmk118.github.io/morphmat/articles/simulations.html)

## Basic Examples

``` r
library(morphmat)
Expand All @@ -96,46 +111,6 @@ fc <- fake_crustaceans(
)
```

### Broken-stick/piecewise regression methods

REGRANS:

``` r
regrans(fc, "x", "y", verbose = FALSE)
#> [1] 67.67091
```

Two-line logistic:

``` r
two_line_logistic(fc, xvar = "x", yvar = "y", verbose = FALSE, SM50_start = 85)
#> SM50
#> 77.6817
```

Two-line model (lines are fit separately; no forced intersection):

``` r
two_line(fc, xvar = "x", yvar = "y", verbose = FALSE)
#> breakpoint intersection
#> 75.43651 56.76587
```

Broken-stick Stevens (only iterates over values of the x-axis variable
present in the data):

``` r
broken_stick_stevens(fc, xvar = "x", yvar = "y", verbose = FALSE)
#> [1] 68.33387
```

Other packages:

``` r
# segmented
# chngpt
```

Compare estimates from all piecewise regression methods:

``` r
Expand All @@ -148,6 +123,14 @@ piecewise_mods(fc, xvar = "x", yvar = "y", method = "all")

### Clustering methods

Compare all clustering methods

``` r
all_clusters <- cluster_mods(fc, xvar = "x", yvar = "y", method = c("all"), plot = TRUE)
```

<img src="man/figures/README-unnamed-chunk-3-1.png" width="100%" />

Somerton method:

``` r
Expand Down
18 changes: 13 additions & 5 deletions man/cluster_mods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added man/figures/README-unnamed-chunk-3-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion man/two_line_logistic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 39dcad0

Please sign in to comment.