diff --git a/.Rbuildignore b/.Rbuildignore index 91114bf..b211b57 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,7 @@ +^LICENSE\.md$ ^.*\.Rproj$ ^\.Rproj\.user$ +^vignette\.Rmd$ +^build$ +^figures$ +^gator\.png$ diff --git a/.gitignore b/.gitignore index 26fad6f..edf4413 100644 --- a/.gitignore +++ b/.gitignore @@ -16,17 +16,19 @@ # RStudio files .Rproj.user/ +.Rproj # produced vignettes -vignettes/*.html +#vignettes/*.html vignettes/*.pdf # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 .httr-oauth # knitr and R markdown default cache directories -/*_cache/ +*_cache/ /cache/ +*_files/ # Temporary files created by R markdown *.utf8.md @@ -34,3 +36,9 @@ vignettes/*.pdf # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html rsconnect/ + +# Mac OSX +.DS_Store + +# R installed data +*.rds diff --git a/DESCRIPTION b/DESCRIPTION index 510eac1..07b6cce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,66 @@ -Package: gateRR +Package: gateR Type: Package -Title: What the Package Does (Title Case) -Version: 0.1.0 -Author: Who wrote it -Maintainer: The package maintainer -Description: More about what it does (maybe more than one line) - Use four spaces when indenting paragraphs within the Description. -License: What license is it under? +Title: Gating strategy for mass cytometry using kernel density estimation +Version: 0.1.0.9000 +Date: 2020-09-12 +Authors@R: + c(person(given = "Ian D.", + family = "Buller", + role = c("aut", "cre", "cph"), + email = "ian.buller@nih.gov", + comment = c(ORCID = "0000-0001-9477-8582")), + person(given = "Elena", + family = "Hsieh", + role = "ctb", + email = "elena.hsieh@cuanschutz.edu"), + person(given = "Debashis", + family = "Ghosh", + role = "ctb", + email = "debashis.ghosh@cuanschutz.edu"), + person(given = "Lance A.", + family = "Waller", + role = "ctb", + email = "lwaller@emory.edu"), + person(given = "NCI", + role = c("cph")) + ) +Maintainer: Ian D. Buller +Description: Estimates statistically significant flourescent marker combination values within + which one immunologically distinctive group (i.e., disease case) is more associated than + another group (i.e., healthy control), successively, using various combinations (i.e., + "gates") of flourescent markers to examine features of cells that may be different between + groups. For a two group comparison, the {gateR} package uses the spatial relative risk + function that is estimated using the {sparr} package. Details about the {sparr} package + methods can be found in the tutorial: Davies et al. (2018) . Details + about kernel density estimation can be found in J. F. Bithell (1990) . + More information about relative risk functions using kernel density estimation can be + found in J. F. Bithell (1991) . +License: Apache License (>= 2.0) Encoding: UTF-8 LazyData: true +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.1.1 +Depends: R (>= 3.5.0) +Imports: + fields, + graphics, + grDevices, + maptools, + raster, + sp, + sparr, + spatstat.core, + stats, + tibble, + utils +Suggests: + flowWorkspaceData, + knitr, + ncdfFlow, + spelling, + testthat +VignetteBuilder: knitr +Language: en-US +URL: https://github.com/Waller-SUSAN/gateR +BugReports: https://github.com/Waller-SUSAN/gateR/issues +Author: Ian D. Buller [aut, cre, cph] diff --git a/LICENSE b/LICENSE deleted file mode 100644 index b09cd78..0000000 --- a/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..523138c --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,194 @@ +Apache License +============== + +_Version 2.0, January 2004_ +_<>_ + +### Terms and Conditions for use, reproduction, and distribution + +#### 1. Definitions + +“License” shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +“Licensor” shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +“Legal Entity” shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, “control” means **(i)** the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the +outstanding shares, or **(iii)** beneficial ownership of such entity. + +“You” (or “Your”) shall mean an individual or Legal Entity exercising +permissions granted by this License. + +“Source” form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +“Object” form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +“Work” shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +“Derivative Works” shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +“Contribution” shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +“submitted” means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as “Not a Contribution.” + +“Contributor” shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +#### 2. Grant of Copyright License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +#### 3. Grant of Patent License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +#### 4. Redistribution + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +* **(a)** You must give any other recipients of the Work or Derivative Works a copy of +this License; and +* **(b)** You must cause any modified files to carry prominent notices stating that You +changed the files; and +* **(c)** You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +* **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. + +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +#### 5. Submission of Contributions + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +#### 6. Trademarks + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +#### 7. Disclaimer of Warranty + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +#### 8. Limitation of Liability + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +#### 9. Accepting Warranty or Additional Liability + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +_END OF TERMS AND CONDITIONS_ + +### APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets `[]` replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same “printed page” as the copyright notice for easier identification within +third-party archives. + + Copyright 2020 Ian D. Buller; NCI + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/NAMESPACE b/NAMESPACE index d75f824..46eb2de 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1 +1,30 @@ -exportPattern("^[[:alpha:]]+") +# Generated by roxygen2: do not edit by hand + +export(gating) +export(lotrrs) +export(lrr_plot) +export(pval_correct) +export(pval_plot) +export(rrs) +importFrom(fields,image.plot) +importFrom(grDevices,chull) +importFrom(grDevices,colorRampPalette) +importFrom(graphics,close.screen) +importFrom(graphics,par) +importFrom(graphics,screen) +importFrom(graphics,split.screen) +importFrom(maptools,unionSpatialPolygons) +importFrom(pgirmess,correlog) +importFrom(raster,cut) +importFrom(raster,extent) +importFrom(raster,raster) +importFrom(raster,rasterToPolygons) +importFrom(raster,values) +importFrom(sp,coordinates) +importFrom(sp,gridded) +importFrom(sp,over) +importFrom(sparr,OS) +importFrom(sparr,risk) +importFrom(spatstat.core,owin) +importFrom(spatstat.core,ppp) +importFrom(tibble,tibble) diff --git a/R/gating.R b/R/gating.R new file mode 100644 index 0000000..9f244a4 --- /dev/null +++ b/R/gating.R @@ -0,0 +1,219 @@ +#' Gating strategy for mass cytometry data using spatial relative risk functions +#' +#' Extracts cells within statistically significant combinations of flourescent markers, successively, for a set of markers. Statistically significant combinations are idetnified using two-tailed p-values of a relative risk surface assuming asymptotic normality. This function is currently available for two-level comparisons of a single group (e.g., case/control) or two groups (e.g., case/control at time 1 and time 2). Provides functionality for basic visualization and multiple testing correction. +#' +#' @param dat Input data frame flow cytometry data with the following features (columns): 1) ID, 2) Condition A ID, 3) Condition B ID (optional), and a set of markers. +#' @param vars A vector of characters with the name of features (columns) within \code{dat} to use as markers for each gate. See details below. +#' @param n_conditions A numeric value of either 1 or 2 designating if the gating is performed with one condition or two conditions. +#' @param numerator Logical. If \code{TRUE} (the default), cells will be extracted within all statistically significant numerator (i.e., case) clusters. If \code{FALSE}, cells will be extracted within all statistically significant denominator (i.e., control) clusters. +#' @param alpha Numeric. The two-tailed alpha level for significance threshold (default is 0.05). +#' @param p_correct Character string specifying whether to apply a correction for multiple comparisons including a Bonferroni correction \code{p_correct = "uncorrelated"} or a correlated Bonferroni correction \code{p_correct = "correlated"}. If \code{p_correct = "none"} then no correction is applied. +#' @param nbc Optional. An integer for the number of bins when \code{p_correct = "correlated"}. Similar to \code{nbclass} argument in \code{\link[pgirmess]{nbclass}}. The default is the average number of gridded knots in one-dimension (i.e., x-axis). +#' @param doplot Logical. If \code{TRUE}, the output includes basic data visualizations. +#' @param rcols Character string of length three (3) specifying the colors for: 1) Group A, 2) Neither, and 3) Group B designations. The defaults are \code{c("#FF0000", "#cccccc", "#0000FF")} or \code{c("red", "grey80", "blue")}. +#' @param win Optional. Object of class \code{owin} for a custom two-dimensional window within which to estimate the surfaces. The default is NULL and calculates a convex hull around the data. +#' @param verbose Logical. If \code{TRUE} will print function progress during execution. If \code{FALSE} (the default), will not print. +#' @param ... Arguments passed to \code{\link[sparr]{risk}} to select bandwidth, edge correction, and resolution. +#' +#' @details This function performs a sequential gating strategy for mass cytometry data comparing two levels with one or two conditions. Gates are typically two-dimensional space comprised of two flourescent markers. The two-level comparison allows for the estimation of a spatial relative risk function and the computation of p-value based on an assumption of asymptotic normality. Cells within statistically significant areas are extracted and used in the next gate. This function relies heavily upon the \code{\link[sparr]{risk}} function. Basic visualization is available if \code{doplot = TRUE}. +#' +#' The \code{vars} argument must be a vector with an even-numbered length where the odd-numbered elements are the markers used on the x-axis of a gate and the even-numbered elements are the markers used on the y-axis of a gate. For example, if \code{vars = c("V1", "V2", "V3", and "V4")} then the first gate is "V1" on the x-axis and "V2" on the y-axis and then the second gate is V3" on the x-axis and "V4" on the y-axis. Makers can be repeated in successive gates. +#' +#' The \code{n_conditions} argument specifies if the gating strategy is performed for one condition or two conditions. If \code{n_conditions = 1}, then the function performs a one condition gating strategy using the internal \code{rrs} function, which computes the statistically significant areas (clusers) of a relative risk surface at each gate and selects the cells within the clusters specified by the \code{numerator} argument. If \code{n_conditions = 2}, then the function performs a two conditions gating strategy using the internal \code{lotrrs} function, which computes the statistically significant areas (clusers) of a ratio of relative risk surfaces at each gate and selects the cells within the clusters specified by the \code{numerator} argument. See the documentation for the internal \code{rrs} and \code{lotrrs} functions for more details. +#' +#' The p-value surface of the ratio of relative risk surfaces is estimated assuming asymptotic normality of the ratio value at each gridded knot. The bandwidth is fixed across all layers. +#' +#' Provides functionality for a correction for multiple testing. If \code{p_correct = "uncorrelated"}, then a conventional Bonferroni correction is calculated by dividing the \code{alpha} level by the number of gridded knots across the estimated surface. The default in the \code{\link[sparr]{risk}} function is a resolution of 128 x 128 or n = 16,384 knots and a custom resolution can be specified using the \code{resolution} argument within the \code{\link[sparr]{risk}} function. If \code{p_correct = "correlated"}, then a Bonferroni correction that takes into account the spatial correlation of the surface is calculated within the internal \code{pval_correct} function. The \code{alpha} level is divided by the minimum number of knots that are not spatially correlated. The minimum number of knots that are not spatially correlated is computed by counting the knots that are a distance apart that exceeds the minimum distance of non-significant spatial correlation based on a correlogram using the \code{\link[pgirmess]{correlog}} function. If \code{p_correct = "none"}, then the function does not account for multiple testing and uses the uncorrected \code{alpha} level. See the internal \code{pval_correct} function documentation for more details. +#' +#' @return An object of class \code{list}. This is a named list with the following components: +#' +#' #' \describe{ +#' \item{\code{obs}}{An object of class 'tibble' of the same features as \code{dat} that includes the information for the cells extracted with significant clusters in the final gate.} +#' \item{\code{gate}}{An object of class 'list' of 'rrs' objects from each gate.} +#' } +#' +#' The objects of class 'rrs' is similar to the output of the \code{\link[sparr]{risk}} function with two additional components: +#' \describe{ +#' \item{\code{rr}}{An object of class 'im' with the relative risk surface.} +#' \item{\code{f}}{An object of class 'im' with the spatial density of the numerator.} +#' \item{\code{g}}{An object of class 'im' with the spatial density of the denominator.} +#' \item{\code{P}}{An object of class 'im' with the asymptotic p-value surface.} +#' \item{\code{lrr}}{An object of class 'im' with the log relative risk surface.} +#' \item{\code{alpha}}{A numeric value for the alpha level used within the gate.} +#' } +#' +#' @importFrom grDevices chull +#' @importFrom maptools unionSpatialPolygons +#' @importFrom raster rasterToPolygons values +#' @importFrom sp coordinates over +#' @importFrom spatstat.core owin +#' @importFrom tibble tibble +#' @export +#' +#' @examples +#' library(flowWorkspaceData) +#' library(ncdfFlow) +#' +#' # Use 'extdata' from the {flowWorkspaceData} package +#' flowDataPath <- system.file("extdata", package = "flowWorkspaceData") +#' fcsFiles <- list.files(pattern = "CytoTrol", flowDataPath, full = TRUE) +#' ncfs <- ncdfFlow::read.ncdfFlowSet(fcsFiles) +#' fr1 <- ncfs[[1]] +#' fr2 <- ncfs[[2]] +#' +#' ## Comparison of two samples (single condition) "g1" +#' ## Two gates (four markers) "CD4", "CD38", "CD8", and "CD3" +#' ## Log10 Transformation for both markers +#' ## Remove cells with NA and Inf values +#' +#' # First sample +#' obs_dat1 <- data.frame("id" = seq(1, nrow(fr1@exprs), 1), +#' "g1" = rep(1, nrow(fr1@exprs)), +#' "log10_CD4" = log(fr1@exprs[ , 5], 10), +#' "log10_CD38" = log(fr1@exprs[ , 6], 10), +#' "log10_CD8" = log(fr1@exprs[ , 7], 10), +#' "log10_CD3" = log(fr1@exprs[ , 8], 10)) +#' # Second sample +#' obs_dat2 <- data.frame("id" = seq(1, nrow(fr2@exprs), 1), +#' "g1" = rep(2, nrow(fr2@exprs)), +#' "log10_CD4" = log(fr2@exprs[ , 5], 10), +#' "log10_CD38" = log(fr2@exprs[ , 6], 10), +#' "log10_CD8" = log(fr2@exprs[ , 7], 10), +#' "log10_CD3" = log(fr2@exprs[ , 8], 10)) +#' # Full set +#' obs_dat <- rbind(obs_dat1, obs_dat2) +#' obs_dat <- obs_dat[complete.cases(obs_dat), ] # remove NAs +#' obs_dat <- obs_dat[is.finite(rowSums(obs_dat)),] # remove Infs +#' obs_dat$g1 <- as.factor(obs_dat$g1) # set "g1" as binary factor +#' +#' # Run gating() function +#' ## Single condition, no multiple testing correction +#' test_gate <- gateR::gating(dat = obs_dat, +#' vars = c("log10_CD4", "log10_CD38", +#' "log10_CD8", "log10_CD3"), +#' n_condition = 1, +#' p_cor = "none") +#' +gating <- function(dat, + vars, + n_condition = c(1, 2), + numerator = TRUE, + alpha = 0.05, + p_cor = c("none", "correlated", "uncorrelated"), + nbc = NULL, + doplot = FALSE, + rcols = c("#FF0000", "#cccccc", "#0000FF"), + win = NULL, + verbose = FALSE, + ...) { + + if (numerator == TRUE) { type_cluster <- "numerator" + } else { + type_cluster <- "denominator" + } + + + # Format data input + dat <- tibble::tibble(dat) + dat <- dat[!is.na(dat[ , which(colnames(dat) %in% vars[1])]) & + !is.na(dat[ , which(colnames(dat) %in% vars[2])]), ] + + if (n_condition == 2 & nrow(unique(dat[ , 3])) != 2) { stop("Must have two conditions") } + + if (n_condition == 2) { dat_gate <- dat[ , c(1:3, which(colnames(dat) %in% vars))] + } else { + dum_condition <- rep(1, nrow(dat)) + dat_gate <- dat[ , c(1:2, which(colnames(dat) %in% vars))] + dat_gate <- tibble::add_column(dat_gate, condition = dum_condition, .after = 2) + } + dat <- as.data.frame(dat) + dat_gate <- as.data.frame(dat_gate) + n_gate <- length(vars) / 2 # calculate the number of gates + + # Create empty list to save output of each gate + list_gate <- vector('list', length(n_gate)) + + # Gating + for (k in 1:n_gate) { + + if (k == 1) { j <- 1 } else { j <- k * 2 - 1 } # vars indexing per gate + + # format data by selecting the desired vars + dat_gate <- dat_gate[!is.na(dat_gate[ , which(colnames(dat_gate) %in% vars[j])]) & + !is.na(dat_gate[ , which(colnames(dat_gate) %in% vars[j + 1])]), ] + xvar <- which(colnames(dat_gate) %in% vars[j]) + yvar <- which(colnames(dat_gate) %in% vars[j + 1]) + df <- dat_gate[ , c(1:3, xvar, yvar)] + df <- df[!is.na(df[ , 4]) & !is.na(df[ , 5]), ] # remove NAs + + # Create custom window with a convex hull + chul <- grDevices::chull(df[ , 4:5]) + chul_coords <- df[ , 4:5][c(chul, chul[1]), ] + win_gate <- spatstat.core::owin(poly = list(x = rev(chul_coords[ , 1]), + y = rev(chul_coords[ , 2]))) + + # Estimate significant relative risk areas + ## Bonferroni correction only necessary in first gate + if (k == 1) { p_cor <- p_cor } else { p_cor <- "none"} + + if (n_condition == 2) { + out <- lotrrs(dat = df, + win = win_gate, + doplot = doplot, + alpha = alpha, + p_correct = p_cor, + verbose = verbose, + ...) + } else { + out <- rrs(dat = df, + win = win_gate, + doplot = doplot, + alpha = alpha, + p_correct = p_cor, + verbose = verbose, + ...) + } + + # Convert p-value surface into a categorized raster + ## v == 2 : significant T1 numerator; v == 1: not + Ps <- pval_plot(input = out$P, alpha = out$alpha) + list_gate[[k]] <- out # save for output + rm(out, df, win_gate) # conserve memory + + # Go back one gate if current gate has no significant area and produce output of previous gate + if (all(raster::values(Ps)[!is.na(raster::values(Ps))] == 2) | all(is.na(raster::values(Ps)))) { + cat(paste("Gate", k, "yeilded no significant", type_cluster, "cluster(s)...", + "Returning results from previous gate\n", + sep = " ")) + output <- dat[which(dat[ , 1] %in% dat_gate[ , 1]), ] + out_list <- list("obs" = output, "gate" = list_gate) + return(out_list) + } + + # convert categorized raster to gridded polygons + out_pol <- raster::rasterToPolygons(Ps) + rm(Ps) # conserve memory + + if (numerator == TRUE) { v <- 1 } else { v <- 3 } + # combine gridded polygons of similar categories + pols <- maptools::unionSpatialPolygons(out_pol[out_pol$layer == v, ], + IDs = rep(1, length(out_pol[out_pol$layer == v, ]))) + rm(out_pol) # conserve memory + + # Overlay points + sp::coordinates(dat_gate) <- ~ cbind(dat_gate[,which(colnames(dat_gate) %in% vars[j])], + dat_gate[,which(colnames(dat_gate) %in% vars[j + 1])]) + + # extract points within significant cluster + dat_gate <- as.data.frame(dat_gate[!is.na(sp::over(dat_gate, pols)), ]) + rm(pols) # conserve memory + + # Output for the final gate + if (k == n_gate) { + cat(paste("Observations within significant", type_cluster, "cluster(s) of Gate", k, "\n",sep = " ")) + output <- dat[which(dat[ , 1] %in% dat_gate[ , 1]), ] + out_list <- list("obs" = output, "gate" = list_gate) + return(out_list) + } + } +} diff --git a/R/hello.R b/R/hello.R deleted file mode 100644 index 3d348f2..0000000 --- a/R/hello.R +++ /dev/null @@ -1,18 +0,0 @@ -# Hello, world! -# -# This is an example function named 'hello' -# which prints 'Hello, world!'. -# -# You can learn more about package authoring with RStudio at: -# -# http://r-pkgs.had.co.nz/ -# -# Some useful keyboard shortcuts for package authoring: -# -# Install Package: 'Cmd + Shift + B' -# Check Package: 'Cmd + Shift + E' -# Test Package: 'Cmd + Shift + T' - -hello <- function() { - print("Hello, world!") -} diff --git a/R/lotrrs.R b/R/lotrrs.R new file mode 100644 index 0000000..561c2aa --- /dev/null +++ b/R/lotrrs.R @@ -0,0 +1,283 @@ +#' A single gate for two conditions +#' +#' Estimates a ratio of relative risk surfaces and computes the asymptotic p-value surface for a single gate with two conditions. Includes features for basic visualization. This function is used internally within the \code{\link{gating}} function to extract the points within the significant areas. This function can also be used as a standalone function. +#' +#' @param dat Input data frame flow cytometry data with five (5) features (columns): 1) ID, 2) Condition A ID, 3) Condition B ID, 4) Marker A as x-coordinate, 5) Marker B as y-coordinate. +#' @param alpha Numeric. The two-tailed alpha level for significance threshold (default is 0.05). +#' @param p_correct Character string specifying whether to apply a correction for multiple comparisons including a Bonferroni correction \code{p_correct = "uncorrelated"} or a correlated Bonferroni correction \code{p_correct = "correlated"}. If \code{p_correct = "none"} then no correction is applied. +#' @param nbc Optional. An integer for the number of bins when \code{p_correct = "correlated"}. Similar to \code{nbclass} argument in \code{\link[pgirmess]{nbclass}}. The default is the average number of gridded knots in one-dimension (i.e., x-axis). +#' @param doplot Logical. If \code{TRUE}, the output includes basic data visualizations. +#' @param rcols Character string of length three (3) specifying the colors for: 1) Group A, 2) Neither, and 3) Group B designations. The defaults are \code{c("#FF0000", "#cccccc", "#0000FF")} or \code{c("red", "grey80", "blue")}. +#' @param win Optional. Object of class \code{owin} for a custom two-dimensional window within which to estimate the surfaces. The default is NULL and calculates a convex hull around the data. +#' @param verbose Logical. If \code{TRUE} will print function progress during execution. If \code{FALSE} (the default), will not print. +#' @param ... Arguments passed to \code{\link[sparr]{risk}} to select bandwidth, edge correction, and resolution. +#' +#' @details This function estimates a ratio of relative risk surfaces and computes the asymptotic p-value surface for a single gate with two conditions using three successive \code{\link[sparr]{risk}} functions. A relative risk surface is estimated for Condition A at each level of Condition B and then a ratio of the two relative risk surfaces is computed. +#' +#' \deqn{RR_{Condition B1} = \frac{Condition A2 of B1}{Condition A1 of B1}} +#' \deqn{RR_{Condition B2} = \frac{Condition A2 of B2}{Condition A1 of B2}} +#' \deqn{ln(rRR) = ln\left (\frac{RR_{Condition B2}}{CRR_{Condition B2}}\right )} +#' +#' The p-value surface of the ratio of relative risk surfaces is estimated assuming asymptotic normality of the ratio value at each gridded knot. The bandwidth is fixed across all layers. Basic visualization is available if \code{doplot = TRUE}. +#' +#' Provides functionality for a correction for multiple testing. If \code{p_correct = "uncorrelated"}, then a conventional Bonferroni correction is calculated by dividing the \code{alpha} level by the number of gridded knots across the estimated surface. The default in the \code{\link[sparr]{risk}} function is a resolution of 128 x 128 or n = 16,384 knots and a custom resolution can be specified using the \code{resolution} argument within the \code{\link[sparr]{risk}} function. If \code{p_correct = "correlated"}, then a Bonferroni correction that takes into account the spatial correlation of the surface is calculated within the internal \code{pval_correct} function. The \code{alpha} level is divided by the minimum number of knots that are not spatially correlated. The minimum number of knots that are not spatially correlated is computed by counting the knots that are a distance apart that exceeds the minimum distance of non-significant spatial correlation based on a correlogram using the \code{\link[pgirmess]{correlog}} function. If \code{p_correct = "none"}, then the function does not account for multiple testing and uses the uncorrected \code{alpha} level. See the internal \code{pval_correct} function documentation for more details. +#' +#' @return An object of class 'list' where each element is a object of class 'rrs' created by the \code{\link[sparr]{risk}} function with two additional components: +#' +#' \describe{ +#' \item{\code{rr}}{An object of class 'im' with the relative risk surface.} +#' \item{\code{f}}{An object of class 'im' with the spatial density of the numerator.} +#' \item{\code{g}}{An object of class 'im' with the spatial density of the denominator.} +#' \item{\code{P}}{An object of class 'im' with the asymptotic p-value surface.} +#' \item{\code{lrr}}{An object of class 'im' with the log relative risk surface.} +#' \item{\code{alpha}}{A numeric value for the alpha level used within the gate.} +#' } +#' +#' @importFrom fields image.plot +#' @importFrom graphics close.screen par screen split.screen +#' @importFrom grDevices chull +#' @importFrom raster extent values +#' @importFrom spatstat.core owin ppp +#' @importFrom sparr OS risk +#' @export +#' +#' @examples +#' library(flowWorkspaceData) +#' library(ncdfFlow) +#' library(stats) +#' +#' # Use 'extdata' from the {flowWorkspaceData} package +#' flowDataPath <- system.file("extdata", package = "flowWorkspaceData") +#' fcsFiles <- list.files(pattern = "CytoTrol", flowDataPath, full = TRUE) +#' ncfs <- ncdfFlow::read.ncdfFlowSet(fcsFiles) +#' fr1 <- ncfs[[1]] +#' fr2 <- ncfs[[2]] +#' +#' ## Comparison of two samples at two time points (two conditions) "g1" and "g2" +#' ## (Create a random binary variable for "g2") +#' ## One gate (two markers) "CD4", "CD38" +#' ## Log10 Transformation for both markers +#' ## Remove cells with NA and Inf values +#' +#' # First sample +#' obs_dat1 <- data.frame("id" = seq(1, nrow(fr1@exprs), 1), +#' "g1" = rep(1, nrow(fr1@exprs)), +#' "g2" = stats::rbinom(nrow(fr1@exprs), 1, 0.5), +#' "log10_CD4" = log(fr1@exprs[ , 5], 10), +#' "log10_CD38" = log(fr1@exprs[ , 6], 10)) +#' # Second sample +#' obs_dat2 <- data.frame("id" = seq(1, nrow(fr2@exprs), 1), +#' "g1" = rep(2, nrow(fr2@exprs)), +#' "g2" = stats::rbinom(nrow(fr2@exprs), 1, 0.5), +#' "log10_CD4" = log(fr2@exprs[ , 5], 10), +#' "log10_CD38" = log(fr2@exprs[ , 6], 10)) +#' # Full set +#' obs_dat <- rbind(obs_dat1, obs_dat2) +#' obs_dat <- obs_dat[complete.cases(obs_dat), ] # remove NAs +#' obs_dat <- obs_dat[is.finite(rowSums(obs_dat)),] # remove Infs +#' obs_dat$g1 <- as.factor(obs_dat$g1) # set "g1" as binary factor +#' obs_dat$g2 <- as.factor(obs_dat$g2) # set "g2" as binary factor +#' +#' # Run lotrrs() function +#' test_lotrrs <- lotrrs(dat = obs_dat, p_cor = "none") +#' +lotrrs <- function(dat, + alpha = 0.05, + p_correct = c("none", "correlated", "uncorrelated"), + nbc = NULL, + doplot = FALSE, + rcols = c("#FF0000", "#cccccc", "#0000FF"), + win = NULL, + verbose = FALSE, + ...) { + + `%!in%` <- function(x, y)!(`%in%`(x, y)) # helpful custom function + + # Inputs + ## dat + if ("data.frame" %!in% class(dat)) { stop("'dat' must be class 'data.frame'") } + ## win + if (!is.null(win) & class(win) != "owin") { stop("'win' must be class 'owin'") } + if (is.null(win)) { + dat <- as.data.frame(dat) + dat <- dat[!is.na(dat[ , 4]) & !is.na(dat[ , 5]) , ] + chul <- grDevices::chull(dat[ , 4:5]) + chul_coords <- dat[ , 4:5][c(chul, chul[1]), ] + win <- spatstat.core::owin(poly = list(x = rev(chul_coords[ , 1]), + y = rev(chul_coords[ , 2]))) + } + ## p-value correction + if (is.null(p_correct)) { p_correct <- "none" } + + Vnames <- names(dat) # axis names + names(dat) <- c("id", "G1", "G2", "V1", "V2") + levels(dat$G1) <- c("case", "control") + levels(dat$G2) <- c("0", "1") + + # Create two PPPs + t0_df <- dat[dat$G2 == "0", ] + t1_df <- dat[dat$G2 == "1", ] + suppressMessages(suppressWarnings(t_ppp <- spatstat.core::ppp(x = dat$V1, + y = dat$V2, + marks = dat$G1, + window = win))) + suppressMessages(suppressWarnings(t0_ppp <- spatstat.core::ppp(x = t0_df$V1, + y = t0_df$V2, + marks = t0_df$G1, + window = win))) + suppressMessages(suppressWarnings(t1_ppp <- spatstat.core::ppp(x = t1_df$V1, + y = t1_df$V2, + marks = t1_df$G1, + window = win))) + + # Estimate two SRRs + t_h0 <- sparr::OS(t_ppp, "geometric") + rm(t_ppp) # conserve memory + t0_rr <- sparr::risk(t0_ppp, + h0 = t_h0, + log = FALSE, + edge = "diggle", + verbose = verbose, + ...) + t1_rr <- sparr::risk(t1_ppp, + h0 = t_h0, + log = FALSE, + edge = "diggle", + verbose = verbose, + ...) + + # Create objects of class 'bivden' + t0_bd <- sparr::bivariate.density(t0_ppp, + h0 = t_h0, + edge = "diggle", + verbose = FALSE, + ...) + t1_bd <- sparr::bivariate.density(t1_ppp, + h0 = t_h0, + edge = "diggle", + verbose = FALSE, + ...) + t0_rr_bd <- list("z" = t0_rr$rr, + "h0" = t0_rr$f$h0, + "hp" = t0_rr$f$hp, + "h" = t0_rr$f$h, + "him" = t0_rr$f$him, + "q" = t0_bd$q, + "gamma" = t0_rr$f$gamma, + "geometric" = t0_rr$f$geometric, + "pp" = t0_ppp) + t1_rr_bd <- list("z" = t1_rr$rr, + "h0" = t1_rr$f$h0, + "hp" = t1_rr$f$hp, + "h" = t1_rr$f$h, + "him" = t1_rr$f$him, + "q" = t1_bd$q, + "gamma" = t1_rr$f$gamma, + "geometric" = t1_rr$f$geometric, + "pp" = t1_ppp) + class(t0_rr_bd) <- "bivden" + class(t1_rr_bd) <- "bivden" + rm(t0_ppp, t1_ppp, t0_bd, t1_bd) # conserve memory + + # Estimate the spatial relative risk surface of the two spatial relative risks + ## Also estimate the asymptotic p-value surface + suppressMessages(suppressWarnings(out <- sparr::risk(f = t1_rr_bd, + g = t0_rr_bd, + h0 = t_h0, + tolerate = TRUE, + edge = "diggle", + verbose = verbose, + log = FALSE, + ...))) + + suppressMessages(suppressWarnings(out$rr <- t1_rr_bd$z/t0_rr_bd$z)) + suppressMessages(suppressWarnings(out$lrr <- log(t1_rr_bd$z) - log(t0_rr_bd$z))) + rm(t0_rr_bd, t1_rr_bd) # conserve memory + + if (all(is.na(out$rr$v))) { + cat("relative risk unable to be estimated") + return(out) + } + + # Alpha level + if (p_correct == "none") { out$alpha <- alpha } + if (p_correct == "correlated") { + cat("Please be patient... Calculating correlated Bonferroni correction\n") + alpha_correct <- pval_correct(input = out$lrr, alpha = alpha, nbc = nbc) + out$alpha <- alpha_correct$correlated + } + if (p_correct == "uncorrelated") { out$alpha <- alpha / prod(out$rr$dim) } + + if (doplot == TRUE) { + # Graphics + op <- graphics::par(no.readonly = TRUE) + on.exit(graphics::par(op)) + # Plotting inputs + ## Colorkeys + tr_plot <- lrr_plot(input = out$lrr, cols = rcols, midpoint = 0) + ## Extent + blim <- as.vector(raster::extent(tr_plot$v)) + xlims <- blim[1:2] + ylims <- blim[3:4] + + # Plot of ratio and p-values + #Vnames <- names(dat) # axis names + Ps <- pval_plot(out$P, alpha = out$alpha) + if (all(raster::values(Ps)[!is.na(raster::values(Ps))] == 2) | all(is.na(raster::values(Ps)))) { + pcols <- rcols[2] + brp <- c(1, 3) + atp <- 2 + labp <- "No" + } else { + pcols <- rcols + brp <- c(1, 1.67, 2.33, 3) + atp <- c(1.33, 2, 2.67) + labp <- c("Numerator", "Insignificant", "Denominator") + } + + graphics::par(pty = "s", bg = "white") + invisible(graphics::split.screen(matrix(c(0, 0.45, 0.55, 1, 0.14, 0.14, 0.86, 0.86), + ncol = 4))) + graphics::screen(1) + fields::image.plot(tr_plot$v, + breaks = tr_plot$breaks, + col = tr_plot$cols, + xlim = xlims, + ylim = ylims, + axes = TRUE, + cex.lab = 1, + xlab = paste(Vnames[4], "\n", sep = ""), + ylab = Vnames[5], + cex = 1, + horizontal = TRUE, + axis.args = list(at = tr_plot$at, + labels = tr_plot$labels, + cex.axis = 0.67), + main = paste("log of the\nrelative risk surfaces\n(bandwidth:", + round(t_h0, digits = 3), + "units)", + sep = " ")) + par(bg = "transparent") + graphics::screen(2) + fields::image.plot(Ps, + breaks = brp, + col = pcols, + xlim = xlims, + ylim = ylims, + xlab = paste(Vnames[4], "\n", sep = ""), + ylab = "", + cex = 1, + axes = TRUE, + horizontal = TRUE, + axis.args = list(at = atp, + labels = labp, + cex.axis = 0.67), + main = paste("significant p-values\n(alpha = ", + formatC(out$alpha, format = "e", digits = 2), + ")", + sep = "")) + graphics::close.screen(all = TRUE) # exit split-screen mode + } + + return(out) +} diff --git a/R/lrr_plot.R b/R/lrr_plot.R new file mode 100644 index 0000000..5ce1b7a --- /dev/null +++ b/R/lrr_plot.R @@ -0,0 +1,108 @@ +#' Prepare log relative risk values for plotting with a diverging color palette +#' +#' Internal function to convert an object of class 'im' to values readable by \code{\link[fields]{image.plot}} function within the \code{\link{rrs}} and \code{\link{lotrrs}} functions. +#' +#' @param input An object of class 'rrs' from the \code{\link{lrren}} function. +#' @param plot_cols Character string of length three (3) specifying the colors for plotting: 1) presence, 2) neither, and 3) absence from the \code{\link{plot_obs}} function. +#' @param midpoint Numeric. The value to center the diverging color palette. +#' @param thresh_up Numeric. The upper value to concatenate the color key. The default (NULL) uses the maximum value from \code{input}. +#' @param thresh_low Numeric. The lower value to concatenate the color key. The default (NULL) uses the minimum value from \code{input}. +#' @param digits Integer. The number of significant digits for the labels using the \code{round} function (default is 1). +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{v}}{An object of class 'vector' for the estimated ecological niche values.} +#' \item{\code{cols}}{An object of class 'vector', returns diverging color palette values.} +#' \item{\code{breaks}}{An object of class 'vector', returns diverging color palette breaks.} +#' \item{\code{at}}{An object of class 'vector', returns legend breaks.} +#' \item{\code{labels}}{An object of class 'vector', returns legend labels.} +#' } +#' +#' @importFrom grDevices colorRampPalette +#' @importFrom raster raster +#' @importFrom sp coordinates gridded +#' @export +#' +#' @keywords internal +#' +lrr_plot <- function(input, + cols, + midpoint = 0, + thresh_up = NULL, + thresh_low = NULL, + digits = 1) { + + # Inputs + if (class(input) != "im") { + stop("The 'input' argument must be of class 'im' from an 'rrs' object.") + } + + if (length(cols) != 3) { + stop("The 'cols' argument must be a vector of length 3") + } + + # Coordinates of grid points within input 'im' + rx <- rep(input$xcol, length(input$yrow)) + for(i in 1:length(input$yrow)) { + if (i == 1) { ry <- rep(input$yrow[i], length(input$xcol)) } + if (i != 1) { ry <- c(ry, rep(input$yrow[i], length(input$xcol))) } + } + + out <- data.frame("x" = rx, + "y" = ry, + "v" = as.vector(t(input$v))) + out$v <- ifelse(is.infinite(out$v), NA, out$v) + out <- na.omit(out) # remove NAs + sp::coordinates(out) <- ~ x + y # convert to spatialpixelsdataframe + suppressMessages(suppressWarnings(sp::gridded(out) <- TRUE)) # gridded + out <- raster::raster(out) # create raster + + # Restrict spurious log relative risk values + if (!is.null(thresh_low)) { + out[out <= thresh_low] <- thresh_low + } + if (!is.null(thresh_up)) { + out[out >= thresh_up] <- thresh_up + } + + # Identify ramp above and below midpoint + lowerhalf <- length(out[out < midpoint & !is.na(out)]) # values below 0 + upperhalf <- length(out[out > midpoint & !is.na(out)]) # values above 0 + nhalf <- length(out[!is.na(out)]) / 2 # number of values at half + min_absolute_value <- min(out[is.finite(out)], na.rm = TRUE) # minimum absolute value of raster + max_absolute_value <- max(out[is.finite(out)], na.rm = TRUE) # maximum absolute value of raster + + # Color ramp parameters + ## Colors + ### vector of colors for values below midpoint + rc1 <- grDevices::colorRampPalette(colors = c(cols[3], cols[2]), space = "Lab")(lowerhalf) + ### vector of colors for values above midpoint + rc2 <- grDevices::colorRampPalette(colors = c(cols[2], cols[1]), space = "Lab")(upperhalf) + ### compile colors + rampcols <- c(rc1, rc2) + ## Breaks + ### vector of breaks for values below midpoint + rb1 <- seq(min_absolute_value, midpoint, length.out = lowerhalf + 1) + ### vector of breaks for values above midpoint + rb2 <- seq(midpoint, max_absolute_value, length.out = upperhalf + 1)[-1] + ### compile breaks + rampbreaks <- c(rb1, rb2) + + # At for colorkey lables + rbr <- max_absolute_value - min_absolute_value + rbt <- rbr / 4 + rbs <- seq(min_absolute_value, max_absolute_value, rbt) + rbm <- which.min(abs(rbs - midpoint)) + rbs[rbm] <- midpoint + + # Text for colorkey labels + rbl <- round(rbs, digits = digits) + + # Output + out <- list("v" = out$v, + "cols" = rampcols, + "breaks" = rampbreaks, + "at" = rbs, + "labels" = rbl) +} diff --git a/R/pval_correct.R b/R/pval_correct.R new file mode 100644 index 0000000..b705251 --- /dev/null +++ b/R/pval_correct.R @@ -0,0 +1,84 @@ +#' Calculate p-value corrections +#' +#' Internal function to calculate various p-value corrections including a correlated and uncorrelated Bonferroni correction for use within the \code{\link{rrs}} and \code{\link{lotrrs}} function. +#' +#' @param input An object of class 'rrs' from the \code{\link{rrs}} or \code{\link{lotrrs}} function. +#' @param alpha Numeric. The two-tailed alpha level for significance threshold (default in \code{\link{rrs}} and \code{\link{lotrrs}} functions is 0.05). +#' @param nbc Integer. The number of bins. Similar to \code{nbclass} argument in \code{\link[pgirmess]{correlog}} function. The default is the average number of gridded knots in one-dimension (i.e., x-axis). +#' +#' @details This function provides functionality for multiple testing correction in two ways: +#' +#' \enumerate{ +#' \item Computes a conventional Bonferroni correction ("uncorrelated") by dividing the \code{alpha} level by the number of gridded knots across the estimated surface. The default in the \code{\link[sparr]{risk}} function is a resolution of 128 x 128 or n = 16,384 knots. +#' \item Computs a correlated Bonferroni correction ("correlated") by taking in account the spatial correlation of the relative risk surface values (if using the \code{rrs} function for a single condition gate) or the ratio of relative risk surfaces values (if using the \code{lotrrs} function for a two condition gate). The \code{alpha} level is divided by the minimum number of knots that are not spatially correlated. The minimum number of knots that are not spatially correlated is computed by counting the knots that are a distance apart that exceeds the minimum distance of non-significant spatial correlation based on a correlogram using the \code{\link[pgirmess]{correlog}} function. +#' } +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{uncorrected}}{Numeric. Returns the uncorrected p-value.} +#' \item{\code{correlated}}{Numeric. Returns the correlated Bonferroni corrected p-value.} +#' \item{\code{uncorrelated}}{Numeric. Returns the uncorrelated Bonferroni corrected p-value.} +#' } +#' +#' @importFrom pgirmess correlog +#' @export +#' +#' @keywords internal +#' +pval_correct <- function(input, + alpha = 0.05, + nbc = NULL) { + +# Inputs + if (class(input) != "im") { + stop("The 'input' argument must be of class 'im' from an 'rrs' object.") + } + +# Calculate correlated Bonferroni correction +## Prepare data +### Coordinates of grid points within input 'im' + rx <- rep(input$xcol, length(input$yrow)) + for(i in 1:length(input$yrow)) { + if (i == 1) { ry <- rep(input$yrow[i], length(input$xcol)) } + if (i != 1) { ry <- c(ry, rep(input$yrow[i], length(input$xcol))) } + } +### Create data frame of values to estimate spatial correlation + out <- data.frame("x" = rx, + "y" = ry, + "v" = as.vector(t(input$v))) + out$v <- ifelse(is.infinite(out$v), NA, out$v) # omit inifite values + out <- out[!is.na(out$v),] # omit NA values + + if(is.null(nbc)) { nbc <- round(mean(length(input$xcol), length(input$yrow)))} + +## Estimate spatial correlation + out_cor <- pgirmess::correlog(coords = out[ , 1:2], + z = out[ , 3], + method = "Moran", + randomisation = FALSE, + nbclass = nbc) + +## Find shortest distance without significant correlation + corr_eff <- out_cor[ , 1][min(which(out_cor[ , 3] >= alpha))] + +## Bonferroni + x_kern <- length(input$xcol) # number of kernels in x-axis + y_kern <- length(input$yrow) # number of kernels in y-axis + x_dist_kern <- input$ystep # distance between kernels in x-axis + y_dist_kern <- input$ystep # distance between kernels in y-axis + n_x_kern <- corr_eff / x_dist_kern # number of x-axis kernels within correlation distance + n_y_kern <- corr_eff / y_dist_kern # number of y- axis kernels within correlation distance + x_uncorr <- x_kern / n_x_kern # number of uncorrelated x-axis kernels + y_uncorr <- y_kern / n_y_kern # number of uncorrelated y-axis kernels + n_uncorr <- x_uncorr * y_uncorr # total number of uncorrelated kernels + + alpha <- alpha # uncorrected + alpha_correlated <- alpha / n_uncorr # correlated Bonferroni + alpha_bonferroni <- alpha / (x_kern * y_kern) # uncorrelated Bonferroni (including tests outside window) + + out_alpha <- list("uncorrected" = alpha, + "correlated" = alpha_correlated, + "uncorrelated" = alpha_bonferroni) + return(out_alpha) +} diff --git a/R/pval_plot.R b/R/pval_plot.R new file mode 100644 index 0000000..0d6d783 --- /dev/null +++ b/R/pval_plot.R @@ -0,0 +1,49 @@ +#' Prepare significant p-values for plotting +#' +#' Internal function to convert an object of class 'im' to values readable by \code{\link[fields]{image.plot}} function within the \code{\link{rrs}}, \code{\link{lotrrs}}, and \code{\link{gate}} functions. +#' +#' @param input An object of class 'rrs' from the \code{\link{lrren}} function. +#' @param alpha Numeric. The two-tailed alpha level for significance threshold (default in \code{\link{rrs}}, \code{\link{lotrrs}}, and \code{\link{gate}} functions is 0.05). +#' +#' @return An object of class 'raster' with categorical values: +#' +#' \itemize{ +#' \item A value of 1: Significant numerator. +#' \item A value of 2: Insignificant. +#' \item A value of 3: Significant demoninator. +#' } +#' +#' @importFrom raster cut raster +#' @importFrom sp coordinates gridded +#' @export +#' +#' @keywords internal +#' +pval_plot <- function(input, + alpha) { + + # Inputs + if (class(input) != "im") { + stop("The 'input' argument must be of class 'im' from an 'rrs' object.") + } + + # Coordinates of grid points within input 'im' + rx <- rep(input$xcol, length(input$yrow)) + for(i in 1:length(input$yrow)) { + if (i == 1) { ry <- rep(input$yrow[i], length(input$xcol)) } + if (i != 1) { ry <- c(ry, rep(input$yrow[i], length(input$xcol))) } + } + + out <- data.frame("x" = rx, + "y" = ry, + "v" = as.vector(t(input$v))) + out$v <- ifelse(is.infinite(out$v), NA, out$v) + out <- na.omit(out) # remove NAs + sp::coordinates(out) <- ~ x + y # convert to spatialpixelsdataframe + suppressMessages(suppressWarnings(sp::gridded(out) <- TRUE)) # gridded + out <- raster::raster(out) # create raster + out <- raster::cut(out, + breaks = c(-Inf, alpha / 2, 1 - alpha / 2, Inf), + right = FALSE) + return(out) +} diff --git a/R/rrs.R b/R/rrs.R new file mode 100644 index 0000000..2e927c4 --- /dev/null +++ b/R/rrs.R @@ -0,0 +1,209 @@ +#' A single gate for a single condition +#' +#' Estimates a relative risk surface and computes the asymptotic p-value surface for a single gate with a single condition. Includes features for basic visualization. This function is used internally within the \code{\link{gating}} function to extract the points within the significant areas. This function can also be used as a standalone function. +#' +#' @param dat Input data frame flow cytometry data with four (4) features (columns): 1) ID, 2) Condition A ID, 3) Marker A as x-coordinate, 4) Marker B as y-coordinate. +#' @param alpha Numeric. The two-tailed alpha level for significance threshold (default is 0.05). +#' @param p_correct Character string specifying whether to apply a correction for multiple comparisons including a Bonferroni correction \code{p_correct = "uncorrelated"} or a correlated Bonferroni correction \code{p_correct = "correlated"}. If \code{p_correct = "none"} then no correction is applied. +#' @param nbc Optional. An integer for the number of bins when \code{p_correct = "correlated"}. Similar to \code{nbclass} argument in \code{\link[pgirmess]{nbclass}}. The default is the average number of gridded knots in one-dimension (i.e., x-axis). +#' @param doplot Logical. If \code{TRUE}, the output includes basic data visualizations. +#' @param rcols Character string of length three (3) specifying the colors for: 1) Group A, 2) Neither, and 3) Group B designations. The defaults are \code{c("#FF0000", "#cccccc", "#0000FF")} or \code{c("red", "grey80", "blue")}. +#' @param win Optional. Object of class \code{owin} for a custom two-dimensional window within which to estimate the surfaces. The default is NULL and calculates a convex hull around the data. +#' @param verbose Logical. If \code{TRUE} will print function progress during execution. If \code{FALSE} (the default), will not print. +#' @param ... Arguments passed to \code{\link[sparr]{risk}} to select bandwidth, edge correction, and resolution. +#' +#' @details This function estimates a relative risk surface and computes the asymptotic p-value surface for a single gate and single condition using the \code{\link[sparr]{risk}} function. Bandwidth is fixed across both layers (numerator and demoninator spatial densities). Basic visualization is available if \code{doplot = TRUE}. +#' +#' Provides functionality for a correction for multiple testing. If \code{p_correct = "uncorrelated"}, then a conventional Bonferroni correction is calculated by dividing the \code{alpha} level by the number of gridded knots across the estimated surface. The default in the \code{\link[sparr]{risk}} function is a resolution of 128 x 128 or n = 16,384 knots and a custom resolution can be specified using the \code{resolution} argument within the \code{\link[sparr]{risk}} function. If \code{p_correct = "correlated"}, then a Bonferroni correction that takes into account the spatial correlation of the surface is calculated within the internal \code{pval_correct} function. The \code{alpha} level is divided by the minimum number of knots that are not spatially correlated. The minimum number of knots that are not spatially correlated is computed by counting the knots that are a distance apart that exceeds the minimum distance of non-significant spatial correlation based on a correlogram using the \code{\link[pgirmess]{correlog}} function. If \code{p_correct = "none"}, then the function does not account for multiple testing and uses the uncorrected \code{alpha} level. See the internal \code{pval_correct} function documentation for more details. +#' +#' @return An object of class 'list' where each element is a object of class 'rrs' created by the \code{\link[sparr]{risk}} function with two additional components: +#' +#' \describe{ +#' \item{\code{rr}}{An object of class 'im' with the relative risk surface.} +#' \item{\code{f}}{An object of class 'im' with the spatial density of the numerator.} +#' \item{\code{g}}{An object of class 'im' with the spatial density of the denominator.} +#' \item{\code{P}}{An object of class 'im' with the asymptotic p-value surface.} +#' \item{\code{lrr}}{An object of class 'im' with the log relative risk surface.} +#' \item{\code{alpha}}{A numeric value for the alpha level used within the gate.} +#' } +#' +#' @importFrom fields image.plot +#' @importFrom graphics close.screen par screen split.screen +#' @importFrom grDevices chull +#' @importFrom raster extent values +#' @importFrom spatstat.core owin ppp +#' @importFrom sparr OS risk +#' @export +#' +#' @examples +#' library(flowWorkspaceData) +#' library(ncdfFlow) +#' +#' # Use 'extdata' from the {flowWorkspaceData} package +#' flowDataPath <- system.file("extdata", package = "flowWorkspaceData") +#' fcsFiles <- list.files(pattern = "CytoTrol", flowDataPath, full = TRUE) +#' ncfs <- ncdfFlow::read.ncdfFlowSet(fcsFiles) +#' fr1 <- ncfs[[1]] +#' fr2 <- ncfs[[2]] +#' +#' ## Comparison of two samples (single condition) "g1" +#' ## One gate (two markers) "CD4", "CD38" +#' ## Log10 Transformation for both markers +#' ## Remove cells with NA and Inf values +#' +#' # First sample +#' obs_dat1 <- data.frame("id" = seq(1, nrow(fr1@exprs), 1), +#' "g1" = rep(1, nrow(fr1@exprs)), +#' "log10_CD4" = log(fr1@exprs[ , 5], 10), +#' "log10_CD38" = log(fr1@exprs[ , 6], 10)) +#' # Second sample +#' obs_dat2 <- data.frame("id" = seq(1, nrow(fr2@exprs), 1), +#' "g1" = rep(2, nrow(fr2@exprs)), +#' "log10_CD4" = log(fr2@exprs[ , 5], 10), +#' "log10_CD38" = log(fr2@exprs[ , 6], 10)) +#' # Full set +#' obs_dat <- rbind(obs_dat1, obs_dat2) +#' obs_dat <- obs_dat[complete.cases(obs_dat), ] # remove NAs +#' obs_dat <- obs_dat[is.finite(rowSums(obs_dat)),] # remove Infs +#' obs_dat$g1 <- as.factor(obs_dat$g1) # set "g1" as binary factor +#' +#' # Run rrs() function +#' test_rrs <- rrs(dat = obs_dat, p_cor = "none") +#' +rrs <- function(dat, + alpha = 0.05, + p_correct = c("none", "correlated", "uncorrelated"), + nbc = NULL, + doplot = FALSE, + rcols = c("#FF0000", "#cccccc", "#0000FF"), + win = NULL, + verbose = FALSE, + ...) { + + `%!in%` <- function(x, y)!(`%in%`(x, y)) # helpful custom function + + # Inputs + ## dat + if ("data.frame" %!in% class(dat)) { stop("'dat' must be class 'data.frame'") } + ## win + if (!is.null(win) & class(win) != "owin") { stop("'win' must be class 'owin'") } + if (is.null(win)) { + dat <- as.data.frame(dat) + dat <- dat[!is.na(dat[ , 4]) & !is.na(dat[ , 5]) , ] + chul <- grDevices::chull(dat[ , 4:5]) + chul_coords <- dat[ , 4:5][c(chul, chul[1]), ] + win <- spatstat.core::owin(poly = list(x = rev(chul_coords[ , 1]), + y = rev(chul_coords[ , 2]))) + } + ## p-value correction + if (is.null(p_correct)) { p_correct <- "none" } + + Vnames <- names(dat) # axis names + names(dat) <- c("id", "G1", "G2", "V1", "V2") + levels(dat$G1) <- c("case", "control") + + # Create PPP + suppressMessages(suppressWarnings(c1_ppp <- spatstat.core::ppp(x = dat$V1, + y = dat$V2, + marks = dat$G1, + window = win))) + + # Estimate SRR and p-values + c1_h0 <- sparr::OS(c1_ppp, "geometric") + suppressMessages(suppressWarnings(out <- sparr::risk(f = c1_ppp, + h0 = c1_h0, + tolerate = TRUE, + edge = "diggle", + verbose = verbose, + log = FALSE, + ...))) + + if (all(is.na(out$rr$v))) { + cat("relative risk unable to be estimated") + return(out) + } + + # Alpha level + if (p_correct == "none") { out$alpha <- alpha } + if (p_correct == "correlated") { + cat("Please be patient... Calculating correlated Bonferroni correction\n") + alpha_correct <- pval_correct(input = out$lrr, alpha = alpha, nbc = nbc) + out$alpha <- alpha_correct$correlated + } + if (p_correct == "uncorrelated") { out$alpha <- alpha / prod(out$rr$dim) } + + if (doplot == TRUE) { + # Graphics + op <- graphics::par(no.readonly = TRUE) + on.exit(graphics::par(op)) + # Plotting inputs + ## Display log RR + out$lrr <- log(out$rr) + ## Colorkeys + c1_plot <- lrr_plot(input = out$lrr, cols = rcols, midpoint = 0) + ## Extent + blim <- as.vector(raster::extent(c1_plot$v)) + xlims <- blim[1:2] + ylims <- blim[3:4] + + # Plot of ratio and p-values + #Vnames <- names(dat) # axis names + Ps <- pval_plot(out$P, alpha = out$alpha) + if (all(raster::values(Ps)[!is.na(raster::values(Ps))] == 2) | all(is.na(raster::values(Ps)))) { + pcols <- rcols[2] + brp <- c(1, 3) + atp <- 2 + labp <- "No" + } else { + pcols <- rcols + brp <- c(1, 1.67, 2.33, 3) + atp <- c(1.33, 2, 2.67) + labp <- c("Numerator", "Insignificant", "Denominator") + } + + graphics::par(pty = "s", bg = "white") + invisible(graphics::split.screen(matrix(c(0, 0.45, 0.55, 1, 0.14, 0.14, 0.86, 0.86), + ncol = 4))) + graphics::screen(1) + fields::image.plot(c1_plot$v, + breaks = c1_plot$breaks, + col = c1_plot$cols, + xlim = xlims, + ylim = ylims, + axes = TRUE, + cex.lab = 1, + xlab = paste(Vnames[4], "\n", sep = ""), + ylab = Vnames[5], + cex = 1, + horizontal = TRUE, + axis.args = list(at = c1_plot$at, + labels = c1_plot$labels, + cex.axis = 0.67), + main = paste("log relative risk\n(bandwidth:", + round(c1_h0, digits = 3), + "units)", + sep = " ")) + par(bg = "transparent") + graphics::screen(2) + fields::image.plot(Ps, + breaks = brp, + col = pcols, + xlim = xlims, + ylim = ylims, + xlab = paste(Vnames[4], "\n", sep = ""), + ylab = "", + cex = 1, + axes = TRUE, + horizontal = TRUE, + axis.args = list(at = atp, + labels = labp, + cex.axis = 0.67), + main = paste("significant p-values\n(alpha = ", + formatC(out$alpha, format = "e", digits = 2), + ")", + sep = "")) + graphics::close.screen(all = TRUE) # exit split-screen mode + } + + return(out) +} diff --git a/README.md b/README.md index 623eab8..b27940b 100644 --- a/README.md +++ b/README.md @@ -1 +1,167 @@ -# gateRR +gateR: Gating strategy for mass cytometry using kernel density estimation +=================================================== + +

+ +Overview + +

+ +The `gateR` package is a suite of `R` functions to identify significant spatial clustering of mass cytometry data used in immunological investigations. For a two group comparison we detect clusters using the kernel-based spatial relative risk function that is estimated using the [sparr](https://CRAN.R-project.org/package=sparr) package. The tests are conducted in two-dimenstional space comprised of two flourescent markers. + +Examples for a single condition: + +1. Disease case v. healthy control +2. Time 1 v. Time 0 + +For a two group comparison for two conditions we estimate two relative risk surfaces for one condition and then a ratio of the relative risks. For example: + +1. Estimate a relative risk surface for +A. Time 1: Disease case v. healthy control +B. Time 0: Disease case v. healthy control +2. Estimate relative risk surface for Time 1 v. Time 2 + +Within areas where the relative risk exceeds an asymptotic normal assumption, the `gateR` package has functionality to examine the features of these cells. Basic visualization is also supported. + +

+ +Installation + +

+ +To install the release version from CRAN: + + install.packages("gateR") + +To install the development version from GitHub: + + devtools::install_github("Waller-SUSAN/gateR") + +

+ +Available functions + +

+ + ++++ + + + + + + + + + + + + + + + + + +
FunctionDescription
gatingMain function. Conduct a gating strategy for mass cytometry data.
rrsCalled within `gating`, one condition comparison.
lotrrsCalled within `gating`, two condition comparison.
+ +## Usage +``` r +# ------------------ # +# Necessary packages # +# ------------------ # + +library(flowWorkspaceData) +library(ncdfFlow) +library(stats) + +# ---------------- # +# Data preparation # +# ---------------- # + +# Use 'extdata' from the {flowWorkspaceData} package +flowDataPath <- system.file("extdata", package = "flowWorkspaceData") +fcsFiles <- list.files(pattern = "CytoTrol", flowDataPath, full = TRUE) +ncfs <- ncdfFlow::read.ncdfFlowSet(fcsFiles) +fr1 <- ncfs[[1]] +fr2 <- ncfs[[2]] + +## Comparison of two samples (single condition) "g1" +## Two gates (four markers) "CD4", "CD38", "CD8", and "CD3" +## Log10 Transformation for all markers +## Remove cells with NA and Inf values + +# First sample +obs_dat1 <- data.frame("id" = seq(1, nrow(fr1@exprs), 1), + "g1" = rep(1, nrow(fr1@exprs)), + "log10_CD4" = log(fr1@exprs[ , 5], 10), + "log10_CD38" = log(fr1@exprs[ , 6], 10), + "log10_CD8" = log(fr1@exprs[ , 7], 10), + "log10_CD3" = log(fr1@exprs[ , 8], 10)) +# Second sample +obs_dat2 <- data.frame("id" = seq(1, nrow(fr2@exprs), 1), + "g1" = rep(2, nrow(fr2@exprs)), + "log10_CD4" = log(fr2@exprs[ , 5], 10), + "log10_CD38" = log(fr2@exprs[ , 6], 10), + "log10_CD8" = log(fr2@exprs[ , 7], 10), + "log10_CD3" = log(fr2@exprs[ , 8], 10)) +# Full set +obs_dat <- rbind(obs_dat1, obs_dat2) +obs_dat <- obs_dat[complete.cases(obs_dat), ] # remove NAs +obs_dat <- obs_dat[is.finite(rowSums(obs_dat)),] # remove Infs +obs_dat$g1 <- as.factor(obs_dat$g1) # set "g1" as binary factor + +# --------- # +# Run gateR # +# --------- # + +# Single Condition +## A p-value uncorrected for multiple testing +test_gate <- gateR::gating(dat = obs_dat, + vars = c("log10_CD4", "log10_CD38", "log10_CD8", "log10_CD3"), + n_condition = 1, + doplot = TRUE, + p_cor = "none") + +# -------------------- # +# Post-gate assessment # +# -------------------- # + +# Density of log10 CD4 post-gating +graphics::plot(stats::density(test_gate$obs[test_gate$obs$g1 == 1, 3]), + main = "log10 CD4", + lty = 2) +graphics::lines(stats::density(test_gate$obs[test_gate$obs$g1 == 2, 3]), + lty = 3) +graphics::legend("topright", + legend = c("Sample 1", "Sample 2"), + lty = c(2, 3), + bty = "n") + +# --------------------------------------------- # +# Perform a single gate without data extraction # +# --------------------------------------------- # + +# Single condition +## A p-value uncorrected for multiple testing +test_rrs <- gateR::rrs(dat = obs_dat, + p_cor = "none") + +# Two conditions +## Create a second condition (randomly split the data) +## In practice, use data with a measured second condition +g2 <- stats::rbinom(nrow(obs_dat), 1, 0.5) +obs_dat$g2 <- as.factor(g2) +obs_dat <- obs_dat[ , c(1:2,7,3:6)] + +## A p-value uncorrected for multiple testing +test_lotrrs <- gateR::lotrrs(dat = obs_dat, + p_cor = "none") +``` +![](man/figures/gate1.png) + +![](man/figures/gate2.png) + +![](man/figures/postgate.png) diff --git a/build/build.R b/build/build.R new file mode 100644 index 0000000..d0e3142 --- /dev/null +++ b/build/build.R @@ -0,0 +1,171 @@ +install.packages(c("devtools", "roxygen2", "testthat", "knitr")) +library(devtools) +devtools::has_devel() +library(roxygen2) +library(testthat) + +devtools::load_all() + +getOption("gateR") + +# Convert roxygen components to .Rd files +devtools::document() +?gateR + +# Create Vignette +install() +build() + +# Testing +use_testthat() +use_test() +test() + +# NAMESPACE +document() +install() + +# Check +check() + +# Ignore .R files from /build directory +usethis::use_build_ignore(c("build")) +usethis::use_build_ignore(c("figures")) + +# Example in README +# ------------------ # +# Necessary packages # +# ------------------ # + +library(flowWorkspaceData) +library(ncdfFlow) +library(stats) + +# ---------------- # +# Data preparation # +# ---------------- # + +# Use 'extdata' from the {flowWorkspaceData} package +flowDataPath <- system.file("extdata", package = "flowWorkspaceData") +fcsFiles <- list.files(pattern = "CytoTrol", flowDataPath, full = TRUE) +ncfs <- ncdfFlow::read.ncdfFlowSet(fcsFiles) +fr1 <- ncfs[[1]] +fr2 <- ncfs[[2]] + +## Comparison of two samples (single condition) "g1" +## Two gates (Four markers) "CD4", "CD38", "CD8", and "CD3" +## Log10 Transformation for all markers +## Remove cells with NA and Inf values + +# First sample +obs_dat1 <- data.frame("id" = seq(1, nrow(fr1@exprs), 1), + "g1" = rep(1, nrow(fr1@exprs)), + "log10_CD4" = log(fr1@exprs[ , 5], 10), + "log10_CD38" = log(fr1@exprs[ , 6], 10), + "log10_CD8" = log(fr1@exprs[ , 7], 10), + "log10_CD3" = log(fr1@exprs[ , 8], 10)) +# Second sample +obs_dat2 <- data.frame("id" = seq(1, nrow(fr2@exprs), 1), + "g1" = rep(2, nrow(fr2@exprs)), + "log10_CD4" = log(fr2@exprs[ , 5], 10), + "log10_CD38" = log(fr2@exprs[ , 6], 10), + "log10_CD8" = log(fr2@exprs[ , 7], 10), + "log10_CD3" = log(fr2@exprs[ , 8], 10)) +# Full set +obs_dat <- rbind(obs_dat1, obs_dat2) +obs_dat <- obs_dat[complete.cases(obs_dat), ] # remove NAs +obs_dat <- obs_dat[is.finite(rowSums(obs_dat)),] # remove Infs +obs_dat$g1 <- as.factor(obs_dat$g1) # set "g1" as binary factor + +# --------- # +# Run gateR # +# --------- # + +# Single Condition +## A p-value uncorrected for multiple testing +test_gate <- gateR::gating(dat = obs_dat, + vars = c("log10_CD4", "log10_CD38", "log10_CD8", "log10_CD3"), + n_condition = 1, + doplot = TRUE, + p_cor = "none") + +# -------------------- # +# Post-gate assessment # +# -------------------- # + +# Density of log10 CD4 post-gating +graphics::plot(stats::density(test_gate$obs[test_gate$obs$g1 == 1, 3]), + main = "log10 CD4", + lty = 2) +graphics::lines(stats::density(test_gate$obs[test_gate$obs$g1 == 2, 3]), + lty = 3) +graphics::legend("topright", + legend = c("Sample 1", "Sample 2"), + lty = c(2, 3), + bty = "n") + +# --------------------------------------------- # +# Perform a single gate without data extraction # +# --------------------------------------------- # + +# Single condition +## A p-value uncorrected for multiple testing +test_rrs <- gateR::rrs(dat = obs_dat, + p_cor = "none") + +# Two conditions +## Create a second condition (randomly split the data) +## In practice, use data with a measured second condition +g2 <- stats::rbinom(nrow(obs_dat), 1, 0.5) +obs_dat$g2 <- as.factor(g2) +obs_dat <- obs_dat[ , c(1:2,7,3:6)] + +## A p-value uncorrected for multiple testing +test_lotrrs <- gateR::lotrrs(dat = obs_dat, + doplot = FALSE, + p_cor = "none") + + +# Example in lotrrs() + library(flowWorkspaceData) + library(ncdfFlow) + library(stats) + +# Use 'extdata' from the {flowWorkspaceData} package + flowDataPath <- system.file("extdata", package = "flowWorkspaceData") + fcsFiles <- list.files(pattern = "CytoTrol", flowDataPath, full = TRUE) + ncfs <- ncdfFlow::read.ncdfFlowSet(fcsFiles) + fr1 <- ncfs[[1]] + fr2 <- ncfs[[2]] + +## Comparison of two samples at two time points (two conditions) "g1" and "g2" +## (Create a random binary variable for "g2") +## One gates (Two markers) "CD4", "CD38" +## Log10 Transformation for both markers +## Remove cells with NA and Inf values + +# First sample + obs_dat1 <- data.frame("id" = seq(1, nrow(fr1@exprs), 1), + "g1" = rep(1, nrow(fr1@exprs)), + "g2" = stats::rbinom(nrow(fr1@exprs), 1, 0.5), + "log10_CD4" = log(fr1@exprs[ , 5], 10), + "log10_CD38" = log(fr1@exprs[ , 6], 10)) +# Second sample + obs_dat2 <- data.frame("id" = seq(1, nrow(fr2@exprs), 1), + "g1" = rep(2, nrow(fr2@exprs)), + "g2" = stats::rbinom(nrow(fr2@exprs), 1, 0.5), + "log10_CD4" = log(fr2@exprs[ , 5], 10), + "log10_CD38" = log(fr2@exprs[ , 6], 10)) +# Full set + obs_dat <- rbind(obs_dat1, obs_dat2) + obs_dat <- obs_dat[complete.cases(obs_dat), ] # remove NAs + obs_dat <- obs_dat[is.finite(rowSums(obs_dat)),] # remove Infs + obs_dat$g1 <- as.factor(obs_dat$g1) # set "g1" as binary factor + obs_dat$g2 <- as.factor(obs_dat$g2) # set "g2" as binary factor + +# Run lotrrs() function + test_lotrrs <- lotrrs(dat = obs_dat, p_cor = "none") + + + +plot(out_gate$rrs[[2]]) diff --git a/build/hexsticker_gateR.R b/build/hexsticker_gateR.R new file mode 100644 index 0000000..fcada3d --- /dev/null +++ b/build/hexsticker_gateR.R @@ -0,0 +1,32 @@ +# ----------------------------------------------------- # +# HexSticker for the {gateR} package +# +# Created by: Ian Buller, Ph.D., M.A. (GitHub: @idblr) +# Created on: September 10, 2020 +# +# Recently modified by: +# Recently modified on: +# +# Notes: +# A) Uses the "hexSticker" package +# B) Modified image: https://publicdomainvectors.org/en/free-clipart/Crocodile-in-egg-shell/78578.html +# ----------------------------------------------------- # + +# Packages +library(hexSticker) + +# Image file +path_image <- "./man/figures/gator.png" + +# Create hexSticker +## the alligator palette https://www.color-hex.com/color-palette/33955 +## And colors from image https://html-color-codes.info/colors-from-image/ +s <- hexSticker::sticker(subplot = path_image, + package = "gateR", p_size = 8, p_color = "#577D45", + s_x = 1, s_y = 0.8, s_width = 0.8, s_height = 0.8, + h_fill = "#ffedb2", + h_color = "#577D45", + dpi = 1000, + filename = "./man/figures/gateR.png", + white_around_sticker = F) +# -------------------- END OF CODE -------------------- # diff --git a/gateR.Rproj b/gateR.Rproj new file mode 100644 index 0000000..60b3a1b --- /dev/null +++ b/gateR.Rproj @@ -0,0 +1,19 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: knitr +LaTeX: XeLaTeX + +AutoAppendNewline: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/man/figures/gate1.png b/man/figures/gate1.png new file mode 100644 index 0000000..27a2c11 Binary files /dev/null and b/man/figures/gate1.png differ diff --git a/man/figures/gate2.png b/man/figures/gate2.png new file mode 100644 index 0000000..79226c4 Binary files /dev/null and b/man/figures/gate2.png differ diff --git a/man/figures/gateR.png b/man/figures/gateR.png new file mode 100644 index 0000000..583d751 Binary files /dev/null and b/man/figures/gateR.png differ diff --git a/man/figures/gator.png b/man/figures/gator.png new file mode 100644 index 0000000..a10285d Binary files /dev/null and b/man/figures/gator.png differ diff --git a/man/figures/postgate.png b/man/figures/postgate.png new file mode 100644 index 0000000..e9e7220 Binary files /dev/null and b/man/figures/postgate.png differ diff --git a/man/gating.Rd b/man/gating.Rd new file mode 100644 index 0000000..ac45232 --- /dev/null +++ b/man/gating.Rd @@ -0,0 +1,123 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gating.R +\name{gating} +\alias{gating} +\title{Gating strategy for mass cytometry data using spatial relative risk functions} +\usage{ +gating( + dat, + vars, + n_condition = c(1, 2), + numerator = TRUE, + alpha = 0.05, + p_cor = c("none", "correlated", "uncorrelated"), + nbc = NULL, + doplot = FALSE, + rcols = c("#FF0000", "#cccccc", "#0000FF"), + win = NULL, + verbose = FALSE, + ... +) +} +\arguments{ +\item{dat}{Input data frame flow cytometry data with the following features (columns): 1) ID, 2) Condition A ID, 3) Condition B ID (optional), and a set of markers.} + +\item{vars}{A vector of characters with the name of features (columns) within \code{dat} to use as markers for each gate. See details below.} + +\item{numerator}{Logical. If \code{TRUE} (the default), cells will be extracted within all statistically significant numerator (i.e., case) clusters. If \code{FALSE}, cells will be extracted within all statistically significant denominator (i.e., control) clusters.} + +\item{alpha}{Numeric. The two-tailed alpha level for significance threshold (default is 0.05).} + +\item{nbc}{Optional. An integer for the number of bins when \code{p_correct = "correlated"}. Similar to \code{nbclass} argument in \code{\link[pgirmess]{nbclass}}. The default is the average number of gridded knots in one-dimension (i.e., x-axis).} + +\item{doplot}{Logical. If \code{TRUE}, the output includes basic data visualizations.} + +\item{rcols}{Character string of length three (3) specifying the colors for: 1) Group A, 2) Neither, and 3) Group B designations. The defaults are \code{c("#FF0000", "#cccccc", "#0000FF")} or \code{c("red", "grey80", "blue")}.} + +\item{win}{Optional. Object of class \code{owin} for a custom two-dimensional window within which to estimate the surfaces. The default is NULL and calculates a convex hull around the data.} + +\item{verbose}{Logical. If \code{TRUE} will print function progress during execution. If \code{FALSE} (the default), will not print.} + +\item{...}{Arguments passed to \code{\link[sparr]{risk}} to select bandwidth, edge correction, and resolution.} + +\item{n_conditions}{A numeric value of either 1 or 2 designating if the gating is performed with one condition or two conditions.} + +\item{p_correct}{Character string specifying whether to apply a correction for multiple comparisons including a Bonferroni correction \code{p_correct = "uncorrelated"} or a correlated Bonferroni correction \code{p_correct = "correlated"}. If \code{p_correct = "none"} then no correction is applied.} +} +\value{ +An object of class \code{list}. This is a named list with the following components: + +#' \describe{ +\item{\code{obs}}{An object of class 'tibble' of the same features as \code{dat} that includes the information for the cells extracted with significant clusters in the final gate.} +\item{\code{gate}}{An object of class 'list' of 'rrs' objects from each gate.} +} + +The objects of class 'rrs' is similar to the output of the \code{\link[sparr]{risk}} function with two additional components: +\describe{ +\item{\code{rr}}{An object of class 'im' with the relative risk surface.} +\item{\code{f}}{An object of class 'im' with the spatial density of the numerator.} +\item{\code{g}}{An object of class 'im' with the spatial density of the denominator.} +\item{\code{P}}{An object of class 'im' with the asymptotic p-value surface.} +\item{\code{lrr}}{An object of class 'im' with the log relative risk surface.} +\item{\code{alpha}}{A numeric value for the alpha level used within the gate.} +} +} +\description{ +Extracts cells within statistically significant combinations of flourescent markers, successively, for a set of markers. Statistically significant combinations are idetnified using two-tailed p-values of a relative risk surface assuming asymptotic normality. This function is currently available for two-level comparisons of a single group (e.g., case/control) or two groups (e.g., case/control at time 1 and time 2). Provides functionality for basic visualization and multiple testing correction. +} +\details{ +This function performs a sequential gating strategy for mass cytometry data comparing two levels with one or two conditions. Gates are typically two-dimensional space comprised of two flourescent markers. The two-level comparison allows for the estimation of a spatial relative risk function and the computation of p-value based on an assumption of asymptotic normality. Cells within statistically significant areas are extracted and used in the next gate. This function relies heavily upon the \code{\link[sparr]{risk}} function. Basic visualization is available if \code{doplot = TRUE}. + +The \code{vars} argument must be a vector with an even-numbered length where the odd-numbered elements are the markers used on the x-axis of a gate and the even-numbered elements are the markers used on the y-axis of a gate. For example, if \code{vars = c("V1", "V2", "V3", and "V4")} then the first gate is "V1" on the x-axis and "V2" on the y-axis and then the second gate is V3" on the x-axis and "V4" on the y-axis. Makers can be repeated in successive gates. + +The \code{n_conditions} argument specifies if the gating strategy is performed for one condition or two conditions. If \code{n_conditions = 1}, then the function performs a one condition gating strategy using the internal \code{rrs} function, which computes the statistically significant areas (clusers) of a relative risk surface at each gate and selects the cells within the clusters specified by the \code{numerator} argument. If \code{n_conditions = 2}, then the function performs a two conditions gating strategy using the internal \code{lotrrs} function, which computes the statistically significant areas (clusers) of a ratio of relative risk surfaces at each gate and selects the cells within the clusters specified by the \code{numerator} argument. See the documentation for the internal \code{rrs} and \code{lotrrs} functions for more details. + +The p-value surface of the ratio of relative risk surfaces is estimated assuming asymptotic normality of the ratio value at each gridded knot. The bandwidth is fixed across all layers. + +Provides functionality for a correction for multiple testing. If \code{p_correct = "uncorrelated"}, then a conventional Bonferroni correction is calculated by dividing the \code{alpha} level by the number of gridded knots across the estimated surface. The default in the \code{\link[sparr]{risk}} function is a resolution of 128 x 128 or n = 16,384 knots and a custom resolution can be specified using the \code{resolution} argument within the \code{\link[sparr]{risk}} function. If \code{p_correct = "correlated"}, then a Bonferroni correction that takes into account the spatial correlation of the surface is calculated within the internal \code{pval_correct} function. The \code{alpha} level is divided by the minimum number of knots that are not spatially correlated. The minimum number of knots that are not spatially correlated is computed by counting the knots that are a distance apart that exceeds the minimum distance of non-significant spatial correlation based on a correlogram using the \code{\link[pgirmess]{correlog}} function. If \code{p_correct = "none"}, then the function does not account for multiple testing and uses the uncorrected \code{alpha} level. See the internal \code{pval_correct} function documentation for more details. +} +\examples{ + library(flowWorkspaceData) + library(ncdfFlow) + +# Use 'extdata' from the {flowWorkspaceData} package + flowDataPath <- system.file("extdata", package = "flowWorkspaceData") + fcsFiles <- list.files(pattern = "CytoTrol", flowDataPath, full = TRUE) + ncfs <- ncdfFlow::read.ncdfFlowSet(fcsFiles) + fr1 <- ncfs[[1]] + fr2 <- ncfs[[2]] + +## Comparison of two samples (single condition) "g1" +## Two gates (four markers) "CD4", "CD38", "CD8", and "CD3" +## Log10 Transformation for both markers +## Remove cells with NA and Inf values + +# First sample + obs_dat1 <- data.frame("id" = seq(1, nrow(fr1@exprs), 1), + "g1" = rep(1, nrow(fr1@exprs)), + "log10_CD4" = log(fr1@exprs[ , 5], 10), + "log10_CD38" = log(fr1@exprs[ , 6], 10), + "log10_CD8" = log(fr1@exprs[ , 7], 10), + "log10_CD3" = log(fr1@exprs[ , 8], 10)) +# Second sample + obs_dat2 <- data.frame("id" = seq(1, nrow(fr2@exprs), 1), + "g1" = rep(2, nrow(fr2@exprs)), + "log10_CD4" = log(fr2@exprs[ , 5], 10), + "log10_CD38" = log(fr2@exprs[ , 6], 10), + "log10_CD8" = log(fr2@exprs[ , 7], 10), + "log10_CD3" = log(fr2@exprs[ , 8], 10)) +# Full set + obs_dat <- rbind(obs_dat1, obs_dat2) + obs_dat <- obs_dat[complete.cases(obs_dat), ] # remove NAs + obs_dat <- obs_dat[is.finite(rowSums(obs_dat)),] # remove Infs + obs_dat$g1 <- as.factor(obs_dat$g1) # set "g1" as binary factor + +# Run gating() function +## Single condition, no multiple testing correction + test_gate <- gateR::gating(dat = obs_dat, + vars = c("log10_CD4", "log10_CD38", + "log10_CD8", "log10_CD3"), + n_condition = 1, + p_cor = "none") + +} diff --git a/man/hello.Rd b/man/hello.Rd deleted file mode 100644 index 0fa7c4b..0000000 --- a/man/hello.Rd +++ /dev/null @@ -1,12 +0,0 @@ -\name{hello} -\alias{hello} -\title{Hello, World!} -\usage{ -hello() -} -\description{ -Prints 'Hello, world!'. -} -\examples{ -hello() -} diff --git a/man/lotrrs.Rd b/man/lotrrs.Rd new file mode 100644 index 0000000..54dfe5a --- /dev/null +++ b/man/lotrrs.Rd @@ -0,0 +1,104 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lotrrs.R +\name{lotrrs} +\alias{lotrrs} +\title{A single gate for two conditions} +\usage{ +lotrrs( + dat, + alpha = 0.05, + p_correct = c("none", "correlated", "uncorrelated"), + nbc = NULL, + doplot = FALSE, + rcols = c("#FF0000", "#cccccc", "#0000FF"), + win = NULL, + verbose = FALSE, + ... +) +} +\arguments{ +\item{dat}{Input data frame flow cytometry data with five (5) features (columns): 1) ID, 2) Condition A ID, 3) Condition B ID, 4) Marker A as x-coordinate, 5) Marker B as y-coordinate.} + +\item{alpha}{Numeric. The two-tailed alpha level for significance threshold (default is 0.05).} + +\item{p_correct}{Character string specifying whether to apply a correction for multiple comparisons including a Bonferroni correction \code{p_correct = "uncorrelated"} or a correlated Bonferroni correction \code{p_correct = "correlated"}. If \code{p_correct = "none"} then no correction is applied.} + +\item{nbc}{Optional. An integer for the number of bins when \code{p_correct = "correlated"}. Similar to \code{nbclass} argument in \code{\link[pgirmess]{nbclass}}. The default is the average number of gridded knots in one-dimension (i.e., x-axis).} + +\item{doplot}{Logical. If \code{TRUE}, the output includes basic data visualizations.} + +\item{rcols}{Character string of length three (3) specifying the colors for: 1) Group A, 2) Neither, and 3) Group B designations. The defaults are \code{c("#FF0000", "#cccccc", "#0000FF")} or \code{c("red", "grey80", "blue")}.} + +\item{win}{Optional. Object of class \code{owin} for a custom two-dimensional window within which to estimate the surfaces. The default is NULL and calculates a convex hull around the data.} + +\item{verbose}{Logical. If \code{TRUE} will print function progress during execution. If \code{FALSE} (the default), will not print.} + +\item{...}{Arguments passed to \code{\link[sparr]{risk}} to select bandwidth, edge correction, and resolution.} +} +\value{ +An object of class 'list' where each element is a object of class 'rrs' created by the \code{\link[sparr]{risk}} function with two additional components: + +\describe{ +\item{\code{rr}}{An object of class 'im' with the relative risk surface.} +\item{\code{f}}{An object of class 'im' with the spatial density of the numerator.} +\item{\code{g}}{An object of class 'im' with the spatial density of the denominator.} +\item{\code{P}}{An object of class 'im' with the asymptotic p-value surface.} +\item{\code{lrr}}{An object of class 'im' with the log relative risk surface.} +\item{\code{alpha}}{A numeric value for the alpha level used within the gate.} +} +} +\description{ +Estimates a ratio of relative risk surfaces and computes the asymptotic p-value surface for a single gate with two conditions. Includes features for basic visualization. This function is used internally within the \code{\link{gating}} function to extract the points within the significant areas. This function can also be used as a standalone function. +} +\details{ +This function estimates a ratio of relative risk surfaces and computes the asymptotic p-value surface for a single gate with two conditions using three successive \code{\link[sparr]{risk}} functions. A relative risk surface is estimated for Condition A at each level of Condition B and then a ratio of the two relative risk surfaces is computed. + +\deqn{RR_{Condition B1} = \frac{Condition A2 of B1}{Condition A1 of B1}} +\deqn{RR_{Condition B2} = \frac{Condition A2 of B2}{Condition A1 of B2}} +\deqn{ln(rRR) = ln\left (\frac{RR_{Condition B2}}{CRR_{Condition B2}}\right )} + +The p-value surface of the ratio of relative risk surfaces is estimated assuming asymptotic normality of the ratio value at each gridded knot. The bandwidth is fixed across all layers. Basic visualization is available if \code{doplot = TRUE}. + +Provides functionality for a correction for multiple testing. If \code{p_correct = "uncorrelated"}, then a conventional Bonferroni correction is calculated by dividing the \code{alpha} level by the number of gridded knots across the estimated surface. The default in the \code{\link[sparr]{risk}} function is a resolution of 128 x 128 or n = 16,384 knots and a custom resolution can be specified using the \code{resolution} argument within the \code{\link[sparr]{risk}} function. If \code{p_correct = "correlated"}, then a Bonferroni correction that takes into account the spatial correlation of the surface is calculated within the internal \code{pval_correct} function. The \code{alpha} level is divided by the minimum number of knots that are not spatially correlated. The minimum number of knots that are not spatially correlated is computed by counting the knots that are a distance apart that exceeds the minimum distance of non-significant spatial correlation based on a correlogram using the \code{\link[pgirmess]{correlog}} function. If \code{p_correct = "none"}, then the function does not account for multiple testing and uses the uncorrected \code{alpha} level. See the internal \code{pval_correct} function documentation for more details. +} +\examples{ + library(flowWorkspaceData) + library(ncdfFlow) + library(stats) + +# Use 'extdata' from the {flowWorkspaceData} package + flowDataPath <- system.file("extdata", package = "flowWorkspaceData") + fcsFiles <- list.files(pattern = "CytoTrol", flowDataPath, full = TRUE) + ncfs <- ncdfFlow::read.ncdfFlowSet(fcsFiles) + fr1 <- ncfs[[1]] + fr2 <- ncfs[[2]] + +## Comparison of two samples at two time points (two conditions) "g1" and "g2" +## (Create a random binary variable for "g2") +## One gate (two markers) "CD4", "CD38" +## Log10 Transformation for both markers +## Remove cells with NA and Inf values + +# First sample + obs_dat1 <- data.frame("id" = seq(1, nrow(fr1@exprs), 1), + "g1" = rep(1, nrow(fr1@exprs)), + "g2" = stats::rbinom(nrow(fr1@exprs), 1, 0.5), + "log10_CD4" = log(fr1@exprs[ , 5], 10), + "log10_CD38" = log(fr1@exprs[ , 6], 10)) +# Second sample + obs_dat2 <- data.frame("id" = seq(1, nrow(fr2@exprs), 1), + "g1" = rep(2, nrow(fr2@exprs)), + "g2" = stats::rbinom(nrow(fr2@exprs), 1, 0.5), + "log10_CD4" = log(fr2@exprs[ , 5], 10), + "log10_CD38" = log(fr2@exprs[ , 6], 10)) +# Full set + obs_dat <- rbind(obs_dat1, obs_dat2) + obs_dat <- obs_dat[complete.cases(obs_dat), ] # remove NAs + obs_dat <- obs_dat[is.finite(rowSums(obs_dat)),] # remove Infs + obs_dat$g1 <- as.factor(obs_dat$g1) # set "g1" as binary factor + obs_dat$g2 <- as.factor(obs_dat$g2) # set "g2" as binary factor + +# Run lotrrs() function + test_lotrrs <- lotrrs(dat = obs_dat, p_cor = "none") + +} diff --git a/man/lrr_plot.Rd b/man/lrr_plot.Rd new file mode 100644 index 0000000..ad2def0 --- /dev/null +++ b/man/lrr_plot.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lrr_plot.R +\name{lrr_plot} +\alias{lrr_plot} +\title{Prepare log relative risk values for plotting with a diverging color palette} +\usage{ +lrr_plot( + input, + cols, + midpoint = 0, + thresh_up = NULL, + thresh_low = NULL, + digits = 1 +) +} +\arguments{ +\item{input}{An object of class 'rrs' from the \code{\link{lrren}} function.} + +\item{midpoint}{Numeric. The value to center the diverging color palette.} + +\item{thresh_up}{Numeric. The upper value to concatenate the color key. The default (NULL) uses the maximum value from \code{input}.} + +\item{thresh_low}{Numeric. The lower value to concatenate the color key. The default (NULL) uses the minimum value from \code{input}.} + +\item{digits}{Integer. The number of significant digits for the labels using the \code{round} function (default is 1).} + +\item{plot_cols}{Character string of length three (3) specifying the colors for plotting: 1) presence, 2) neither, and 3) absence from the \code{\link{plot_obs}} function.} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{v}}{An object of class 'vector' for the estimated ecological niche values.} +\item{\code{cols}}{An object of class 'vector', returns diverging color palette values.} +\item{\code{breaks}}{An object of class 'vector', returns diverging color palette breaks.} +\item{\code{at}}{An object of class 'vector', returns legend breaks.} +\item{\code{labels}}{An object of class 'vector', returns legend labels.} +} +} +\description{ +Internal function to convert an object of class 'im' to values readable by \code{\link[fields]{image.plot}} function within the \code{\link{rrs}} and \code{\link{lotrrs}} functions. +} +\keyword{internal} diff --git a/man/pval_correct.Rd b/man/pval_correct.Rd new file mode 100644 index 0000000..e51e476 --- /dev/null +++ b/man/pval_correct.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pval_correct.R +\name{pval_correct} +\alias{pval_correct} +\title{Calculate p-value corrections} +\usage{ +pval_correct(input, alpha = 0.05, nbc = NULL) +} +\arguments{ +\item{input}{An object of class 'rrs' from the \code{\link{rrs}} or \code{\link{lotrrs}} function.} + +\item{alpha}{Numeric. The two-tailed alpha level for significance threshold (default in \code{\link{rrs}} and \code{\link{lotrrs}} functions is 0.05).} + +\item{nbc}{Integer. The number of bins. Similar to \code{nbclass} argument in \code{\link[pgirmess]{correlog}} function. The default is the average number of gridded knots in one-dimension (i.e., x-axis).} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{uncorrected}}{Numeric. Returns the uncorrected p-value.} +\item{\code{correlated}}{Numeric. Returns the correlated Bonferroni corrected p-value.} +\item{\code{uncorrelated}}{Numeric. Returns the uncorrelated Bonferroni corrected p-value.} +} +} +\description{ +Internal function to calculate various p-value corrections including a correlated and uncorrelated Bonferroni correction for use within the \code{\link{rrs}} and \code{\link{lotrrs}} function. +} +\details{ +This function provides functionality for multiple testing correction in two ways: + +\enumerate{ +\item Computes a conventional Bonferroni correction ("uncorrelated") by dividing the \code{alpha} level by the number of gridded knots across the estimated surface. The default in the \code{\link[sparr]{risk}} function is a resolution of 128 x 128 or n = 16,384 knots. +\item Computs a correlated Bonferroni correction ("correlated") by taking in account the spatial correlation of the relative risk surface values (if using the \code{rrs} function for a single condition gate) or the ratio of relative risk surfaces values (if using the \code{lotrrs} function for a two condition gate). The \code{alpha} level is divided by the minimum number of knots that are not spatially correlated. The minimum number of knots that are not spatially correlated is computed by counting the knots that are a distance apart that exceeds the minimum distance of non-significant spatial correlation based on a correlogram using the \code{\link[pgirmess]{correlog}} function. +} +} +\keyword{internal} diff --git a/man/pval_plot.Rd b/man/pval_plot.Rd new file mode 100644 index 0000000..e860ae5 --- /dev/null +++ b/man/pval_plot.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pval_plot.R +\name{pval_plot} +\alias{pval_plot} +\title{Prepare significant p-values for plotting} +\usage{ +pval_plot(input, alpha) +} +\arguments{ +\item{input}{An object of class 'rrs' from the \code{\link{lrren}} function.} + +\item{alpha}{Numeric. The two-tailed alpha level for significance threshold (default in \code{\link{rrs}}, \code{\link{lotrrs}}, and \code{\link{gate}} functions is 0.05).} +} +\value{ +An object of class 'raster' with categorical values: + +\itemize{ +\item A value of 1: Significant numerator. +\item A value of 2: Insignificant. +\item A value of 3: Significant demoninator. +} +} +\description{ +Internal function to convert an object of class 'im' to values readable by \code{\link[fields]{image.plot}} function within the \code{\link{rrs}}, \code{\link{lotrrs}}, and \code{\link{gate}} functions. +} +\keyword{internal} diff --git a/man/rrs.Rd b/man/rrs.Rd new file mode 100644 index 0000000..f753f32 --- /dev/null +++ b/man/rrs.Rd @@ -0,0 +1,93 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rrs.R +\name{rrs} +\alias{rrs} +\title{A single gate for a single condition} +\usage{ +rrs( + dat, + alpha = 0.05, + p_correct = c("none", "correlated", "uncorrelated"), + nbc = NULL, + doplot = FALSE, + rcols = c("#FF0000", "#cccccc", "#0000FF"), + win = NULL, + verbose = FALSE, + ... +) +} +\arguments{ +\item{dat}{Input data frame flow cytometry data with four (4) features (columns): 1) ID, 2) Condition A ID, 3) Marker A as x-coordinate, 4) Marker B as y-coordinate.} + +\item{alpha}{Numeric. The two-tailed alpha level for significance threshold (default is 0.05).} + +\item{p_correct}{Character string specifying whether to apply a correction for multiple comparisons including a Bonferroni correction \code{p_correct = "uncorrelated"} or a correlated Bonferroni correction \code{p_correct = "correlated"}. If \code{p_correct = "none"} then no correction is applied.} + +\item{nbc}{Optional. An integer for the number of bins when \code{p_correct = "correlated"}. Similar to \code{nbclass} argument in \code{\link[pgirmess]{nbclass}}. The default is the average number of gridded knots in one-dimension (i.e., x-axis).} + +\item{doplot}{Logical. If \code{TRUE}, the output includes basic data visualizations.} + +\item{rcols}{Character string of length three (3) specifying the colors for: 1) Group A, 2) Neither, and 3) Group B designations. The defaults are \code{c("#FF0000", "#cccccc", "#0000FF")} or \code{c("red", "grey80", "blue")}.} + +\item{win}{Optional. Object of class \code{owin} for a custom two-dimensional window within which to estimate the surfaces. The default is NULL and calculates a convex hull around the data.} + +\item{verbose}{Logical. If \code{TRUE} will print function progress during execution. If \code{FALSE} (the default), will not print.} + +\item{...}{Arguments passed to \code{\link[sparr]{risk}} to select bandwidth, edge correction, and resolution.} +} +\value{ +An object of class 'list' where each element is a object of class 'rrs' created by the \code{\link[sparr]{risk}} function with two additional components: + +\describe{ +\item{\code{rr}}{An object of class 'im' with the relative risk surface.} +\item{\code{f}}{An object of class 'im' with the spatial density of the numerator.} +\item{\code{g}}{An object of class 'im' with the spatial density of the denominator.} +\item{\code{P}}{An object of class 'im' with the asymptotic p-value surface.} +\item{\code{lrr}}{An object of class 'im' with the log relative risk surface.} +\item{\code{alpha}}{A numeric value for the alpha level used within the gate.} +} +} +\description{ +Estimates a relative risk surface and computes the asymptotic p-value surface for a single gate with a single condition. Includes features for basic visualization. This function is used internally within the \code{\link{gating}} function to extract the points within the significant areas. This function can also be used as a standalone function. +} +\details{ +This function estimates a relative risk surface and computes the asymptotic p-value surface for a single gate and single condition using the \code{\link[sparr]{risk}} function. Bandwidth is fixed across both layers (numerator and demoninator spatial densities). Basic visualization is available if \code{doplot = TRUE}. + +Provides functionality for a correction for multiple testing. If \code{p_correct = "uncorrelated"}, then a conventional Bonferroni correction is calculated by dividing the \code{alpha} level by the number of gridded knots across the estimated surface. The default in the \code{\link[sparr]{risk}} function is a resolution of 128 x 128 or n = 16,384 knots and a custom resolution can be specified using the \code{resolution} argument within the \code{\link[sparr]{risk}} function. If \code{p_correct = "correlated"}, then a Bonferroni correction that takes into account the spatial correlation of the surface is calculated within the internal \code{pval_correct} function. The \code{alpha} level is divided by the minimum number of knots that are not spatially correlated. The minimum number of knots that are not spatially correlated is computed by counting the knots that are a distance apart that exceeds the minimum distance of non-significant spatial correlation based on a correlogram using the \code{\link[pgirmess]{correlog}} function. If \code{p_correct = "none"}, then the function does not account for multiple testing and uses the uncorrected \code{alpha} level. See the internal \code{pval_correct} function documentation for more details. +} +\examples{ + library(flowWorkspaceData) + library(ncdfFlow) + +# Use 'extdata' from the {flowWorkspaceData} package + flowDataPath <- system.file("extdata", package = "flowWorkspaceData") + fcsFiles <- list.files(pattern = "CytoTrol", flowDataPath, full = TRUE) + ncfs <- ncdfFlow::read.ncdfFlowSet(fcsFiles) + fr1 <- ncfs[[1]] + fr2 <- ncfs[[2]] + +## Comparison of two samples (single condition) "g1" +## One gate (two markers) "CD4", "CD38" +## Log10 Transformation for both markers +## Remove cells with NA and Inf values + +# First sample + obs_dat1 <- data.frame("id" = seq(1, nrow(fr1@exprs), 1), + "g1" = rep(1, nrow(fr1@exprs)), + "log10_CD4" = log(fr1@exprs[ , 5], 10), + "log10_CD38" = log(fr1@exprs[ , 6], 10)) +# Second sample + obs_dat2 <- data.frame("id" = seq(1, nrow(fr2@exprs), 1), + "g1" = rep(2, nrow(fr2@exprs)), + "log10_CD4" = log(fr2@exprs[ , 5], 10), + "log10_CD38" = log(fr2@exprs[ , 6], 10)) +# Full set + obs_dat <- rbind(obs_dat1, obs_dat2) + obs_dat <- obs_dat[complete.cases(obs_dat), ] # remove NAs + obs_dat <- obs_dat[is.finite(rowSums(obs_dat)),] # remove Infs + obs_dat$g1 <- as.factor(obs_dat$g1) # set "g1" as binary factor + +# Run rrs() function + test_rrs <- rrs(dat = obs_dat, p_cor = "none") + +} diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd new file mode 100644 index 0000000..44ddd2b --- /dev/null +++ b/vignettes/vignette.Rmd @@ -0,0 +1,367 @@ +--- +title: "gateR: Gating strategy for mass cytometry using kernel density estimation" +author: 'Ian D. Buller, Ph.D., M.A. (Github: @idblr)' +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{gateR: Gating strategy for mass cytometry using kernel density estimation} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} + knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, cache = FALSE, fig.width = 7, fig.height = 7, fig.show = "hold") +``` + +Start with the necessary packages and seed for the vignette. + +```{r packages} + loadedPackages <- c("gateR", "graphics", "maptools", "raster", "sp", "sparr", "spatstat.core", "stats", "tibble", "utils") + invisible(lapply(loadedPackages, library, character.only = TRUE)) + set.seed(1234) # for reproducibility +``` + +### Generate random toy data using the `spatstat.core` package + +Unique function to randomly generate data multivariate normal (MVN) around a central point. Parameters include the centroid coordinates (`centre`), number of observations to generate (`ncell`), and the standard deviation of the normal distribution (`scalar`). + +```{r rand_mvn function} + rand_mvn <- function(centre, ncell, scalar) { + x0 <- centre[1] + y0 <- centre[2] + x1 <- rep(x0, ncell) + y1 <- rep(y0, ncell) + x2 <- x1 + stats::rnorm(ncell, 0, scalar) + y2 <- y1 + stats::rnorm(ncell, 0, scalar) + x <- cbind(x2, y2) + } +``` + +#### Gate 1: Marker 1 and Marker 2 + +At Time 0, we generate 100,000 cases and 100,000 controls (`ncell = 100000`) randomly MVN with a case centroid at (`0.55, 0.55`) and a control centroid at (`0.40, 0.40`) within a unit square window `(0, 1)`, and cases have a more focal cluster (`scalar = 0.05`) than controls (`scalar = 0.15`). + +```{r gate 1 time 0} +# Initial parameters + ncell <- 100000 # number of observations per group per time + t0_cas_center <- c(0.55, 0.55) + t0_con_center <- c(0.40, 0.40) +# V1 and V2 at Time 0 + t0_cas <- rand_mvn(centre = t0_cas_center, ncell = ncell, scalar = 0.05) + t0_con <- rand_mvn(centre = t0_con_center, ncell = ncell, scalar = 0.15) + graphics::par(pty = "s") + graphics::plot(t0_con, + col = "blue", + xlim = c(0, 1), + ylim = c(0, 1), + main = "Gate 1, Time 0", + xlab = "V1", + ylab = "V2") + graphics::points(t0_cas, col = "orangered4") +``` + +At Time 1, we generate 100,000 cases and 100,000 controls (`ncell = 100000`) randomly MVN with a case centroid at (`0.45, 0.45`) and a control centroid at (`0.40, 0.40`) within a unit square window `(0, 1)`, and cases have a more focal cluster (`scalar = 0.05`) than controls (`scalar = 0.10`). + +```{r gate 1 time 1} +# Initial parameters + t1_cas_center <- c(0.45, 0.45) + t1_con_center <- c(0.40, 0.40) +# V1 and V2 at Time 1 + t1_cas <- rand_mvn(centre = t1_cas_center, ncell = ncell, scalar = 0.05) + t1_con <- rand_mvn(centre = t1_con_center, ncell = ncell, scalar = 0.10) + graphics::par(pty = "s") + graphics::plot(t1_con, + col = "cornflowerblue", + xlim = c(0, 1), + ylim = c(0, 1), + main = "Gate 1, Time 1", + xlab = "V1", + ylab = "V2") + graphics::points(t1_cas, col = "orangered1") +``` + +```{r compile data} +# compile data + df_full <- tibble::tibble("id" = seq(1, ncell * 2 * 2, 1), + "group" = factor(c(rep("case", ncell * 2), + rep("control", ncell * 2))), + "time" = factor(c(rep("1", ncell), rep("0", ncell), + rep("1", ncell), rep("0", ncell))), + "V1" = c(t1_cas[ , 1], t0_cas[ , 1], t1_con[ , 1], t0_con[ , 1]), + "V2" = c(t1_cas[ , 2], t0_cas[ , 2], t1_con[ , 2], t0_con[ , 2])) +``` + +#### Gate 2: Marker 3 and Marker 4 + +At Time 0, we generate 100,000 cases and 100,000 controls (`ncell = 100000`) randomly MVN with a case centroid at (`0.55, 0.55`) and a control centroid at (`0.50, 0.50`) within a unit square window `(0, 05)`, but both have the same amount of spread (`scalar = 0.10`). + +```{r gate 2 time 0} +# Initial parameters + t0_cas_center <- c(0.55, 0.55) + t0_con_center <- c(0.50, 0.50) +# V3 and V4 at Time 0 + t0_cas <- rand_mvn(centre = t0_cas_center, ncell = ncell, scalar = 0.05) + t0_con <- rand_mvn(centre = t0_con_center, ncell = ncell, scalar = 0.10) + graphics::par(pty = "s") + graphics::plot(t0_con, + col = "blue", + xlim = c(0, 1), + ylim = c(0, 1), + main = "Gate 2, Time 0", + xlab = "V3", + ylab = "V4") + graphics::points(t0_cas, col = "orangered4") +``` + +At Time 1, we generate 100,000 cases and 100,000 controls (`ncell = 100000`) randomly with a case centroid at (`0.65, 0.65`) and control a centroid at (`0.50, 0.50`) within a unit square window `(0, 1)`, and cases have a more focal cluster (`scalar = 0.05`) than controls (`scalar = 0.10`). + +```{r gate 2 time 1} +# Initial parameters + t1_cas_center <- c(0.65, 0.65) + t1_con_center <- c(0.50, 0.50) +# V3 and V4 at Time 1 + t1_cas <- rand_mvn(centre = t1_cas_center, ncell = ncell, scalar = 0.05) + t1_con <- rand_mvn(centre = t1_con_center, ncell = ncell, scalar = 0.10) + graphics::par(pty = "s") + graphics::plot(t1_con, + col = "cornflowerblue", + xlim = c(0, 1), + ylim = c(0, 1), + main = "Gate 2, Time 1", + xlab = "V3", + ylab = "V4") + graphics::points(t1_cas, col = "orangered1") +``` + +Compile the toy data into a data frame + +```{r append data} + df_full$V3 <- c(t1_cas[ , 1], t0_cas[ , 1], t1_con[ , 1], t0_con[ , 1]) + df_full$V4 <- c(t1_cas[ , 2], t0_cas[ , 2], t1_con[ , 2], t0_con[ , 2]) +``` + +Generate random values for two example cytokines and append to the data frame. + +```{r cytokines} +# Two Cytokines + C1 <- stats::rchisq(ncell * 4, df = 5) # Random Chi-square distribution + C2 <- stats::rnorm(ncell * 4, 0, 1) # Random Gaussian distribution +# Append to data.frame + df_full$C1 <- C1 + df_full$C2 <- C2 +# Visualize histograms by the two group conditions + graphics::par(mfrow = c(2, 2), pty = "s") + graphics::plot(stats::density(df_full$C1[df_full$group == "case" + & df_full$time == "0"]), + main = "Cytokine 1 of Cases at Time 0") + graphics::plot(stats::density(df_full$C1[df_full$group == "case" + & df_full$time == "1"]), + main = "Cytokine 1 of Cases at Time 1") + graphics::plot(stats::density(df_full$C1[df_full$group == "control" + & df_full$time == "0"]), + main = "Cytokine 1 of Controls at Time 0") + graphics::plot(stats::density(df_full$C1[df_full$group == "control" + & df_full$time == "1"]), + main = "Cytokine 1 of Controls at Time 1") + graphics::plot(stats::density(df_full$C2[df_full$group == "case" + & df_full$time == "0"]), + main = "Cytokine 2 of Cases at Time 0") + graphics::plot(stats::density(df_full$C2[df_full$group == "case" + & df_full$time == "1"]), + main = "Cytokine 2 of Cases at Time 1") + graphics::plot(stats::density(df_full$C2[df_full$group == "control" + & df_full$time == "0"]), + main = "Cytokine 2 of Controls at Time 0") + graphics::plot(stats::density(df_full$C2[df_full$group == "control" + & df_full$time == "1"]), + main = "Cytokine 2 of Controls at Time 1") +``` + +The toy data frame has nine columns (id, groups, markers, and cytokines). + +```{r full data} + df_save <- df_full # create copy for latter example + utils::head(df_full) +``` + +### For a two conditions + +```{r 2C} +# Initial parameters + alpha <- 0.05 + vars <- c("V1", "V2", "V3", "V4") + p_cor <- "uncorrelated" + set.seed(1234) # for reproducibility + +# Gates 1 and 2 + start_time <- Sys.time() # record start time + out_gate <- gateR::gating(dat = df_save, + vars = vars, + n_condition = 2, + doplot = TRUE, + alpha = alpha, + p_cor = p_cor) + end_time <- Sys.time() # record end time + total_time <- end_time - start_time # calculate duration of gateRR() example +``` + +The gating process took about `r round(total_time, digits = 1)` *seconds* on a Macbook Pro (4 variables, 2 gates, 2 cytokines, `r format(nrow(df_save), big.mark= ",")` observations). The corrected signficance level in the first gate was `r formatC(out_gate$lrr[[1]]$alpha, format = "e", digits = 2)`. The histograms for the two cytokines are the same as above. + +```{r 2C cytokines A} +# Plot of Cytokine 1 + graphics::par(mfrow = c(1, 2), pty = "s") + graphics::plot(stats::density(out_gate$obs$C1[out_gate$obs$group == "case" + & out_gate$obs$time == "1"]), + col = "red", main = "Cytokine 1 of cases\npost-gating", + xlim = c(-5, 30), + ylim = c(0, 0.2)) + graphics::plot(stats::density(out_gate$obs$C1[out_gate$obs$group == "control" + & out_gate$obs$time == "1"]), + col = "blue", + main = "Cytokine 1 of controls\npost-gating", + xlim = c(-5, 30), + ylim = c(0, 0.2)) +# Plot of Cytokine 2 + graphics::par(mfrow = c(1, 2), pty = "s") + graphics::plot(stats::density(out_gate$obs$C2[out_gate$obs$group == "case" + & out_gate$obs$time == "1"]), + col = "red", + main = "Cytokine 2 of cases\npost-gating", + xlim = c(-5, 5), + ylim = c(0, 0.5)) + graphics::plot(stats::density(out_gate$obs$C2[out_gate$obs$group == "control" + & out_gate$obs$time == "1"]), + col = "blue", + main = "Cytokine 2 of controls\npost-gating", + xlim = c(-5, 5), + ylim = c(0, 0.5)) +``` + +Compare histograms before and after gating. Gating reduced the overall sample size of observations from `r format(nrow(df_save), big.mark= ",")` (cases & controls and Time 0 & Time 1) to `r format(nrow(out_gate$obs), big.mark = ",")` observations (cases & controls and Time 0 & Time 1). + +```{r 2C cytokines B} +# Plot of Cytokine 1 + graphics::par(mfrow = c(1, 2), pty = "s") + graphics::plot(stats::density(df_save$C1[df_save$group == "case" + & df_save$time == "1"]), + col = "black", + lty = 1, + main = "Cytokine 1 of cases\npre-gating", + xlim = c(-5, 30), + ylim = c(0, 0.2)) + graphics::plot(stats::density(out_gate$obs$C1[out_gate$obs$group == "case" + & out_gate$obs$time == "1"]), + col = "black", + lty = 1, + main = "Cytokine 1 of cases\npost-gating", + xlim = c(-5, 30), + ylim = c(0, 0.2)) +# Plot of Cytokine 2 + graphics::par(mfrow = c(1, 2), pty = "s") + graphics::plot(stats::density(df_save$C2[df_save$group == "case" + & df_save$time == "1"]), + col = "black", + lty = 1, + main = "Cytokine 2 of cases\npre-gating", + xlim = c(-5, 5), + ylim = c(0, 0.5)) + graphics::plot(stats::density(out_gate$obs$C2[out_gate$obs$group == "case" + & out_gate$obs$time == "1"]), + col = "black", + lty = 1, + main = "Cytokine 2 of cases\npost-gating", + xlim = c(-5 ,5), + ylim = c(0, 0.5)) +``` + +### For a one condition (using only T0) + +```{r 1C} +# Data subset, only T0 + df_sub <- df_save[df_save$time == 0, ] # For only condition Time = 0 + +# Initial parameters + alpha <- 0.05 + vars <- c("V1", "V2", "V3", "V4") + p_cor <- "uncorrelated" + set.seed(1234) # for reproducibility + +# Gates 1 and 2 + start_time <- Sys.time() # record start time + out_gate <- gateR::gating(dat = df_sub, + vars = vars, + doplot = TRUE, + n_condition = 1, + alpha = alpha, + p_cor = p_cor) + end_time <- Sys.time() # record end time + total_time <- end_time - start_time # calculate duration of gateRR() example +``` + +The gating process took about `r round(total_time, digits = 1)` *seconds* on a Macbook Pro (4 variables, 2 gates, 2 cytokines, `r format(nrow(df_sub), big.mark= ",")` observations). The corrected signficance level in the first gate was `r formatC(out_gate$lrr[[1]]$alpha, format = "e", digits = 2)`. The histograms for the two cytokines are the same as above. + +```{r 1C cytokines A} +# Plot of Cytokine 1 + graphics::par(mfrow = c(1, 2), pty = "s") + graphics::plot(stats::density(out_gate$obs$C1[out_gate$obs$group == "case"]), + col = "red", + main = "Cytokine 1 of cases\npost-gating", + xlim = c(-5, 30), + ylim = c(0, 0.2)) + graphics::plot(stats::density(out_gate$obs$C1[out_gate$obs$group == "control"]), + col = "blue", + main = "Cytokine 1 of controls\npost-gating", + xlim = c(-5, 30), + ylim = c(0, 0.2)) +# Plot of Cytokine 2 + graphics::par(mfrow = c(1, 2), pty = "s") + graphics::plot(stats::density(out_gate$obs$C2[out_gate$obs$group == "case"]), + col = "red", + main = "Cytokine 2 of cases\npost-gating", + xlim = c(-5, 5), + ylim = c(0, 0.5)) + graphics::plot(stats::density(out_gate$obs$C2[out_gate$obs$group == "control"]), + col = "blue", + main = "Cytokine 2 of controls\npost-gating", + xlim = c(-5, 5), + ylim = c(0, 0.5)) +``` + +Compare histograms before and after gating. Gating reduced the overall sample size of observations from `r format(nrow(df_sub), big.mark= ",")` (cases & controls) to `r format(nrow(out_gate$obs), big.mark = ",")` observations (cases & controls). + +```{r 1C cytokines B} +# Plot of Cytokine 1 + graphics::par(mfrow = c(1, 2), pty = "s") + graphics::plot(stats::density(df_save$C1[df_save$group == "case"]), + col = "black", + lty = 1, + main = "Cytokine 1 of cases\npre-gating", + xlim = c(-5, 30), + ylim = c(0, 0.2)) + graphics::plot(stats::density(out_gate$obs$C1[out_gate$obs$group == "case"]), + col = "black", + lty = 1, + main = "Cytokine 1 of cases\npost-gating", + xlim = c(-5, 30), + ylim = c(0, 0.2)) +# Plot of Cytokine 2 + graphics::par(mfrow = c(1, 2), pty = "s") + graphics::plot(stats::density(df_save$C2[df_save$group == "case"]), + col = "black", + lty = 1, + main = "Cytokine 2 of cases\npre-gating", + xlim = c(-5, 5), + ylim = c(0, 0.5)) + graphics::plot(stats::density(out_gate$obs$C2[out_gate$obs$group == "case"]), + col = "black", + lty = 1, + main = "Cytokine 2 of cases\npost-gating", + xlim = c(-5 ,5), + ylim = c(0, 0.5)) +``` + +### Current limitations + +1. Extracts observations at *all* significant clusters (either case or controls). There is no ability to select which case cluster for the next gate. +2. Only two dimensions (i.e., Markers) per gate. +3. Only two group comparisons (e.g., case v. control) per gate.