From cc52bebc88701b925effc688f262e41913c6e8a5 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Fri, 17 Oct 2025 14:37:23 -0700 Subject: [PATCH 1/6] wording and whitespace --- base/workflow/R/run.write.configs.R | 2 +- modules/uncertainty/R/ensemble.R | 39 +++++++++---------- .../uncertainty/man/write.ensemble.configs.Rd | 2 +- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/base/workflow/R/run.write.configs.R b/base/workflow/R/run.write.configs.R index 3984ecb08f1..18756b45559 100644 --- a/base/workflow/R/run.write.configs.R +++ b/base/workflow/R/run.write.configs.R @@ -164,7 +164,7 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU ### Sensitivity Analysis if ("sensitivity.analysis" %in% names(settings)) { ### Write out SA config files - PEcAn.logger::logger.info("\n ----- Writing model run config files ----") + PEcAn.logger::logger.info("\n ----- Writing model config files for sensitivity run ----") sa.runs <- PEcAn.uncertainty::write.sa.configs( defaults = settings$pfts, quantile.samples = sa.samples, diff --git a/modules/uncertainty/R/ensemble.R b/modules/uncertainty/R/ensemble.R index d44add22cf9..7dcffd4542a 100644 --- a/modules/uncertainty/R/ensemble.R +++ b/modules/uncertainty/R/ensemble.R @@ -217,7 +217,7 @@ get.ensemble.samples <- function( ensemble.size, pft.samples, env.samples, ##' @return list, containing $runs = data frame of runids, $ensemble.id = the ensemble ID for these runs and $samples with ids and samples used for each tag. Also writes sensitivity analysis configuration files as a side effect ##' @details The restart functionality is developed using model specific functions by calling write_restart.modelname function. First, you need to make sure that this function is already exist for your desired model.See here \url{https://pecanproject.github.io/pecan-documentation/latest/pecan-models.html} ##' new state is a dataframe with a different column for each state variable. The number of the rows in this dataframe needs to be the same as the ensemble size. -##' State variables that you can use for setting up the intial conditions differs for different models. You may check the documentation of the write_restart.modelname your model. +##' State variables that you can use for setting up the initial conditions differs for different models. You may check the documentation of the write_restart.modelname your model. ##' The units for the state variables need to be in the PEcAn standard units which can be found in \link{standard_vars}. ##' new.params also has similar structure to ensemble.samples which is sent as an argument. ##' @@ -225,29 +225,26 @@ get.ensemble.samples <- function( ensemble.size, pft.samples, env.samples, ##' @importFrom rlang .data ##' @export ##' @author David LeBauer, Carl Davidson, Hamze Dokoohaki -write.ensemble.configs <- function(input_design , ensemble.size, defaults, ensemble.samples, settings, model, +write.ensemble.configs <- function(input_design, ensemble.size, defaults, ensemble.samples, settings, model, clean = FALSE, write.to.db = TRUE, restart = NULL, samples = NULL, rename = FALSE) { - - - # Check if there are NO inputs - -for (input_tag in names(settings$run$inputs)) { - input <- settings$run$inputs[[input_tag]] - input_paths <- input$path - + # Check for required paths - if (is.null(input_paths) || length(input_paths) == 0) { - PEcAn.logger::logger.error("Input", sQuote(input_tag), "has no paths specified") - } - - # Check for unsampled multi-path inputs - if (length(input_paths) > 1 && - !(input_tag %in% names(settings$ensemble$samplingspace))) { - PEcAn.logger::logger.error( - "Input", sQuote(input_tag), "has", length(input_paths), "paths but no sampling method.", - "Add for this input in pecan.xml") + for (input_tag in names(settings$run$inputs)) { + input <- settings$run$inputs[[input_tag]] + input_paths <- input$path + if (is.null(input_paths) || length(input_paths) == 0) { + PEcAn.logger::logger.error("Input", sQuote(input_tag), "has no paths specified") + } + # Check for unsampled multi-path inputs + if (length(input_paths) > 1 && + !(input_tag %in% names(settings$ensemble$samplingspace))) { + PEcAn.logger::logger.error( + "Input", sQuote(input_tag), "has", length(input_paths), + "paths but no sampling method.", + "Add for this input in pecan.xml" + ) + } } -} diff --git a/modules/uncertainty/man/write.ensemble.configs.Rd b/modules/uncertainty/man/write.ensemble.configs.Rd index 86abcc87026..9439d141a19 100644 --- a/modules/uncertainty/man/write.ensemble.configs.Rd +++ b/modules/uncertainty/man/write.ensemble.configs.Rd @@ -52,7 +52,7 @@ a name to distinguish the output files, and the directory to place the files. \details{ The restart functionality is developed using model specific functions by calling write_restart.modelname function. First, you need to make sure that this function is already exist for your desired model.See here \url{https://pecanproject.github.io/pecan-documentation/latest/pecan-models.html} new state is a dataframe with a different column for each state variable. The number of the rows in this dataframe needs to be the same as the ensemble size. -State variables that you can use for setting up the intial conditions differs for different models. You may check the documentation of the write_restart.modelname your model. +State variables that you can use for setting up the initial conditions differs for different models. You may check the documentation of the write_restart.modelname your model. The units for the state variables need to be in the PEcAn standard units which can be found in \link{standard_vars}. new.params also has similar structure to ensemble.samples which is sent as an argument. } From 9ccf90420cc3f71a31849aff5cf019d6f23297b3 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Fri, 17 Oct 2025 14:39:43 -0700 Subject: [PATCH 2/6] use settings hash as ensemble id if not provided --- modules/uncertainty/R/ensemble.R | 13 +++++++------ .../uncertainty/R/run.sensitivity.analysis.R | 18 ++++++------------ modules/uncertainty/R/sensitivity.R | 8 ++++++-- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/modules/uncertainty/R/ensemble.R b/modules/uncertainty/R/ensemble.R index 7dcffd4542a..eee8247066a 100644 --- a/modules/uncertainty/R/ensemble.R +++ b/modules/uncertainty/R/ensemble.R @@ -278,14 +278,12 @@ write.ensemble.configs <- function(input_design, ensemble.size, defaults, ensemb # Get the workflow id - if (!is.null(settings$workflow$id)) { - workflow.id <- settings$workflow$id - } else { - workflow.id <- -1 - } + workflow.id <- settings$workflow$id %||% -1 + #------------------------------------------------- if this is a new fresh run------------------ if (is.null(restart)){ # create an ensemble id + # Note: this ignores any existing settings$ensemble$id if (!is.null(con) && write.to.db) { # write ensemble first ensemble.id <- PEcAn.DB::db.query(paste0( @@ -299,7 +297,10 @@ write.ensemble.configs <- function(input_design, ensemble.size, defaults, ensemb "values (", pft$posteriorid, ", ", ensemble.id, ")"), con = con) } } else { - ensemble.id <- NA + # Use existing id if provided, or an arbitrary unique value if not + # Note: Since write.ensemble.configs is called separately for each site, + # a multisite run with no ID provided gives each site its own ensemble id! + ensemble.id <- settings$ensemble$id %||% rlang::hash(settings) } #-------------------------generating met/param/soil/veg/... for all ensembles---- if (!is.null(con)){ diff --git a/modules/uncertainty/R/run.sensitivity.analysis.R b/modules/uncertainty/R/run.sensitivity.analysis.R index 53c00c8fb0c..de4474202b4 100644 --- a/modules/uncertainty/R/run.sensitivity.analysis.R +++ b/modules/uncertainty/R/run.sensitivity.analysis.R @@ -104,18 +104,12 @@ run.sensitivity.analysis <- function(settings, # Can specify ensemble ids manually. If not, look in settings. # If none there, will use the most recent, which was loaded with samples.Rdata - if (!is.null(ensemble.id)) { - fname <- sensitivity.filename(settings, "sensitivity.samples", "Rdata", - ensemble.id = ensemble.id, - all.var.yr = TRUE) - } else if (!is.null(settings$sensitivity.analysis$ensemble.id)) { - ensemble.id <- settings$sensitivity.analysis$ensemble.id - fname <- sensitivity.filename(settings, "sensitivity.samples", "Rdata", - ensemble.id = ensemble.id, - all.var.yr = TRUE) - } else { - ensemble.id <- NULL - } + ensemble.id <- ensemble.id %||% + settings$sensitivity.analysis$ensemble.id %||% + rlang::hash(settings) + fname <- sensitivity.filename(settings, "sensitivity.samples", "Rdata", + ensemble.id = ensemble.id, + all.var.yr = TRUE) if (file.exists(fname)) { load(fname, envir = samples) } diff --git a/modules/uncertainty/R/sensitivity.R b/modules/uncertainty/R/sensitivity.R index a726198a1b3..f1c9d8ab16a 100644 --- a/modules/uncertainty/R/sensitivity.R +++ b/modules/uncertainty/R/sensitivity.R @@ -162,6 +162,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, names(median.samples) <- names(quantile.samples) if (!is.null(con)) { + # Note: ignores any existing run or ensemble ids in settings ensemble.id <- PEcAn.DB::db.query(paste0( "INSERT INTO ensembles (runtype, workflow_id) ", "VALUES ('sensitivity analysis', ", format(workflow.id, scientific = FALSE), ") ", @@ -206,8 +207,11 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } } } else { - run.id <- PEcAn.utils::get.run.id("SA", "median") - ensemble.id <- NA + run.id <- PEcAn.utils::get.run.id("SA", "median", site.id = settings$run$site$id) + # Use SA ensemble id if provided, or an arbitrary unique value if not + # Note: Since write.sa.configs is called separately for each site, + # a multisite run with no ID provided gives each site its own ensemble id! + ensemble.id <- settings$sensitivity.analysis$ensemble.id %||% rlang::hash(settings) } medianrun <- run.id From 91cc792b69fd281d46c92b72091c5c4b69c790f7 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Fri, 17 Oct 2025 14:40:27 -0700 Subject: [PATCH 3/6] filter to pfts present at this site --- modules/uncertainty/R/run.sensitivity.analysis.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/uncertainty/R/run.sensitivity.analysis.R b/modules/uncertainty/R/run.sensitivity.analysis.R index de4474202b4..a736d5671e5 100644 --- a/modules/uncertainty/R/run.sensitivity.analysis.R +++ b/modules/uncertainty/R/run.sensitivity.analysis.R @@ -76,6 +76,9 @@ run.sensitivity.analysis <- function(settings, if (is.null(pfts)) { #extract just pft names pfts <- purrr::map_chr(settings$pfts, "name") + if (!is.null(settings$run$site$site.pft)) { + pfts <- pfts[pfts %in% settings$run$site$site.pft] + } } else { # validate pfts argument if (!is.character(pfts)) { From c56c125f4a93546e9c290612fb4a2158199bfc3a Mon Sep 17 00:00:00 2001 From: Chris Black Date: Fri, 17 Oct 2025 14:40:59 -0700 Subject: [PATCH 4/6] pass site id when naming rundirs --- modules/uncertainty/R/sensitivity.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/uncertainty/R/sensitivity.R b/modules/uncertainty/R/sensitivity.R index f1c9d8ab16a..7caa635b8b1 100644 --- a/modules/uncertainty/R/sensitivity.R +++ b/modules/uncertainty/R/sensitivity.R @@ -343,7 +343,8 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, run.type = "SA", index = round(quantile, 3), trait = trait, - pft.name = names(trait.samples)[i] + pft.name = names(trait.samples)[i], + site.id = settings$run$site$id ) } runs[[pftname]][quantile.str, trait] <- run.id From ef6d42d21ecee209c9e2854ff4c82115ac61b8cf Mon Sep 17 00:00:00 2001 From: Chris Black Date: Mon, 20 Oct 2025 10:20:05 -0700 Subject: [PATCH 5/6] changelog --- CHANGELOG.md | 1 + modules/uncertainty/NEWS.md | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4c46975e64..e4e3744314b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ section for the next release. ### Changed +- Ensemble and sensitivity analyses run with now assign an ensemble ID if one is not specified in the XML, even when running with no DB (#TBD). - `download.ERA5_cds` now uses the R package ecmwfr (replacing python dependency of cdsapi via reticulate), enabling direct NetCDF downloads; and made flexible for both reanalysis and ensemble data product. - `extract_soil_gssurgo` now supports spatial sampling using a grid of user-defined size and spacing. And supports ensemble simulation of soil organic carbon (SOC) stocks, using area-weighted aggregation - The ERA5 NC extraction function can now handle multi-site instead of one diff --git a/modules/uncertainty/NEWS.md b/modules/uncertainty/NEWS.md index bf375a09765..fa25e868f4a 100644 --- a/modules/uncertainty/NEWS.md +++ b/modules/uncertainty/NEWS.md @@ -1,6 +1,12 @@ # PEcAn.uncertainty 1.8.2 -* Plotting sensitivity now makes less noise in the console and once again produces a one-page PDF as intended. +* Plotting sensitivity now makes less noise in the console and once again + produces a one-page PDF as intended. +* `write.ensemble.configs` and `write.sa.configs` now generate an ensemble id + if one is not provided in a DB-free run. + Runs with DB continue to always generate a new id. + Note that multi-site runs with no id provided will now get a separate + ensemble ID (and thus generate separate analyses) for each site. * Documented that `runModule.run.sensitivity.analysis` does not yet work with multisite settings. This will be fixed in a future release. From 159284afbfe637739cc8a4e12adc9d3167fcc6ba Mon Sep 17 00:00:00 2001 From: Chris Black Date: Mon, 20 Oct 2025 10:58:20 -0700 Subject: [PATCH 6/6] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4e3744314b..d5a459c19da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,7 +38,7 @@ section for the next release. ### Changed -- Ensemble and sensitivity analyses run with now assign an ensemble ID if one is not specified in the XML, even when running with no DB (#TBD). +- Ensemble and sensitivity analyses run with now assign an ensemble ID if one is not specified in the XML, even when running with no DB (#3654). - `download.ERA5_cds` now uses the R package ecmwfr (replacing python dependency of cdsapi via reticulate), enabling direct NetCDF downloads; and made flexible for both reanalysis and ensemble data product. - `extract_soil_gssurgo` now supports spatial sampling using a grid of user-defined size and spacing. And supports ensemble simulation of soil organic carbon (SOC) stocks, using area-weighted aggregation - The ERA5 NC extraction function can now handle multi-site instead of one