RMI-PACTA · cjyetman · Feb 10, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025
@@ -1,6 +1,6 @@
 Package: r2dii.analysis
 Title: Measure Climate Scenario Alignment of Corporate Loans
-Version: 0.4.0.9003
+Version: 0.4.0.9004
 Authors@R: 
     c(person(given = "Jacob",
              family = "Kastl",
@@ -47,7 +47,7 @@ URL: https://rmi-pacta.github.io/r2dii.analysis/,
     https://github.com/RMI-PACTA/r2dii.analysis
 BugReports: https://github.com/RMI-PACTA/r2dii.analysis/issues
 Depends:
-    R (>= 3.4)
+    R (>= 3.5)
 Imports: 
     dplyr (>= 0.8.5),
     glue,
@@ -72,3 +72,4 @@ Encoding: UTF-8
 Language: en-US
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
+LazyData: true
@@ -1,7 +1,8 @@
 # r2dii.analysis (development version)
 
-* r2dii.analysis is now [stable](https://lifecycle.r-lib.org/articles/stages.html).
-* @jacobvjk is now the maintainer.
+* `data_dictionary` dataset added to define the columns in each dataset used or exported by the functions in this package (#521).
+* r2dii.analysis is now [stable](https://lifecycle.r-lib.org/articles/stages.html) (#488).
+* @jacobvjk is now the maintainer (#519).
 
 # r2dii.analysis 0.4.0
 

@@ -0,0 +1,19 @@
+#' Data Dictionary
+#'
+#' A table of column names and descriptions of data frames used or exported by
+#' the functions in this package.
+#'
+#' @family data dictionary
+#'
+#' @format ## `data_dictionary`
+#' \describe{
+#'   \item{dataset}{Name of the dataset}
+#'   \item{column}{Name of the column}
+#'   \item{typeof}{Type of the column}
+#'   \item{definition}{Definition of the column}
+#' }
+#'
+#' @examples
+#' data_dictionary
+
+"data_dictionary"
@@ -48,4 +48,6 @@ reference:
   contents: has_concept("functions to calculate scenario targets")
 - title: Utility functions
   contents: has_concept("utility functions")
+- title: Data dictionary
+  contents: has_concept("data dictionary")
 
@@ -0,0 +1,9 @@
+library(readr)
+library(usethis)
+
+paths <- list.files("data-raw/data_dictionary", full.names = TRUE)
+
+out <- readr::read_csv(file = paths, show_col_types = FALSE)
+data_dictionary <- out[order(out$dataset, out$column), , drop = FALSE]
+
+usethis::use_data(data_dictionary, overwrite = TRUE)
@@ -0,0 +1,23 @@
+dataset,column,typeof,definition
+join_abcd_scenario_output,id_loan,character,Unique loan identifier
+join_abcd_scenario_output,loan_size_outstanding,double,Amount drawn by borrower from total credit limit
+join_abcd_scenario_output,loan_size_outstanding_currency,character,Currency corresponding to outstandings
+join_abcd_scenario_output,loan_size_credit_limit,double,Total credit limit or exposure at default
+join_abcd_scenario_output,loan_size_credit_limit_currency,character,Currency corresponding to credit limit
+join_abcd_scenario_output,id_2dii,character,an id used internally by match_name() to distinguish companies
+join_abcd_scenario_output,level,character,the level of granularity that the loan was matched at (e.g direct_loantaker or ultimate_parent)
+join_abcd_scenario_output,score,double,the score of the match (manually set this to 1 prior to calling prioritize() to validate the match)
+join_abcd_scenario_output,sector,character,the sector of the abcd company
+join_abcd_scenario_output,name_abcd,character,the name of the abcd company
+join_abcd_scenario_output,sector_abcd,character,the sector of the abcd company
+join_abcd_scenario_output,technology,character,Technology implemented by the asset
+join_abcd_scenario_output,plant_location,character,Country where asset is located
+join_abcd_scenario_output,is_ultimate_owner,logical,Flag if company is the ultimate parent in our database
+join_abcd_scenario_output,scenario,character,The name of the scenario
+join_abcd_scenario_output,region,character,The region to which the pathway is relevant
+join_abcd_scenario_output,scenario_source,character,The source publication from which the scenario was taken
+join_abcd_scenario_output,year,integer,The year at which the pathway value is prescribed
+join_abcd_scenario_output,production,double,Company level production of the technology
+join_abcd_scenario_output,emission_factor,double,Company level emission factor of the technology
+join_abcd_scenario_output,tmsr,double,Technology market share ratio of the pathway
+join_abcd_scenario_output,smsp,double,Sector market share percentage of the pathway
@@ -0,0 +1,5 @@
+dataset,column,typeof,definition
+summarize_weighted_percent_change_output,sector_abcd,character,the sector of the summarized assets
+summarize_weighted_percent_change_output,technology,character,the technology of the summarized assets
+summarize_weighted_percent_change_output,year,double,the year at which the pathway value is prescribed
+summarize_weighted_percent_change_output,weighted_percent_change,double,"the weighted average of the percent change in production associated with a loan, weighted by loan size"
@@ -0,0 +1,6 @@
+dataset,column,typeof,definition
+summarize_weighted_production_output,sector_abcd,character,the sector of the summarized assets
+summarize_weighted_production_output,technology,character,the technology of the summarized assets
+summarize_weighted_production_output,year,double,the year at which the pathway value is prescribed
+summarize_weighted_production_output,weighted_production,double,"the weighted sum of the production associated with a loan, weighted by loan size"
+summarize_weighted_production_output,weighted_technology_share,double,"the weighted share of the production from the technology within its sector associated with a loan, weighted by loan size"
@@ -0,0 +1,11 @@
+dataset,column,typeof,definition
+target_market_share_output,sector,character,the sector of the abcd company
+target_market_share_output,technology,character,technology implemented by the asset
+target_market_share_output,year,double,the year at which the pathway value is prescribed
+target_market_share_output,region,character,the region to which the pathway is relevant
+target_market_share_output,scenario_source,character,the source publication from which the scenario was taken
+target_market_share_output,metric,character,"indicates if the production related values refer to the projected activities of the underlying counterparty, to the economy wide benchmark, or to allocated levels of activity based on the scenarios"
+target_market_share_output,production,double,company level production of the technology
+target_market_share_output,technology_share,double,"the share of the 'production' the given 'technology' relative to all technologies of the corresponding 'sector' for the gien combination of 'group_id', 'region', 'year' and 'metric'"
+target_market_share_output,scope,character,"indicates if the targets for the given technology have been calculated based on the TMSR (technology) or the SMSP (sector). High-carbon technologies that need to decrease have their targets calculated on the technology level, whereas low-carbon technologies that need to increase have them calculated on the sector level"
+target_market_share_output,percentage_of_initial_production_by_scope,double,relative change compared to the start value (by scope). Used for displaying the change in activity over time on a common scale
@@ -0,0 +1,7 @@
+dataset,column,typeof,definition
+target_sda_output,sector,character,the sector of the abcd company
+target_sda_output,year,double,the year at which the pathway value is prescribed
+target_sda_output,region,character,the region to which the pathway is relevant
+target_sda_output,scenario_source,character,the source publication from which the scenario was taken
+target_sda_output,emission_factor_metric,character,"indicates if the emission intensity related values refer to the projected activities of the underlying counterparty, to the economy wide benchmark, or to allocated levels of activity based on the scenarios"
+target_sda_output,emission_factor_value,double,the physical emission intensity level of the given 'emission_factor_metric'
@@ -316,3 +316,17 @@ test_that("only extend timeline beyond t0 of abcd #157", {
   expect_equal(out_b$`2021`$production, 1)
 
 })
+
+test_that("columns in output match what is documented in `data_dictionary`", {
+  out <- join_abcd_scenario(
+    data = fake_matched(),
+    abcd = fake_abcd(),
+    scenario = fake_scenario(),
+    region_isos = region_isos_stable
+  )
+
+  data_dict <- dplyr::filter(r2dii.analysis::data_dictionary, dataset == "join_abcd_scenario_output")
+
+  expect_setequal(names(out), data_dict[["column"]])
+  expect_mapequal(sapply(out, typeof), setNames(data_dict[["typeof"]], data_dict[["column"]]))
+})
@@ -401,3 +401,21 @@ test_that("with different currencies errors with informative message (#137)", {
     class = "multiple_currencies"
   )
 })
+
+test_that("columns in output match what is documented in `data_dictionary`", {
+  out <- summarize_weighted_production(data = fake_master())
+
+  data_dict <- dplyr::filter(r2dii.analysis::data_dictionary, dataset == "summarize_weighted_production_output")
+
+  expect_setequal(names(out), data_dict[["column"]])
+  expect_mapequal(sapply(out, typeof), setNames(data_dict[["typeof"]], data_dict[["column"]]))
+})
+
+test_that("columns in output match what is documented in `data_dictionary`", {
+  out <- summarize_weighted_percent_change(data = fake_master())
+
+  data_dict <- dplyr::filter(r2dii.analysis::data_dictionary, dataset == "summarize_weighted_percent_change_output")
+
+  expect_setequal(names(out), data_dict[["column"]])
+  expect_mapequal(sapply(out, typeof), setNames(data_dict[["typeof"]], data_dict[["column"]]))
+})
@@ -1566,3 +1566,17 @@ test_that("target_market_share() calculates target_* values for missing low carb
     scen_technologies
   )
 })
+
+test_that("columns in output match what is documented in `data_dictionary`", {
+  out <- target_market_share(
+    data = fake_matched(),
+    abcd = fake_abcd(),
+    scenario = fake_scenario(),
+    region_isos = region_isos_stable
+  )
+
+  data_dict <- dplyr::filter(r2dii.analysis::data_dictionary, dataset == "target_market_share_output")
+
+  expect_setequal(names(out), data_dict[["column"]])
+  expect_mapequal(sapply(out, typeof), setNames(data_dict[["typeof"]], data_dict[["column"]]))
+})
@@ -1013,3 +1013,26 @@ test_that("target of final year always converges at final value of adjusted_scen
   expect_equal(unique(final_targets_converge_company$targets_converge), TRUE)
 })
 
+test_that("columns in output match what is documented in `data_dictionary`", {
+  out <- target_sda(
+    data = fake_matched(
+      sector_abcd = "cement"
+    ),
+    abcd = fake_abcd(
+      sector = "cement",
+      technology = "cement",
+      year = c(2020, 2021, 2022),
+      emission_factor = c(1, 2, 3)
+    ),
+    co2_intensity_scenario = fake_co2_scenario(
+      year = c(2020, 2050),
+      emission_factor = c(0.6, 0.2)
+    ),
+    region_isos = region_isos_stable
+  )
+
+  data_dict <- dplyr::filter(r2dii.analysis::data_dictionary, dataset == "target_sda_output")
+
+  expect_setequal(names(out), data_dict[["column"]])
+  expect_mapequal(sapply(out, typeof), setNames(data_dict[["typeof"]], data_dict[["column"]]))
+})