diff --git a/DESCRIPTION b/DESCRIPTION index 0d58be7..4eb2bba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: EMLeditor Title: View and Edit EML Metadata -Version: 0.1.4 +Version: 0.1.5 Authors@R: c( person(given="Robert", family="Baker", email="robert_baker@nps.gov", role = c("aut", "cre"), @@ -19,7 +19,7 @@ License: MIT + file LICENSE BugReports: https://github.com/nationalparkservice/EMLeditor/issues Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Remotes: NCEAS/arcticdatautils, nationalparkservice/DPchecker @@ -29,26 +29,27 @@ Imports: sf, utils, EML, - arcticdatautils, magrittr, stringr, - gdata, - stargazer, lubridate, - ISOcodes, crayon, dplyr, stats, lifecycle, here, DPchecker (>= 0.1.0), - purrr, jsonlite, tibble, - cli, + cli, XML, - curl -Suggests: + curl, + mockr, + rlang, + ISOcodes, + gdata +Suggests: + arcticdatautils, + stargazer, knitr, rmarkdown, testthat (>= 3.0.0) diff --git a/NAMESPACE b/NAMESPACE index 6c8ab87..786b6ba 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(.get_user_input3) export(check_eml) export(get_abstract) export(get_additional_info) @@ -8,6 +9,8 @@ export(get_begin_date) export(get_citation) export(get_content_units) export(get_cui) +export(get_cui_code) +export(get_cui_marking) export(get_doi) export(get_drr_doi) export(get_drr_title) @@ -15,7 +18,9 @@ export(get_ds_id) export(get_end_date) export(get_file_info) export(get_lit) +export(get_methods) export(get_producing_units) +export(get_publisher) export(get_title) export(remove_datastore_files) export(set_abstract) @@ -25,6 +30,8 @@ export(set_creator_orcids) export(set_creator_order) export(set_creator_orgs) export(set_cui) +export(set_cui_code) +export(set_cui_marking) export(set_data_urls) export(set_datastore_doi) export(set_doi) @@ -33,12 +40,14 @@ export(set_int_rights) export(set_language) export(set_lit) export(set_methods) +export(set_missing_data) export(set_producing_units) export(set_protocol) export(set_publisher) export(set_title) export(upload_data_package) -export(write_readme) importFrom(lifecycle,deprecated) importFrom(magrittr,"%>%") +importFrom(mockr,local_mock) +importFrom(rlang,local_options) importFrom(stats,complete.cases) diff --git a/NEWS.md b/NEWS.md index 2335e34..bc73180 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,31 @@ +# EMLeditor v0.1.5 "Little Bighorn" + +## 2024-04-01 + * Fix bug in `set_creator_orcids()`: no longer adds https://orcid.org/NA for creators without an orcid. + * Added checks in `set_creator_orcids()` such that users must specify NA (not "NA") and to check that the length of the orcid list supplied matches the length of the authors in metadata (excluding organizational authors). + * Updated `set_creator_orcids()` documentation to specify that the function can also be used to remove orcids from authors. + * Updated the EML creation script to reference `set_cui_code()` as opposed to the (now deprecated) `set_cui()`. + +## 2024-04-01 + * Fix bug in `set_cui_code()` that was detecting both CUI code and CUI marking. + * Fix bug in `set_cui_marking()`. + * Fix bug in `set_creator_order()`. + +## 2024-03-12 + * make `write_readme()` a non-exported function. + +## 2024-02-29 + * Add function `get_cui_code()`. Deprecate function `get_cui()`. + * Add function `get_cui_marking()`. + +## 2024-02-22 + * Added function `set_missing_data()` which allows users to add missing data codes and missing data code definitions to metadata. + * Added utility functions `.get_user_input()` and `.get_user_input3()`. Refactored all set_ class functions to use these sub-functions rather than readlines() to get user input. + +## 2024-02-13 + * Deprecated `set_cui()` in favor of `set_cui_dissem()`, which does the exact same thing as `set_cui()` but the function name has been updated to distinguish the action of the function from the newly added `set_cui_code()` function. + * Updated the publisher contact email in `set_npspublisher()` from irma@nps.gov to nrss_datastore@nps.gov to reflect DataStore changes in the contact email address. + # EMLeditor v0.1.4 "Mackinac Island" ## 2024-01-18 @@ -135,7 +163,7 @@ Added a new function, `set_datastore_doi()` that will initiate a draft reference ## February 09, 2023 -# Summary +### Summary Bug fixes, update `set_cui()` codes, flesh out `set_int_rights`. Update documentation. diff --git a/R/UntitledR.R b/R/UntitledR.R index 00bca7c..f7c3ecf 100644 --- a/R/UntitledR.R +++ b/R/UntitledR.R @@ -34,3 +34,6 @@ # agol_token) # data_w_covariates <- rbind(data_w_covariates, temporary) } return(data_w_covariates) } #} + + + diff --git a/R/check_eml.R b/R/check_eml.R index 20ecded..e811b99 100644 --- a/R/check_eml.R +++ b/R/check_eml.R @@ -29,7 +29,6 @@ check_eml <- function(path = here::here()) { #' @param outfile is the name of the file you want to write, typically *.txt. #' #' @return a character string in readable format (saved to the given outfile) -#' @export #' #' @examples #' \dontrun{ diff --git a/R/datastore_interactions.R b/R/datastore_interactions.R index 0a1fd95..4253120 100644 --- a/R/datastore_interactions.R +++ b/R/datastore_interactions.R @@ -1,8 +1,8 @@ #' Initiates a draft reference and inserts the reserved DOI into metadata #' -#' @description `set_datastore_doi()` differs from `set_doi()` in that this function generates a draft reference on DataStore and uses that draft reference to auto-populate the DOI within metadata whereas the later requires manually initiating a draft reference in DataStore and providing the reference ID to insert the DOI into metadata. +#' @description `set_datastore_doi()` differs from `set_doi()` in that this function generates a draft reference on DataStore and uses that draft reference to auto-populate the DOI within metadata whereas the latter requires manually initiating a draft reference in DataStore and providing the reference ID to insert the DOI into metadata. #' -#' @details To prevent generating too many (unused) draft references, `set_datastore_doi()` checks your metadata contents prior to initiating a draft reference on DataStore. If you already have a DOI specified, it will ask if you really want to over-write the DOI **and** initiate a new draft reference. Setting force = TRUE will over-ride this aspect of the function, so use with care. the `set_datastore_doi()` function requires that your metadata already contain a data package title and if it is missing will prompt you to insert it and quit. Setting force = TRUE will not override this check. If R cannot successfully initiate a draft reference on DataStore, the function will remind you to log on to the VPN. If the problem persists, email [irma@nps.gov](mailto:irma@nps.gov). +#' @details To prevent generating too many (unused) draft references, `set_datastore_doi()` checks your metadata contents prior to initiating a draft reference on DataStore. If you already have a DOI specified, it will ask if you really want to over-write the DOI **and** initiate a new draft reference. Setting force = TRUE will over-ride this aspect of the function, so use with care. the `set_datastore_doi()` function requires that your metadata already contain a data package title and if it is missing will prompt you to insert it and quit. Setting force = TRUE will not override this check. If R cannot successfully initiate a draft reference on DataStore, the function will remind you to log on to the VPN. If the problem persists, email [NRSS_DataStore@nps.gov ](mailto:NRSS_DataStore@nps.gov ). #' #' @details This function generates a draft reference on DataStore. If you run with force = FALSE (default), the function will report the draft reference URL and the draft title for the draft reference. Make sure you upload your data and metadata to the correct draft reference! Your draft reference title should read: "DRAFT: ". This will be updated to your data package title when you upload your metadata. #' @@ -28,16 +28,16 @@ #' } set_datastore_doi <- function(eml_object, force = FALSE, NPS = TRUE, dev = FALSE){ # check for existing DOI: - doc <- utils::capture.output(get_doi(eml_object)) + doc <- utils::capture.output(EMLeditor::get_doi(eml_object)) #get data package title from metadata: data_package_title <- EMLeditor::get_title(eml_object) if(force == FALSE){ # if there is NOT an existing DOI in metadata: if(length(seq_along(doc)) > 1 ){ - cat("Your metadata does not have a previously specified DOI.\n", sep = "") + cat("Your metadata does not have a previously specified DOI.\n", + sep = "") cat("Are you sure you want to create a new draft reference on DataStore and insert the corresponding DOI into your metadata?\n") - message("1: Yes") - var1 <- readline(prompt = "2: No \n") + var1 <- .get_user_input() #1 = yes, 2 = no if (var1 == 2){ cat("Function terminated. You have not created a new draft reference on DataStore and a DOI has not been added to your metadata.") return() @@ -81,8 +81,7 @@ set_datastore_doi <- function(eml_object, force = FALSE, NPS = TRUE, dev = FALSE } #Ask if they really want a new DOI & new draft reference? cat("Are you sure you want to create a new draft reference on DataStore and insert the corresponding DOI into your metadata?\n") - message("1: Yes") - var1 <- readline(prompt = "2: No\n") + var1 <- .get_user_input() # 1 = yes, 2 = no # if chooses not to add a new doi/generate a new draft reference: if (var1 == 2){ cat("Function terminated. You have not created a new draft reference on DataStore and your original DOI has been retained.") @@ -255,7 +254,7 @@ upload_data_package <- function(directory = here::here(), return() } cat("Are you sure you want to upload your data package files to this reference?\n") - var1 <- readline(prompt = "1: Yes\n2: No\n") + var1 <- .get_user_input() # 1 = yes, 2 = no if (var1 == 2) { cat("Function terminated. You have not uploaded any files to DataStore.") } @@ -524,7 +523,7 @@ remove_datastore_files <- function(data_store_reference, Reference?" x <- strwrap(x, width = 10000, simplify = TRUE) cat(x) - var1 <- readline(prompt = "1: Yes\n2: No\n") + var1 <- .get_user_input() # 1 = yes, 2 = no if (var1 == 2) { cat("You have not removed any files from the Reference.") return(invisible()) diff --git a/R/editEMLfunctions.R b/R/editEMLfunctions.R index 8c0b3d1..d7790b5 100644 --- a/R/editEMLfunctions.R +++ b/R/editEMLfunctions.R @@ -1,11 +1,14 @@ #' Edit data package title #' -#' @details The set_title function checks to see if there is an existing title and then asks the user if they would like to change the title. Some work is still needed on this function as get_eml() automatically returns all instances of a given tag. Specifying which title will be important for this function to work well. +#' @details The `set_title()` function checks to see if there is an existing title and then asks the user if they would like to change the title. Some work is still needed on this function as `get_eml()` automatically returns all instances of a given tag. Specifying which title will be important for this function to work well. #' -#' @param eml_object is an R object imported (typically from an EML-formatted .xml file) using EML::read_eml(, from="xml"). +#' @param eml_object is an EML-formatted R object, either generated in R or imported (typically from an EML-formatted .xml file) using EML::read_eml(, from="xml"). #' @param data_package_title is a character string that will become the new title for the data package. It can be specified directly in the function call or it can be a previously defined object that holds a character string. #' @param force logical. Defaults to false. If set to FALSE, a more interactive version of the function requesting user input and feedback. Setting force = TRUE facilitates scripting. -#' @param NPS Logical. Defaults to TRUE. **Most users should leave this as the default**. Only under specific circumstances should it be set to FALSE: if you are **not** publishing with NPS, if you need to set the publisher location to some place other than the Fort Collins Office (e.g. you are NOT working on a data package) or your product is "for" the NPS by not "by" the NPS and you need to specify a different agency, set NPS = FALSE. When NPS=TRUE, the function will over-write existing publisher info and inject NPS as the publisher along the the Central Office in Fort Collins as the location. Additionally, it sets the "for or by NPS" field to TRUE and specifies the originating agency as NPS. +#' @param NPS Logical. Defaults to TRUE. **Most NPS users should leave this as the default**. Only under specific circumstances should it be set to FALSE: if you are **not** publishing with NPS, if you need to set the publisher location to some place other than the Fort Collins Office (e.g. you are NOT working on a data package) or your product is "for" the NPS but not "by" the NPS and you need to specify a different agency, set NPS = FALSE. When NPS=TRUE, the function will over-write existing publisher info and inject NPS as the publisher along the the Central Office in Fort Collins as the location. Additionally, it sets the "for or by NPS" field to TRUE and specifies the originating agency as NPS. +#' +#' @importFrom mockr local_mock +#' @importFrom rlang local_options #' #' @return an EML-formatted R object #' @export @@ -15,7 +18,10 @@ #' data_package_title <- "New Title. Must match DataStore Reference title." #' eml_object <- set_title(eml_object, data_package_title) #' } -set_title <- function(eml_object, data_package_title, force = FALSE, NPS = TRUE) { +set_title <- function(eml_object, + data_package_title, + force = FALSE, + NPS = TRUE) { # scripting route: if (force == TRUE) { eml_object$dataset$title <- data_package_title @@ -31,8 +37,11 @@ set_title <- function(eml_object, data_package_title, force = FALSE, NPS = TRUE) sep = "" ) } else { - cat("Your EML already has an title, ", crayon::blue$bold(doc), ".", sep = "") - var1 <- readline(prompt = "Are you sure you want to replace it? \n\n 1: Yes\n 2: No\n") + cat("Your EML already has an title, ", + crayon::blue$bold(doc), + ".\n", sep = "") + cat("Are you sure you want to replace it?\n") + var1 <- .get_user_input() # if User opts to retain DOI, retain it if (var1 == 1) { # print the existing DOI to the screen: @@ -77,7 +86,8 @@ set_title <- function(eml_object, data_package_title, force = FALSE, NPS = TRUE) set_doi <- function(eml_object, ds_ref, force = FALSE, NPS = TRUE) { # scripting route: if (force == TRUE) { - eml_object$dataset$alternateIdentifier <- paste0("doi: https://doi.org/10.57830/", ds_ref) + eml_object$dataset$alternateIdentifier <- paste0( + "doi: https://doi.org/10.57830/", ds_ref) # update data URLs to correspond to new DOI: data_table <- EML::eml_get(eml_object, "dataTable") data_table <- within(data_table, rm("@context")) @@ -97,67 +107,48 @@ set_doi <- function(eml_object, ds_ref, force = FALSE, NPS = TRUE) { # interactive route: if (force == FALSE) { # Look for an existing data package DOI: - doc <- arcticdatautils::eml_get_simple( - eml_object, - "alternateIdentifier" - ) + doi <- eml_object$dataset$alternateIdentifier # If there is no existing DOI, add a DOI to the metadata - if (is.null(doc)) { + if (is.null(doi)) { eml_object$dataset$alternateIdentifier <- paste0( "doi: https://doi.org/10.57830/", ds_ref ) - doc <- arcticdatautils::eml_get_simple( - eml_object, - "alternateIdentifier" - ) - doc <- sub(".*? ", "", doc) + #get new doi: + doi <- eml_object$dataset$alternateIdentifier + doi <- sub(".*? ", "", doi) # print the new DOI to the screen: cat("No DOI detected.") cat("Your newly specified DOI is: ", - crayon::blue$bold(doc), + crayon::blue$bold(doi), sep = "" ) } # If there is a DOI, find the correct doi by searching for the text "doi: ". else { - my_list <- NULL - - # hopefully deals with case when there are multiple DOIs specified under alternateIdentifier tags. Haven't run into this yet and so this remains untested. - if (length(doc) > 1) { - for (i in seq_along(doc)) { - if (stringr::str_detect(doc[i], "doi:")) { - my_list <- append(my_list, doc[i]) - } - } - } - # if there is only one alternateIdentifier: - else { - my_list <- doc - } - doi <- my_list[[1]] - # If a DOI exists, ask the user what to do about it: - cat("Your EML already has a DOI specified in the tag:\n") - cat(crayon::blue$bold(doc), + cat("Your EML already has a DOI:\n") + cat(crayon::blue$bold(doi), "\n\n", sep = "" ) - var1 <- readline(prompt = cat("Enter 1 to retain this DOI\nEnter 2 to overwrite this DOI")) + cat("Are you sure you want to replace your DOI?\n") + var1 <- .get_user_input() # if User opts to retain DOI, retain it - if (var1 == 1) { + if (var1 == 2) { # print the existing DOI to the screen: doi <- sub(".*? ", "", doi) cat("Your DOI remains: ", crayon::blue$bold(doi), sep = "") } # if User opts to change DOI, change it: - if (var1 == 2) { - eml_object$dataset$alternateIdentifier <- paste0("doi: https://doi.org/10.57830/", ds_ref) + if (var1 == 1) { + eml_object$dataset$alternateIdentifier <- paste0( + "doi: https://doi.org/10.57830/", ds_ref) # get the new DOI: - doc <- arcticdatautils::eml_get_simple(eml_object, "alternateIdentifier") - doc <- sub(".*? ", "", doc) + doi <- eml_object$dataset$alternateIdentifier + doi <- sub(".*? ", "", doi) # update data URLs to correspond to new DOI: data_table <- EML::eml_get(eml_object, "dataTable") @@ -176,7 +167,7 @@ set_doi <- function(eml_object, ds_ref, force = FALSE, NPS = TRUE) { } } # print the new DOI to the screen: - cat("Your newly specified DOI is: ", crayon::blue$bold(doc), + cat("Your newly specified DOI is: ", crayon::blue$bold(doi), ".\n", sep = "") cat("Your data files url also been updated to: ", crayon::blue$bold(data_url), ".\n", sep = "") @@ -196,7 +187,7 @@ set_doi <- function(eml_object, ds_ref, force = FALSE, NPS = TRUE) { #' Add Park Unit Connections to metadata #' -#' @description `set_content_units()` adds all specified park units and their N, E, S, W bounding boxes to . This information will be used to fill in the Content Unit Links field in DataStore. Invalid park unit codes will return an error and the function will terminate. If you don't know a park unit code, see [get_park_code()](https://nationalparkservice.github.io/NPSutils/reference/get_park_code.html) from the [NPSutils](https://nationalparkservice.github.io/NPSutils/index.html) package]. +#' @description `set_content_units()` adds all specified park units and their N, E, S, W bounding boxes to . This information will be used to fill in the Content Unit Links field in DataStore. Invalid park unit codes will return an error and the function will terminate. If you don't know a park unit code, see [`get_park_code()`](https://nationalparkservice.github.io/NPSutils/reference/get_park_code.html) from the [NPSutils](https://nationalparkservice.github.io/NPSutils/index.html) package]. #' #' @details Adds the Content Unit Link(s) to a geographicCoverage. Content Unit Links(s) are the (typically) four-letter codes describing the park unit(s) where data were collected (e.g. ROMO, not ROMN). Each park unit is given a separate geographicCoverage element. For each content unit link, the unit name will be listed under geographicDescription and prefaced with "NPS Content Unit Link:". The geographicCoverage element will be given the attribute "system = content unit link". Required child elements (bounding coordinates) are auto populated to produce a rectangle that encompasses the park unit in question. If the default force=FALSE option is retained, the user will be shown existing content unit links (if any exist) and asked to 1) retain them 2) add to them or 3) replace them. If the force is set to TRUE, the interactive components will be skipped and the existing content unit links will be replaced. #' @@ -217,13 +208,13 @@ set_content_units <- function(eml_object, park_units, # test whether park units are actually park units: null_units <- NULL for(i in seq_along(park_units)){ - is_unit <- .get_unit_polygon(park_units[i]) + is_unit <- .get_unit_polygon(park_units[i]) # null_units <- append(null_units, is_unit) } - # if any park unit is not valid, and exit function. - if(!identical(seq_along(null_units), seq_along(park_units))){ + if (is.null(null_units)) { return() } + # add text to indicate that these are park unit connections. units <- paste0("NPS Content Unit Link: ", park_units) #generate new geographic coverage for NPS Content Unit Links: @@ -248,44 +239,46 @@ set_content_units <- function(eml_object, park_units, } # get geographic coverage from eml_object doc <- eml_object$dataset$coverage$geographicCoverage + # Are there content unit links already specified? exist_units <- NULL for (i in seq_along(doc)) { - myMap <- purrr::map(doc, 1)[[i]] - if (suppressWarnings(stringr::str_detect(myMap, - "NPS Content Unit Link")) == TRUE) { - exist_units <- append(exist_units, myMap) + doc2 <- unlist(doc) + if (suppressWarnings( + stringr::str_detect(doc2[i], + "NPS Content Unit Link")) == TRUE) { + exist_units <- append(exist_units, doc2[[i]]) } } - # interactive route: - if (force == FALSE) { - # if there is no content unit links add it directly to eml_object - if (is.null(exist_units)) { - if (is.null(doc)) { + + # if there is no content unit links add it directly to eml_object + if (is.null(exist_units)) { + if (is.null(doc)) { + eml_object$dataset$coverage$geographicCoverage <- unit_list + } else { + #if there are multiple existing geographic coverages: + if (length(seq_along(doc[[1]])) > 1) { + # combine new and old geo coverages (new always at the top!) + doc <- append(unit_list, doc) + # write over the existing geographic coverage + eml_object$dataset$coverage$geographicCoverage <- doc + } + # if there is only one geo coverage: + if (length(seq_along(doc[[1]])) == 1) { + geocov2 <- EML::eml$geographicCoverage( + geographicDescription = + doc$geographicDescription, + boundingCoordinates = + doc$boundingCoordinates + ) + # add park unit connections and existing geo coverage (park units always on top!) + unit_list<-append(unit_list, list(geocov2)) + #insert into EML: eml_object$dataset$coverage$geographicCoverage <- unit_list - } else { - #if there are multiple existing geographic coverages: - if (length(seq_along(doc[[1]])) > 1) { - # combine new and old geo coverages (new always at the top!) - doc <- append(unit_list, doc) - # write over the existing geographic coverage - eml_object$dataset$coverage$geographicCoverage <- doc - } - # if there is only one geo coverage: - if (length(seq_along(doc[[1]])) == 1) { - geocov2 <- EML::eml$geographicCoverage( - geographicDescription = - doc$geographicDescription, - boundingCoordinates = - doc$boundingCoordinates - ) - # add park unit connections and existing geo coverage (park units always on top!) - unit_list<-append(unit_list, list(geocov2)) - #insert into EML: - eml_object$dataset$coverage$geographicCoverage <- unit_list - #eml_object$dataset$coverage$geographicCoverage <- list(unit_list, (geocov2)) - } + #eml_object$dataset$coverage$geographicCoverage <- list(unit_list, (geocov2)) } + } + if (force == FALSE) { cat("No previous Content Unit Links Detected\n") cat("Your Content Unit Links have been set to:\n") for(i in seq_along(park_units)){ @@ -293,13 +286,17 @@ set_content_units <- function(eml_object, park_units, sep = "" } } - # if there already content unit links: - if (!is.null(exist_units)) { + } + # if there already content unit links: + if (!is.null(exist_units)) { + if (force == FALSE) { cat("Your metadata already has the following Content Unit Links Specified:\n") for (i in seq_along(exist_units)) { cat(crayon::blue$bold(exist_units[i]), "\n") } - var1 <- readline(prompt = "Do you want to\n\n 1: Retain the existing Unit Connections\n 2: Add to the exsiting Unit Connections\n 3: Replace the existing Unit Connections") + cat("Do you want to:\n\n 1: Retain the existing Unit Connections\n 2: Add to the exsiting Unit Connections\n 3: Replace the existing Unit Connections") + var1 <- .get_user_input3() + # Do nothing: if (var1 == 1) { cat("Your existing Unit Connections were retained.") @@ -352,13 +349,12 @@ set_content_units <- function(eml_object, park_units, } #get all geographic coverage that is NOT content unit links: no_units <- NULL + for (i in seq_along(doc)) { - myMap2 <- purrr::map(doc, 1)[[i]] - if (suppressWarnings(stringr::str_detect( - myMap2, - "NPS Content Unit Link" - )) == FALSE) { - no_units <- append(no_units, doc[i]) + if (suppressWarnings( + stringr::str_detect(doc[[i]][1], + "NPS Content Unit Link")) == FALSE) { + no_units <- append(no_units, list(doc[[i]])) } } # if the only geo unit was a previous connection, replace it: @@ -368,14 +364,11 @@ set_content_units <- function(eml_object, park_units, #if there are geographic units other than content units, add to those: if (!is.null(no_units)) { #if there is only one non-content unit geographic coverage element: - unit_list<-append(unit_list, no_units) + unit_list <- append(unit_list, no_units) #insert into EML: + eml_object$dataset$coverage$geographicCoverage <- unit_list - } - if (length(no_units) > 1) { - my_list <- append(unit_list, no_units) - eml_object$dataset$coverage$geographicCoverage <- my_list - } + } # get new geo units: newgeo <- eml_object$dataset$coverage$geographicCoverage exist_units <- NULL @@ -396,51 +389,143 @@ set_content_units <- function(eml_object, park_units, cat(crayon::blue$bold(exist_units[i]), "\n") } } - } } + } # scripting route if (force == TRUE) { - # if the only geo unit was not a previous connection, add connections: - if (is.null(exist_units)) { - #if no geographic coverage at all: - if (is.null(doc)) { - eml_object$dataset$coverage$geographicCoverage <- unit_list - } - if (!is.null(doc)){ - unit_list <- append(unit_list, doc) - eml_object$dataset$coverage$geographicCoverage <- unit_list - } - } - else { - # make sure everything is nested to the same hierarchical level: + #if there is only one item in geoCov, it is not nested as deeply as when + #there are multiple. Re-nest single items so that all geoCov are at the + #same level of nesting: if(!is.null(names(doc))){ - doc <- list(doc) + doc <- list(doc) } - #get all geographic coverage that is NOT content unit links: + #get all geographic coverage that is NOT content unit links: no_units <- NULL for (i in seq_along(doc)) { - myMap2 <- purrr::map(doc, 1)[[i]] - if (suppressWarnings(stringr::str_detect( - myMap2, - "NPS Content Unit Link" - )) == FALSE) { - no_units <- append(no_units, doc[i]) + if (suppressWarnings( + stringr::str_detect(doc[[i]][1], + "NPS Content Unit Link")) == FALSE) { + no_units <- append(no_units, list(doc[[i]])) } } - # if the only geo unit was a previous connection, replace it: + # if the only geo unit was a previous connection, replace it: if (is.null(no_units)) { eml_object$dataset$coverage$geographicCoverage <- unit_list - } - #if there are geographic units other than content units, add to those: + } + #if there are geographic units other than content units, add to those: if (!is.null(no_units)) { #if there is only one non-content unit geographic coverage element: - unit_list<-append(unit_list, no_units) + unit_list <- append(unit_list, no_units) #insert into EML: eml_object$dataset$coverage$geographicCoverage <- unit_list } - if (length(no_units) > 1) { - my_list <- append(unit_list, no_units) - eml_object$dataset$coverage$geographicCoverage <- my_list + } + # Set NPS publisher, if it doesn't already exist + if (NPS == TRUE) { + eml_object <- .set_npspublisher(eml_object) + } + # add/update EMLeditor and version to metadata: + eml_object <- .set_version(eml_object) + return(eml_object) +} + +#' Adds CUI dissemination codes to metadata +#' +#' @description +#' `set_cui_code()` adds Controlled Unclassified Information (CUI) dissemination codes to EML metadata. These codes determine who can or cannot have access to the data. Unless you have a specific mandate to restrict data, all data should be available to the public. if the CUI dissemination code is PUBLIC, the CUI marking should also be PUBLIC (`see set_cui_marking()`) and the license should be set to public domain (or CC0; see `set_int_rights()`). If your data contains CUI and you need to set the CUI dissemination code to anything other than PUBLIC, please be prepared to provide a legal justification in the form of the appropriate CUI marking (see `set_cui_marking()`). +#' +#' @details `set_cui_code()` adds a CUI dissemination code to the tag CUI under additionalMetadata/metadata. The available choices for CUI dissemination codes at NPS are (pay attention to the spaces!): +#' +#' PUBLIC: The data contain no CUI, dissemination is not restricted. +#' FED ONLY: Contains CUI. Only federal employees should have access (similar to the "internal only" setting in DataStore) +#' FEDCON: Contains CUI Only federal employees and federal contractors should have access to the data (again, very similar to the DataStore "internal only" setting) +#' DL ONLY: Contains CUI. Should only be available to a named list of individuals. (where and how to supply that list TBD) +#' NOCON - Contains CUI. Federal, state, local, or tribal employees may have access, but contractors cannot. +#' +#' For a more detailed explanation of the CUI dissemination codes, please see the national archives [CUI Registry: Limited Dissemination Controls](https://www.archives.gov/cui/registry/limited-dissemination) web page. +#' +#' @inheritParams set_title +#' @param cui_code a string consisting of one of 7 potential CUI codes: PUBLIC, FED ONLY, FEDCON, DL ONLY, or NOCON + +#' @returns an EML-formatted R object +#' @export +#' @examples +#' \dontrun{ +#' set_cui_dissem(eml_object, "PUBLIC") +#' } +set_cui_code <- function(eml_object, + cui_code = c("PUBLIC", + "NOCON", + "DL ONLY", + "FEDCON", + "FED ONLY"), + force = FALSE, + NPS = TRUE) { + + cui_code <- toupper(cui_code) + # verify CUI code entry; stop if does not equal one of six valid codes listed above: + cui_code <- match.arg(cui_code) + + # Generate new CUI element for additionalMetadata + my_cui <- list(metadata = list(CUI = cui_code), id = "CUI") + + # get existing additionalMetadata elements: + doc <- eml_object$additionalMetadata + + #if no additional metadata at all.... + if(is.null(doc)){ + eml_object$additionalMetadata <- list(my_cui) + } + if(!is.null(doc)){ + + #helps track lists of different lengths/hierarchies + x <- length(doc) + + # Is CUI code already specified? + exist_cui <- NULL + for (i in seq_along(doc)) { + y <- suppressWarnings(stringr::str_replace_all(doc[i], " ", "")) + if (suppressWarnings(stringr::str_detect(y, "CUI\\b")) == TRUE) { + seq <- i + exist_cui <- doc[[i]]$metadata$CUI + } + } + + # scripting route: + if (force == TRUE) { + #what is [[seq]]? It works but... + eml_object$additionalMetadata[[seq]] <- my_cui + } + + # interactive route: + if (force == FALSE) { + # If no existing CUI, add it in: + if (is.null(exist_cui)) { + if (x == 1) { + eml_object$additionalMetadata <- list(my_cui, + eml_object$additionalMetadata) + } + if (x > 1) { + eml_object$additionalMetadata[[x + 1]] <- my_cui + } + cat("No previous CUI was detected. Your CUI has been set to ", + crayon::bold$blue(cui_code), ".", sep = "") + } + # If existing CUI, stop. + if (!is.null(exist_cui)) { + cat("CUI has previously been specified as ", + crayon::bold$blue(exist_cui), + ".\n", sep = "") + cat("Are you sure you want to reset it?") + var1 <- .get_user_input() #1 = yes, 2 = no + if (var1 == 1) { + eml_object$additionalMetadata[[seq]] <- my_cui + cat("Your CUI code has been rest to ", + crayon::blue$bold(cui_code), ".", sep = "") + } + if (var1 == 2) { + cat("Your original CUI code was retained") + } } } } @@ -448,14 +533,19 @@ set_content_units <- function(eml_object, park_units, if (NPS == TRUE) { eml_object <- .set_npspublisher(eml_object) } - # add/update EMLeditor and version to metadata: + + # add/updated EMLeditor and version to metadata: eml_object <- .set_version(eml_object) + return(eml_object) } + #' Adds CUI to metadata #' -#' @description set_cui adds CUI codes to EML metadata +#' @description +#' #' `r lifecycle::badge("deprecated")` +#' set_cui adds CUI dissemination codes to EML metadata #' #' @details set_cui adds a CUI code to the tag CUI under additionalMetadata/metadata. #' @@ -476,6 +566,9 @@ set_content_units <- function(eml_object, park_units, set_cui <- function(eml_object, cui_code = c("PUBLIC", "NOCON", "DL ONLY", "FEDCON", "FED ONLY"), force = FALSE, NPS = TRUE) { + #add in deprecation + lifecycle::deprecate_soft(when = "0.1.5", "set_cui()", "set_cui_dissem()") + cui_code <- toupper(cui_code) # verify CUI code entry; stop if does not equal one of six valid codes listed above: cui_code <- match.arg(cui_code) @@ -521,15 +614,15 @@ set_cui <- function(eml_object, cui_code = c("PUBLIC", "NOCON", "DL ONLY", if (x > 1) { eml_object$additionalMetadata[[x + 1]] <- my_cui } - cat("No previous CUI was detected. Your CUI has been set to ", + cat("No previous CUI code was detected. Your CUI code has been set to ", crayon::bold$blue(cui_code), ".", sep = "") } # If existing CUI, stop. if (!is.null(exist_cui)) { - cat("CUI has previously been specified as ", + cat("CUI code has previously been specified as ", crayon::bold$blue(exist_cui), ".\n", sep = "") - var1 <- readline(prompt = "Are you sure you want to reset it? \n\n 1: Yes\n 2: No\n") + var1 <- .get_user_input() #1 = yes, 2 = no if (var1 == 1) { eml_object$additionalMetadata[[seq]] <- my_cui cat("Your CUI code has been rest to ", @@ -552,6 +645,177 @@ set_cui <- function(eml_object, cui_code = c("PUBLIC", "NOCON", "DL ONLY", return(eml_object) } +#' The function sets the CUI marking for the data package +#' +#' @description `r lifecycle::badge("experimental")` +#' The Controlled Unclassified Information (CUI) marking is different from the CUI dissemination code. The CUI dissemination code (set `set_cui_code()`) sets who can have access to the data package. The CUI marking set by `set_cui_marking()` specifies the reason (if any) that the data are being restricted. +#' If the CUI dissemination code is set to PUBLIC, the CUI marking must also be PUBLIC. +#' If the CUI dissemination code is set to anything other than PUBLIC, the CUI marking must be set to SP-NPSR, SP-HISTP or SP-ARCHR. +#' +#' @details CUI markings are the legal justification for why data are being restricted from the public. If data contain no CUI, the CUI marking must be set to PUBLIC (and the CUI dissemination code must be set to PUBLIC and the license must be set to CC0 or Public Domain). If the data contain CUI (i.e. the CUI dissemination code is not PUBLIC), you must use the CUI marking to provide a legal justification for why the data are restricted. Only one CUI marking can be applied. At NPS, the following markings are available: +#' +#' PUBLIC: The data contain no CUI, dissemination is not restricted. +#' SP-NPSR: "National Park System Resources" - This material contains information concerning the nature and specific location of a National Park System resource that is endangered, threatened, rare, or commercially valuable, of mineral or paleontological objects within System units, or of objects of cultural patrimony within System units. +#' SP-HISTP: "Historic Properties" - This material contains information related to the location, character, or ownership of historic property. +#' SP-ARCHR: "Archaeological Resources" - This material contains information related to information about the nature and location of any archaeological resource for which the excavation or removal requires a permit or other permission. +#' +#' For more information on CUI markings, please visit the [CUI Markings](https://www.archives.gov/cui/registry/category-marking-list) list maintained by the National Archives. +#' +#' @inheritParams set_title +#' @param cui_marking String. One of four options, "PUBLIC", "SP-NPSR", "SP-HISTP" or "SP-ARCHR" are available. +#' +#' @return an EML-formatted R object +#' @export +#' +#' @examples +#' \dontrun{ +#' eml_object <- set_cui_marking(eml_object, "PUBLIC") +#' } +set_cui_marking <- function (eml_object, + cui_marking = c("PUBLIC", + "SP-NPSR", + "SP-HISTP", + "SP-ARCHR"), + force = FALSE, + NPS = TRUE) { + + cui_marking <- toupper(cui_marking) + # verify CUI code entry; stop if does not equal one of six valid codes listed above: + cui_marking <- match.arg(cui_marking) + + # Generate new CUI element for additionalMetadata + my_cui <- list(metadata = list(CUImarking = cui_marking), id = "CUImarking") + + # get existing additionalMetadata elements: + add_meta <- eml_object$additionalMetadata + + #get the location of CUI dissemination codes in additionalMetadata: + x <- NULL + for (i in 1:length(seq_along(add_meta))) { + if (names(add_meta[[i]][["metadata"]]) == "CUI") { + x <- i + break + } + } + + #if no CUI dissemination code exit the function; warn if force == FALSE + if (is.null(x)) { + if (force == FALSE) { + cat("Your metadata does not contain a CUI dissemination code.") + cat("Use ", + crayon::bold$green("set_cui_code()"), + " to add a dissemination code to the metadata.", + sep = "") + } + return(invisible(eml_object)) + } + + #get location of CUI marking codes in additionalMetadata: + y <- NULL + for (i in 1:length(seq_along(add_meta))) { + if(names(add_meta[[i]][["metadata"]]) == "CUImarking") { + y <- i + break + } + } + + #if CUI marking already exists: + if (!is.null(y)) { + #get existing CUI marking: + existing_cui_marking <- add_meta[[y]][["metadata"]][["CUImarking"]] + + #don't replace an existing CUI marking with the same marking + if (existing_cui_marking == cui_marking) { + if (force == FALSE) { + cat("Your metadata already have an existing CUI marking of ", + crayon::bold$blue(existing_cui_marking), + ".\n", + sep = "") + cat("Your metadata CUI marking was not updated.\n") + } + return(invisible(eml_object)) + } + + #if CUI markings already exist, ask if they should be replaced/changed? + if (force == FALSE) { + cat("Your metadata already contains the CUI marking: ", + crayon::blue$bold(existing_cui_marking), + ".\n", + sep = "") + cat("Are you sure you want to change it?\n") + var1 <- .get_user_input() + if (var1 == 2) { + cat("Your original CUI marking has been retained") + return(invisible(eml_object)) + } + } + } + #extract CUI dissemination code + cui <- add_meta[[x]][["metadata"]][["CUI"]] + + #test that cui code and cui marking are both public: + if (cui == "PUBLIC" & cui_marking != "PUBLIC") { + if (force == FALSE){ + msg <- paste0("to choose a CUI marking that coincides", + " with your CUI dissemination code or use ") + cat("Your CUI dissemination code is set to ", cui, ".\n", sep ="") + cat("The CUI dissemination code and CUI marking must coincide.\n") + cat("Use ", + crayon::green$bold("set_cui_marking() "), + msg, + crayon::green$bold("set_cui_code()"), + " to change your CUI dissemination code.\n", sep = "") + } + return(invisible(eml_object)) + } + + #test that if cui_code is not public, cui_marking is not public. + if (cui != "PUBLIC" & cui_marking == "PUBLIC") { + if (force == FALSE){ + msg <- paste0("to choose a CUI marking that coincides", + " with your CUI dissemination code or use ") + cat("Your CUI dissemination code is set to ", cui, ".\n", sep = "") + cat("The CUI dissemination code and CUI marking must coincide.\n") + cat("Use ", + crayon::green$bold("set_cui_marking() "), + msg, + crayon::green$bold("set_cui_code()"), + " to change your CUI dissemination code\n.", sep = "") + } + return(invisible(eml_object)) + } + + # at this point cui_code and cui_marking coincide + # add cui_marking and put it back in additional metadata + + # Generate new CUI element for additionalMetadata + my_cui <- list(metadata = list(CUImarking = cui_marking), id = "CUI marking") + + # if there was no CUImarking, add one: + if (is.null(y)) { + x <- length(eml_object$additionalMetadata) + eml_object$additionalMetadata[[x + 1]] <- my_cui + } else { + #otherwise, overwrite the existing CUI marking: + eml_object[["additionalMetadata"]][[y]] <- my_cui + } + + if (force == FALSE) { + cat("Your CUI marking has been set to ", crayon::blue$bold(cui_marking)) + } + + # Set NPS publisher, if it doesn't already exist + if (NPS == TRUE) { + eml_object <- .set_npspublisher(eml_object) + } + + # add/updated EMLeditor and version to metadata: + eml_object <- .set_version(eml_object) + + return(eml_object) + +} + #' adds DRR connection #' #' @description set_drr adds the DOI of an associated DRR @@ -600,7 +864,8 @@ set_drr <- function(eml_object, drr_ref_id, drr_title, org_name = "NPS", force = ".\n", sep = "" ) - var1 <- readline(prompt = "Are you sure you want to change it? \n\n 1: Yes\n 2: No\n") + cat("Are you sure you want to change it?\n") + var1 <- .get_user_input() #1 = Yes; 2 = No if (var1 == 1) { eml_object$dataset$usageCitation <- cite cat("Your new DRR is: ", crayon::blue$bold(doc$title), ".\n", sep = "") @@ -660,7 +925,8 @@ set_abstract <- function(eml_object, cat("View the current abstract using get_abstract.") } else { cat("Your EML already has an abstract.\n") - var1 <- readline(prompt = "Are you sure you want to replace it? \n\n 1: Yes\n 2: No\n") + cat("Are you sure you want to replace it?\n\n") + var1 <- .get_user_input() #1 = yes, 2 = no # if User opts to replace abstract: if (var1 == 1) { # print the existing DOI to the screen: @@ -700,7 +966,8 @@ set_abstract <- function(eml_object, #' #' @examples #' \dontrun{ -#' eml_object <- set_additional_info(eml_object, "Here is some text for the Notes section on DataStore.") +#' eml_object <- set_additional_info(eml_object, +#' "Some text for the Notes section on DataStore.") #' } set_additional_info <- function(eml_object, additional_info, @@ -723,7 +990,8 @@ set_additional_info <- function(eml_object, cat("View the current additionalInfo using get_additional_info.") } else { cat("Your EML already has additionalInfo.\n") - var1 <- readline(prompt = "Are you sure you want to replace it? \n\n 1: Yes\n 2: No\n") + cat("Are you sure you want to replace it?\n\n") + var1 <- .get_user_input() #1 = yes, 2 = no # if User opts to replace abstract: if (var1 == 1) { # print the existing DOI to the screen: @@ -790,7 +1058,8 @@ set_methods <- function(eml_object, cat("View the current methods using get_methods.") } else { cat("Your EML already has a Methods section.\n") - var1 <- readline(prompt = "Are you sure you want to replace it? \n\n 1: Yes\n 2: No\n") + cat("Are you sure you want to replace it?\n\n") + var1 <- .get_user_input() #1 = yes, 2 = no # if User opts to replace abstract: if (var1 == 1) { # print the existing DOI to the screen: @@ -847,6 +1116,7 @@ set_lit <- function(eml_object, bibtex_file, force = FALSE, NPS = TRUE) { cat("You have already specified literature cited.\n") cat("To view yourcurrent literature, use get_lit\n") var1 <- readline(prompt = "Would you like to:\n\n 1: Make no changes\n 2: Replace your literature cited\n 3: add to your literature cited\n\n") + var1 <- .get_user_input3() # if (var1 == 1) { print("No changes were made to literature cited.") } @@ -927,7 +1197,8 @@ set_producing_units <- function(eml_object, if (!is.null(doc)) { cat("Your metadata already contains the following Producing Unit(s):\n") cat(crayon::blue$bold(get_producing_units(eml_object)), "\n") - var1 <- readline(prompt = "Are you sure you want to replace them? \n\n 1: Yes\n 2: No\n") + cat("Are you sure you want to replace them?\n\n") + var1 <- .get_user_input() #1 = yes, 2 = no # if User opts to replace metadataProvider, replace it: if (var1 == 1) { eml_object$dataset$metadataProvider <- plist @@ -1030,7 +1301,8 @@ set_language <- function(eml_object, lang, force = FALSE, NPS = TRUE) { } # does the user want to change the language? - var1 <- readline(prompt = "Are you sure you want to replace it? \n\n 1: Yes\n 2: No\n") + cat("Are you sure you want to replace it?\n\n") + var1 <- .get_user_input() #1 = yes, 2 = no # if yes, change the language and report the change: if (var1 == 1) { @@ -1132,10 +1404,8 @@ set_protocol <- function(eml_object, protocol_id, force = FALSE, NPS = TRUE) { crayon::bold$blue(doc$title), ".", sep = "" ) - - var1 <- readline(prompt = "Are you sure you want to replace it? \n\n - 1: Yes\n 2: No\n") - + cat("Are you sure you want to replace it?\n\n") + var1 <- .get_user_input() #1=yes, 2=no # if yes, change the project: if (var1 == 1) { eml_object$dataset$project <- proj @@ -1365,7 +1635,8 @@ set_publisher <- function(eml_object, "\n\n", sep = "" ) - var1 <- readline(prompt = "Would you like to replace your existing publisher? \n\n 1: Yes\n 2: No\n") + cat("Would you like to replace your existing publisher?\n\n") + var1 <- .get_user_input() #1: Yes; 2: No\n") if (var1 == 1) { eml_object$dataset$publisher <- pubset cat("Your new publisher is:\n\n") @@ -1451,7 +1722,8 @@ set_publisher <- function(eml_object, "\n\n", sep = "" ) - var2 <- readline(prompt = "Would you like to replace your agency? \n\n 1: Yes\n 2: No\n") + cat("Would you like to replace your agency?\n\n") + var2 <- .get_user_input() #1 = yes, 2 = no if (var2 == 1) { # Since there are existing additionalMetadata elements: if (length(add_meta) == 1) { @@ -1701,9 +1973,9 @@ set_data_urls <- function(eml_object, url = NULL, force = FALSE, NPS = TRUE){ #' Allows user to add ORCids to the creator #' -#' @description `set_creator_orcids()` allows users to add ORCiDs to creators or edit/update existing ORCiDs associated with creators. ORCiDs are persistent digital identifiers associated with individual people and remain constant despite name changes. They can help disambiguate creators with similar names and associated all the products of one creator in one space despite variations in how the name was used (e.g. Rob Baker and Robert Baker and. Robert L. Baker but NOT any of the 15 million or so other Robert Bakers). To register an ORCiD or manage your ORCiD profile, go to [https://orcid.org/](https://orcid.org/). +#' @description `set_creator_orcids()` allows users to add (or remove) ORCiDs to creators or edit/update existing ORCiDs associated with creators. ORCiDs are persistent digital identifiers associated with individual people and remain constant despite name changes. They can help disambiguate creators with similar names and associated all the products of one creator in one space despite variations in how the name was used (e.g. Rob Baker and Robert Baker and. Robert L. Baker but NOT any of the 15 million or so other Robert Bakers). To register an ORCiD or manage your ORCiD profile, go to [https://orcid.org/](https://orcid.org/). #' -#' @details ORCiDs should be supplied as a list in the order in which the creators are listed. If a creator does not have an ORCiD, put "NA" in the list in that space. Only consider individual people who are creators (and not organizations, they will automatically be skipped). ORCiDs should be supplied as a 16-digit string with hyphens after every 4 digits: xxxx-xxxx-xxxx-xxxx. Please do not include the URL prefix for your ORCiDs; this will automatically be inserted for you. +#' @details ORCiDs should be supplied as a list in the order in which the creators are listed. If a creator does not have an ORCiD, put NA (NO quotes around NA!) in the list in that space. Only consider individual people who are creators (and not organizations, they will automatically be skipped). ORCiDs should be supplied as a 16-digit string with hyphens after every 4 digits: xxxx-xxxx-xxxx-xxxx. Please do not include the URL prefix for your ORCiDs; this will automatically be inserted for you. #' #' @inheritParams set_title #' @param orcids String. One or more ORCiDs listed in the same order as the corresponding creators. Use "NA" if a creator does not have an ORCiD. Do not include the full URL. Format as: xxxx-xxxx-xxxx-xxxx (the https://orcid.org/ prefix will be added for you). @@ -1716,18 +1988,28 @@ set_data_urls <- function(eml_object, url = NULL, force = FALSE, NPS = TRUE){ #' #only one creator: #' mymetadata <- set_creator_orcids(mymetadata, 1234-1234-1234-1234) #' -#' #three creators, the second of which does not hae an ORCiD: -#' creator_orcids <- c("1234-1234-1234-1234", "NA", "4321-4321-4321-4321") +#' #three creators, the second of which does not have an ORCiD: +#' creator_orcids <- c("1234-1234-1234-1234", NA, "4321-4321-4321-4321") #' mymetadata <- set_creator_orcids(mymetadata, creator_orcids) #' } set_creator_orcids <- function(eml_object, orcids, force = FALSE, NPS = TRUE){ + #if NA supplied as "NA": + if (sum(stringr::str_detect(na.exclude(orcids), "NA")) > 0) { + if (force == FALSE) { + cat("It appears some authors do not have orcids.\n") + cat("Please specify these as NA (without quotes).") + } + return(invisible(eml_object)) + } #make sure they didn't include URLs: - if(sum(grep("https://orcid.org/", orcids)) > 0){ - cat("The ORCiD(s) you supplied appear to be incorrectly formatted.\n") - cat("Please supply ORCiDs in the following format: xxxx-xxxx-xxxx-xxxx", - " (No URLs).\n", sep="") - return() + if (sum(grep("https://orcid.org/", orcids)) > 0) { + if (force == FALSE) { + cat("The ORCiD(s) you supplied appear to be incorrectly formatted.\n") + cat("Please supply ORCiDs in the following format: xxxx-xxxx-xxxx-xxxx", + " (No URLs).\n", sep="") + } + return(invisible(eml_object)) } #get creators @@ -1735,25 +2017,37 @@ set_creator_orcids <- function(eml_object, orcids, force = FALSE, NPS = TRUE){ # If there's only one creator, creator ends up with one less level of nesting. Re-nest it so that the rest of the code works consistently names_list <- c("individualName", "organizationName", "positionName") - if(sum(names_list %in% names(creator)) > 0){ + if (sum(names_list %in% names(creator)) > 0) { creator <- list(creator) } #get existing orcids (should this be its own function?): surName <- NULL existing_orcid <- NULL - for(i in seq_along(creator)){ - if("individualName" %in% names(creator[[i]])){ + for (i in seq_along(creator)) { + if ("individualName" %in% names(creator[[i]])) { #check for orcid directory id: - surName <- append(surName, creator[[i]][["individualName"]][["surName"]]) - existing_orcid <- append(existing_orcid, creator[[i]][["userId"]][["userId"]]) + surName <- append(surName, + creator[[i]][["individualName"]][["surName"]]) + #what if there is no orcid? + existing_orcid <- append(existing_orcid, + creator[[i]][["userId"]][["userId"]]) } } + # make sure provided list of orcids is the same length as the authors + if (length(seq_along(surName)) != length(seq_along(orcids))) { + if (force == FALSE) { + cat("The list of orcids and existing authors must be the same length.\n") + cat("Use NA (no quotes) to indicate an author does not have an orcid.") + } + return(invisible(eml_object)) + } + #if verbose route: - if(force == FALSE){ + if (force == FALSE) { #if there are already orcids: - if(!is.null(existing_orcid)){ + if (!is.null(existing_orcid)) { #construct tibble of existing orcids: current_orcids <- tibble::tibble(surName, existing_orcid) #construct tibble of replacement orcids: @@ -1762,23 +2056,29 @@ set_creator_orcids <- function(eml_object, orcids, force = FALSE, NPS = TRUE){ cat(format(tibble::as_tibble(current_orcids))[c(-3L, -1L)], sep = "\n") cat("\nAre you sure you want to replace the existing ORCiDs with the following:\n\n") cat(format(tibble::as_tibble(new_orcids))[c(-3L, -1L)], sep = "\n") - var1 <- readline(prompt = cat("\n\n1: Yes\n2: No\n\n")) + var1 <- .get_user_input() #1: Yes; 2: No if(var1 == 2){ cat("Your original ORCiDs were retained.") - return() + return(invisible(eml_object)) } } } #generate list of new orcids: author_order <- 1 - for(i in seq_along(creator)){ - if("individualName" %in% names(creator[[i]])){ - replace_orcid <- paste0("https://orcid.org/", orcids[author_order]) + for (i in seq_along(creator)) { + if ("individualName" %in% names(creator[[i]])) { + if (!is.na(orcids[author_order])) { + replace_orcid <- paste0("https://orcid.org/", orcids[author_order]) + userId2 <- list(list(userId = replace_orcid), + directory = "https://orcid.org") + creator[[i]][["userId"]] <- userId2 + } else { + # if user specified NA for orcid, get rid of that field + creator[[i]][["userId"]] <- NULL + } author_order <- author_order +1 - userId2 <- list(list(userId = replace_orcid), directory = "https://orcid.org") - creator[[i]][["userId"]] <- userId2 } } @@ -1786,21 +2086,23 @@ set_creator_orcids <- function(eml_object, orcids, force = FALSE, NPS = TRUE){ eml_object[["dataset"]][["creator"]] <- creator #if verbose route, display new orcids: - if(force == FALSE){ - #get new orcids - should this be it's own function? - creator <- eml_object[["dataset"]][["creator"]] - names_list <- c("individualName", "organizationName", "positionName") - if(sum(names_list %in% names(creator)) > 0){ - creator <- list(creator) - } - + if (force == FALSE) { + new_creator <- eml_object[["dataset"]][["creator"]] + #get existing orcids (should this be its own function?): surName <- NULL existing_orcid <- NULL - for(i in seq_along(creator)){ - if("individualName" %in% names(creator[[i]])){ + for (i in seq_along(new_creator)) { + if ("individualName" %in% names(new_creator[[i]])) { #check for orcid directory id: - surName <- append(surName, creator[[i]][["individualName"]][["surName"]]) - existing_orcid <- append(existing_orcid, creator[[i]][["userId"]][[1]][["userId"]]) + surName <- append(surName, + new_creator[[i]][["individualName"]][["surName"]]) + next_orcid <- new_creator[[i]][["userId"]][[1]][["userId"]] + if (!is.null(next_orcid)) { + existing_orcid <- append(existing_orcid, + next_orcid) + } else { + existing_orcid <- append(existing_orcid, "NULL") + } } } #construct tibble of existing orcids: @@ -1831,7 +2133,7 @@ set_creator_orcids <- function(eml_object, orcids, force = FALSE, NPS = TRUE){ #' @inheritParams set_title #' @param creator_orgs List. Defaults to NA. A list of one or more organizations. #' @param park_units List. Defaults to NA. A list of park units. If any park units are specified, it they will supersede anything listed under creator_orgs. -#' @param RORs List. Defaults to NA. An optional list of one or more ROR IDs (see https:/ror.org) that correspond to the organization in question. If an organization does not hae a ROR ID (or you don't know it), enter "NA". +#' @param RORs List. Defaults to NA. An optional list of one or more ROR IDs (see [https://ror.org](https://ror.org)) that correspond to the organization in question. If an organization does not have a ROR ID (or you don't know it), enter "NA". #' #' @return eml_object #' @export @@ -2018,15 +2320,18 @@ set_creator_order <- function(eml_object, cat("Please enter comma-separated numbers for the new creator order.\n") cat("Example: put 5 creators in reverse order, enter: 5, 4, 3, 2, 1\n") cat("Example: remove the 3rd item (out of 5) enter: 1, 2, 4, 5\n\n") - var1 <- readline(prompt="") + var1 <- .get_user_input3() # waits for any user input #don't allow user to remove all creators! if(nchar(var1)==0){ cat("You cannot delete all creators.") cat("Please enter comma-separated numbers for the new creator order.\n") - var1 <- readline(prompt="") + var1 <-.get_user_input3() + } + ord <- var1 + if (nchar(ord) > 1) { + ord <- as.list(strsplit(ord, ","))[[1]] + ord <- trimws(ord) } - ord <- as.list(strsplit(var1, ","))[[1]] - ord <- trimws(ord) new_order <- as.numeric(ord) } } @@ -2078,20 +2383,141 @@ set_creator_order <- function(eml_object, return(eml_object) } +#' Adds a missing value code and definition to EML metadata +#' +#' @description Missing data must have a missing data code and missing data code definition. `set_missing_data()` can add a single missing value code and single missing value code definition. Missing data should be clearly indicated in the data with a missing data code (e.g "NA", "NaN", "Missing", "blank" etc.). It is generally a good idea to not use special characters for missing data codes (e.g. N/A is not advised). If it is absolutely necessary to leave a cell empty with no code, that cell still needs a missing value code and definition in the metadata. Acceptable codes in this case are "empty" and "blank" with a suitable definition that states the cells are purposefully left empty. +#' +#' @details The `set_missing_data()` be used on an individual column or can accept lists of files, column names, codes, and definitions. Make sure that each missing value has a file, column, single code, and single definition associated with it (if you need multiple missing value codes and definitions per column, please use the `set_more_missing()` function). If you have many missing value codes and definitions, you might consider constructing (or import) a dataframe to describe them: +#' +#' Example data frame: +#' df <- data.frame(files = c("table1.csv", +#' "table2.csv", +#' "table3.csv", +#' "table4.csv"), +#' columns = c("EventDate", +#' "scientificName", +#' "eventID", +#' "decimalLatitude"), +#' codes = c("NA", "NA", "missing", "blank"), +#' definitions = c("not recorded", +#' "not identified", +#' "not recorded", +#' "intentionally left blank - not recorded")) +#' +#' meta2 <- set_missing_data(eml_object = metadata, +#' files = df$files, +#' columns = df$columns, +#' codes = df$codes, +#' definitions = df$definitions) +#' +#' +#' @inheritParams set_title +#' @param files String or List of strings. These are the files that contain undocumented missing data, e.g "my_data_file1.csv". +#' @param columns String or List of strings. These are the columns with missing data for which you would like to add missing data codes and explanations in to the metadata, e.g. "scientificName". +#' @param codes String or list of strings. These are the missing value codes you would like associated with the column in question, e.g. "missing" or "NA". +#' @param definitions String or list of strings. These are the missing value code definitions associated with the missing value codes, e.g "not recorded" or "sample damaged when the lab flooded". +#' +#' @return eml_object +#' @export +#' +#' @examples +#' \dontrun{ +#' #For a single column of data in a single file: +#' meta2 <- set_missing_data(my_metadata, +#' "table1.csv", +#' "scientificName", +#' "NA", +#' "Unable to identify") +#' +#' #For multiple columns of data, potentially across multiple files: +#' #(blank cells must have the missing value code of "blank" or "empty") +#' meta2 <- set_missing_data(my_metadata, +#' files = c("table1.csv", "table1.csv", "table2.csv"), +#' columns = c("date", "time", "scientificName"), +#' codes = c("NA", "missing", "blank"), +#' definitions = c("date not recorded", +#' "time not recorded", +#' "intentionally left blank - missing") +#' ) +#'} +set_missing_data <- function(eml_object, + files, + columns, + codes, + definitions, + force = FALSE, + NPS = TRUE) { + #test that the number of data files, columns, codes, and definitions match: + lists <- list(files, columns, codes, definitions) + if (sum(seq_along(unique(lapply(lists, seq_along))) != 1) > 0) { + if (force == FALSE) { + msg <- paste0("The number of filenames, columns, codes, ", + "and defintions must be the same.") + cat(msg) + } + return() + } + #tun user input into a dataframe for easier manipulation later on: + user_df <- data.frame(files, columns, codes, definitions) + #get dataTable from metadata + data_tbl <- eml_object$dataset$dataTable + # If there's only one csv, data_tbl ends up with one less level of nesting. Re-nest it so that the rest of the code works consistently + if ("attributeList" %in% names(data_tbl)) { + data_tbl <- list(data_tbl) + } + #get the attribute list for each file: + for (i in seq_along(unique(files))) { + for(j in seq_along(data_tbl)) { + if (data_tbl[[j]][["physical"]][["objectName"]] == unique(files[i])) { + attrs <- data_tbl[[j]][["attributeList"]][["attribute"]] + #if only one attribute, renest so the code works consistently: + if (!is.null(names(attrs))) { + attrs <- list(attrs) + } + #get the new missing codes for that file: + df <- user_df[which(user_df$files == files[i]),] + #find the correct attribute for each column: + for (k in seq_along(df$columns)) { + for (l in seq_along(attrs)) { + if (attrs[[l]][["attributeName"]] == df$columns[k]) { + #extract that specific attribute + attr_add_missing <- attrs[[l]] + # if it already has a missing value code, ask before replacing: + if (!is.null(attr_add_missing[["missingValueCode"]])) { + if (force == FALSE) { + cat("File ", crayon::blue$bold(files[i]), + " column ", crayon::blue$bold(df$columns[k]), + " already has a missing value code.\n", sep ="") + cat("Would you like to replace it?\n") + var1 <- .get_user_input() #1 = yes, 2 = no + if (var1 == 2) { next } # Skip this column + } + } + #add/replace the missing value codes: + attr_add_missing$missingValueCode <- list( + code = df$codes[k], + codeExplanation = df$definitions[k]) + #put new attr_add_missing back into attrs: + attrs[[l]] <- attr_add_missing + } + } + } + # put new attrs back into data_tbl: + data_tbl[[j]][["attributeList"]][["attribute"]] <- attrs + } + } + # put new data_tbl back into EML: + eml_object$dataset$dataTable <- data_tbl + } + #add NPS publisher & for or by nps + if (NPS == TRUE) { + eml_object <- .set_npspublisher(eml_object) + } + # add/update EMLeditor and version to metadataa + eml_object <- .set_version(eml_object) - - - - - - - - - - - - - + return(eml_object) +} diff --git a/R/getEMLfunctions.R b/R/getEMLfunctions.R index 4083b26..d92d230 100644 --- a/R/getEMLfunctions.R +++ b/R/getEMLfunctions.R @@ -95,9 +95,9 @@ get_abstract <- function(eml_object) { return(txt) } -#' Get additional information (Notes on DataStore) +#' Get methods #' -#' @description `get_methods()` returns the text stored in the methods element of EML metadata. The returned text is not manipulated in any way. DataStore unlist the returned object (get rid of tags such as $methodStep, $methodStep$description and $methodStep$description$para and remove the numbers in brackets). the "\\n" character combination is interpretted as a line break (as are blank lines). However, DataStpre will not filter out stray characters such as &#13;. Use the `set_methods()` function to edit and replace the text stored in the methods field. +#' @description `get_methods()` returns the text stored in the methods element of EML metadata. The returned text is not manipulated in any way. DataStore unlists the returned object (get rid of tags such as $methodStep, $methodStep$description and $methodStep$description$para and remove the numbers in brackets). the "\\n" character combination is interpreted as a line break (as are blank lines). However, DataStore will not filter out stray characters such as &#13;. Use the `set_methods()` function to edit and replace the text stored in the methods field. #' #' @inheritParams get_begin_date #' @@ -106,17 +106,10 @@ get_abstract <- function(eml_object) { #' #' @examples #' \dontrun{ -#' get_additional_info(eml_object) +#' get_methods(eml_object) #' } -get_additional_info <- function(eml_object) { - doc <- eml_object$dataset$additionalInfo - if(is.null(doc)) { - warning("Your EML lacks additional info. Use set_additional_info() to add it.") - } - return(doc) - -} - +#' +#' get_methods <- function(eml_object){ doc <- eml_object$dataset$methods if(is.null(doc)){ @@ -127,6 +120,28 @@ get_methods <- function(eml_object){ +#' Get additional information (Notes on DataStore) +#' +#' @description `get_additional_info()` returns the text in the additionalInformation element of EML. This text will be used to populate the "Notes" sectionon the DataStore reference page. There is no strict limit on what can and cannot go in to the additionalInformation/Notes section. However, DataStore will not filter out stray characters such as &#13;. Use the `set_additional_info()` function to edit and replace the text stored in the additionalInformation (notes) field. +#' +#' @inheritParams get_begin_date +#' +#' @return String +#' @export +#' +#' @examples +#' \dontrun{ +#' get_additional_info(eml_object) +#' } +get_additional_info <- function(eml_object) { + doc <- eml_object$dataset$additionalInfo + if(is.null(doc)) { + warning("Your EML lacks additional info. Use set_additional_info() to add it.") + } + return(doc) + +} + #' returns the data package title #' #' @description get_title returns a text string that is the title of the data package @@ -405,9 +420,52 @@ get_content_units <- function(eml_object) { return(list_units) } +#' returns a CUI dissemination code statement +#' +#' @description `get_cui_code()` returns an English-language translation of the CUI dissemination codes. It supersedes `get_cui()`, which has been deprecated. +#' +#' @details `get_cui_code()` accesses the contents of the Controlled Unclassified Information (CUI) tag, and returns an appropriate string of english-language text based on the properties of the CUI code. If thee tag is empty or does not exist, get_cui alerts the user and suggests specifying CUI using the set_cui() funciton. +#' +#' @inheritParams get_begin_date +#' +#' @return a text string +#' @export +#' @examples +#' \dontrun{ +#' get_cui(eml_object) +#' } +get_cui_code <- function(eml_object) { + cui <- arcticdatautils::eml_get_simple(eml_object, "CUI") + if (is.null(cui)) { + cat("No CUI specified. Use the set_cui() function to add a properly formatted CUI code.") + cui <- "No CUI specified." + } else if (cui == "FED ONLY") { + cui <- "Contains CUI. Only federal employees should have access (similar to \"internal only\" in DataStore)." + } else if (cui == "FEDCON") { + cui <- "Contains CUI. Only federal employees and federal contractors should have access (also very much like current \"internal only\" setting in DataStore)." + } else if (cui == "DL ONLY") { + cui <- "Contains CUI. Should only be available to a names list of individuals (where and how to list those individuals TBD)." + } else if (cui == "NOCON") { + cui <- "Contains CUI. Federal, state, local, or tribal employees may have access, but contractors cannot." + } else if (cui == "PUBVER") { + cui <- "Does NOT contain CUI. The original data contained CUI, but in this data package CUI have been obscured so that it no longer contains CUI." + } else if (cui == "PUBFUL") { + cui <- "Does NOT contain CUI. The original data contained no CUI. No data were obscured or altered to generate the data package." + } else if (cui == "PUBLIC") { + cui <- "Does NOT contain CUI" + } + else { + warning("CUI not properly specified. Use set_cui() to update the CUI code.") + cui <- NA + } + return(cui) +} + #' returns a CUI statement #' -#' @description `get_cui()` returns an English-language translation of the CUI codes +#' @description +#' #' `r lifecycle::badge("deprecated")` +#' Deprecated in favor of `get_cui_code()`. `get_cui()` returns an English-language translation of the CUI codes #' #' @details `get_cui()` accesses the contents of the Controlled Unclassified Information (CUI) tag, and returns an appropriate string of english-language text based on the properties of the CUI code. If thee tag is empty or does not exist, get_cui alerts the user and suggests specifying CUI using the set_cui() funciton. #' @@ -420,6 +478,9 @@ get_content_units <- function(eml_object) { #' get_cui(eml_object) #' } get_cui <- function(eml_object) { + #add in deprecation + lifecycle::deprecate_soft(when = "0.1.5", "set_cui()", "set_cui_dissem()") + cui <- arcticdatautils::eml_get_simple(eml_object, "CUI") if (is.null(cui)) { cat("No CUI specified. Use the set_cui() function to add a properly formatted CUI code.") @@ -446,6 +507,85 @@ get_cui <- function(eml_object) { return(cui) } +#' Returns the CUI marking +#' +#' @description +#' For data with controlled unclassified information (CUI), `get_cui_marking()` eturns the specific marking and the english language explanation of the marking. For data without CUI, it informs that there is no CUI and returns the code "PUBLIC". +#' +#' @details +#' CUI markings are defined by the U.S. National Archives (nara.gov). NPS users can designate one of three CUI markings, plus the code "PUBLIC" (essentially, no marking necessary). The three markings are: SP-NPSR, SP-HISTP or SP-ARCHR. +#' For more information on CUI markings, please visit the [CUI Markings](https://www.archives.gov/cui/registry/category-marking-list) list maintained by the National Archives. +#' +#' +##' @inheritParams get_begin_date +#' +#' @return String (invisibly) +#' @export +#' +#' @examples +#' \dontrun{ +#' get_cui_marking(eml_object) +#' } +get_cui_marking <- function(eml_object) { + # get existing additionalMetadata elements: + add_meta <- eml_object$additionalMetadata + + #get location of CUI markings from addtiional metadata + y <- NULL + for (i in 1:length(seq_along(add_meta))) { + if(names(add_meta[[i]][["metadata"]]) == "CUImarking") { + y <- i + break + } + } + #if there is not yet any CUI marking: + if (is.null(y)) { + cat("Your metadata do not yet contain a CUI marking.\n") + cat("Please use ", + crayon::green$bold("set_cui_marking"), + " to add the appropriate CUI marking to your metadata.", + sep = "") + return(invisible()) + } + + #if CUI marking exists: + if (!is.null(y)) { + #get existing CUI marking: + existing_cui_marking <- add_meta[[y]][["metadata"]][["CUImarking"]] + if(existing_cui_marking == "PUBLIC") { + msg <- paste0("Your CUI marking is set to ", + crayon::blue("PUBLIC"), + ". This means the data do not contain CUI.") + } else if (existing_cui_marking == "SP-NPSR") { + msg <- paste0("Your CUI marking is set to ", + crayon::blue$bold(existing_cui_marking), + ". This means the CUI in the data is related to", + " information concerning the nature and specific location", + " of a National Park System resource that is endangered, ", + "threatened, rare, or commercially valuable, of mineral", + " or paleontological objects within System units, or of", + " objects of cultural patrimony within System unit") + } else if (existing_cui_marking == "SP-HISTP") { + msg <- paste0("Your CUI marking is set to ", + crayon::blue$bold(existing_cui_marking), + ". This means the CUI in the data is related to the", + " location character, or ownership of historic property.") + } else if (existing_cui_marking == "SP-ARCHR") { + msg <- paste0("Your CUI marking is set to ", + crayon::blue$bold(existing_cui_marking), + ". This means the CUI in the data is related to ", + "information about the nature and location of any", + " archaeological resource for which the excavation or", + " removal requires a permit or other permission.") + } else { + warning("CUI marking is not properly set. Please use set_cui_marking to fix it.") + msg <- NA + } + } + cat(msg) + return(invisible(msg)) +} + #' displays file names, sizes, and descriptions #' @@ -587,6 +727,19 @@ get_producing_units <- function(eml_object) { return(punit) } +#' Returns the publisher information +#' +#' @description `get_publisher()` returns a list that includes all the information about the publisher stored in EML. +#' +#' @inheritParams get_begin_date +#' +#' @return List. +#' @export +#' +#' @examples +#' \dontrun{ +#' get_publisher(eml_object) +#' } get_publisher <- function(eml_object) { pub <- eml_object$dataset$publisher } diff --git a/R/utils.R b/R/utils.R index fc5c3b4..c56d196 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,4 @@ + ## assign global package variables #initiate new environment accessible from within package: @@ -27,7 +28,10 @@ assign("ds_dev_api", "https://irmadevservices.nps.gov/datastore-secure/v6/rest/" #this gets rid of the "no visible binding for global variable 'x'" error in build checks: globalVariables(c("UnitCode", "Name", - "Alpha_3_B")) + "Alpha_3_B", + "msg", + "msg2", + "na.exclude")) #' inject NPS Publisher info into metadata @@ -60,7 +64,7 @@ globalVariables(c("UnitCode", country = "USA" ), onlineUrl = "http://www.nps.gov", - electronicMailAddress = "irma@nps.gov", + electronicMailAddress = "nrss_datastore@nps.gov", userId = list(directory = "https://ror.org/", userId = "https://ror.org/044zqqy65") ) @@ -170,7 +174,7 @@ globalVariables(c("UnitCode", error = function(e) { message( paste0(crayon::bold$red(unit_code), - " is not a valid park unit. Please supply valid park units")) + " is not a valid park unit. Please supply valid park units.")) } ) return(park_polygon) @@ -233,3 +237,35 @@ globalVariables(c("UnitCode", } return(eml_object) } + + +#' Get Binary User Input +#' +#' Prompts for, gets, and returns binary user input (1 or 2) +#' +#' @return Factor. 1 or 2. +#' +#' @examples +#' \dontrun{ +#' var1 <- .get_user_input() +#' } +.get_user_input <- function () { + var1 <- readline(prompt = "1: Yes\n2: No\n") + return(var1) +} + +#' Get open ended user input +#' +#' @description Does not prompt for user input. Takes any user input supplied and returns it. +#' +#' @return character, typically 1, 2, or 3 but could be any character string. +#' @export +#' +#' @examples +#' \dontrun{ +#' var1 <- .get_user_input3() +#' } +.get_user_input3 <- function() { + var1 <- readline(prompt = ("")) + return(var1) +} diff --git a/README.Rmd b/README.Rmd index 7e16389..32125fa 100644 --- a/README.Rmd +++ b/README.Rmd @@ -25,7 +25,7 @@ knitr::opts_chunk$set( ## Overview -The goal of EMLeditor is to edit EML-formatted xml files. Specifically, EMLeditor provides many functions that will be useful to the U.S. National Park Service when generating metadata for statistical data packages uploaded to DataStore. NPS affiliation is assumed as default. However, some of the functions for viewing and editing metadata may be useful to people outside the NPS. +The goal of EMLeditor is to edit EML-formatted xml files. Specifically, EMLeditor provides many functions that will be useful to the U.S. National Park Service when generating metadata for statistical data packages uploaded to DataStore. NPS affiliation is assumed as default. However, the functions for viewing and editing metadata may be useful to people outside the NPS. ## Installation and updates @@ -46,11 +46,11 @@ devtools::install_github("nationalparkservice/NPSdataverse") EMLeditor comes with a template Rmarkdown script that you can edit to generate a fully fledged EML document. The script includes and accompanying documentation includes information on: -1) Generating an initial EML document using the R/EMLassemblyline package functions -2) Adding in NPS specific and DataStore specific EML elements using the R/EMLeditor package functions -3) Checking the EML document to make sure it is schema-valid and passes all the necessary tests for uploading to DataStore -4) Generating a draft data package reference on DataStore and incorporating DOIs into the metadata -5) Uploading a completed data package to DataStore +1) Generating an initial EML document using the R/EMLassemblyline package functions +2) Adding in NPS specific and DataStore specific EML elements using the R/EMLeditor package functions +3) Generating a draft data package reference on DataStore and incorporating DOIs into the metadata +4) Checking the EML document to make sure it is schema-valid and passes all the necessary tests for uploading to DataStore (using the `run_congruence_checks()` function from the [DPchecker](https://nationalparkservice.github.io/DPchecker/) package) +5) Uploading a completed data package to DataStore Please *DO NOT ACTIVATE* the DataStore reference: prior to activation, data packages need to be reviewed via a yet-to-be-created process. @@ -63,7 +63,3 @@ To access the EML creation script from within EMLeditor, install (or update) the If you use EMLeditor functions to alter your metadata (e.g. "set" class functions) they will also silently add the National Park Service as a publisher (including location, [ROR id](https://ror.org/), etc) to your metadata unless you set NPS=FALSE. If you leave the default setting as NPS=TRUE, EMLeditor will also assume the data package is being created "by or for the NPS" and add that information to the metadata. EMLeditor will also add information about the version of EMLeditor you used to edit your metadata (for instance if you used "set" class functions). - - - - diff --git a/api_working.R b/api_working.R deleted file mode 100644 index 01899d5..0000000 --- a/api_working.R +++ /dev/null @@ -1,112 +0,0 @@ - -#File upload: -# 1) User inputs path to data package -# 2) function reads data package file names -# a. function checks for just .csv and one .xml -# 3) function loads the EML formatted .xml file -# 4) function extracts DS reference ID from metadata (DOI field) -# 5) For each file in the data package: -# a. API call POST: Request token -# Requires file name (new token for each file?) -# Requires datastore reference id -# b. API call PUT: upload file (in fragments?) -# Requires token (which includes reference id info?) -# requires path to file -# c. Check each fragment for successful upload -# d. Some sort of upload progress indicator? -# d. Tell user upon successfully uploading each full file -# 6) Tell user upon successful completion of data package upload -# 7) Provide url to draft reference - - - - -library(httr) -library(jsonlite) -library(tidyverse) - -#### next try using the PUT method (more complex, requires token, upload files -# in fragments and then check each frag for completion; re-run incompletefrags) - -req <- httr::POST("https://irmadevservices.nps.gov/datastore-secure/v4/rest/Reference/2297221/UploadFile/TokenRequest", - httr::authenticate(":", "", "ntlm"), - httr::add_headers('Content-Type'='application/json'), - body = "\"Book1.csv\"", - httr::verbose() - ) -# status_code<-httr::stop_for_status(req)$status_code -# status_code -#[1] 200 - -#MIME encoding types: xml typically "application/xml" but also "text/xml" -#MIME encoding types: csv "text/csv" - -r2 <- httr::PUT(url, - httr::content_type("text/csv"), - httr::authenticate(":", "", "ntlm"), - httr::add_headers('Content-Range' = 'bytes 0-83/83'), - body = curl::form_data(readBin("Book1.csv", - "raw", - file.info("Book1.csv")$size), - type= "text/csv"), #type necessary for xml? - httr::progress(type="up", con=""), - httr::verbose() - ) - - -req <- httr::POST("https://irmadevservices.nps.gov/datastore-secure/v4/rest/Reference/2297221/UploadFile/TokenRequest", - httr::authenticate(":", "", "ntlm"), - httr::add_headers('Content-Type'='application/json'), - body = "\"Book1.csv\"", - httr::verbose() -) -url<-req$headers$location -url -#[1] "https://irmadevservices.nps.gov/datastore-secure/v4/rest/Reference/2297221/UploadFile/d78f8a90-c162fa69" - -Book1<-read.csv("Book1.csv") - -fsize <- file.size("Book1.csv") -#[1] 83 -toread <- file("Book1.csv", "rb") #rb open connection for reading in binary mode -data_read <- readBin(toread, integer(), size=1, n=(fsize/2), endian = "little") #read in data in binary mode - -writeBin(data_read, "book1.bin", useBytes= TRUE) - -r2 <- httr::PUT(url, - httr::add_headers('Content-Range' = 'bytes 0-49/83'), - httr::content_type("text/csv"), - httr::authenticate(":", "", "ntlm"), - body = curl::form_data( - readBin(toread, integer(), - size=1, n=50, endian = "little"), - type="application/csv"), - - #type="application/csv"), - #httr::progress(type="up", con=""), - httr::verbose() -) - -r2 <- httr::PUT(url, - httr::add_headers('Content-Range' = 'bytes 0-40/83'), - httr::content_type("application/octet-stream"), - httr::authenticate(":", "", "ntlm"), - body = list("file"=curl::form_data("Book1.bin", - type="application/octet-stream"), - encode="multipart"), - httr::progress(type="up", con=""), - httr::verbose() -) - - -#/rest/Reference/{referenceID}/UploadFile/Status/{token} -url2<-"https://irmadevservices.nps.gov/datastore-secure/v4/rest/Reference/2297738/UploadFile/Status/91a029f0-1a2c8b0d" - -req3 <- httr::GET(url) - -req2 -str(req2) -json<-httr::content(req2, "text") -json -getwd() -list.files(full.names = TRUE) diff --git a/docs/404.html b/docs/404.html index bf66f96..61a7a3e 100644 --- a/docs/404.html +++ b/docs/404.html @@ -24,7 +24,7 @@ EMLeditor - 0.1.4 + 0.1.5 + + + + + +
+
+
+ +
+

Prompts for, gets, and returns binary user input (1 or 2)

+
+ +
+

Usage

+
.get_user_input()
+
+ +
+

Value

+ + +

Factor. 1 or 2.

+
+ +
+

Examples

+
if (FALSE) {
+var1 <- .get_user_input()
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/dot-get_user_input3.html b/docs/reference/dot-get_user_input3.html new file mode 100644 index 0000000..b201797 --- /dev/null +++ b/docs/reference/dot-get_user_input3.html @@ -0,0 +1,100 @@ + +Get open ended user input — .get_user_input3 • EMLeditor + Skip to contents + + +
+
+
+ +
+

Does not prompt for user input. Takes any user input supplied and returns it.

+
+ +
+

Usage

+
.get_user_input3()
+
+ +
+

Value

+ + +

character, typically 1, 2, or 3 but could be any character string.

+
+ +
+

Examples

+
if (FALSE) {
+var1 <- .get_user_input3()
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/dot-set_for_by_nps.html b/docs/reference/dot-set_for_by_nps.html index 57c39a3..66ba6aa 100644 --- a/docs/reference/dot-set_for_by_nps.html +++ b/docs/reference/dot-set_for_by_nps.html @@ -10,7 +10,7 @@ EMLeditor - 0.1.4 + 0.1.5 + + + + + +
+
+
+ +
+

get_cui_code() returns an English-language translation of the CUI dissemination codes. It supersedes get_cui(), which has been deprecated.

+
+ +
+

Usage

+
get_cui_code(eml_object)
+
+ +
+

Arguments

+
eml_object
+

is an R object imported (typically from an EML-formatted .xml file) using EmL::read_eml(, from="xml").

+ +
+
+

Value

+ + +

a text string

+
+
+

Details

+

get_cui_code() accesses the contents of the Controlled Unclassified Information (CUI) tag, and returns an appropriate string of english-language text based on the properties of the CUI code. If thee tag is empty or does not exist, get_cui alerts the user and suggests specifying CUI using the set_cui() funciton.

+
+ +
+

Examples

+
if (FALSE) {
+get_cui(eml_object)
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/get_cui_marking.html b/docs/reference/get_cui_marking.html new file mode 100644 index 0000000..773e733 --- /dev/null +++ b/docs/reference/get_cui_marking.html @@ -0,0 +1,111 @@ + +Returns the CUI marking — get_cui_marking • EMLeditor + Skip to contents + + +
+
+
+ +
+

For data with controlled unclassified information (CUI), get_cui_marking() eturns the specific marking and the english language explanation of the marking. For data without CUI, it informs that there is no CUI and returns the code "PUBLIC".

+
+ +
+

Usage

+
get_cui_marking(eml_object)
+
+ +
+

Arguments

+
eml_object
+

is an R object imported (typically from an EML-formatted .xml file) using EmL::read_eml(, from="xml").

+ +
+
+

Value

+ + +

String (invisibly)

+
+
+

Details

+

CUI markings are defined by the U.S. National Archives (nara.gov). NPS users can designate one of three CUI markings, plus the code "PUBLIC" (essentially, no marking necessary). The three markings are: SP-NPSR, SP-HISTP or SP-ARCHR. +For more information on CUI markings, please visit the CUI Markings list maintained by the National Archives.

+
+ +
+

Examples

+
if (FALSE) {
+get_cui_marking(eml_object)
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/get_doi.html b/docs/reference/get_doi.html index 0e4fdbd..f01173a 100644 --- a/docs/reference/get_doi.html +++ b/docs/reference/get_doi.html @@ -10,7 +10,7 @@ EMLeditor - 0.1.4 + 0.1.5 + + + + + +
+
+
+ +
+

get_methods() returns the text stored in the methods element of EML metadata. The returned text is not manipulated in any way. DataStore unlists the returned object (get rid of tags such as $methodStep, $methodStep$description and $methodStep$description$para and remove the numbers in brackets). the "\n" character combination is interpreted as a line break (as are blank lines). However, DataStore will not filter out stray characters such as &#13;. Use the set_methods() function to edit and replace the text stored in the methods field.

+
+ +
+

Usage

+
get_methods(eml_object)
+
+ +
+

Arguments

+
eml_object
+

is an R object imported (typically from an EML-formatted .xml file) using EmL::read_eml(, from="xml").

+ +
+
+

Value

+ + +

List

+
+ +
+

Examples

+
if (FALSE) {
+get_methods(eml_object)
+}
+
+
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/get_producing_units.html b/docs/reference/get_producing_units.html index abd624f..48ae6ef 100644 --- a/docs/reference/get_producing_units.html +++ b/docs/reference/get_producing_units.html @@ -10,7 +10,7 @@ EMLeditor - 0.1.4 + 0.1.5 + + + + + +
+
+
+ +
+

get_publisher() returns a list that includes all the information about the publisher stored in EML.

+
+ +
+

Usage

+
get_publisher(eml_object)
+
+ +
+

Arguments

+
eml_object
+

is an R object imported (typically from an EML-formatted .xml file) using EmL::read_eml(, from="xml").

+ +
+
+

Value

+ + +

List.

+
+ +
+

Examples

+
if (FALSE) {
+get_publisher(eml_object)
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/get_title.html b/docs/reference/get_title.html index 9818e43..eb89fd5 100644 --- a/docs/reference/get_title.html +++ b/docs/reference/get_title.html @@ -10,7 +10,7 @@ EMLeditor - 0.1.4 + 0.1.5 + + + + + +
+
+
+ +
+

set_cui_code() adds Controlled Unclassified Information (CUI) dissemination codes to EML metadata. These codes determine who can or cannot have access to the data. Unless you have a specific mandate to restrict data, all data should be available to the public. if the CUI dissemination code is PUBLIC, the CUI marking should also be PUBLIC (see set_cui_marking()) and the license should be set to public domain (or CC0; see set_int_rights()). If your data contains CUI and you need to set the CUI dissemination code to anything other than PUBLIC, please be prepared to provide a legal justification in the form of the appropriate CUI marking (see set_cui_marking()).

+
+ +
+

Usage

+
set_cui_code(
+  eml_object,
+  cui_code = c("PUBLIC", "NOCON", "DL ONLY", "FEDCON", "FED ONLY"),
+  force = FALSE,
+  NPS = TRUE
+)
+
+ +
+

Arguments

+
eml_object
+

is an EML-formatted R object, either generated in R or imported (typically from an EML-formatted .xml file) using EML::read_eml(, from="xml").

+ + +
cui_code
+

a string consisting of one of 7 potential CUI codes: PUBLIC, FED ONLY, FEDCON, DL ONLY, or NOCON

+ + +
force
+

logical. Defaults to false. If set to FALSE, a more interactive version of the function requesting user input and feedback. Setting force = TRUE facilitates scripting.

+ + +
NPS
+

Logical. Defaults to TRUE. Most NPS users should leave this as the default. Only under specific circumstances should it be set to FALSE: if you are not publishing with NPS, if you need to set the publisher location to some place other than the Fort Collins Office (e.g. you are NOT working on a data package) or your product is "for" the NPS but not "by" the NPS and you need to specify a different agency, set NPS = FALSE. When NPS=TRUE, the function will over-write existing publisher info and inject NPS as the publisher along the the Central Office in Fort Collins as the location. Additionally, it sets the "for or by NPS" field to TRUE and specifies the originating agency as NPS.

+ +
+
+

Value

+ + +

an EML-formatted R object

+
+
+

Details

+

set_cui_code() adds a CUI dissemination code to the tag CUI under additionalMetadata/metadata. The available choices for CUI dissemination codes at NPS are (pay attention to the spaces!):

+

PUBLIC: The data contain no CUI, dissemination is not restricted. +FED ONLY: Contains CUI. Only federal employees should have access (similar to the "internal only" setting in DataStore) +FEDCON: Contains CUI Only federal employees and federal contractors should have access to the data (again, very similar to the DataStore "internal only" setting) +DL ONLY: Contains CUI. Should only be available to a named list of individuals. (where and how to supply that list TBD) +NOCON - Contains CUI. Federal, state, local, or tribal employees may have access, but contractors cannot.

+

For a more detailed explanation of the CUI dissemination codes, please see the national archives CUI Registry: Limited Dissemination Controls web page.

+
+ +
+

Examples

+
if (FALSE) {
+set_cui_dissem(eml_object, "PUBLIC")
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/set_cui_marking.html b/docs/reference/set_cui_marking.html new file mode 100644 index 0000000..c82f4b2 --- /dev/null +++ b/docs/reference/set_cui_marking.html @@ -0,0 +1,141 @@ + +The function sets the CUI marking for the data package — set_cui_marking • EMLeditor + Skip to contents + + +
+
+
+ +
+

[Experimental] +The Controlled Unclassified Information (CUI) marking is different from the CUI dissemination code. The CUI dissemination code (set set_cui_code()) sets who can have access to the data package. The CUI marking set by set_cui_marking() specifies the reason (if any) that the data are being restricted. +If the CUI dissemination code is set to PUBLIC, the CUI marking must also be PUBLIC. +If the CUI dissemination code is set to anything other than PUBLIC, the CUI marking must be set to SP-NPSR, SP-HISTP or SP-ARCHR.

+
+ +
+

Usage

+
set_cui_marking(
+  eml_object,
+  cui_marking = c("PUBLIC", "SP-NPSR", "SP-HISTP", "SP-ARCHR"),
+  force = FALSE,
+  NPS = TRUE
+)
+
+ +
+

Arguments

+
eml_object
+

is an EML-formatted R object, either generated in R or imported (typically from an EML-formatted .xml file) using EML::read_eml(, from="xml").

+ + +
cui_marking
+

String. One of four options, "PUBLIC", "SP-NPSR", "SP-HISTP" or "SP-ARCHR" are available.

+ + +
force
+

logical. Defaults to false. If set to FALSE, a more interactive version of the function requesting user input and feedback. Setting force = TRUE facilitates scripting.

+ + +
NPS
+

Logical. Defaults to TRUE. Most NPS users should leave this as the default. Only under specific circumstances should it be set to FALSE: if you are not publishing with NPS, if you need to set the publisher location to some place other than the Fort Collins Office (e.g. you are NOT working on a data package) or your product is "for" the NPS but not "by" the NPS and you need to specify a different agency, set NPS = FALSE. When NPS=TRUE, the function will over-write existing publisher info and inject NPS as the publisher along the the Central Office in Fort Collins as the location. Additionally, it sets the "for or by NPS" field to TRUE and specifies the originating agency as NPS.

+ +
+
+

Value

+ + +

an EML-formatted R object

+
+
+

Details

+

CUI markings are the legal justification for why data are being restricted from the public. If data contain no CUI, the CUI marking must be set to PUBLIC (and the CUI dissemination code must be set to PUBLIC and the license must be set to CC0 or Public Domain). If the data contain CUI (i.e. the CUI dissemination code is not PUBLIC), you must use the CUI marking to provide a legal justification for why the data are restricted. Only one CUI marking can be applied. At NPS, the following markings are available:

+

PUBLIC: The data contain no CUI, dissemination is not restricted. +SP-NPSR: "National Park System Resources" - This material contains information concerning the nature and specific location of a National Park System resource that is endangered, threatened, rare, or commercially valuable, of mineral or paleontological objects within System units, or of objects of cultural patrimony within System units. +SP-HISTP: "Historic Properties" - This material contains information related to the location, character, or ownership of historic property. +SP-ARCHR: "Archaeological Resources" - This material contains information related to information about the nature and location of any archaeological resource for which the excavation or removal requires a permit or other permission.

+

For more information on CUI markings, please visit the CUI Markings list maintained by the National Archives.

+
+ +
+

Examples

+
if (FALSE) {
+eml_object <- set_cui_marking(eml_object, "PUBLIC")
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/set_data_urls.html b/docs/reference/set_data_urls.html index 72d50f9..d2f7437 100644 --- a/docs/reference/set_data_urls.html +++ b/docs/reference/set_data_urls.html @@ -10,7 +10,7 @@ EMLeditor - 0.1.4 + 0.1.5 + + + + + +
+
+
+ +
+

Missing data must have a missing data code and missing data code definition. set_missing_data() can add a single missing value code and single missing value code definition. Missing data should be clearly indicated in the data with a missing data code (e.g "NA", "NaN", "Missing", "blank" etc.). It is generally a good idea to not use special characters for missing data codes (e.g. N/A is not advised). If it is absolutely necessary to leave a cell empty with no code, that cell still needs a missing value code and definition in the metadata. Acceptable codes in this case are "empty" and "blank" with a suitable definition that states the cells are purposefully left empty.

+
+ +
+

Usage

+
set_missing_data(
+  eml_object,
+  files,
+  columns,
+  codes,
+  definitions,
+  force = FALSE,
+  NPS = TRUE
+)
+
+ +
+

Arguments

+
eml_object
+

is an EML-formatted R object, either generated in R or imported (typically from an EML-formatted .xml file) using EML::read_eml(, from="xml").

+ + +
files
+

String or List of strings. These are the files that contain undocumented missing data, e.g "my_data_file1.csv".

+ + +
columns
+

String or List of strings. These are the columns with missing data for which you would like to add missing data codes and explanations in to the metadata, e.g. "scientificName".

+ + +
codes
+

String or list of strings. These are the missing value codes you would like associated with the column in question, e.g. "missing" or "NA".

+ + +
definitions
+

String or list of strings. These are the missing value code definitions associated with the missing value codes, e.g "not recorded" or "sample damaged when the lab flooded".

+ + +
force
+

logical. Defaults to false. If set to FALSE, a more interactive version of the function requesting user input and feedback. Setting force = TRUE facilitates scripting.

+ + +
NPS
+

Logical. Defaults to TRUE. Most NPS users should leave this as the default. Only under specific circumstances should it be set to FALSE: if you are not publishing with NPS, if you need to set the publisher location to some place other than the Fort Collins Office (e.g. you are NOT working on a data package) or your product is "for" the NPS but not "by" the NPS and you need to specify a different agency, set NPS = FALSE. When NPS=TRUE, the function will over-write existing publisher info and inject NPS as the publisher along the the Central Office in Fort Collins as the location. Additionally, it sets the "for or by NPS" field to TRUE and specifies the originating agency as NPS.

+ +
+
+

Value

+ + +

eml_object

+
+
+

Details

+

The set_missing_data() be used on an individual column or can accept lists of files, column names, codes, and definitions. Make sure that each missing value has a file, column, single code, and single definition associated with it (if you need multiple missing value codes and definitions per column, please use the set_more_missing() function). If you have many missing value codes and definitions, you might consider constructing (or import) a dataframe to describe them:

+

Example data frame: +df <- data.frame(files = c("table1.csv", +"table2.csv", +"table3.csv", +"table4.csv"), +columns = c("EventDate", +"scientificName", +"eventID", +"decimalLatitude"), +codes = c("NA", "NA", "missing", "blank"), +definitions = c("not recorded", +"not identified", +"not recorded", +"intentionally left blank - not recorded"))

+

meta2 <- set_missing_data(eml_object = metadata, +files = df$files, +columns = df$columns, +codes = df$codes, +definitions = df$definitions)

+
+ +
+

Examples

+
if (FALSE) {
+#For a single column of data in a single file:
+meta2 <- set_missing_data(my_metadata,
+                          "table1.csv",
+                          "scientificName",
+                          "NA",
+                          "Unable to identify")
+
+#For multiple columns of data, potentially across multiple files:
+#(blank cells must have the missing value code of "blank" or "empty")
+meta2 <- set_missing_data(my_metadata,
+                         files = c("table1.csv", "table1.csv", "table2.csv"),
+                         columns = c("date", "time", "scientificName"),
+                         codes = c("NA", "missing", "blank"),
+                         definitions = c("date not recorded",
+                                       "time not recorded",
+                                       "intentionally left blank - missing")
+                                       )
+}
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/set_producing_units.html b/docs/reference/set_producing_units.html index 5312aa9..795a2b2 100644 --- a/docs/reference/set_producing_units.html +++ b/docs/reference/set_producing_units.html @@ -10,7 +10,7 @@ EMLeditor - 0.1.4 + 0.1.5