From 756d7372886ebc1536dec848a39ecdd4f1815cae Mon Sep 17 00:00:00 2001 From: Wright Date: Mon, 23 Jan 2023 10:41:11 -0700 Subject: [PATCH 01/10] Add option to output results of run_congruence_checks to file instead of console --- R/tabular_data_congruence.R | 28 +++++++++++++++++++++++++++- man/run_congruence_checks.Rd | 8 +++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/R/tabular_data_congruence.R b/R/tabular_data_congruence.R index dc76354..fdd382c 100644 --- a/R/tabular_data_congruence.R +++ b/R/tabular_data_congruence.R @@ -668,6 +668,8 @@ test_date_range <- function(directory = here::here(), metadata = load_metadata(d #' Run all congruence checks #' #' @param check_metadata_only Only run checks on the metadata and skip anything involving data files. +#' @param output_filename Optional. If specified, saves results of congruence checks to this file. If omitted, prints results to console. If the file already exists, results will be appended to the existing file. +#' @param output_dir Location in which to save the output file, if using. #' @inheritParams load_data #' @inheritParams test_metadata_version #' @@ -678,12 +680,30 @@ test_date_range <- function(directory = here::here(), metadata = load_metadata(d #' dir <- DPchecker_example("BICY_veg") #' run_congruence_checks(dir) #' -run_congruence_checks <- function(directory = here::here(), metadata = load_metadata(directory), check_metadata_only = FALSE) { +run_congruence_checks <- function(directory = here::here(), metadata = load_metadata(directory), check_metadata_only = FALSE, output_filename, output_dir = here::here()) { err_count <- 0 warn_count <- 0 total_count <- 10 # Don't forget to update this number when adding more checks! + if (!missing(output_filename)) { + output_dir <- normalizePath(output_dir, winslash = .Platform$file.sep, mustWork = TRUE) + output_path <- file.path(output_dir, output_filename) + open_mode <- if (file.exists(output_path)) { + "at" # if file exists, use append mode + } else { + "wt" # If the file doesn't already exist, use write mode + } + file <- file(output_path, open = open_mode) + sink(file) + sink(file, type = "message") + if (open_mode == "at") { + cli::cli_verbatim("\n\n\n") # If appending to existing log, add a few newlines to make it more readable + } + cli::cli_rule(center = "{Sys.time()}") + cli::cli_inform("The following checks were run using DPchecker version {packageVersion('DPchecker')}.") + } + if (check_metadata_only) { cli::cli_h1("Running metadata-only checks (skipping checks against data files)") } else { @@ -803,6 +823,12 @@ run_congruence_checks <- function(directory = here::here(), metadata = load_meta cli::cli_alert_success("Success! All {check_type} checks passed.") } + if (!missing(output_filename)) { + sink(type = "message") + sink() + close(file) + file.show(output_path) # Opens log file. May want to add option in future to not do this + } return(invisible(c("errors" = err_count, "warnings" = warn_count))) } diff --git a/man/run_congruence_checks.Rd b/man/run_congruence_checks.Rd index 4581f6f..ccfa1da 100644 --- a/man/run_congruence_checks.Rd +++ b/man/run_congruence_checks.Rd @@ -7,7 +7,9 @@ run_congruence_checks( directory = here::here(), metadata = load_metadata(directory), - check_metadata_only = FALSE + check_metadata_only = FALSE, + output_filename, + output_dir = here::here() ) } \arguments{ @@ -16,6 +18,10 @@ run_congruence_checks( \item{metadata}{The metadata object returned by \code{load_metadata}. If parameter not provided, defaults to calling \code{load_metadata} in current project directory.} \item{check_metadata_only}{Only run checks on the metadata and skip anything involving data files.} + +\item{output_filename}{Optional. If specified, saves results of congruence checks to this file. If omitted, prints results to console. If the file already exists, results will be appended to the existing file.} + +\item{output_dir}{Location in which to save the output file, if using.} } \value{ Invisibly returns \code{metadata}. From e55aae7f805786e7e66794fe1b818e7069eaa45a Mon Sep 17 00:00:00 2001 From: Wright Date: Tue, 24 Jan 2023 07:26:48 -0700 Subject: [PATCH 02/10] Handle missing metadata elements in test_delimiter and test_dup_meta_entries Fails gracefully when the metadata elements being checked are missing completely --- R/tabular_data_congruence.R | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/R/tabular_data_congruence.R b/R/tabular_data_congruence.R index fdd382c..a5678ef 100644 --- a/R/tabular_data_congruence.R +++ b/R/tabular_data_congruence.R @@ -271,13 +271,17 @@ test_delimiter <- function(metadata = load_metadata(here::here())) { simplify = FALSE) bad_delimit$`@context` <- NULL bad_delimit <- do.call(rbind, bad_delimit) - bad_delimit <- dplyr::filter(bad_delimit, is.na(delimiter) | nchar(delimiter) != 1 | delimiter == "[INVALID]") + if (!is.null(bad_delimit)) { + bad_delimit <- dplyr::filter(bad_delimit, is.na(delimiter) | nchar(delimiter) != 1 | delimiter == "[INVALID]") + } - if (nrow(bad_delimit) == 0) { + if (is.null(bad_delimit) || all(is.na(bad_delimit$delimiter))) { + cli::cli_abort(c("x" = "Metadata does not contain information about the field delimiter for data files")) + } + else if (nrow(bad_delimit) == 0) { cli::cli_inform(c("v" = "Metadata indicates that each data file contains a field delimiter that is a single character")) - } else if (all(is.na(bad_delimit$delimiter))) { - stop("Metadata does not contain information about the field delimiter for data files") - } else { + } + else { wrong_delimiters <- bad_delimit$table_name names(wrong_delimiters) <- rep("*", length(wrong_delimiters)) cli::cli_abort(c("x" = "Metadata indicates that the following data files do not contain valid delimiters:", wrong_delimiters)) @@ -309,6 +313,10 @@ test_dup_meta_entries <- function(metadata = load_metadata(here::here())) { # list all file names held in "objectName" fn <- unlist(attribs)[grepl("objectName", names(unlist(attribs)), fixed = T)] + if (length(fn) == 0) { + cli::cli_abort(c("x" = "Metadata file name check failed. No file names found in metadata.")) + } + # find duplicate entries: dups <- fn[duplicated(fn)] From 570729726b3906cf7fabca8dee6f4c6a3d5e0954 Mon Sep 17 00:00:00 2001 From: Wright Date: Thu, 26 Jan 2023 13:46:26 -0700 Subject: [PATCH 03/10] Add functions to check for geographic, publisher, and taxonomic information in metadata --- NAMESPACE | 3 + R/tabular_data_congruence.R | 117 +++++++++++++++++++++++++++++++++--- man/test_geographic_cov.Rd | 23 +++++++ man/test_publisher.Rd | 23 +++++++ man/test_taxonomic_cov.Rd | 23 +++++++ 5 files changed, 182 insertions(+), 7 deletions(-) create mode 100644 man/test_geographic_cov.Rd create mode 100644 man/test_publisher.Rd create mode 100644 man/test_taxonomic_cov.Rd diff --git a/NAMESPACE b/NAMESPACE index 47e8bdb..178c40e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,8 +11,11 @@ export(test_dup_meta_entries) export(test_fields_match) export(test_file_name_match) export(test_footer) +export(test_geographic_cov) export(test_header_num) export(test_metadata_version) export(test_numeric_fields) +export(test_publisher) +export(test_taxonomic_cov) export(test_validate_schema) importFrom(magrittr,"%>%") diff --git a/R/tabular_data_congruence.R b/R/tabular_data_congruence.R index a5678ef..9ddd19d 100644 --- a/R/tabular_data_congruence.R +++ b/R/tabular_data_congruence.R @@ -556,6 +556,15 @@ test_numeric_fields <- function(directory = here::here(), metadata = load_metada #' test_date_range(dir) test_date_range <- function(directory = here::here(), metadata = load_metadata(directory)) { + missing_temporal <- is.null(arcticdatautils::eml_get_simple(metadata, "temporalCoverage")) + + # Check if temporal coverage info is complete. Throw a warning if it's missing entirely and an error if it's only partially complete. + # The logic being that maybe there's a scenario where temporal coverage isn't relevant to the dataset at all, but if it has date/time info, it has to have both a start and end. + if (missing_temporal) { + cli::cli_warn(c("!" = "Could not check date range. Metadata does not contain temporal coverage information.")) + return(invisible(metadata)) + } + # get dataTable and all children elements data_tbl <- EML::eml_get(metadata, "dataTable") data_tbl$`@context` <- NULL @@ -569,12 +578,7 @@ test_date_range <- function(directory = here::here(), metadata = load_metadata(d meta_end_date <- readr::parse_datetime(EMLeditor::get_end_date(metadata), format = "%d %B %Y") meta_date_range <- c(begin = meta_begin_date, end = meta_end_date) - # Check if temporal coverage info is complete. Throw a warning if it's missing entirely and an error if it's only partially complete. - # The logic being that maybe there's a scenario where temporal coverage isn't relevant to the dataset at all, but if it has date/time info, it has to have both a start and end. - if (all(is.na(meta_date_range))) { - cli::cli_warn(c("!" = "Metadata does not contain temporal coverage information.")) - return(metadata) - } else if (any(is.na(meta_date_range))) { + if (any(is.na(meta_date_range))) { missing_date <- names(meta_date_range[is.na(meta_date_range)]) present_date <- names(meta_date_range[!is.na(meta_date_range)]) cli::cli_warn(c("!" = paste("Metadata temporal coverage is missing", missing_date, "date."))) @@ -665,7 +669,6 @@ test_date_range <- function(directory = here::here(), metadata = load_metadata(d } else { cli::cli_warn(c("!" = err, msg)) # If dates all parse but are out of range, just throw a warning. } - } else { cli::cli_inform(c("v" = "Columns indicated as date/time in metadata are within the stated temporal coverage range.")) } @@ -673,6 +676,106 @@ test_date_range <- function(directory = here::here(), metadata = load_metadata(d return(invisible(metadata)) } +#' Check for Taxonomic Coverage +#' Checks if taxonomic coverage element is present in metadata. Does not perform any validation of taxonomic coverage information. +#' +#' @inheritParams test_metadata_version +#' +#' @return Invisibly returns `metadata`. +#' @export +#' +#' @examples +#' meta <- load_metadata(DPchecker_example("BICY_veg")) +#' test_taxonomic_cov(meta) +test_taxonomic_cov <- function(metadata = load_metadata(directory)) { + + missing_taxonomic <- is.null(arcticdatautils::eml_get_simple(metadata, "taxonomicCoverage")) + + if (missing_taxonomic) { + cli::cli_warn(c("!" = "Metadata does not contain taxonomic coverage information.")) + } else { + cli::cli_inform(c("v" = "Metadata contains taxonomic coverage element")) + } + + return(invisible(metadata)) +} + +#' Check for Geographic Coverage +#' Checks if geographic coverage element is present in metadata. Does not perform any validation of geographic coverage information. +#' +#' @inheritParams test_metadata_version +#' +#' @return Invisibly returns `metadata`. +#' @export +#' +#' @examples +#' meta <- load_metadata(DPchecker_example("BICY_veg")) +#' test_geographic_cov(meta) +test_geographic_cov <- function(metadata = load_metadata(directory)) { + + missing_geographic <- is.null(arcticdatautils::eml_get_simple(metadata, "geographicCoverage")) + + if (missing_geographic) { + cli::cli_warn(c("!" = "Metadata does not contain geographic coverage information.")) + } else { + cli::cli_inform(c("v" = "Metadata contains geographic coverage element")) + } + + return(invisible(metadata)) +} + +#' Check for Publisher +#' Checks if publisher information is present in metadata, with option to require valid NPS publisher information. +#' +#' @inheritParams test_publisher +#' @param require_nps If TRUE, throw an error if publisher information is not correct for NPS published data. +#' +#' @return Invisibly returns `metadata`. +#' @export +#' +#' @examples +#' meta <- load_metadata(DPchecker_example("BICY_veg")) +#' test_publisher(meta) +test_publisher <- function(metadata = load_metadata(directory), require_nps = FALSE) { + + pub <- EML::eml_get(metadata, "publisher") + # Convert to a vector for easier comparison + if (!is.null(pub)) { + pub$`@context` <- NULL + pub <- unlist(pub) %>% + sort() + } + + valid_nps_pub <- list( + organizationName = + "National Park Service", + address = list( + deliveryPoint = "1201 Oakridge Drive, Suite 150", + city = "Fort Collins", + administrativeArea = "CO", + postalCode = "80525", + country = "USA" + ), + onlineUrl = "http://www.nps.gov", + electronicMailAddress = "irma@nps.gov", + userId = list(directory = "https://ror.org/", userId = "https://ror.org/044zqqy65") + ) %>% + unlist() %>% # Convert to vector for easier comparison + sort() + + if (is.null(pub)) { + cli::cli_abort(c("x" = "Metadata does not contain publisher information.")) + } else if (!require_nps) { + cli::cli_inform(c("v" = "Metadata contains publisher element.")) + } else if (identical(valid_nps_pub, pub)) { + cli::cli_inform(c("v" = "Metadata contains publisher element and correctly designates NPS as the publisher.")) + } else { + cli::cli_abort(c("x" = "Metadata contains publisher element but does not correctly designate NPS as the publisher.")) + } + + return(invisible(metadata)) +} + #' Run all congruence checks #' #' @param check_metadata_only Only run checks on the metadata and skip anything involving data files. diff --git a/man/test_geographic_cov.Rd b/man/test_geographic_cov.Rd new file mode 100644 index 0000000..e24bde7 --- /dev/null +++ b/man/test_geographic_cov.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabular_data_congruence.R +\name{test_geographic_cov} +\alias{test_geographic_cov} +\title{Check for Geographic Coverage +Checks if geographic coverage element is present in metadata. Does not perform any validation of geographic coverage information.} +\usage{ +test_geographic_cov(metadata = load_metadata(directory)) +} +\arguments{ +\item{metadata}{The metadata object returned by \code{load_metadata}. If parameter not provided, defaults to calling \code{load_metadata} in current project directory.} +} +\value{ +Invisibly returns \code{metadata}. +} +\description{ +Check for Geographic Coverage +Checks if geographic coverage element is present in metadata. Does not perform any validation of geographic coverage information. +} +\examples{ +meta <- load_metadata(DPchecker_example("BICY_veg")) +test_geographic_cov(meta) +} diff --git a/man/test_publisher.Rd b/man/test_publisher.Rd new file mode 100644 index 0000000..825edb7 --- /dev/null +++ b/man/test_publisher.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabular_data_congruence.R +\name{test_publisher} +\alias{test_publisher} +\title{Check for Publisher +Checks if publisher information is present in metadata, with option to require valid NPS publisher information.} +\usage{ +test_publisher(metadata = load_metadata(directory), require_nps = FALSE) +} +\arguments{ +\item{require_nps}{If TRUE, throw an error if publisher information is not correct for NPS published data.} +} +\value{ +Invisibly returns \code{metadata}. +} +\description{ +Check for Publisher +Checks if publisher information is present in metadata, with option to require valid NPS publisher information. +} +\examples{ +meta <- load_metadata(DPchecker_example("BICY_veg")) +test_publisher(meta) +} diff --git a/man/test_taxonomic_cov.Rd b/man/test_taxonomic_cov.Rd new file mode 100644 index 0000000..1813098 --- /dev/null +++ b/man/test_taxonomic_cov.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabular_data_congruence.R +\name{test_taxonomic_cov} +\alias{test_taxonomic_cov} +\title{Check for Taxonomic Coverage +Checks if taxonomic coverage element is present in metadata. Does not perform any validation of taxonomic coverage information.} +\usage{ +test_taxonomic_cov(metadata = load_metadata(directory)) +} +\arguments{ +\item{metadata}{The metadata object returned by \code{load_metadata}. If parameter not provided, defaults to calling \code{load_metadata} in current project directory.} +} +\value{ +Invisibly returns \code{metadata}. +} +\description{ +Check for Taxonomic Coverage +Checks if taxonomic coverage element is present in metadata. Does not perform any validation of taxonomic coverage information. +} +\examples{ +meta <- load_metadata(DPchecker_example("BICY_veg")) +test_taxonomic_cov(meta) +} From 14e70bd02c4ac2fbcb3b046a36347a2231c42efd Mon Sep 17 00:00:00 2001 From: Wright Date: Mon, 30 Jan 2023 14:43:45 -0700 Subject: [PATCH 04/10] Add functions to check for DOI and valid field and filenames (no special characters, etc) --- NAMESPACE | 3 + R/tabular_data_congruence.R | 124 +++++++++++++++++++++++++++++++++++ man/test_doi.Rd | 23 +++++++ man/test_valid_fieldnames.Rd | 24 +++++++ man/test_valid_filenames.Rd | 24 +++++++ 5 files changed, 198 insertions(+) create mode 100644 man/test_doi.Rd create mode 100644 man/test_valid_fieldnames.Rd create mode 100644 man/test_valid_filenames.Rd diff --git a/NAMESPACE b/NAMESPACE index 178c40e..eb693d6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(load_metadata) export(run_congruence_checks) export(test_date_range) export(test_delimiter) +export(test_doi) export(test_dup_meta_entries) export(test_fields_match) export(test_file_name_match) @@ -17,5 +18,7 @@ export(test_metadata_version) export(test_numeric_fields) export(test_publisher) export(test_taxonomic_cov) +export(test_valid_fieldnames) +export(test_valid_filenames) export(test_validate_schema) importFrom(magrittr,"%>%") diff --git a/R/tabular_data_congruence.R b/R/tabular_data_congruence.R index 9ddd19d..40fab9c 100644 --- a/R/tabular_data_congruence.R +++ b/R/tabular_data_congruence.R @@ -724,6 +724,30 @@ test_geographic_cov <- function(metadata = load_metadata(directory)) { return(invisible(metadata)) } +#' Check for DOI +#' Checks if DOI is present in metadata. Does not currently validate DOI. +#' +#' @inheritParams test_metadata_version +#' +#' @return Invisibly returns `metadata`. +#' @export +#' +#' @examples +#' meta <- load_metadata(DPchecker_example("BICY_veg")) +#' test_geographic_cov(meta) +test_doi <- function(metadata = load_metadata(directory)) { + + missing_doi <- is.null(arcticdatautils::eml_get_simple(eml_object, "alternateIdentifier")) + + if (missing_doi) { + cli::cli_warn(c("!" = "Metadata does not contain a digital object identifier.")) + } else { + cli::cli_inform(c("v" = "Metadata contains a digital object identifier")) + } + + return(invisible(metadata)) +} + #' Check for Publisher #' Checks if publisher information is present in metadata, with option to require valid NPS publisher information. #' @@ -776,6 +800,106 @@ test_publisher <- function(metadata = load_metadata(directory), require_nps = FA return(invisible(metadata)) } +#' Test Field Names for Invalid Characters +#' +#' @description test_valid_fieldnames checks for field names in the metadata that contain invalid special characters. Only underscores and alphanumeric characters are permitted, and names must begin with a letter. +#' +#' @details You should run `test_fields_match()` before you run this function, since this function only checks the field names in the metadata. +#' +#' @inheritParams test_metadata_version +#' +#' @return Invisibly returns `metadata`. +#' @export +#' +#' @examples +#' meta <- load_metadata(DPchecker_example("BICY_veg")) +#' test_valid_fieldnames(meta) +test_valid_fieldnames <- function(metadata = load_metadata(here::here())) { + + # get dataTable and all children elements + data_tbl <- EML::eml_get(metadata, "dataTable") + # If there's only one csv, data_tbl ends up with one less level of nesting. Re-nest it so that the rest of the code works consistently + if ("attributeList" %in% names(data_tbl)) { + data_tbl <- list(data_tbl) + } + + # Get list of columns for each table in the metadata + metadata_attrs <- lapply(data_tbl, function(tbl) {arcticdatautils::eml_get_simple(tbl, "attributeName")}) + metadata_attrs$`@context` <- NULL + names(metadata_attrs) <- arcticdatautils::eml_get_simple(data_tbl, "objectName") + + # Check each table. Throw a warning if they contain special characters + bad_fieldnames <- sapply(names(metadata_attrs), function(tbl) { + cols <- metadata_attrs[[tbl]] + bad_start <- grepl("^[^a-zA-Z]", cols) # Col names must start with a letter + special_chars <- grepl("[^a-zA-Z0-9_\\.]", cols) # No special characters in col names (only alphanumeric and underscores allowed) + + bad_cols <- cols[bad_start | special_chars] + + if (length(bad_cols) == 0) { # No problems + return(NULL) + } else { + msg <- c(" " = paste0("--> {.file ", data_file, "}: ", paste0("{.field ", bad_cols, "}", collapse = ", "))) + return(msg) + } + }, USE.NAMES = FALSE, simplify = FALSE) + + # Remove tables from list that pass the test, and convert it to a named vector + bad_fieldnames <- purrr::discard(bad_fieldnames, is.null) %>% + unlist() + + # If there are mismatches, throw an error, otherwise, print a message indicating passed test + if (!is.null(bad_fieldnames)) { + cli::cli_warn(c("x" = "Some column names contain special characters and/or do not begin with a letter:", mismatches)) + } else { + cli::cli_inform(c("v" = "Column names begin with a letter and do not contain spaces or special characters.")) + } + + return(invisible(metadata)) +} + +#' Test File Names for Invalid Characters +#' +#' @description test_valid_filenames checks for file names in the metadata that contain invalid special characters. Only underscores and alphanumeric characters are permitted, and names must begin with a letter. +#' +#' @details You should run `test_file_name_match()` before you run this function, since this function only checks the file names in the metadata. +#' +#' @inheritParams test_metadata_version +#' +#' @return Invisibly returns `metadata`. +#' @export +#' +#' @examples +#' meta <- load_metadata(DPchecker_example("BICY_veg")) +#' test_valid_filenames(meta) +test_valid_filenames <- function(metadata = load_metadata(here::here())) { + + # get dataTable and all children elements + data_tbl <- EML::eml_get(metadata, "dataTable") + # If there's only one csv, data_tbl ends up with one less level of nesting. Re-nest it so that the rest of the code works consistently + if ("attributeList" %in% names(data_tbl)) { + data_tbl <- list(data_tbl) + } + + # Get vector of filenames from the metadata + file_names <- arcticdatautils::eml_get_simple(data_tbl, "objectName") + + # Check each file name. Throw a warning if any contain special characters + bad_start <- grepl("^[^a-zA-Z]", file_names) # File names must start with a letter + special_chars <- grepl("[^a-zA-Z0-9_\\.]", file_names) # No special characters in file names (only alphanumeric and underscores allowed) + + bad_names <- file_names[bad_start | special_chars] + + # If there are mismatches, throw an error, otherwise, print a message indicating passed test + if (length(bad_names) > 0) { + cli::cli_warn(c("x" = paste("Some file names contain special characters and/or do not begin with a letter:", paste0("{.file ", bad_names, "}", collapse = ", ")))) + } else { + cli::cli_inform(c("v" = "File names begin with a letter and do not contain spaces or special characters.")) + } + + return(invisible(metadata)) +} + #' Run all congruence checks #' #' @param check_metadata_only Only run checks on the metadata and skip anything involving data files. diff --git a/man/test_doi.Rd b/man/test_doi.Rd new file mode 100644 index 0000000..cc781c0 --- /dev/null +++ b/man/test_doi.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabular_data_congruence.R +\name{test_doi} +\alias{test_doi} +\title{Check for DOI +Checks if DOI is present in metadata. Does not currently validate DOI.} +\usage{ +test_doi(metadata = load_metadata(directory)) +} +\arguments{ +\item{metadata}{The metadata object returned by \code{load_metadata}. If parameter not provided, defaults to calling \code{load_metadata} in current project directory.} +} +\value{ +Invisibly returns \code{metadata}. +} +\description{ +Check for DOI +Checks if DOI is present in metadata. Does not currently validate DOI. +} +\examples{ +meta <- load_metadata(DPchecker_example("BICY_veg")) +test_geographic_cov(meta) +} diff --git a/man/test_valid_fieldnames.Rd b/man/test_valid_fieldnames.Rd new file mode 100644 index 0000000..70e409c --- /dev/null +++ b/man/test_valid_fieldnames.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabular_data_congruence.R +\name{test_valid_fieldnames} +\alias{test_valid_fieldnames} +\title{Test Field Names for Invalid Characters} +\usage{ +test_valid_fieldnames(metadata = load_metadata(here::here())) +} +\arguments{ +\item{metadata}{The metadata object returned by \code{load_metadata}. If parameter not provided, defaults to calling \code{load_metadata} in current project directory.} +} +\value{ +Invisibly returns \code{metadata}. +} +\description{ +test_valid_fieldnames checks for field names in the metadata that contain invalid special characters. Only underscores and alphanumeric characters are permitted, and names must begin with a letter. +} +\details{ +You should run \code{test_fields_match()} before you run this function, since this function only checks the field names in the metadata. +} +\examples{ +meta <- load_metadata(DPchecker_example("BICY_veg")) +test_valid_fieldnames(meta) +} diff --git a/man/test_valid_filenames.Rd b/man/test_valid_filenames.Rd new file mode 100644 index 0000000..6516452 --- /dev/null +++ b/man/test_valid_filenames.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tabular_data_congruence.R +\name{test_valid_filenames} +\alias{test_valid_filenames} +\title{Test File Names for Invalid Characters} +\usage{ +test_valid_filenames(metadata = load_metadata(here::here())) +} +\arguments{ +\item{metadata}{The metadata object returned by \code{load_metadata}. If parameter not provided, defaults to calling \code{load_metadata} in current project directory.} +} +\value{ +Invisibly returns \code{metadata}. +} +\description{ +test_valid_filenames checks for file names in the metadata that contain invalid special characters. Only underscores and alphanumeric characters are permitted, and names must begin with a letter. +} +\details{ +You should run \code{test_file_name_match()} before you run this function, since this function only checks the file names in the metadata. +} +\examples{ +meta <- load_metadata(DPchecker_example("BICY_veg")) +test_valid_filenames(meta) +} From 8ce4a0806e1126fe6f72fc93185d9d9814b6f96c Mon Sep 17 00:00:00 2001 From: Wright Date: Tue, 31 Jan 2023 18:22:24 -0700 Subject: [PATCH 05/10] Fix minor bug in test_file_name_match and remove confusing text from error messages in run_congruence_checks --- R/tabular_data_congruence.R | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/R/tabular_data_congruence.R b/R/tabular_data_congruence.R index 40fab9c..fac64cb 100644 --- a/R/tabular_data_congruence.R +++ b/R/tabular_data_congruence.R @@ -365,14 +365,16 @@ test_file_name_match <- function(directory = here::here(), metadata = load_metad if (length(meta_only) == 0 && length(dir_only) == 0) { cli::cli_inform(c("v" = "All data files are listed in metadata and all metadata files names refer to data files.")) } else if (length(meta_only) > 0 || length(dir_only) > 0) { + msg <- c() if (length(meta_only > 0)) { - names(meta_only) <- "*" + names(meta_only) <- rep("*", length(meta_only)) + msg <- c("x" = "{length(meta_only)} file{?s} listed in metadata and missing from data folder", meta_only) } if (length(dir_only) > 0) { - names(dir_only) <- "*" + names(dir_only) <- rep("*", length(dir_only)) + msg <- c(msg, "x" = "{length(dir_only)} file{?s} present in data folder and missing from metadata", dir_only) } - cli::cli_abort(c("x" = "{length(meta_only)} file{?s} listed in metadata and missing from data folder", meta_only, - "x" = "{length(dir_only)} file{?s} present in data folder and missing from metadata", dir_only)) + cli::cli_abort(msg) } return(invisible(metadata)) @@ -949,16 +951,16 @@ run_congruence_checks <- function(directory = here::here(), metadata = load_meta error = function(e) { err_count <<- err_count + 1 cli::cli_alert_danger("Schema validation failed. Run {.fn test_validate_schema} for details.") - cli::cli_abort(c("x" = "You must correct the above error before the rest of the congruence checks can run."))}, + cli::cli_abort(c("x" = "Metadata schema must validate before the rest of the congruence checks can run."), call = NULL)}, warning = function(w) { warn_count <<- warn_count + 1 - cli::cli_alert_warning("Schema validation warnings exist. Run {.fn test_validate_schema} for details.") + cli::cli_alert_warning("Schema validation warnings exist. Run {.fn test_validate_schema} for details.", call = NULL) }) tryCatch(test_dup_meta_entries(metadata), error = function(e) { err_count <<- err_count + 1 cli::cli_bullets(c(e$message, e$body)) - cli::cli_abort(c("x" = "You must correct the above error before the rest of the congruence checks can run."))}, + cli::cli_abort(c("x" = "You must remove duplicate data table names from metadata before the rest of the congruence checks can run."), call = NULL)}, warning = function(w) { warn_count <<- warn_count + 1 cli::cli_bullets(c(w$message, w$body)) @@ -1006,7 +1008,7 @@ run_congruence_checks <- function(directory = here::here(), metadata = load_meta error = function(e) { err_count <<- err_count + 1 cli::cli_bullets(c(e$message, e$body)) - cli::cli_abort(c("x" = "You must correct the above error before the rest of the congruence checks can run.")) + cli::cli_abort(c("x" = "Files documented in metadata must match files present in package before the rest of the congruence checks can run."), call = NULL) }, warning = function(w) { warn_count <<- warn_count + 1 @@ -1016,7 +1018,7 @@ run_congruence_checks <- function(directory = here::here(), metadata = load_meta error = function(e) { err_count <<- err_count + 1 cli::cli_bullets(c(e$message, e$body)) - cli::cli_abort(c("x" = "You must correct the above error before the rest of the congruence checks can run.")) + cli::cli_abort(c("x" = "Columns documented in metadata must match columns present in data files before the rest of the congruence checks can run."), call = NULL) }, warning = function(w) { warn_count <<- warn_count + 1 From f059720d97d57da41d2b244743214258524165de Mon Sep 17 00:00:00 2001 From: Wright Date: Thu, 2 Feb 2023 21:50:16 -0700 Subject: [PATCH 06/10] Bug fixes --- R/tabular_data_congruence.R | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/R/tabular_data_congruence.R b/R/tabular_data_congruence.R index fac64cb..fa7b47e 100644 --- a/R/tabular_data_congruence.R +++ b/R/tabular_data_congruence.R @@ -271,14 +271,14 @@ test_delimiter <- function(metadata = load_metadata(here::here())) { simplify = FALSE) bad_delimit$`@context` <- NULL bad_delimit <- do.call(rbind, bad_delimit) - if (!is.null(bad_delimit)) { - bad_delimit <- dplyr::filter(bad_delimit, is.na(delimiter) | nchar(delimiter) != 1 | delimiter == "[INVALID]") - } if (is.null(bad_delimit) || all(is.na(bad_delimit$delimiter))) { cli::cli_abort(c("x" = "Metadata does not contain information about the field delimiter for data files")) } - else if (nrow(bad_delimit) == 0) { + + bad_delimit <- dplyr::filter(bad_delimit, is.na(delimiter) | nchar(delimiter) != 1 | delimiter == "[INVALID]") + + if (nrow(bad_delimit) == 0) { cli::cli_inform(c("v" = "Metadata indicates that each data file contains a field delimiter that is a single character")) } else { @@ -368,11 +368,11 @@ test_file_name_match <- function(directory = here::here(), metadata = load_metad msg <- c() if (length(meta_only > 0)) { names(meta_only) <- rep("*", length(meta_only)) - msg <- c("x" = "{length(meta_only)} file{?s} listed in metadata and missing from data folder", meta_only) + msg <- c("x" = "{length(meta_only)} file{?s} listed in metadata but missing from data folder", meta_only) } if (length(dir_only) > 0) { names(dir_only) <- rep("*", length(dir_only)) - msg <- c(msg, "x" = "{length(dir_only)} file{?s} present in data folder and missing from metadata", dir_only) + msg <- c(msg, "x" = "{length(dir_only)} file{?s} present in data folder but missing from metadata", dir_only) } cli::cli_abort(msg) } @@ -696,7 +696,7 @@ test_taxonomic_cov <- function(metadata = load_metadata(directory)) { if (missing_taxonomic) { cli::cli_warn(c("!" = "Metadata does not contain taxonomic coverage information.")) } else { - cli::cli_inform(c("v" = "Metadata contains taxonomic coverage element")) + cli::cli_inform(c("v" = "Metadata contains taxonomic coverage element.")) } return(invisible(metadata)) @@ -739,12 +739,13 @@ test_geographic_cov <- function(metadata = load_metadata(directory)) { #' test_geographic_cov(meta) test_doi <- function(metadata = load_metadata(directory)) { - missing_doi <- is.null(arcticdatautils::eml_get_simple(eml_object, "alternateIdentifier")) + doi <- arcticdatautils::eml_get_simple(metadata, "alternateIdentifier") + missing_doi <- is.null(doi) || !any(grepl("^doi\\:", doi)) if (missing_doi) { cli::cli_warn(c("!" = "Metadata does not contain a digital object identifier.")) } else { - cli::cli_inform(c("v" = "Metadata contains a digital object identifier")) + cli::cli_inform(c("v" = "Metadata contains a digital object identifier.")) } return(invisible(metadata)) @@ -753,7 +754,7 @@ test_doi <- function(metadata = load_metadata(directory)) { #' Check for Publisher #' Checks if publisher information is present in metadata, with option to require valid NPS publisher information. #' -#' @inheritParams test_publisher +#' @inheritParams test_metadata_version #' @param require_nps If TRUE, throw an error if publisher information is not correct for NPS published data. #' #' @return Invisibly returns `metadata`. @@ -841,7 +842,7 @@ test_valid_fieldnames <- function(metadata = load_metadata(here::here())) { if (length(bad_cols) == 0) { # No problems return(NULL) } else { - msg <- c(" " = paste0("--> {.file ", data_file, "}: ", paste0("{.field ", bad_cols, "}", collapse = ", "))) + msg <- c(" " = paste0("--> {.file ", tbl, "}: ", paste0("{.field ", bad_cols, "}", collapse = ", "))) return(msg) } }, USE.NAMES = FALSE, simplify = FALSE) @@ -852,9 +853,9 @@ test_valid_fieldnames <- function(metadata = load_metadata(here::here())) { # If there are mismatches, throw an error, otherwise, print a message indicating passed test if (!is.null(bad_fieldnames)) { - cli::cli_warn(c("x" = "Some column names contain special characters and/or do not begin with a letter:", mismatches)) + cli::cli_warn(c("!" = "Some field names contain special characters and/or do not begin with a letter:", bad_fieldnames)) } else { - cli::cli_inform(c("v" = "Column names begin with a letter and do not contain spaces or special characters.")) + cli::cli_inform(c("v" = "Field names begin with a letter and do not contain spaces or special characters.")) } return(invisible(metadata)) @@ -894,7 +895,7 @@ test_valid_filenames <- function(metadata = load_metadata(here::here())) { # If there are mismatches, throw an error, otherwise, print a message indicating passed test if (length(bad_names) > 0) { - cli::cli_warn(c("x" = paste("Some file names contain special characters and/or do not begin with a letter:", paste0("{.file ", bad_names, "}", collapse = ", ")))) + cli::cli_warn(c("!" = paste("Some file names contain special characters and/or do not begin with a letter:", paste0("{.file ", bad_names, "}", collapse = ", ")))) } else { cli::cli_inform(c("v" = "File names begin with a letter and do not contain spaces or special characters.")) } From c863cd78853e4bee3fb8b4ee76291ef48ca1b3c2 Mon Sep 17 00:00:00 2001 From: Wright Date: Thu, 2 Feb 2023 21:50:41 -0700 Subject: [PATCH 07/10] Add new tests to run_congruence_checks, add unit tests --- R/tabular_data_congruence.R | 54 ++++++++ .../_snaps/tabular_data_congruence.md | 12 ++ tests/testthat/test-tabular_data_congruence.R | 124 +++++++++++++++++- 3 files changed, 185 insertions(+), 5 deletions(-) diff --git a/R/tabular_data_congruence.R b/R/tabular_data_congruence.R index fa7b47e..6532102 100644 --- a/R/tabular_data_congruence.R +++ b/R/tabular_data_congruence.R @@ -1002,6 +1002,60 @@ run_congruence_checks <- function(directory = here::here(), metadata = load_meta warn_count <<- warn_count + 1 cli::cli_bullets(c(e$message, e$body)) }) + tryCatch(test_taxonomic_cov(metadata), + error = function(e) { + err_count <<- err_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }, + warning = function(w) { + warn_count <<- warn_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }) + tryCatch(test_geographic_cov(metadata), + error = function(e) { + err_count <<- err_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }, + warning = function(w) { + warn_count <<- warn_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }) + tryCatch(test_doi(metadata), + error = function(e) { + err_count <<- err_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }, + warning = function(w) { + warn_count <<- warn_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }) + tryCatch(test_publisher(metadata), + error = function(e) { + err_count <<- err_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }, + warning = function(w) { + warn_count <<- warn_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }) + tryCatch(test_valid_fieldnames(metadata), + error = function(e) { + err_count <<- err_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }, + warning = function(w) { + warn_count <<- warn_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }) + tryCatch(test_valid_filenames(metadata), + error = function(e) { + err_count <<- err_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }, + warning = function(w) { + warn_count <<- warn_count + 1 + cli::cli_bullets(c(e$message, e$body)) + }) if (!check_metadata_only) { cli::cli_h2("Checking that metadata is consistent with data file(s)") diff --git a/tests/testthat/_snaps/tabular_data_congruence.md b/tests/testthat/_snaps/tabular_data_congruence.md index ae6cd33..ab38d67 100644 --- a/tests/testthat/_snaps/tabular_data_congruence.md +++ b/tests/testthat/_snaps/tabular_data_congruence.md @@ -15,6 +15,12 @@ v Metadata indicates that each data file contains a field delimiter that is a single character v Metadata indicates that each data file contains exactly one header row. v Metadata indicates data files do not have footers. + v Metadata contains taxonomic coverage element. + v Metadata contains geographic coverage element + v Metadata contains a digital object identifier. + v Metadata contains publisher element. + v Field names begin with a letter and do not contain spaces or special characters. + v File names begin with a letter and do not contain spaces or special characters. Message -- Checking that metadata is consistent with data file(s) -- @@ -47,6 +53,12 @@ v Metadata indicates that each data file contains a field delimiter that is a single character v Metadata indicates that each data file contains exactly one header row. v Metadata indicates data files do not have footers. + v Metadata contains taxonomic coverage element. + v Metadata contains geographic coverage element + v Metadata contains a digital object identifier. + v Metadata contains publisher element. + v Field names begin with a letter and do not contain spaces or special characters. + v File names begin with a letter and do not contain spaces or special characters. Message -- Summary -- diff --git a/tests/testthat/test-tabular_data_congruence.R b/tests/testthat/test-tabular_data_congruence.R index 876e6e9..43699d5 100644 --- a/tests/testthat/test-tabular_data_congruence.R +++ b/tests/testthat/test-tabular_data_congruence.R @@ -1,5 +1,18 @@ +# There must be a more elegant way to do this...R CMD check only works if +# you treat testthat as the working directory, but running tests any other way +# expects the package dir (DPchecker) to be the working dir. + good_dir <- here::here("tests", "testthat", "good") bad_dir <- here::here("tests", "testthat", "bad") +bicy_meta <- load_metadata(here::here("tests", "testthat", "good", "BICY_good")) +buis_meta <- load_metadata(here::here("tests", "testthat", "good", "BUIS_good")) + +# Comment out the block above and use this block instead if running R CMD check +# good_dir <- "good" +# bad_dir <- "bad" +# bicy_meta <- load_metadata("good/BICY_good") +# buis_meta <- load_metadata("good/BUIS_good") + # ---- load_metadata ---- test_that("load_metadata works on valid EML file", { @@ -38,9 +51,9 @@ test_that("load_metadata throws an error when there are multiple xml files with # ---- run_congruence_checks ---- cli::test_that_cli("run_congruence_checks works", configs = "plain", { expect_error(run_congruence_checks(here::here(bad_dir, "BICY_bad")), - "You must correct the above error") + "Metadata schema must validate") expect_error(run_congruence_checks(here::here(bad_dir, "data_metadata_mismatch", "BICY_files")), - "You must correct the above error") + "You must remove duplicate data table names") expect_snapshot(run_congruence_checks(here::here(good_dir, "BICY_good"))) expect_snapshot(run_congruence_checks(here::here(good_dir, "BICY_good"), check_metadata_only = TRUE)) }) @@ -125,7 +138,6 @@ test_that("test_delimiter displays success message if metadata indicates that da "Metadata indicates that each data file contains a field delimiter that is a single character") expect_message(test_delimiter(load_metadata(here::here(good_dir, "BUIS_good"))), "Metadata indicates that each data file contains a field delimiter that is a single character") - }) test_that("test_delimiter throws error if metadata does not contain delimiter info", { @@ -166,9 +178,9 @@ test_that("test_file_name_match displays success message if files in data dir an test_that("test_file_name_match displays error message if metadata contains filenames not in data dir and vice versa", { expect_error(test_file_name_match(here::here(bad_dir,"data_metadata_mismatch", "BICY_files")), - "1 file listed in metadata and missing from data folder.*1 file present in data folder and missing from metadata") + "1 file listed in metadata but missing from data folder.*1 file present in data folder but missing from metadata") expect_error(test_file_name_match(here::here(bad_dir, "data_metadata_mismatch", "BUIS_files")), - "1 file listed in metadata and missing from data folder.*0 files present in data folder and missing from metadata") + "1 file listed in metadata but missing from data folder.") }) @@ -225,6 +237,108 @@ test_that("test_date_range displays warning message if dates in data are outside "The following date/time columns are out of the range \\W*2001-06-20\\W*2001-10-20\\W* specified in the metadata:\\W*BUIS_herps.csv\\W*: eventDate \\W*2001-06-20\\W*2020-10-18\\W*$") }) +# ---- test_taxonomic_cov ---- +test_that("test_taxonomic_cov displays success message if taxonomic coverage element is present", { + expect_message(test_taxonomic_cov(load_metadata(here::here(good_dir, "BICY_good"))), + "Metadata contains taxonomic coverage element") + expect_message(test_taxonomic_cov(load_metadata(here::here(good_dir, "BUIS_good"))), + "Metadata contains taxonomic coverage element") +}) + +test_that("test_taxonomic_cov throws warning if taxonomic coverage element is missing", { + bicy_meta$dataset$coverage$taxonomicCoverage <- NULL + buis_meta$dataset$coverage$taxonomicCoverage <- NULL + expect_warning(test_taxonomic_cov(bicy_meta), + "Metadata does not contain taxonomic coverage information") + expect_warning(test_taxonomic_cov(buis_meta), + "Metadata does not contain taxonomic coverage information") +}) + +# ---- test_geographic_cov ---- +test_that("test_geographic_cov displays success message if geographic coverage element is present", { + expect_message(test_geographic_cov(load_metadata(here::here(good_dir, "BICY_good"))), + "Metadata contains geographic coverage element") + expect_message(test_geographic_cov(load_metadata(here::here(good_dir, "BUIS_good"))), + "Metadata contains geographic coverage element") +}) + +test_that("test_geographic_cov throws warning if geographic coverage element is missing", { + bicy_meta$dataset$coverage$geographicCoverage <- NULL + buis_meta$dataset$coverage$geographicCoverage <- NULL + expect_warning(test_geographic_cov(bicy_meta), + "Metadata does not contain geographic coverage information") + expect_warning(test_geographic_cov(buis_meta), + "Metadata does not contain geographic coverage information") +}) + +# ---- test_doi ---- +test_that("test_doi displays success message if DOI is present", { + expect_message(test_doi(load_metadata(here::here(good_dir, "BICY_good"))), + "Metadata contains a digital object identifier") + expect_message(test_doi(load_metadata(here::here(good_dir, "BUIS_good"))), + "Metadata contains a digital object identifier") +}) + +test_that("test_doi throws warning if DOI is missing", { + bicy_meta$dataset$alternateIdentifier <- NULL + buis_meta$dataset$alternateIdentifier <- NULL + expect_warning(test_doi(bicy_meta), + "Metadata does not contain a digital object identifier") + expect_warning(test_doi(buis_meta), + "Metadata does not contain a digital object identifier") +}) + +# ---- test_publisher ---- +test_that("test_publisher displays success message if publisher element is present", { + expect_message(test_publisher(load_metadata(here::here(good_dir, "BICY_good"))), + "Metadata contains publisher element") + expect_message(test_publisher(load_metadata(here::here(good_dir, "BUIS_good"))), + "Metadata contains publisher element") +}) + +test_that("test_publisher throws error if publisher element is missing", { + bicy_meta$dataset$publisher <- NULL + buis_meta$dataset$publisher <- NULL + expect_error(test_publisher(bicy_meta), + "Metadata does not contain publisher information") + expect_error(test_publisher(buis_meta), + "Metadata does not contain publisher information") +}) + +# ---- test_valid_filenames ---- +test_that("test_valid_filenames displays success message if filenames do not contain special characters", { + expect_message(test_valid_filenames(load_metadata(here::here(good_dir, "BICY_good"))), + "File names begin with a letter and do not contain spaces or special characters") + expect_message(test_valid_filenames(load_metadata(here::here(good_dir, "BUIS_good"))), + "File names begin with a letter and do not contain spaces or special characters") +}) + +test_that("test_valid_filenames throws warning if filenames contain special characters", { + bicy_meta$dataset$dataTable[[2]]$physical$objectName <- "0ops_bad_filename.csv" + buis_meta$dataset$dataTable$physical$objectName <- "als*o bad.csv" + expect_warning(test_valid_filenames(bicy_meta), + "Some file names contain special characters and/or do not begin with a letter") + expect_warning(test_valid_filenames(buis_meta), + "Some file names contain special characters and/or do not begin with a letter") +}) + +# ---- test_valid_fieldnames ---- +test_that("test_valid_fieldnames displays success message if filenames do not contain special characters", { + expect_message(test_valid_fieldnames(load_metadata(here::here(good_dir, "BICY_good"))), + "Field names begin with a letter and do not contain spaces or special characters") + expect_message(test_valid_fieldnames(load_metadata(here::here(good_dir, "BUIS_good"))), + "Field names begin with a letter and do not contain spaces or special characters") +}) + +test_that("test_valid_fieldnames throws warning if filenames contain special characters", { + bicy_meta$dataset$dataTable[[2]]$attributeList$attribute[[3]]$attributeName <- "_weird_col_name" + buis_meta$dataset$dataTable$attributeList$attribute[[4]]$attributeName <- "poor choices were made!" + expect_warning(test_valid_fieldnames(bicy_meta), + "Some field names contain special characters and/or do not begin with a letter") + expect_warning(test_valid_fieldnames(buis_meta), + "Some field names contain special characters and/or do not begin with a letter") +}) + # ---- convert_datetime_format ---- test_that("convert_datetime_format returns the correct R datetime format string for ISO compliant date formats", { expect_equal(convert_datetime_format("YYYY-MM-DD"), "%Y-%m-%d") From e076239ff484de4c36ae25387d8ad5bfd84fe967 Mon Sep 17 00:00:00 2001 From: Wright Date: Thu, 2 Feb 2023 21:52:08 -0700 Subject: [PATCH 08/10] Fix test_publisher documentation --- man/test_publisher.Rd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/man/test_publisher.Rd b/man/test_publisher.Rd index 825edb7..f34f222 100644 --- a/man/test_publisher.Rd +++ b/man/test_publisher.Rd @@ -8,6 +8,8 @@ Checks if publisher information is present in metadata, with option to require v test_publisher(metadata = load_metadata(directory), require_nps = FALSE) } \arguments{ +\item{metadata}{The metadata object returned by \code{load_metadata}. If parameter not provided, defaults to calling \code{load_metadata} in current project directory.} + \item{require_nps}{If TRUE, throw an error if publisher information is not correct for NPS published data.} } \value{ From 840ac41bac5c9a098463e5cc47cad7478906033f Mon Sep 17 00:00:00 2001 From: Wright Date: Thu, 2 Feb 2023 21:52:23 -0700 Subject: [PATCH 09/10] Update version --- DESCRIPTION | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ce624c9..d80e3b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: DPchecker Title: Checks Data Packages for Congruence -Version: 0.1.0 +Version: 0.2.0 Authors@R: c( person("Rob", "Baker", email = "robert_baker@nps.gov", role = c("cre", "aut"), comment = c(ORCID = "0000-0001-7591-5035")), person(c("Sarah", "E."), "Wright", email = "sarah_wright@nps.gov", role = "aut"), diff --git a/README.md b/README.md index 243ac30..d9c827b 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # DPchecker -#### v0.1.0 +#### v0.2.0 DPchecker (Data Package checker) is a package with a series of functions for NPS data package authors and reviewers to check for internal consistency among data/meta data and with the data package standards. From 5c901f379658c246e26de5397ad76e6248a2d856 Mon Sep 17 00:00:00 2001 From: Wright Date: Thu, 2 Feb 2023 21:52:36 -0700 Subject: [PATCH 10/10] Rebuild pkgdown site --- docs/404.html | 2 +- docs/LICENSE-text.html | 2 +- docs/LICENSE.html | 2 +- docs/authors.html | 10 +-- docs/index.html | 4 +- docs/pkgdown.yml | 2 +- docs/reference/DPchecker-package.html | 2 +- docs/reference/DPchecker_example.html | 11 +-- docs/reference/convert_datetime_format.html | 2 +- docs/reference/index.html | 36 +++++++- docs/reference/load_data.html | 2 +- docs/reference/load_metadata.html | 2 +- docs/reference/run_congruence_checks.html | 56 +++++++++++- docs/reference/test_date_range.html | 2 +- docs/reference/test_delimiter.html | 2 +- docs/reference/test_doi.html | 94 ++++++++++++++++++++ docs/reference/test_dup_meta_entries.html | 2 +- docs/reference/test_fields_match.html | 2 +- docs/reference/test_file_name_match.html | 2 +- docs/reference/test_footer.html | 2 +- docs/reference/test_geographic_cov.html | 94 ++++++++++++++++++++ docs/reference/test_header_num.html | 2 +- docs/reference/test_metadata_version.html | 2 +- docs/reference/test_numeric_fields.html | 2 +- docs/reference/test_publisher.html | 98 +++++++++++++++++++++ docs/reference/test_taxonomic_cov.html | 94 ++++++++++++++++++++ docs/reference/test_valid_fieldnames.html | 93 +++++++++++++++++++ docs/reference/test_valid_filenames.html | 93 +++++++++++++++++++ docs/reference/test_validate_schema.html | 2 +- docs/search.json | 2 +- docs/sitemap.xml | 18 ++++ 31 files changed, 703 insertions(+), 36 deletions(-) create mode 100644 docs/reference/test_doi.html create mode 100644 docs/reference/test_geographic_cov.html create mode 100644 docs/reference/test_publisher.html create mode 100644 docs/reference/test_taxonomic_cov.html create mode 100644 docs/reference/test_valid_fieldnames.html create mode 100644 docs/reference/test_valid_filenames.html diff --git a/docs/404.html b/docs/404.html index 0f8c863..55fbabd 100644 --- a/docs/404.html +++ b/docs/404.html @@ -24,7 +24,7 @@ DPchecker - 0.1.0 + 0.2.0 + + + + + +
+
+
+ +
+

Check for DOI +Checks if DOI is present in metadata. Does not currently validate DOI.

+
+ +
+

Usage

+
test_doi(metadata = load_metadata(directory))
+
+ +
+

Arguments

+
metadata
+

The metadata object returned by load_metadata. If parameter not provided, defaults to calling load_metadata in current project directory.

+ +
+
+

Value

+ + +

Invisibly returns metadata.

+
+ +
+

Examples

+
meta <- load_metadata(DPchecker_example("BICY_veg"))
+#> Data are provided for example use only. Do not assume that they are complete, accurate, or up to date.
+test_geographic_cov(meta)
+#>  Metadata contains geographic coverage element
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/test_dup_meta_entries.html b/docs/reference/test_dup_meta_entries.html index 37a9db6..336d0ed 100644 --- a/docs/reference/test_dup_meta_entries.html +++ b/docs/reference/test_dup_meta_entries.html @@ -10,7 +10,7 @@ DPchecker - 0.1.0 + 0.2.0 + + + + + +
+
+
+ +
+

Check for Geographic Coverage +Checks if geographic coverage element is present in metadata. Does not perform any validation of geographic coverage information.

+
+ +
+

Usage

+
test_geographic_cov(metadata = load_metadata(directory))
+
+ +
+

Arguments

+
metadata
+

The metadata object returned by load_metadata. If parameter not provided, defaults to calling load_metadata in current project directory.

+ +
+
+

Value

+ + +

Invisibly returns metadata.

+
+ +
+

Examples

+
meta <- load_metadata(DPchecker_example("BICY_veg"))
+#> Data are provided for example use only. Do not assume that they are complete, accurate, or up to date.
+test_geographic_cov(meta)
+#>  Metadata contains geographic coverage element
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/test_header_num.html b/docs/reference/test_header_num.html index 8d6bacb..27c0f69 100644 --- a/docs/reference/test_header_num.html +++ b/docs/reference/test_header_num.html @@ -10,7 +10,7 @@ DPchecker - 0.1.0 + 0.2.0 + + + + + +
+
+
+ +
+

Check for Publisher +Checks if publisher information is present in metadata, with option to require valid NPS publisher information.

+
+ +
+

Usage

+
test_publisher(metadata = load_metadata(directory), require_nps = FALSE)
+
+ +
+

Arguments

+
metadata
+

The metadata object returned by load_metadata. If parameter not provided, defaults to calling load_metadata in current project directory.

+ + +
require_nps
+

If TRUE, throw an error if publisher information is not correct for NPS published data.

+ +
+
+

Value

+ + +

Invisibly returns metadata.

+
+ +
+

Examples

+
meta <- load_metadata(DPchecker_example("BICY_veg"))
+#> Data are provided for example use only. Do not assume that they are complete, accurate, or up to date.
+test_publisher(meta)
+#>  Metadata contains publisher element.
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/test_taxonomic_cov.html b/docs/reference/test_taxonomic_cov.html new file mode 100644 index 0000000..d7404aa --- /dev/null +++ b/docs/reference/test_taxonomic_cov.html @@ -0,0 +1,94 @@ + +Check for Taxonomic Coverage +Checks if taxonomic coverage element is present in metadata. Does not perform any validation of taxonomic coverage information. — test_taxonomic_cov • DPchecker + Skip to contents + + +
+
+
+ +
+

Check for Taxonomic Coverage +Checks if taxonomic coverage element is present in metadata. Does not perform any validation of taxonomic coverage information.

+
+ +
+

Usage

+
test_taxonomic_cov(metadata = load_metadata(directory))
+
+ +
+

Arguments

+
metadata
+

The metadata object returned by load_metadata. If parameter not provided, defaults to calling load_metadata in current project directory.

+ +
+
+

Value

+ + +

Invisibly returns metadata.

+
+ +
+

Examples

+
meta <- load_metadata(DPchecker_example("BICY_veg"))
+#> Data are provided for example use only. Do not assume that they are complete, accurate, or up to date.
+test_taxonomic_cov(meta)
+#>  Metadata contains taxonomic coverage element.
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/test_valid_fieldnames.html b/docs/reference/test_valid_fieldnames.html new file mode 100644 index 0000000..af2cbcc --- /dev/null +++ b/docs/reference/test_valid_fieldnames.html @@ -0,0 +1,93 @@ + +Test Field Names for Invalid Characters — test_valid_fieldnames • DPchecker + Skip to contents + + +
+
+
+ +
+

test_valid_fieldnames checks for field names in the metadata that contain invalid special characters. Only underscores and alphanumeric characters are permitted, and names must begin with a letter.

+
+ +
+

Usage

+
test_valid_fieldnames(metadata = load_metadata(here::here()))
+
+ +
+

Arguments

+
metadata
+

The metadata object returned by load_metadata. If parameter not provided, defaults to calling load_metadata in current project directory.

+ +
+
+

Value

+ + +

Invisibly returns metadata.

+
+
+

Details

+

You should run test_fields_match() before you run this function, since this function only checks the field names in the metadata.

+
+ +
+

Examples

+
meta <- load_metadata(DPchecker_example("BICY_veg"))
+#> Data are provided for example use only. Do not assume that they are complete, accurate, or up to date.
+test_valid_fieldnames(meta)
+#>  Field names begin with a letter and do not contain spaces or special
+#>   characters.
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/test_valid_filenames.html b/docs/reference/test_valid_filenames.html new file mode 100644 index 0000000..435ed15 --- /dev/null +++ b/docs/reference/test_valid_filenames.html @@ -0,0 +1,93 @@ + +Test File Names for Invalid Characters — test_valid_filenames • DPchecker + Skip to contents + + +
+
+
+ +
+

test_valid_filenames checks for file names in the metadata that contain invalid special characters. Only underscores and alphanumeric characters are permitted, and names must begin with a letter.

+
+ +
+

Usage

+
test_valid_filenames(metadata = load_metadata(here::here()))
+
+ +
+

Arguments

+
metadata
+

The metadata object returned by load_metadata. If parameter not provided, defaults to calling load_metadata in current project directory.

+ +
+
+

Value

+ + +

Invisibly returns metadata.

+
+
+

Details

+

You should run test_file_name_match() before you run this function, since this function only checks the file names in the metadata.

+
+ +
+

Examples

+
meta <- load_metadata(DPchecker_example("BICY_veg"))
+#> Data are provided for example use only. Do not assume that they are complete, accurate, or up to date.
+test_valid_filenames(meta)
+#>  File names begin with a letter and do not contain spaces or special
+#>   characters.
+
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/test_validate_schema.html b/docs/reference/test_validate_schema.html index 855b6aa..bb8538d 100644 --- a/docs/reference/test_validate_schema.html +++ b/docs/reference/test_validate_schema.html @@ -10,7 +10,7 @@ DPchecker - 0.1.0 + 0.2.0