diff --git a/DESCRIPTION b/DESCRIPTION index d526ff3..e7e1b5d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ Description: Allows the user (and reviewer) to check a data package and test whe License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 VignetteBuilder: knitr Suggests: here, diff --git a/NEWS.md b/NEWS.md index 1a5729c..764f3c7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # DPchecker 0.3.4 +2024-06-24 +* Fixed bug in `test_missing_data()` where if the order of files listed in metadata did not match the order of files produced by `list.files()` the function would evaluate the wrong file and produce inadvertent and unhelpful errors. 2024-02-05 * Fix bug in `test_date_range()` that was adding UTC to temporalCoverage * `test_missing_data()` now also handles the missing data codes "blank" and "empty". diff --git a/R/optional_eml_elements.R b/R/optional_eml_elements.R index 187fa3d..7eafff1 100644 --- a/R/optional_eml_elements.R +++ b/R/optional_eml_elements.R @@ -376,12 +376,16 @@ test_orcid_match <- function(metadata = load_metadata(directory)){ } } - #if there are any orcids, record orcids bad orcids: + #if there are any orcids, record orcids & bad orcids: if(!is.null(existing_orcid)){ bad_orcid <- NULL wrong_person <- NULL for(i in seq_along(surName)){ orcid_url <- existing_orcid[i] + is_it_na <- stringr::str_sub(orcid_url, start = -2) + if(is_it_na == "NA") { + next + } #api request to ORCID: tryCatch({test_req <- httr::GET(orcid_url)}, diff --git a/R/tabular_data_congruence.R b/R/tabular_data_congruence.R index fef65eb..3e38837 100644 --- a/R/tabular_data_congruence.R +++ b/R/tabular_data_congruence.R @@ -627,8 +627,7 @@ test_missing_data <- function(directory = here::here(), #detail_level <- match.arg(arg_choices) # get dataTable and all children elements - data_tbl <- EML::eml_get(metadata, "dataTable") - data_tbl$`@context` <- NULL + data_tbl <- metadata[["dataset"]][["dataTable"]] # If there's only one csv, data_tbl ends up with one less level of nesting. Re-nest it so that the rest of the code works consistently if ("attributeList" %in% names(data_tbl)) { data_tbl <- list(data_tbl) @@ -650,36 +649,44 @@ test_missing_data <- function(directory = here::here(), for (j in seq_len(ncol(dat))) { #look for NAs; if NAs found, look for correct missing data codes if (sum(is.na(dat[,j])) > 0) { - missing <- data_tbl[[i]][["attributeList"]][["attribute"]][[j]][["missingValueCode"]][["code"]] - if(is.null(missing) || sum(missing != missing_types) < 1) { - #file level error message output: - if (detail_level == "files") { - error_log <- append(error_log, + for(k in 1:length(seq_along(data_tbl))){ + if(data_tbl[[k]][["physical"]][["objectName"]] != data_files[i]){ + next + } else { + missing <- data_tbl[[k]][["attributeList"]][["attribute"]][[j]][["missingValueCode"]][["code"]] + if(is.null(missing) || sum(missing != missing_types) < 1) { + #file level error message output: + if (detail_level == "files") { + error_log <- append(error_log, paste0(" ", "---> {.file ", data_files[i], "} contains missing data without a corresponding missing data code in metadata." )) - break - } - #column level error message output: - if (detail_level == "columns") { - error_log <- append(error_log, + break + } + #column level error message output: + if (detail_level == "columns") { + error_log <- append(error_log, paste0(" ", "---> {.file ", data_files[i], "} {.field ", names(dat)[j], "} contains missing data without a corresponding missing data code in metadata.")) + } + } + } } } } } - } if(is.null(error_log)){ - cli::cli_inform(c("v" = "Missing data listed as NA is accounted for in metadata")) + cli::cli_inform(c( + "v" = "Missing data listed as NA is accounted for in metadata")) } else{ # really only need to say it once per file/column combo + error_log <- unique(error_log) msg <- error_log names(msg) <- rep(" ", length(msg)) err <- paste0("Undocumented missing data detected. Please document all missing data in metadata:\n") diff --git a/docs/404.html b/docs/404.html index a4d8fc0..2fc3aef 100644 --- a/docs/404.html +++ b/docs/404.html @@ -10,7 +10,7 @@ - + License • DPcheckerLicense • DPchecker Skip to contents -