Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions R/mergedatasetsbycase.R
Original file line number Diff line number Diff line change
Expand Up @@ -2066,7 +2066,7 @@ combineAsCategoricalVariable <- function(var.list, data.sets,
}
}

if (isIntegerValued(result) && all(abs(result) <= .Machine$integer.max, na.rm = TRUE))
if (isIntegerValued(result, merged.val.attr))
{
result <- as.integer(result)
nms <- names(merged.val.attr)
Expand Down Expand Up @@ -2496,7 +2496,7 @@ combineAsNumericVariable <- function(var.list, data.sets, v.types)
v
}))

if (isIntegerValued(result) && all(abs(result) <= .Machine$integer.max, na.rm = TRUE))
if (isIntegerValued(result))
result <- as.integer(result)

return(result)
Expand Down
46 changes: 0 additions & 46 deletions R/mergedatasetsbyvariable.R
Original file line number Diff line number Diff line change
Expand Up @@ -580,12 +580,6 @@ doMergeByVariable <- function(data.sets, matched.cases.matrix,
merged.var[non.missing.ind] <- input.var[matched.cases.matrix[non.missing.ind,
data.set.ind]]

if (isIntegerValued(merged.var)) {
recode.object <- recodeOutOfBoundsIntegersIfNecessary(merged.var, input.var = input.var)
merged.var <- recode.object[["merged.var"]]
input.var <- recode.object[["input.var"]]
merged.var <- as.integer(merged.var)
}
v.type <- variableType(input.var)
if (v.type == CATEGORICAL.VARIABLE.TYPE)
{
Expand Down Expand Up @@ -671,43 +665,3 @@ print.MergeDataSetByVariablePage <- function(x, ...)

do.call(DataSetMergingByVariableWidget, args)
}

recodeOutOfBoundsIntegersIfNecessary <- function(merged.var, input.var) {
merged.unique.vals <- unique(merged.var)
merged.unique.vals <- removeNA(merged.unique.vals)
merged.val.attr <- attr(merged.var, "labels", exact = TRUE)
input.val.attr <- attr(input.var, "labels", exact = TRUE)
all.unique.vals <- unique(c(unclass(merged.unique.vals), unclass(input.val.attr)))

bad.vals <- abs(all.unique.vals) > 1e9
n.bad.vals <- length(which(bad.vals))
if (n.bad.vals == 0)
return(list(merged.var = merged.var,
input.var = input.var))
lab <- attr(input.var, "label", exact = TRUE)
if (n.bad.vals > 1 ) {
stop("Variable: '",
lab,
"' contains multiple values outside the allowable range. ",
"Values larger than 1,000,000,000 or smaller than -1,000,000,000 ",
"should be recoded before attempting to merge these files.")
}

offending.value <- all.unique.vals[bad.vals]
remaining.values <- setdiff(all.unique.vals, offending.value)
# -99 is an industry convention
new.value <- if (-99 %in% remaining.values) min(remaining.values) - 1 else -99
warning("Variable: '",
lab,
"' contains a value outside of the allowable range (",
offending.value,
"). This value has been recoded as ",
new.value)
merged.var[merged.var == offending.value] <- new.value
input.val.attr[input.val.attr == offending.value] <- new.value
merged.val.attr[merged.val.attr == offending.value] <- new.value
attr(input.var, "labels") <- input.val.attr
attr(merged.var, "labels") <- merged.val.attr
list(merged.var = merged.var,
input.var = input.var)
}
55 changes: 46 additions & 9 deletions R/mergingandstackingutilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ createReadErrorHandler <- function(data.set.name)
#' @importFrom tools file_path_sans_ext
writeDataSet <- function(data.set, data.set.name, is.saved.to.cloud)
{
if (any(invalid.columns <- findInvalidIntegerValueColumns(data.set)))
data.set <- updateClassForLabelledIntegerVariables(data.set, invalid.columns)
if (is.saved.to.cloud)
{
warn.msg <- paste0("The data file ", data.set.name,
Expand Down Expand Up @@ -352,16 +354,23 @@ splitByComma <- function(input.text, ignore.commas.in.parentheses = FALSE)
#' isIntegerValued(c(1, 2, 3)) # TRUE
#' isIntegerValued(c(1, 2.1, 3)) # FALSE
#' @noRd
isIntegerValued <- function(x)
isIntegerValued <- function(x, val.attr = NULL)
{
val.attr <- attr(x, "labels", exact = TRUE)
if (!any(is.nan(val.attr)) && is.numeric(x))
{
x.without.na <- removeNA(x)
all(floor(x.without.na) == x.without.na &
!is.infinite(x.without.na))
}else
FALSE
if (!is.numeric(x))
return(FALSE)

if (is.null(val.attr))
val.attr <- attr(x, "labels", exact = TRUE)
if (!is.null(val.attr) &&
(any(is.nan(val.attr)) ||
any(abs(val.attr) > .Machine$integer.max)))
return(FALSE)


x.without.na <- removeNA(x)
all(floor(x.without.na) == x.without.na &
!is.infinite(x.without.na) &
abs(x.without.na) <= .Machine$integer.max)
}

#' @param data.set.name A character scalar of the user-input name for
Expand Down Expand Up @@ -617,3 +626,31 @@ throwCombinedDataSetTooLargeError <- function() {
stop("The combined data set is too large to create. ",
"Consider omitting variables from the combined data set.")
}

#' Checks each labelled integer variable in a data.frame for integer values
#' greater than .Machine$integer.max (including in the value attributes)
#' @param df A data.frame containing haven::labelled and vctrs variables
#' @return a logical vector for each
#' @noRd
findInvalidIntegerValueColumns <- function(df)
{
n.col <- ncol(df)
invalid.columns <- logical(n.col)
.invalidValues <- function(x)
any(abs(x) > .Machine$integer.max, na.rm = TRUE)
.invalidVariable <- function(variable)
.invalidValues(variable) ||
(!is.null(val.attr <- attr(variable, "labels", exact = TRUE)) &&
.invalidValues(val.attr))
vapply(df, .invalidVariable, logical(1L))
}

#' Updates the class of specified columns of a data.frame from integer to double
#' so they may be saved by haven::write_sav
#' @noRd
updateClassForLabelledIntegerVariables <- function(data.set, col.idx)
{
for (i in which(col.idx))
class(data.set[, col.idx])[class(data.set[, col.idx]) %in% "integer"] <- "double"
return(data.set)
}
56 changes: 56 additions & 0 deletions tests/testthat/test-mergedatasetsbycase.R
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,62 @@ test_that("DS-5306 Support when both data sets have meresrc variables", {
expect_equal(mergesrc, expected)
})

test_that("DS-5236: Merging with missing data (NaN value attr.) stays NaN in output",
{
input.data <- structure(list(Q1 =
structure(c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 4),
label = "I love cats",
format.spss = "F4.0",
labels = c(`Strongly disagree` = 1, Disagree = 2,
`Neither agree nor disagree` = 3,
Agree = 4, `Strongly agree` = 5),
class = c("haven_labelled", "vctrs_vctr", "double")),
Q2 = structure(c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 4),
label = "I love dogs",
format.spss = "F4.0",
labels = c(`Strongly disagree` = 1, Disagree = 2,
`Neither agree nor disagree` = 3, Agree = 4,
`Strongly agree` = 5),
class = c("haven_labelled", "vctrs_vctr", "double")),
Q3 = structure(c(4, 4, 4, 3, 4, 4, 3, 3, 3, 2), label = "QA3",
format.spss = "F4.0",
labels = c(`Dog` = 1, Pig = 2,
`Giraffe` = 3, Cow = 4,
`Missing data` = NaN),
class = c("haven_labelled", "vctrs_vctr", "double"))),
row.names = c(NA, -10L),
class = c("tbl_df", "tbl", "data.frame"))
in.tfile <- "temp_data_for_merge.sav"
out.tfile <- "temp_data_merged.sav"
haven::write_sav(input.data, in.tfile)

output <- do.call(MergeDataSetsByCase, list(c(in.tfile, in.tfile), out.tfile))
expected.labels <- attr(input.data[["Q3"]], "labels")
output.labels <- attr(haven::read_sav(out.tfile)[[3]], "labels")
expect_equal(expected.labels, output.labels)
unlink(in.tfile)
unlink(out.tfile)
})

test_that("DS-5115: Merging with missing data (NaN value attr.) stays NaN in output",
{
out.file <- "temp_data_for_merge.sav"
in.file <- findInstDirFile("SPSSWithIntegerValueError1.sav")

expect_silent(do.call(MergeDataSetsByCase,
list(c(in.file, in.file), out.file)))
output.data <- haven::read_spss(out.file)
input.data <- haven::read_spss(in.file)
expected.labels <- attr(input.data[["badvar1"]], "labels")
output.labels <- attr(output.data[["badvar1"]], "labels")
expect_equal(expected.labels, output.labels)

expected.badvar <- c(input.data[["badvar1"]], input.data[["badvar1"]])
attr(expected.badvar, "format.spss") <- "F8.2"
expect_equal(expected.badvar, output.data[["badvar1"]])
unlink(out.file)
})

if (file.exists("Combined data set.sav"))
file.remove("Combined data set.sav")

Expand Down
17 changes: 6 additions & 11 deletions tests/testthat/test-mergedatasetsbyvariable.R
Original file line number Diff line number Diff line change
Expand Up @@ -261,20 +261,15 @@ test_that("exampleIDValues", {
})

test_that("DS-5115: Handle integer values outside R's allowable range", {
expect_warning(
expect_silent(
MergeDataSetsByVariable(data.set.names = c(findInstDirFile("SPSSWithIntegerValueError1.sav"),
findInstDirFile("SPSSWithIntegerValueError2.sav"))),
"contains a value outside of the allowable range"
findInstDirFile("SPSSWithIntegerValueError2.sav")))
)

expect_warning(
expect_error(

expect_silent(
MergeDataSetsByVariable(data.set.names = c(findInstDirFile("SPSSWithIntegerValueError3.sav"),
findInstDirFile("SPSSWithIntegerValueError2.sav"))),
"contains multiple values outside the allowable range"
),
"contains a value outside of the allowable range"
)
findInstDirFile("SPSSWithIntegerValueError2.sav")))
)
})

if (file.exists("Combined data set.sav"))
Expand Down