Skip to content

Commit

Permalink
data check and validate() improvements
Browse files Browse the repository at this point in the history
1. Auto check of dim of input/output testing data if they are provided as vectors.
2. Updated validate() for linked emulators under new implementations of lgp().
  • Loading branch information
mingdeyu committed Nov 13, 2024
1 parent 80feaab commit d3da50b
Show file tree
Hide file tree
Showing 20 changed files with 595 additions and 166 deletions.
34 changes: 28 additions & 6 deletions R/alm.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@
#' `dgp` class), or the order of emulators placed in `object` if `object` is an instance of the `bundle` class;
#' * If `x_cand` is supplied as a list when `object` is an instance of `bundle` class and a `aggregate` function is provided, the matrices in `x_cand` must have
#' common rows (i.e., the candidate sets of emulators in the bundle have common input locations) so the `aggregate` function can be applied.
#' * Any R vector detected in `x_cand` will be treated as a column vector and automatically converted into a single-column
#' R matrix.
#' @references
#' MacKay, D. J. (1992). Information-based objective functions for active data selection. *Neural Computation*, **4(4)**, 590-604.
#'
Expand Down Expand Up @@ -116,7 +114,13 @@ alm.gp <- function(object, x_cand, batch_size = 1, M = 50, workers = 1, ...) {
n_dim_X <- ncol(training_input)
#check x_cand
if ( !is.matrix(x_cand)&!is.vector(x_cand) ) stop("'x_cand' must be a vector or a matrix.", call. = FALSE)
if ( is.vector(x_cand) ) x_cand <- as.matrix(x_cand)
if ( is.vector(x_cand) ) {
if ( ncol(object$data$X)!=1 ){
x_cand <- matrix(x_cand, nrow = 1)
} else {
x_cand <- as.matrix(x_cand)
}
}
if ( ncol(x_cand)!=n_dim_X ) stop("'x_cand' and the training input have different number of dimensions.", call. = FALSE)
#check core number
if( !is.null(workers) ) {
Expand Down Expand Up @@ -183,7 +187,13 @@ alm.dgp <- function(object, x_cand, batch_size = 1, M = 50, workers = 1, aggrega
n_dim_Y <- ncol(training_output)
#check x_cand
if ( !is.matrix(x_cand)&!is.vector(x_cand) ) stop("'x_cand' must be a vector or a matrix.", call. = FALSE)
if ( is.vector(x_cand) ) x_cand <- as.matrix(x_cand)
if ( is.vector(x_cand) ) {
if ( ncol(object$data$X)!=1 ){
x_cand <- matrix(x_cand, nrow = 1)
} else {
x_cand <- as.matrix(x_cand)
}
}
if ( ncol(x_cand)!=n_dim_X ) stop("'x_cand' and the training input have different number of dimensions.", call. = FALSE)
#check core number
if( !is.null(workers) ) {
Expand Down Expand Up @@ -306,12 +316,24 @@ alm.bundle <- function(object, x_cand, batch_size = 1, M = 50, workers = 1, aggr
if ( is.list(x_cand) ){
if (length(x_cand) != n_emulators) stop("When 'x_cand' is a list, the number of elements in it should match the number of emulators in the bundle.", call. = FALSE)
for ( i in 1:n_emulators ){
if ( is.vector(x_cand[[i]]) ) x_cand[[i]] <- as.matrix(x_cand[[i]])
if ( is.vector(x_cand[[i]]) ) {
if ( n_dim_X!=1 ){
x_cand[[i]] <- matrix(x_cand[[i]], nrow = 1)
} else {
x_cand[[i]] <- as.matrix(x_cand[[i]])
}
}
if ( ncol(x_cand[[i]])!=n_dim_X ) stop("Elements in 'x_cand' have different number of dimensions with the training input.", call. = FALSE)
}
islist <- TRUE
} else {
if ( is.vector(x_cand) ) x_cand <- as.matrix(x_cand)
if ( is.vector(x_cand) ) {
if ( n_dim_X!=1 ){
x_cand <- matrix(x_cand, nrow = 1)
} else {
x_cand <- as.matrix(x_cand)
}
}
if ( ncol(x_cand)!=n_dim_X ) stop("'x_cand' and the training input have different number of dimensions.", call. = FALSE)
islist <- FALSE
}
Expand Down
93 changes: 59 additions & 34 deletions R/design.R
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,6 @@
#' within `f` by appropriately returning `NA`s.
#' * When defining `eval`, the output metric needs to be positive if [draw()] is used with `log = T`. And one needs to ensure that a lower metric value indicates
#' a better emulation performance if `target` is set.
#' * Any R vector detected in `x_test` and `y_test` will be treated as a column vector and automatically converted into a single-column
#' R matrix. Thus, if `x_test` or `y_test` is a single testing data point with multiple dimensions, it must be given as a matrix.
#' @details See further examples and tutorials at <https://mingdeyu.github.io/dgpsi-R/>.
#'
#' @examples
Expand Down Expand Up @@ -267,8 +265,14 @@ design.gp <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200, lim
freq <- check_freq(freq)
reset <- check_reset(reset, N)
n_cand <- check_n_cand(n_cand)

X <- object$data$X
Y <- object$data$Y
n_dim_X <- ncol(X)
n_dim_Y <- ncol(Y)

if (!is.null(x_test) & !is.null(y_test)) {
xy_test <- check_xy_test(x_test, y_test)
xy_test <- check_xy_test(x_test, y_test, n_dim_X, n_dim_Y)
x_test <- xy_test[[1]]
y_test <- xy_test[[2]]
}
Expand Down Expand Up @@ -318,11 +322,6 @@ design.gp <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200, lim
start_point <- 0
}

X <- object$data$X
Y <- object$data$Y
n_dim_X <- ncol(X)
n_dim_Y <- ncol(Y)

N_acq <- c()
mnames <- methods::formalArgs(method)
if ( !is.null(eval) ){
Expand Down Expand Up @@ -359,7 +358,7 @@ design.gp <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200, lim
int <- check_int(int, n_dim_X)
} else {
add_arg <- list(...)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_dim_Y)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_dim_X, n_dim_Y)
xy_cand_list <- remove_dup(xy_cand_list, rbind(X, target_points))
x_cand_origin <- xy_cand_list[[1]]
x_cand_rep <- pkg.env$np$unique(x_cand_origin, return_inverse=TRUE, axis=0L)
Expand Down Expand Up @@ -623,7 +622,7 @@ design.gp <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200, lim
}
} else {
add_arg <- list(...)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_dim_Y)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_dim_X, n_dim_Y)
xy_cand_list <- remove_dup(xy_cand_list, X)
x_cand_origin <- xy_cand_list[[1]]
x_cand_rep <- pkg.env$np$unique(x_cand_origin, return_inverse=TRUE, axis=0L)
Expand Down Expand Up @@ -874,8 +873,14 @@ design.dgp <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200, li
if ( refit_cores < 1 ) stop("'refit_cores' must be >= 1.", call. = FALSE)
}
n_cand <- check_n_cand(n_cand)

X <- object$data$X
Y <- object$data$Y
n_dim_X <- ncol(X)
n_dim_Y <- ncol(Y)

if (!is.null(x_test) & !is.null(y_test)) {
xy_test <- check_xy_test(x_test, y_test)
xy_test <- check_xy_test(x_test, y_test, n_dim_X, n_dim_Y)
x_test <- xy_test[[1]]
y_test <- xy_test[[2]]
}
Expand Down Expand Up @@ -926,11 +931,6 @@ design.dgp <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200, li
start_point <- 0
}

X <- object$data$X
Y <- object$data$Y
n_dim_X <- ncol(X)
n_dim_Y <- ncol(Y)

if (pruning){
pruning <- check_auto(object)
if (pruning){
Expand Down Expand Up @@ -991,7 +991,7 @@ design.dgp <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200, li
int <- check_int(int, n_dim_X)
} else {
add_arg <- list(...)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_dim_Y)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_dim_X, n_dim_Y)
xy_cand_list <- remove_dup(xy_cand_list, rbind(X, target_points))
x_cand_origin <- xy_cand_list[[1]]
x_cand_rep <- pkg.env$np$unique(x_cand_origin, return_inverse=TRUE, axis=0L)
Expand Down Expand Up @@ -1330,7 +1330,7 @@ design.dgp <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200, li
}
} else {
add_arg <- list(...)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_dim_Y)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_dim_X, n_dim_Y)
xy_cand_list <- remove_dup(xy_cand_list, X)
x_cand_origin <- xy_cand_list[[1]]
x_cand_rep <- pkg.env$np$unique(x_cand_origin, return_inverse=TRUE, axis=0L)
Expand Down Expand Up @@ -1664,8 +1664,16 @@ design.bundle <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200,
if ( refit_cores < 1 ) stop("'refit_cores' must be >= 1.", call. = FALSE)
}
n_cand <- check_n_cand(n_cand)

X <- object$data$X
Y <- object$data$Y
n_dim_X <- ncol(X[[1]])
n_emulators <- length(object) - 1
if ( "id" %in% names(object) ) n_emulators <- n_emulators - 1
if ( "design" %in% names(object) ) n_emulators <- n_emulators - 1

if (!is.null(x_test) & !is.null(y_test)) {
xy_test <- check_xy_test(x_test, y_test)
xy_test <- check_xy_test(x_test, y_test, n_dim_X, n_emulators)
x_test <- xy_test[[1]]
y_test <- xy_test[[2]]
}
Expand Down Expand Up @@ -1715,13 +1723,6 @@ design.bundle <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200,
start_point <- 0
}

X <- object$data$X
Y <- object$data$Y
n_dim_X <- ncol(X[[1]])
n_emulators <- length(object) - 1
if ( "id" %in% names(object) ) n_emulators <- n_emulators - 1
if ( "design" %in% names(object) ) n_emulators <- n_emulators - 1

for ( k in 1:n_emulators ){
object[[paste('emulator',k,sep='')]] <- copy_in_design(object[[paste('emulator',k,sep='')]])
}
Expand Down Expand Up @@ -1776,7 +1777,7 @@ design.bundle <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200,
int <- check_int(int, n_dim_X)
} else {
add_arg <- list(...)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_emulators)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_dim_X, n_emulators)
x_cand <- vector('list', n_emulators)
for (j in 1:n_emulators){
xy_cand_list_j <- remove_dup( xy_cand_list, rbind(X[[paste('emulator',j,sep="")]], target_points[[paste('emulator',j,sep="")]]) )
Expand Down Expand Up @@ -2264,7 +2265,7 @@ design.bundle <- function(object, N, x_cand = NULL, y_cand = NULL, n_cand = 200,
}
} else {
add_arg <- list(...)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_emulators)
xy_cand_list <- check_xy_cand(x_cand, y_cand, n_dim_X, n_emulators)

x_cand <- vector('list', n_emulators)
x_cand_origin <- vector('list', n_emulators)
Expand Down Expand Up @@ -2695,13 +2696,25 @@ find_matching_indices <- function(mat1, mat2) {
}

#check argument x_cand and y_cand
check_xy_cand <- function(x_cand, y_cand, n_dim_Y){
check_xy_cand <- function(x_cand, y_cand, n_dim_X, n_dim_Y){
# if ( is.null(y_cand) ) stop("'y_cand' must be provided if 'x_cand' is not NULL.", call. = FALSE)
if ( !is.matrix(x_cand)&!is.vector(x_cand) ) stop("'x_cand' must be a vector or a matrix.", call. = FALSE)
if ( is.vector(x_cand) ) x_cand <- as.matrix(x_cand)
if ( is.vector(x_cand) ) {
if ( n_dim_X!=1 ){
x_cand <- matrix(x_cand, nrow = 1)
} else {
x_cand <- as.matrix(x_cand)
}
}
if ( !is.null(y_cand) ) {
if ( !is.matrix(y_cand)&!is.vector(y_cand) ) stop("'y_cand' must be a vector or a matrix.", call. = FALSE)
if ( is.vector(y_cand) ) y_cand <- as.matrix(y_cand)
if ( is.vector(y_cand) ) {
if ( n_dim_Y!=1 ){
y_cand <- matrix(y_cand, nrow = 1)
} else {
y_cand <- as.matrix(y_cand)
}
}
if ( nrow(x_cand)!=nrow(y_cand) ) stop("'x_cand' and 'y_cand' have different number of data points.", call. = FALSE)
if ( ncol(y_cand)!=n_dim_Y ) stop(sprintf("The dimension of 'y_cand' must be %i.", n_dim_Y), call. = FALSE)
}
Expand Down Expand Up @@ -2734,13 +2747,25 @@ extract_all <- function(X ,x_cand){
}

#check argument x_test and y_test
check_xy_test <- function(x_test, y_test){
check_xy_test <- function(x_test, y_test, n_dim_X, n_dim_Y){
x_test <- unname(x_test)
y_test <- unname(y_test)
if ( !is.matrix(x_test)&!is.vector(x_test) ) stop("'x_test' must be a vector or a matrix.", call. = FALSE)
if ( !is.matrix(y_test)&!is.vector(y_test) ) stop("'y_test' must be a vector or a matrix.", call. = FALSE)
if ( is.vector(x_test) ) x_test <- as.matrix(x_test)
if ( is.vector(y_test) ) y_test <- as.matrix(y_test)
if ( is.vector(x_test) ) {
if ( n_dim_X!=1 ){
x_test <- matrix(x_test, nrow = 1)
} else {
x_test <- as.matrix(x_test)
}
}
if ( is.vector(y_test) ) {
if ( n_dim_Y!=1 ){
y_test <- matrix(y_test, nrow = 1)
} else {
y_test <- as.matrix(y_test)
}
}
if ( nrow(x_test)!=nrow(y_test) ) stop("'x_test' and 'y_test' have different number of data points.", call. = FALSE)
return(list(x_test, y_test))
}
Expand Down
34 changes: 28 additions & 6 deletions R/mice.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@
#' `dgp` class), or the order of emulators placed in `object` if `object` is an instance of the `bundle` class;
#' * If `x_cand` is supplied as a list when `object` is an instance of `bundle` class and a `aggregate` function is provided, the matrices in `x_cand` must have
#' common rows (i.e., the candidate sets of emulators in the bundle have common input locations) so the `aggregate` function can be applied.
#' * Any R vector detected in `x_cand` will be treated as a column vector and automatically converted into a single-column
#' R matrix.
#' @references
#' Beck, J., & Guillas, S. (2016). Sequential design with mutual information for computer experiments (MICE): emulation of a tsunami model.
#' *SIAM/ASA Journal on Uncertainty Quantification*, **4(1)**, 739-766.
Expand Down Expand Up @@ -118,7 +116,13 @@ mice.gp <- function(object, x_cand, batch_size = 1, M = 50, nugget_s = 1e-6, wor
n_dim_X <- ncol(training_input)
#check x_cand
if ( !is.matrix(x_cand)&!is.vector(x_cand) ) stop("'x_cand' must be a vector or a matrix.", call. = FALSE)
if ( is.vector(x_cand) ) x_cand <- as.matrix(x_cand)
if ( is.vector(x_cand) ) {
if ( ncol(object$data$X)!=1 ){
x_cand <- matrix(x_cand, nrow = 1)
} else {
x_cand <- as.matrix(x_cand)
}
}
if ( ncol(x_cand)!=n_dim_X ) stop("'x_cand' and the training input have different number of dimensions.", call. = FALSE)
#check core number
if( !is.null(workers) ) {
Expand Down Expand Up @@ -184,7 +188,13 @@ mice.dgp <- function(object, x_cand, batch_size = 1, M = 50, nugget_s = 1e-6, wo
n_dim_Y <- ncol(training_output)
#check x_cand
if ( !is.matrix(x_cand)&!is.vector(x_cand) ) stop("'x_cand' must be a vector or a matrix.", call. = FALSE)
if ( is.vector(x_cand) ) x_cand <- as.matrix(x_cand)
if ( is.vector(x_cand) ) {
if ( ncol(object$data$X)!=1 ){
x_cand <- matrix(x_cand, nrow = 1)
} else {
x_cand <- as.matrix(x_cand)
}
}
if ( ncol(x_cand)!=n_dim_X ) stop("'x_cand' and the training input have different number of dimensions.", call. = FALSE)
#check core number
if( !is.null(workers) ) {
Expand Down Expand Up @@ -307,12 +317,24 @@ mice.bundle <- function(object, x_cand, batch_size = 1, M = 50, nugget_s = 1e-6,
if ( is.list(x_cand) ){
if (length(x_cand) != n_emulators) stop("When 'x_cand' is a list, the number of elements in it should match the number of emulators in the bundle.", call. = FALSE)
for ( i in 1:n_emulators ){
if ( is.vector(x_cand[[i]]) ) x_cand[[i]] <- as.matrix(x_cand[[i]])
if ( is.vector(x_cand[[i]]) ) {
if ( n_dim_X!=1 ){
x_cand[[i]] <- matrix(x_cand[[i]], nrow = 1)
} else {
x_cand[[i]] <- as.matrix(x_cand[[i]])
}
}
if ( ncol(x_cand[[i]])!=n_dim_X ) stop("Elements in 'x_cand' have different number of dimensions with the training input.", call. = FALSE)
}
islist <- TRUE
} else {
if ( is.vector(x_cand) ) x_cand <- as.matrix(x_cand)
if ( is.vector(x_cand) ) {
if ( n_dim_X!=1 ){
x_cand <- matrix(x_cand, nrow = 1)
} else {
x_cand <- as.matrix(x_cand)
}
}
if ( ncol(x_cand)!=n_dim_X ) stop("'x_cand' and the training input have different number of dimensions.", call. = FALSE)
islist <- FALSE
}
Expand Down
Loading

0 comments on commit d3da50b

Please sign in to comment.