From dd47236e9a846274cebab6377e73d21f4b6911f1 Mon Sep 17 00:00:00 2001 From: Diondra Peck <16376603+diondrapeck@users.noreply.github.com> Date: Mon, 24 Aug 2020 14:36:18 -0700 Subject: [PATCH 1/9] update version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 175d1763..f06b0971 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: azuremlsdk Type: Package Title: Interface to the 'Azure Machine Learning' 'SDK' -Version: 0.10.0 +Version: 1.10.0 Authors@R: c( person("AzureML R SDK Team", email = "amlrsdk@microsoft.com", role = c("cre", "aut", "ctb")), person("Microsoft", role = c("cph", "fnd")), From d7984a00f02183e2872292f7167c291f931e8bb9 Mon Sep 17 00:00:00 2001 From: Diondra Peck <16376603+diondrapeck@users.noreply.github.com> Date: Mon, 14 Sep 2020 15:32:38 -0700 Subject: [PATCH 2/9] add function to convert R datetime to Python datetime.datetime --- R/datasets.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/R/datasets.R b/R/datasets.R index d2af757e..5e269f71 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -444,6 +444,26 @@ keep_columns_from_dataset <- function(dataset, columns, validate = FALSE) { dataset$keep_columns(columns, validate) } +#' Convert R datetime objects to Python datetime.datetime objects +#' +#' @param posix_date The POSIX* object to be converted +#' @return A Python datetime.datetime object +#' @md +.posixct_to_datetime <- function(posix_date) { + datetime <- import("datetime", convert=FALSE) + + parsed_date <- stringr::str_split(posix_date, "[- : ]")[[1]] + + year <- as.integer(parsed_date[1]) + month <- as.integer(parsed_date[2]) + day <- as.integer(parsed_date[3]) + hour <- as.integer(parsed_date[4]) + minute <- as.integer(parsed_date[5]) + second <- as.integer(parsed_date[6]) + + datetime$datetime(year, month, day, hour, minute, second) +} + #' Filter Tabular Dataset with time stamp columns after a specified start time. #' #' @description From 52e29236097ab3327d0b0efe3598b911e9d7cb14 Mon Sep 17 00:00:00 2001 From: Diondra Peck <16376603+diondrapeck@users.noreply.github.com> Date: Mon, 14 Sep 2020 15:36:09 -0700 Subject: [PATCH 3/9] convert dates in function --- R/datasets.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/datasets.R b/R/datasets.R index 5e269f71..eb7a4b60 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -478,6 +478,7 @@ keep_columns_from_dataset <- function(dataset, columns, validate = FALSE) { #' @md filter_dataset_after_time <- function(dataset, start_time, include_boundary = TRUE) { + start_time <- .posixct_to_datetime(start_time) dataset$time_after(start_time, include_boundary) } @@ -495,6 +496,7 @@ filter_dataset_after_time <- function(dataset, start_time, #' @md filter_dataset_before_time <- function(dataset, end_time, include_boundary = TRUE) { + end_time <- .posixct_to_datetime(end_time) dataset$time_before(end_time, include_boundary) } @@ -513,6 +515,8 @@ filter_dataset_before_time <- function(dataset, end_time, #' @md filter_dataset_between_time <- function(dataset, start_time, end_time, include_boundary = TRUE) { + start_time <- .posixct_to_datetime(start_time) + end_time <- .posixct_to_datetime(end_time) dataset$time_between(start_time, end_time, include_boundary) } From e6431d9f2f3d2b47ee0237b6f2d266a0f1f091da Mon Sep 17 00:00:00 2001 From: Diondra Peck <16376603+diondrapeck@users.noreply.github.com> Date: Mon, 14 Sep 2020 15:56:16 -0700 Subject: [PATCH 4/9] add test --- tests/testthat/test_datasets.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test_datasets.R b/tests/testthat/test_datasets.R index 0a7eadcc..a978bedd 100644 --- a/tests/testthat/test_datasets.R +++ b/tests/testthat/test_datasets.R @@ -10,11 +10,13 @@ test_that("create a tabular dataset, ws <- existing_ws # create tabular dataset from delimited files + date <- as.POSIXct("2011-05-01 17:55:23") path_to_dataset <- "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/nyc_energy.csv" dataset <- create_tabular_dataset_from_delimited_files(path=path_to_dataset) + filtered_dataset <- filter_dataset_before_time(dataset, date) # load data into data frame - pandas_df <- load_dataset_into_data_frame(dataset) + pandas_df <- load_dataset_into_data_frame(filtered_dataset) expect_equal(is.data.frame(pandas_df), TRUE) # register first version of the dataset @@ -28,7 +30,7 @@ test_that("create a tabular dataset, expect_equal(registered_dataset1$name, registered_dataset2$name) expect_equal(registered_dataset1$description, 'I am version 1') expect_equal(registered_dataset2$description, 'I am version 2') - + # unregister datasets unregister_all_dataset_versions(dataset) expect_equal(dataset$name, NULL) From 5e15ededab858df1253a74d34f8bc36f7699851a Mon Sep 17 00:00:00 2001 From: Diondra Peck <16376603+diondrapeck@users.noreply.github.com> Date: Mon, 14 Sep 2020 17:18:31 -0700 Subject: [PATCH 5/9] add time column name --- tests/testthat/test_datasets.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test_datasets.R b/tests/testthat/test_datasets.R index a978bedd..3d258bc5 100644 --- a/tests/testthat/test_datasets.R +++ b/tests/testthat/test_datasets.R @@ -12,9 +12,10 @@ test_that("create a tabular dataset, # create tabular dataset from delimited files date <- as.POSIXct("2011-05-01 17:55:23") path_to_dataset <- "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/nyc_energy.csv" - dataset <- create_tabular_dataset_from_delimited_files(path=path_to_dataset) + time_column_name <- 'timeStamp' + dataset <- create_tabular_dataset_from_delimited_files(path=path_to_dataset)$with_timestamp_columns(fine_grain_timestamp=time_column_name) filtered_dataset <- filter_dataset_before_time(dataset, date) - + # load data into data frame pandas_df <- load_dataset_into_data_frame(filtered_dataset) expect_equal(is.data.frame(pandas_df), TRUE) From e69e7a716742fba72497581f1e154a7f321e0c9d Mon Sep 17 00:00:00 2001 From: Diondra <16376603+diondrapeck@users.noreply.github.com> Date: Mon, 14 Sep 2020 18:15:41 -0700 Subject: [PATCH 6/9] revert version change --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f06b0971..175d1763 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: azuremlsdk Type: Package Title: Interface to the 'Azure Machine Learning' 'SDK' -Version: 1.10.0 +Version: 0.10.0 Authors@R: c( person("AzureML R SDK Team", email = "amlrsdk@microsoft.com", role = c("cre", "aut", "ctb")), person("Microsoft", role = c("cph", "fnd")), From a0bb2e902441a2e19e64f10e4279fb8366293628 Mon Sep 17 00:00:00 2001 From: Diondra Peck <16376603+diondrapeck@users.noreply.github.com> Date: Tue, 15 Sep 2020 08:44:08 -0700 Subject: [PATCH 7/9] cleanup code --- R/datasets.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/R/datasets.R b/R/datasets.R index eb7a4b60..ce78aa09 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -452,16 +452,16 @@ keep_columns_from_dataset <- function(dataset, columns, validate = FALSE) { .posixct_to_datetime <- function(posix_date) { datetime <- import("datetime", convert=FALSE) - parsed_date <- stringr::str_split(posix_date, "[- : ]")[[1]] + parsed_date <- sapply(stringr::str_split(posix_date, "[- : ]")[[1]], + as.integer) + names(parsed_date) <- c("year", "month", "day", "hour", "minute", "second") - year <- as.integer(parsed_date[1]) - month <- as.integer(parsed_date[2]) - day <- as.integer(parsed_date[3]) - hour <- as.integer(parsed_date[4]) - minute <- as.integer(parsed_date[5]) - second <- as.integer(parsed_date[6]) - - datetime$datetime(year, month, day, hour, minute, second) + datetime$datetime(parsed_date[["year"]], + parsed_date[["month"]], + parsed_date[["day"]], + parsed_date[["hour"]], + parsed_date[["minute"]], + parsed_date[["second"]]) } #' Filter Tabular Dataset with time stamp columns after a specified start time. From d46244a925e08816277c7c2556bb740505a1f93d Mon Sep 17 00:00:00 2001 From: Diondra Peck <16376603+diondrapeck@users.noreply.github.com> Date: Wed, 16 Sep 2020 09:59:11 -0700 Subject: [PATCH 8/9] add suppressWarnings to .posixct_to_datetime to mute unnecessary package warning --- R/datasets.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/datasets.R b/R/datasets.R index ce78aa09..9c735822 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -446,6 +446,8 @@ keep_columns_from_dataset <- function(dataset, columns, validate = FALSE) { #' Convert R datetime objects to Python datetime.datetime objects #' +#' Wrap all calls in suppressWarnings due to unnecessary warning in timedatectl +#' package: https://stat.ethz.ch/pipermail/r-devel/2018-May/076163.html #' @param posix_date The POSIX* object to be converted #' @return A Python datetime.datetime object #' @md @@ -478,7 +480,7 @@ keep_columns_from_dataset <- function(dataset, columns, validate = FALSE) { #' @md filter_dataset_after_time <- function(dataset, start_time, include_boundary = TRUE) { - start_time <- .posixct_to_datetime(start_time) + start_time <- suppressWarnings(.posixct_to_datetime(start_time)) dataset$time_after(start_time, include_boundary) } @@ -496,7 +498,7 @@ filter_dataset_after_time <- function(dataset, start_time, #' @md filter_dataset_before_time <- function(dataset, end_time, include_boundary = TRUE) { - end_time <- .posixct_to_datetime(end_time) + end_time <- suppressWarnings(.posixct_to_datetime(end_time)) dataset$time_before(end_time, include_boundary) } @@ -515,8 +517,8 @@ filter_dataset_before_time <- function(dataset, end_time, #' @md filter_dataset_between_time <- function(dataset, start_time, end_time, include_boundary = TRUE) { - start_time <- .posixct_to_datetime(start_time) - end_time <- .posixct_to_datetime(end_time) + start_time <- suppressWarnings(.posixct_to_datetime(start_time)) + end_time <- suppressWarnings(.posixct_to_datetime(end_time)) dataset$time_between(start_time, end_time, include_boundary) } From 8555d7b89a1cdc5af52363369b286c19224f0e80 Mon Sep 17 00:00:00 2001 From: Diondra Peck <16376603+diondrapeck@users.noreply.github.com> Date: Wed, 16 Sep 2020 10:29:10 -0700 Subject: [PATCH 9/9] move warning suppression to inside function declaration --- R/datasets.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/datasets.R b/R/datasets.R index 9c735822..0d22ba99 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -446,12 +446,14 @@ keep_columns_from_dataset <- function(dataset, columns, validate = FALSE) { #' Convert R datetime objects to Python datetime.datetime objects #' -#' Wrap all calls in suppressWarnings due to unnecessary warning in timedatectl +#' Wrapped in suppressWarnings() due to unnecessary warning in timedatectl #' package: https://stat.ethz.ch/pipermail/r-devel/2018-May/076163.html +#' #' @param posix_date The POSIX* object to be converted #' @return A Python datetime.datetime object #' @md .posixct_to_datetime <- function(posix_date) { + suppressWarnings({ datetime <- import("datetime", convert=FALSE) parsed_date <- sapply(stringr::str_split(posix_date, "[- : ]")[[1]], @@ -464,6 +466,7 @@ keep_columns_from_dataset <- function(dataset, columns, validate = FALSE) { parsed_date[["hour"]], parsed_date[["minute"]], parsed_date[["second"]]) + }) } #' Filter Tabular Dataset with time stamp columns after a specified start time. @@ -480,7 +483,7 @@ keep_columns_from_dataset <- function(dataset, columns, validate = FALSE) { #' @md filter_dataset_after_time <- function(dataset, start_time, include_boundary = TRUE) { - start_time <- suppressWarnings(.posixct_to_datetime(start_time)) + start_time <- .posixct_to_datetime(start_time) dataset$time_after(start_time, include_boundary) } @@ -498,7 +501,7 @@ filter_dataset_after_time <- function(dataset, start_time, #' @md filter_dataset_before_time <- function(dataset, end_time, include_boundary = TRUE) { - end_time <- suppressWarnings(.posixct_to_datetime(end_time)) + end_time <- .posixct_to_datetime(end_time) dataset$time_before(end_time, include_boundary) } @@ -517,8 +520,8 @@ filter_dataset_before_time <- function(dataset, end_time, #' @md filter_dataset_between_time <- function(dataset, start_time, end_time, include_boundary = TRUE) { - start_time <- suppressWarnings(.posixct_to_datetime(start_time)) - end_time <- suppressWarnings(.posixct_to_datetime(end_time)) + start_time <- .posixct_to_datetime(start_time) + end_time <- .posixct_to_datetime(end_time) dataset$time_between(start_time, end_time, include_boundary) }