From 984694e6e4fa5afa7bd0427dea0b8f7bf67f2e48 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 1 Feb 2024 10:34:11 +0800 Subject: [PATCH 1/9] Trigger Build From 4f00dc7b1d1aafb47b00fe1244319d70af937343 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 1 Feb 2024 11:04:19 +0800 Subject: [PATCH 2/9] include additional condition --- R/auto_aux_update.R | 55 +++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R index 0a7ff84..b165830 100644 --- a/R/auto_aux_update.R +++ b/R/auto_aux_update.R @@ -99,31 +99,38 @@ auto_aux_update <- function(measure = NULL, suppressMessages() } } - - # Write the latest auxiliary file and corresponding hash to csv - # Always save at the end. - # sha - hash object of current csv file in Data/git_metadata.csv - # content - base64 of changed data - out <- gh::gh("GET /repos/{owner}/{repo}/contents/{file_path}", - owner = "PIP-Technical-Team", - repo = "pipaux", - file_path = "Data/git_metadata.csv", - .params = list(ref = "metadata")) - - res <- gh::gh("PUT /repos/{owner}/{repo}/contents/{path}", - owner = "PIP-Technical-Team", - repo = "pipaux", - path = "Data/git_metadata.csv", - .params = list(branch = "metadata", - message = "updating csv file", - sha = out$sha, # why does the sha remain the same? - content = convert_df_to_base64(all_data) - ), - .token = Sys.getenv("GITHUB_PAT") - ) - - cli::cli_h2("File updated status.") out <- aux_file_last_updated(maindir, names(dependencies), branch) + if(length(aux_fns) > 0) { + # Check if the file has updated, only then update the metadata file + # If the files that we wanted to updated were updated today only then write the file + temp_dat <- out |> + dplyr::filter(filename %in% paste0(aux_fns, ".qs")) |> + dplyr::filter(as.Date(time_last_update) == Sys.Date()) + if(nrow(temp_dat) > 0) { + # Write the latest auxiliary file and corresponding hash to csv + # Always save at the end. + # sha - hash object of current csv file in Data/git_metadata.csv + # content - base64 of changed data + out <- gh::gh("GET /repos/{owner}/{repo}/contents/{file_path}", + owner = "PIP-Technical-Team", + repo = "pipaux", + file_path = "Data/git_metadata.csv", + .params = list(ref = "metadata")) + + res <- gh::gh("PUT /repos/{owner}/{repo}/contents/{path}", + owner = "PIP-Technical-Team", + repo = "pipaux", + path = "Data/git_metadata.csv", + .params = list(branch = "metadata", + message = "updating csv file", + sha = out$sha, # why does the sha remain the same? + content = convert_df_to_base64(all_data) + ), + .token = Sys.getenv("GITHUB_PAT") + ) + } + } + cli::cli_h2("File updated status.") knitr::kable(out) } From 6b81989efa6bbe950a62613031e6c92f73bf8217 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 9 Feb 2024 10:41:29 +0800 Subject: [PATCH 3/9] add read_signature_file function --- R/auto_aux_update.R | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R index b165830..cbfb772 100644 --- a/R/auto_aux_update.R +++ b/R/auto_aux_update.R @@ -71,12 +71,16 @@ auto_aux_update <- function(measure = NULL, old_data <- old_data %>% dplyr::inner_join(all_data, by = c("Repo", "branch")) + cli::cli_alert_info("Number of rows from csv file : {nrow(old_data)}") + cli::cli_alert_info("Number of rows from Github : {nrow(all_data)}") + cli::cli_alert_info("Both the numbers above should be equal or else some debugging is required.") + new_data <- old_data %>% dplyr::filter(.data$hash != .data$hash_original | is.na(.data$hash_original) | is.na(.data$hash)) - all_data <- dplyr::rows_update(org_data, all_data, by = c("Repo", "branch")) + # all_data <- dplyr::rows_update(org_data, all_data, by = c("Repo", "branch")) @@ -94,9 +98,12 @@ auto_aux_update <- function(measure = NULL, list_of_funcs <- paste0("pip_", return_value(aux, dependencies)) for(fn in list_of_funcs) { cli::cli_alert_info("Running function {fn} for aux file {aux}.") + before_hash <- read_signature_file(fn, maindir, branch) # Run the pip_.* function match.fun(fn)(maindir = maindir, branch = branch) |> suppressMessages() + + after_hash <- read_signature_file(fn, maindir, branch) } } out <- aux_file_last_updated(maindir, names(dependencies), branch) @@ -116,7 +123,8 @@ auto_aux_update <- function(measure = NULL, repo = "pipaux", file_path = "Data/git_metadata.csv", .params = list(ref = "metadata")) - + # There is no way to update only the lines which has changed using Github API + # We need to update the entire file every time. Refer - https://stackoverflow.com/a/21315234/3962914 res <- gh::gh("PUT /repos/{owner}/{repo}/contents/{path}", owner = "PIP-Technical-Team", repo = "pipaux", @@ -163,7 +171,6 @@ aux_file_last_updated <- function(data_dir, aux_files, branch) { } - read_dependencies <- function(gh_user, owner) { dependencies <- paste(gh_user, owner, @@ -174,3 +181,11 @@ read_dependencies <- function(gh_user, owner) { sapply(dependencies, \(x) if (length(x)) strsplit(x, ",\\s+")[[1]] else character()) } + +read_signature_file <- function(fn, maindir, branch) { + aux_file <- sub("pip_", "", fn) + # Construct the path to data signature aux file + data_signature_path <- fs::path(maindir, "_aux", branch, aux_file, glue::glue("{aux_file}_datasignature.txt")) + signature_hash <- readr::read_lines(data_signature_path) + return(signature_hash) +} From 239c633536735b5b52e52b58f239eddb81fe0021 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 9 Feb 2024 19:22:49 +0800 Subject: [PATCH 4/9] update data reading signature files --- R/auto_aux_update.R | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R index cbfb772..b86fb69 100644 --- a/R/auto_aux_update.R +++ b/R/auto_aux_update.R @@ -97,32 +97,33 @@ auto_aux_update <- function(measure = NULL, # Add pip_ suffix so that it becomes function name list_of_funcs <- paste0("pip_", return_value(aux, dependencies)) for(fn in list_of_funcs) { + aux_file <- sub("pip_", "", fn) cli::cli_alert_info("Running function {fn} for aux file {aux}.") - before_hash <- read_signature_file(fn, maindir, branch) + before_hash <- read_signature_file(aux_file, maindir, branch) # Run the pip_.* function match.fun(fn)(maindir = maindir, branch = branch) |> suppressMessages() - - after_hash <- read_signature_file(fn, maindir, branch) + after_hash <- read_signature_file(aux_file, maindir, branch) + if(before_hash != after_hash) { + cli::cli_alert_info("Updating csv for {fn}") + org_data$hash[org_data$branch == branch & + fs::path_file(org_data$Repo) |> sub('aux_', '',x = _) %in% aux_file] <- + new_data$hash[fs::path_file(new_data$Repo) |> sub('aux_', '',x = _) & + new_data$branch == branch] + } } } out <- aux_file_last_updated(maindir, names(dependencies), branch) if(length(aux_fns) > 0) { - # Check if the file has updated, only then update the metadata file - # If the files that we wanted to updated were updated today only then write the file - temp_dat <- out |> - dplyr::filter(filename %in% paste0(aux_fns, ".qs")) |> - dplyr::filter(as.Date(time_last_update) == Sys.Date()) - if(nrow(temp_dat) > 0) { # Write the latest auxiliary file and corresponding hash to csv # Always save at the end. # sha - hash object of current csv file in Data/git_metadata.csv # content - base64 of changed data - out <- gh::gh("GET /repos/{owner}/{repo}/contents/{file_path}", - owner = "PIP-Technical-Team", - repo = "pipaux", - file_path = "Data/git_metadata.csv", - .params = list(ref = "metadata")) + out <- gh::gh("GET /repos/{owner}/{repo}/contents/{file_path}", + owner = "PIP-Technical-Team", + repo = "pipaux", + file_path = "Data/git_metadata.csv", + .params = list(ref = "metadata")) # There is no way to update only the lines which has changed using Github API # We need to update the entire file every time. Refer - https://stackoverflow.com/a/21315234/3962914 res <- gh::gh("PUT /repos/{owner}/{repo}/contents/{path}", @@ -131,12 +132,11 @@ auto_aux_update <- function(measure = NULL, path = "Data/git_metadata.csv", .params = list(branch = "metadata", message = "updating csv file", - sha = out$sha, # why does the sha remain the same? - content = convert_df_to_base64(all_data) + sha = out$sha, + content = convert_df_to_base64(org_data) ), .token = Sys.getenv("GITHUB_PAT") ) - } } cli::cli_h2("File updated status.") knitr::kable(out) @@ -183,7 +183,6 @@ read_dependencies <- function(gh_user, owner) { } read_signature_file <- function(fn, maindir, branch) { - aux_file <- sub("pip_", "", fn) # Construct the path to data signature aux file data_signature_path <- fs::path(maindir, "_aux", branch, aux_file, glue::glue("{aux_file}_datasignature.txt")) signature_hash <- readr::read_lines(data_signature_path) From e3820e25b7be535cd534d9a9a07c7491fa701ac8 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 9 Feb 2024 19:32:46 +0800 Subject: [PATCH 5/9] change args --- R/auto_aux_update.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R index b86fb69..2b538a7 100644 --- a/R/auto_aux_update.R +++ b/R/auto_aux_update.R @@ -182,7 +182,7 @@ read_dependencies <- function(gh_user, owner) { sapply(dependencies, \(x) if (length(x)) strsplit(x, ",\\s+")[[1]] else character()) } -read_signature_file <- function(fn, maindir, branch) { +read_signature_file <- function(aux_file, maindir, branch) { # Construct the path to data signature aux file data_signature_path <- fs::path(maindir, "_aux", branch, aux_file, glue::glue("{aux_file}_datasignature.txt")) signature_hash <- readr::read_lines(data_signature_path) From 2e60341cd927fdf448e15d85d8c8a69277374a08 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 9 Feb 2024 19:36:17 +0800 Subject: [PATCH 6/9] change time --- R/auto_aux_update.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R index 2b538a7..e4588c7 100644 --- a/R/auto_aux_update.R +++ b/R/auto_aux_update.R @@ -113,7 +113,7 @@ auto_aux_update <- function(measure = NULL, } } } - out <- aux_file_last_updated(maindir, names(dependencies), branch) + last_updated_time <- aux_file_last_updated(maindir, names(dependencies), branch) if(length(aux_fns) > 0) { # Write the latest auxiliary file and corresponding hash to csv # Always save at the end. @@ -139,7 +139,7 @@ auto_aux_update <- function(measure = NULL, ) } cli::cli_h2("File updated status.") - knitr::kable(out) + knitr::kable(last_updated_time) } From 00c39d11c27a20b79dd54d725c6dbf558a2d86ba Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 9 Feb 2024 19:42:17 +0800 Subject: [PATCH 7/9] add new var to track file change --- R/auto_aux_update.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R index e4588c7..c7029f8 100644 --- a/R/auto_aux_update.R +++ b/R/auto_aux_update.R @@ -15,6 +15,7 @@ auto_aux_update <- function(measure = NULL, branch <- match.arg(branch) from <- match.arg(from) + files_changed <- FALSE isgls <- ls(sys.frame(), pattern = "^gls$") |> length() > 0 @@ -106,6 +107,7 @@ auto_aux_update <- function(measure = NULL, after_hash <- read_signature_file(aux_file, maindir, branch) if(before_hash != after_hash) { cli::cli_alert_info("Updating csv for {fn}") + files_changed <- TRUE org_data$hash[org_data$branch == branch & fs::path_file(org_data$Repo) |> sub('aux_', '',x = _) %in% aux_file] <- new_data$hash[fs::path_file(new_data$Repo) |> sub('aux_', '',x = _) & @@ -114,7 +116,7 @@ auto_aux_update <- function(measure = NULL, } } last_updated_time <- aux_file_last_updated(maindir, names(dependencies), branch) - if(length(aux_fns) > 0) { + if(length(aux_fns) > 0 && files_changed) { # Write the latest auxiliary file and corresponding hash to csv # Always save at the end. # sha - hash object of current csv file in Data/git_metadata.csv From 6e6785bbee0c67e22032228257c15cde00e694c0 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 9 Feb 2024 19:48:43 +0800 Subject: [PATCH 8/9] complete condition --- R/auto_aux_update.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R index c7029f8..a0f1151 100644 --- a/R/auto_aux_update.R +++ b/R/auto_aux_update.R @@ -110,7 +110,7 @@ auto_aux_update <- function(measure = NULL, files_changed <- TRUE org_data$hash[org_data$branch == branch & fs::path_file(org_data$Repo) |> sub('aux_', '',x = _) %in% aux_file] <- - new_data$hash[fs::path_file(new_data$Repo) |> sub('aux_', '',x = _) & + new_data$hash[fs::path_file(new_data$Repo) |> sub('aux_', '',x = _) %in% aux_file & new_data$branch == branch] } } From f2297e3cf6b03c88f4e33230f3a9f744c957d33c Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Wed, 14 Feb 2024 19:59:39 -0500 Subject: [PATCH 9/9] fix formating a little --- R/auto_aux_update.R | 166 +++++++++++++++++++++++++++----------------- 1 file changed, 104 insertions(+), 62 deletions(-) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R index a0f1151..03e7c4c 100644 --- a/R/auto_aux_update.R +++ b/R/auto_aux_update.R @@ -10,9 +10,7 @@ auto_aux_update <- function(measure = NULL, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) - ) { - + tag = match.arg(branch)) { branch <- match.arg(branch) from <- match.arg(from) files_changed <- FALSE @@ -21,20 +19,22 @@ auto_aux_update <- function(measure = NULL, length() > 0 if (isFALSE(isgls)) { - cli::cli_abort("object {.var gls} is not available in Globel env. - Run {.code gls <- pipfun::pip_create_globals()} first", - wrap = TRUE) + cli::cli_abort( + "object {.var gls} is not available in Globel env. + Run {.code gls <- pipfun::pip_create_globals()} first", + wrap = TRUE + ) } assertthat::assert_that(Sys.getenv("GITHUB_PAT") != "", - msg = "Enviroment variable `GITHUB_PAT` is empty. Please set it up using Sys.setenv(GITHUB_PAT = 'code')") + msg = "Enviroment variable `GITHUB_PAT` is empty. + Please set it up using Sys.setenv(GITHUB_PAT = 'code')") gh_user <- "https://raw.githubusercontent.com" org_data <- paste(gh_user, owner, "pipaux/metadata/Data/git_metadata.csv", - sep = "/" - ) |> + sep = "/") |> readr::read_csv(show_col_types = FALSE) @@ -46,24 +46,29 @@ auto_aux_update <- function(measure = NULL, #Keep only those repos that start with "aux_" grep("^aux_", x = _, value = TRUE) - if(!is.null(measure)) { + if (!is.null(measure)) { all_repos <- all_repos[all_repos %in% glue::glue("aux_{measure}")] } # get hashs hash <- purrr::map(all_repos, - .f = ~{ - gh::gh("GET /repos/{owner}/{repo}/commits/{branch}", - owner = owner, - repo = .x, - branch = branch) + .f = ~ { + gh::gh( + "GET /repos/{owner}/{repo}/commits/{branch}", + owner = owner, + repo = .x, + branch = branch + ) }) |> - purrr::map_chr(~.x[["sha"]]) + purrr::map_chr( ~ .x[["sha"]]) # Get the latest hash of the repo - all_data <- dplyr::tibble(Repo = glue::glue("{owner}/{all_repos}"), - hash = hash, - branch = branch) + all_data <- + dplyr::tibble( + Repo = glue::glue("{owner}/{all_repos}"), + hash = hash, + branch = branch + ) old_data <- org_data %>% dplyr::filter(.data$branch == branch) %>% @@ -74,7 +79,8 @@ auto_aux_update <- function(measure = NULL, cli::cli_alert_info("Number of rows from csv file : {nrow(old_data)}") cli::cli_alert_info("Number of rows from Github : {nrow(all_data)}") - cli::cli_alert_info("Both the numbers above should be equal or else some debugging is required.") + cli::cli_alert_info("Both the numbers above should be equal or else some + debugging is required.", wrap = TRUE) new_data <- old_data %>% dplyr::filter(.data$hash != .data$hash_original | @@ -93,52 +99,74 @@ auto_aux_update <- function(measure = NULL, # For each auxiliary data to be updated cli::cli_alert_info("Updating data for {length(aux_fns)} files.") - for(aux in aux_fns) { + for (aux in aux_fns) { # Find the corresponding functions to be run # Add pip_ suffix so that it becomes function name list_of_funcs <- paste0("pip_", return_value(aux, dependencies)) - for(fn in list_of_funcs) { + + for (fn in list_of_funcs) { + aux_file <- sub("pip_", "", fn) cli::cli_alert_info("Running function {fn} for aux file {aux}.") + before_hash <- read_signature_file(aux_file, maindir, branch) # Run the pip_.* function match.fun(fn)(maindir = maindir, branch = branch) |> suppressMessages() after_hash <- read_signature_file(aux_file, maindir, branch) - if(before_hash != after_hash) { + + if (before_hash != after_hash) { + cli::cli_alert_info("Updating csv for {fn}") files_changed <- TRUE - org_data$hash[org_data$branch == branch & - fs::path_file(org_data$Repo) |> sub('aux_', '',x = _) %in% aux_file] <- - new_data$hash[fs::path_file(new_data$Repo) |> sub('aux_', '',x = _) %in% aux_file & - new_data$branch == branch] - } - } - } - last_updated_time <- aux_file_last_updated(maindir, names(dependencies), branch) - if(length(aux_fns) > 0 && files_changed) { + + # find rows of of org to be modified + aux_row_org <- org_data$Repo |> + fs::path_file() |> + sub('aux_', '', x = _) %in% aux_file & + org_data$branch == branch + + # find rows in new that will be copied to org + aux_row_new <- new_data$Repo |> + fs::path_file() |> + sub('aux_', '', x = _) %in% aux_file & + new_data$branch == branch + + org_data$hash[aux_row_org] <- new_data$hash[aux_row_new] + + } # end of before_hash condition + + } # end of list_of_funcs loop + } # end of aux_fns loop + last_updated_time <- + aux_file_last_updated(maindir, names(dependencies), branch) + if (length(aux_fns) > 0 && files_changed) { # Write the latest auxiliary file and corresponding hash to csv # Always save at the end. # sha - hash object of current csv file in Data/git_metadata.csv # content - base64 of changed data - out <- gh::gh("GET /repos/{owner}/{repo}/contents/{file_path}", - owner = "PIP-Technical-Team", - repo = "pipaux", - file_path = "Data/git_metadata.csv", - .params = list(ref = "metadata")) - # There is no way to update only the lines which has changed using Github API - # We need to update the entire file every time. Refer - https://stackoverflow.com/a/21315234/3962914 - res <- gh::gh("PUT /repos/{owner}/{repo}/contents/{path}", - owner = "PIP-Technical-Team", - repo = "pipaux", - path = "Data/git_metadata.csv", - .params = list(branch = "metadata", - message = "updating csv file", - sha = out$sha, - content = convert_df_to_base64(org_data) - ), - .token = Sys.getenv("GITHUB_PAT") - ) + out <- gh::gh( + "GET /repos/{owner}/{repo}/contents/{file_path}", + owner = "PIP-Technical-Team", + repo = "pipaux", + file_path = "Data/git_metadata.csv", + .params = list(ref = "metadata") + ) + # There is no way to update only the lines which has changed using Github API + # We need to update the entire file every time. Refer - https://stackoverflow.com/a/21315234/3962914 + res <- gh::gh( + "PUT /repos/{owner}/{repo}/contents/{path}", + owner = "PIP-Technical-Team", + repo = "pipaux", + path = "Data/git_metadata.csv", + .params = list( + branch = "metadata", + message = "updating csv file", + sha = out$sha, + content = convert_df_to_base64(org_data) + ), + .token = Sys.getenv("GITHUB_PAT") + ) } cli::cli_h2("File updated status.") knitr::kable(last_updated_time) @@ -147,8 +175,8 @@ auto_aux_update <- function(measure = NULL, return_value <- function(aux, dependencies) { val <- dependencies[[aux]] - if(length(val) > 0) { - for(i in val) { + if (length(val) > 0) { + for (i in val) { val <- c(return_value(i, dependencies), val) } } @@ -157,18 +185,25 @@ return_value <- function(aux, dependencies) { convert_df_to_base64 <- function(df) { df |> - write.table(quote = FALSE, row.names = FALSE, sep=",") |> + write.table(quote = FALSE, + row.names = FALSE, + sep = ",") |> capture.output() |> - paste(collapse="\n") |> + paste(collapse = "\n") |> charToRaw() |> base64enc::base64encode() } aux_file_last_updated <- function(data_dir, aux_files, branch) { - filenames <- glue::glue("{data_dir}/_aux/{branch}/{aux_files}/{aux_files}.qs") - data <- sapply(filenames, function(x) qs::qattributes(x)$datetime) - data.frame(filename = basename(names(data)), - time_last_update = as.POSIXct(data, format = "%Y%m%d%H%M%S"), row.names = NULL) |> + filenames <- + glue::glue("{data_dir}/_aux/{branch}/{aux_files}/{aux_files}.qs") + data <- sapply(filenames, function(x) + qs::qattributes(x)$datetime) + data.frame( + filename = basename(names(data)), + time_last_update = as.POSIXct(data, format = "%Y%m%d%H%M%S"), + row.names = NULL + ) |> dplyr::arrange(desc(time_last_update)) } @@ -177,16 +212,23 @@ read_dependencies <- function(gh_user, owner) { dependencies <- paste(gh_user, owner, "pipaux/metadata/Data/dependency.yml", - sep = "/" - ) |> + sep = "/") |> yaml::read_yaml() - sapply(dependencies, \(x) if (length(x)) strsplit(x, ",\\s+")[[1]] else character()) + sapply(dependencies, \(x) if (length(x)) + strsplit(x, ",\\s+")[[1]] + else + character()) } read_signature_file <- function(aux_file, maindir, branch) { # Construct the path to data signature aux file - data_signature_path <- fs::path(maindir, "_aux", branch, aux_file, glue::glue("{aux_file}_datasignature.txt")) + data_signature_path <- + fs::path(maindir, + "_aux", + branch, + aux_file, + glue::glue("{aux_file}_datasignature.txt")) signature_hash <- readr::read_lines(data_signature_path) return(signature_hash) }