Skip to content

Commit

Permalink
Merge pull request #96 from PIP-Technical-Team/auto-aux-update-review
Browse files Browse the repository at this point in the history
fix formating a little
  • Loading branch information
shahronak47 authored Mar 4, 2024
2 parents 6e6785b + f2297e3 commit caa1309
Showing 1 changed file with 104 additions and 62 deletions.
166 changes: 104 additions & 62 deletions R/auto_aux_update.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@ auto_aux_update <- function(measure = NULL,
maindir = gls$PIP_DATA_DIR,
owner = getOption("pipfun.ghowner"),
branch = c("DEV", "PROD", "main"),
tag = match.arg(branch)
) {

tag = match.arg(branch)) {
branch <- match.arg(branch)
from <- match.arg(from)
files_changed <- FALSE
Expand All @@ -21,20 +19,22 @@ auto_aux_update <- function(measure = NULL,
length() > 0

if (isFALSE(isgls)) {
cli::cli_abort("object {.var gls} is not available in Globel env.
Run {.code gls <- pipfun::pip_create_globals()} first",
wrap = TRUE)
cli::cli_abort(
"object {.var gls} is not available in Globel env.
Run {.code gls <- pipfun::pip_create_globals()} first",
wrap = TRUE
)
}


assertthat::assert_that(Sys.getenv("GITHUB_PAT") != "",
msg = "Enviroment variable `GITHUB_PAT` is empty. Please set it up using Sys.setenv(GITHUB_PAT = 'code')")
msg = "Enviroment variable `GITHUB_PAT` is empty.
Please set it up using Sys.setenv(GITHUB_PAT = 'code')")
gh_user <- "https://raw.githubusercontent.com"
org_data <- paste(gh_user,
owner,
"pipaux/metadata/Data/git_metadata.csv",
sep = "/"
) |>
sep = "/") |>
readr::read_csv(show_col_types = FALSE)


Expand All @@ -46,24 +46,29 @@ auto_aux_update <- function(measure = NULL,
#Keep only those repos that start with "aux_"
grep("^aux_", x = _, value = TRUE)

if(!is.null(measure)) {
if (!is.null(measure)) {
all_repos <- all_repos[all_repos %in% glue::glue("aux_{measure}")]
}
# get hashs
hash <-
purrr::map(all_repos,
.f = ~{
gh::gh("GET /repos/{owner}/{repo}/commits/{branch}",
owner = owner,
repo = .x,
branch = branch)
.f = ~ {
gh::gh(
"GET /repos/{owner}/{repo}/commits/{branch}",
owner = owner,
repo = .x,
branch = branch
)
}) |>
purrr::map_chr(~.x[["sha"]])
purrr::map_chr( ~ .x[["sha"]])

# Get the latest hash of the repo
all_data <- dplyr::tibble(Repo = glue::glue("{owner}/{all_repos}"),
hash = hash,
branch = branch)
all_data <-
dplyr::tibble(
Repo = glue::glue("{owner}/{all_repos}"),
hash = hash,
branch = branch
)

old_data <- org_data %>%
dplyr::filter(.data$branch == branch) %>%
Expand All @@ -74,7 +79,8 @@ auto_aux_update <- function(measure = NULL,

cli::cli_alert_info("Number of rows from csv file : {nrow(old_data)}")
cli::cli_alert_info("Number of rows from Github : {nrow(all_data)}")
cli::cli_alert_info("Both the numbers above should be equal or else some debugging is required.")
cli::cli_alert_info("Both the numbers above should be equal or else some
debugging is required.", wrap = TRUE)

new_data <- old_data %>%
dplyr::filter(.data$hash != .data$hash_original |
Expand All @@ -93,52 +99,74 @@ auto_aux_update <- function(measure = NULL,

# For each auxiliary data to be updated
cli::cli_alert_info("Updating data for {length(aux_fns)} files.")
for(aux in aux_fns) {
for (aux in aux_fns) {
# Find the corresponding functions to be run
# Add pip_ suffix so that it becomes function name
list_of_funcs <- paste0("pip_", return_value(aux, dependencies))
for(fn in list_of_funcs) {

for (fn in list_of_funcs) {

aux_file <- sub("pip_", "", fn)
cli::cli_alert_info("Running function {fn} for aux file {aux}.")

before_hash <- read_signature_file(aux_file, maindir, branch)
# Run the pip_.* function
match.fun(fn)(maindir = maindir, branch = branch) |>
suppressMessages()
after_hash <- read_signature_file(aux_file, maindir, branch)
if(before_hash != after_hash) {

if (before_hash != after_hash) {

cli::cli_alert_info("Updating csv for {fn}")
files_changed <- TRUE
org_data$hash[org_data$branch == branch &
fs::path_file(org_data$Repo) |> sub('aux_', '',x = _) %in% aux_file] <-
new_data$hash[fs::path_file(new_data$Repo) |> sub('aux_', '',x = _) %in% aux_file &
new_data$branch == branch]
}
}
}
last_updated_time <- aux_file_last_updated(maindir, names(dependencies), branch)
if(length(aux_fns) > 0 && files_changed) {

# find rows of of org to be modified
aux_row_org <- org_data$Repo |>
fs::path_file() |>
sub('aux_', '', x = _) %in% aux_file &
org_data$branch == branch

# find rows in new that will be copied to org
aux_row_new <- new_data$Repo |>
fs::path_file() |>
sub('aux_', '', x = _) %in% aux_file &
new_data$branch == branch

org_data$hash[aux_row_org] <- new_data$hash[aux_row_new]

} # end of before_hash condition

} # end of list_of_funcs loop
} # end of aux_fns loop
last_updated_time <-
aux_file_last_updated(maindir, names(dependencies), branch)
if (length(aux_fns) > 0 && files_changed) {
# Write the latest auxiliary file and corresponding hash to csv
# Always save at the end.
# sha - hash object of current csv file in Data/git_metadata.csv
# content - base64 of changed data
out <- gh::gh("GET /repos/{owner}/{repo}/contents/{file_path}",
owner = "PIP-Technical-Team",
repo = "pipaux",
file_path = "Data/git_metadata.csv",
.params = list(ref = "metadata"))
# There is no way to update only the lines which has changed using Github API
# We need to update the entire file every time. Refer - https://stackoverflow.com/a/21315234/3962914
res <- gh::gh("PUT /repos/{owner}/{repo}/contents/{path}",
owner = "PIP-Technical-Team",
repo = "pipaux",
path = "Data/git_metadata.csv",
.params = list(branch = "metadata",
message = "updating csv file",
sha = out$sha,
content = convert_df_to_base64(org_data)
),
.token = Sys.getenv("GITHUB_PAT")
)
out <- gh::gh(
"GET /repos/{owner}/{repo}/contents/{file_path}",
owner = "PIP-Technical-Team",
repo = "pipaux",
file_path = "Data/git_metadata.csv",
.params = list(ref = "metadata")
)
# There is no way to update only the lines which has changed using Github API
# We need to update the entire file every time. Refer - https://stackoverflow.com/a/21315234/3962914
res <- gh::gh(
"PUT /repos/{owner}/{repo}/contents/{path}",
owner = "PIP-Technical-Team",
repo = "pipaux",
path = "Data/git_metadata.csv",
.params = list(
branch = "metadata",
message = "updating csv file",
sha = out$sha,
content = convert_df_to_base64(org_data)
),
.token = Sys.getenv("GITHUB_PAT")
)
}
cli::cli_h2("File updated status.")
knitr::kable(last_updated_time)
Expand All @@ -147,8 +175,8 @@ auto_aux_update <- function(measure = NULL,

return_value <- function(aux, dependencies) {
val <- dependencies[[aux]]
if(length(val) > 0) {
for(i in val) {
if (length(val) > 0) {
for (i in val) {
val <- c(return_value(i, dependencies), val)
}
}
Expand All @@ -157,18 +185,25 @@ return_value <- function(aux, dependencies) {

convert_df_to_base64 <- function(df) {
df |>
write.table(quote = FALSE, row.names = FALSE, sep=",") |>
write.table(quote = FALSE,
row.names = FALSE,
sep = ",") |>
capture.output() |>
paste(collapse="\n") |>
paste(collapse = "\n") |>
charToRaw() |>
base64enc::base64encode()
}

aux_file_last_updated <- function(data_dir, aux_files, branch) {
filenames <- glue::glue("{data_dir}/_aux/{branch}/{aux_files}/{aux_files}.qs")
data <- sapply(filenames, function(x) qs::qattributes(x)$datetime)
data.frame(filename = basename(names(data)),
time_last_update = as.POSIXct(data, format = "%Y%m%d%H%M%S"), row.names = NULL) |>
filenames <-
glue::glue("{data_dir}/_aux/{branch}/{aux_files}/{aux_files}.qs")
data <- sapply(filenames, function(x)
qs::qattributes(x)$datetime)
data.frame(
filename = basename(names(data)),
time_last_update = as.POSIXct(data, format = "%Y%m%d%H%M%S"),
row.names = NULL
) |>
dplyr::arrange(desc(time_last_update))

}
Expand All @@ -177,16 +212,23 @@ read_dependencies <- function(gh_user, owner) {
dependencies <- paste(gh_user,
owner,
"pipaux/metadata/Data/dependency.yml",
sep = "/"
) |>
sep = "/") |>
yaml::read_yaml()

sapply(dependencies, \(x) if (length(x)) strsplit(x, ",\\s+")[[1]] else character())
sapply(dependencies, \(x) if (length(x))
strsplit(x, ",\\s+")[[1]]
else
character())
}

read_signature_file <- function(aux_file, maindir, branch) {
# Construct the path to data signature aux file
data_signature_path <- fs::path(maindir, "_aux", branch, aux_file, glue::glue("{aux_file}_datasignature.txt"))
data_signature_path <-
fs::path(maindir,
"_aux",
branch,
aux_file,
glue::glue("{aux_file}_datasignature.txt"))
signature_hash <- readr::read_lines(data_signature_path)
return(signature_hash)
}

0 comments on commit caa1309

Please sign in to comment.