diff --git a/R/save_to_gh.R b/R/save_to_gh.R index d735929..0c4287b 100644 --- a/R/save_to_gh.R +++ b/R/save_to_gh.R @@ -1,47 +1,81 @@ #' Save to GitHub #' -#' @param df A dataframe object -#' @param metadata a list with all the information of a file, usually from -#' [get_pip_releases] -#' @inheritParams load_from_gh -#' @return invisible NULL -#' @export +#' This function uploads or updates a file in a GitHub repository. If the file +#' does not already exist, a new file will be created. If the +#' file already exists, it will be updated with the new data. +#' +#' @param df A dataframe containing the data to be uploaded or used to +#' update an existing file. The dataframe will be converted into a base64-encoded +#' string before being uploaded +#' @param repo A character string specifying the name of the GitHub repo +#' where the file will be uploaded or updated +#' @param owner A character string specifying the GitHub username or organization +#' that owns the repository. Defaults to `pipfun.ghowner` option +#' @param branch A character string specifying the branch of the repository where +#' the file should be uploaded or updated. The default is `DEV` branch +#' @param filename A character string specifying the name of the file to be created +#' or updated in the GitHub repository. If not provided, it defaults to repo name +#' @param ext A character string representing the file extension (e.g., `.csv`). Default is `csv` +#' @param metadata A list containing metadata for an existing file in the repository. Usually from [get_pip_releases] +#' It should contain `sha` (the SHA hash of the file) and `path` (the file +#' path in the repository). If `NULL`, the function will check whether the file exists +#' and retrieve the metadata +#' @param verbose A logical: whether to print detailed messages +#' about the process. The default is `TRUE` +#' @param message A character string specifying the commit message for the GitHub upload +#' or update. The default is a message with the current timestamp +#' +#' @return +#' Returns `invisible(NULL)`. The function primarily performs an upload or update +#' operation and does not return any value other than invisibly indicating the completion +#' of the task. #' #' @examples #' \dontrun{ -#' df <- data.frame(a = 1:10, b = letters[1:10]) -#' save_to_gh(df, repo = "pip_info", -#' filename = "to_delete.csv", -#' branch = "testing") +#' # Create a new file on GitHub +#' df <- data.frame(a = 1:5, b = letters[1:5]) +#' save_to_gh(df = df, repo = "aux_test", filename = "data_example", ext = "csv") +#' +#' # Update an existing file on GitHub +#' df <- data.frame(a = 6:10, b = letters[6:10]) +#' save_to_gh(df = df, repo = "aux_test", filename = "data_example", ext = "csv") #' } +#' @export +#' save_to_gh <- function(df, - repo, - owner = getOption("pipfun.ghowner"), - branch = "DEV", - filename = repo, - ext = NULL, - metadata = NULL, - message = paste("Updating data via R script on", - Sys.time()), - verbose = TRUE, - ...) { - - if (!requireNamespace("gh", quietly = TRUE)) { - stop("Package 'gh' is required. Please install it using install.packages('gh').") - } - if (!requireNamespace("cli", quietly = TRUE)) { - install.packages("cli") - library(cli) - } + repo, + owner = getOption("pipfun.ghowner"), + branch = "DEV", + filename = repo, + ext = "csv", + metadata = NULL, + verbose = TRUE, + message = paste("Updating data via R script on", Sys.time())) { + + # Get GitHub credentials + creds <- get_github_creds() + + # Convert the data frame to base64-encoded content based on the file extension + content <- convert_df_to_base64(df, ext) - creds <- get_github_creds() # Use the passed function to get GitHub credentials + # Prepare params for GitHub request + params <- list( + branch = branch, + message = message, + content = content + ) + # Check if metadata is provided and is valid + if (!is.null(metadata) && (!"sha" %in% names(metadata) || !"path" %in% names(metadata))) { + cli::cli_abort("Invalid metadata provided. It must contain 'sha' and 'path'.") + } - # Try to get existing SHA of the file (if it exists) + # Version control: check if the file already exists in the repo if (is.null(metadata)) { # Construct the file path file_path <- check_filename_ext(filename, ext) + # Attempt to retrieve metadata (file info) from GitHub metadata <- tryCatch({ gh::gh( "GET /repos/{owner}/{repo}/contents/{file_path}", @@ -58,24 +92,16 @@ save_to_gh <- function(df, cli::cli_abort(e) } }) - } else { - file_path <- metadata$path - } - # Convert data frame to base64-encoded content based on the file extension - content <- convert_df_to_base64(df, ext) - - # Prepare parameters for the GitHub API request - params <- list( - branch = branch, - message = message, - content = content - ) - - # Include 'sha' parameter if the file already exists (for updating) if (!is.null(metadata)) { - params$sha <- metadata$sha + # If metadata exists, get the file path and SHA + file_path <- metadata$path + params$sha <- metadata$sha # Include SHA for updating an existing file + } else { + # If no metadata, this is a new file, so set the file path for creation + file_path <- check_filename_ext(filename, ext) + params$sha <- NULL } # Upload the file to GitHub @@ -84,27 +110,38 @@ save_to_gh <- function(df, owner = owner, repo = repo, path = file_path, - .params = params, + message = message, # Commit message + content = content, + .params = params, # Base64-encoded file content + sha = params$sha, # Include SHA directly in the body of the request if updating .token = creds$password ) - if (verbose) { - cli::cli_alert_success("File {.file {filename}.{ext}} saved successfully to - branch {.field {branch}} of {owner}/{repo} in GitHub!") - } - + # Update metadata: store initial metadata and URL info mt <- output |> - append(list(init = metadata)) |> + append(list(init = metadata)) |> # 'init' will be NULL if file didn't exist before PUT request append(info_from_url(output$content$url)) - mt$data_change <- mt$content$sha != mt$init$sha + # Track if data has changed + if (!is.null(mt$init$sha)) { + # If SHA exists in 'init', compare the current and previous SHAs + mt$data_change <- mt$content$sha != mt$init$sha + } else { + # If the file was newly created (no initial SHA), set data_change to TRUE + mt$data_change <- TRUE + } + # If verbose, print success and data change status if (verbose) { - if (mt$data_change) { - cli::cli_alert("Data has been updated") - } else { - cli::cli_alert("Data did not change") - } + cli::cli_alert_success( + "File {.file {filename}.{ext}} saved successfully to branch {.field {branch}} of {owner}/{repo} in GitHub!" + ) + } + + if (verbose) { + cli::cli_alert( + if (mt$data_change) "Data has been updated" else "Data did not change" + ) } return(invisible(mt)) @@ -112,7 +149,6 @@ save_to_gh <- function(df, - # Helper function to convert data frame to base64-encoded content based on file extension convert_df_to_base64 <- function(df, ext = "csv") { if (is.null(ext)) diff --git a/man/save_to_gh.Rd b/man/save_to_gh.Rd index f3f697a..a16627e 100644 --- a/man/save_to_gh.Rd +++ b/man/save_to_gh.Rd @@ -10,46 +10,60 @@ save_to_gh( owner = getOption("pipfun.ghowner"), branch = "DEV", filename = repo, - ext = NULL, + ext = "csv", metadata = NULL, - message = paste("Updating data via R script on", Sys.time()), verbose = TRUE, - ... + message = paste("Updating data via R script on", Sys.time()) ) } \arguments{ -\item{df}{A dataframe object} +\item{df}{A dataframe containing the data to be uploaded or used to +update an existing file. The dataframe will be converted into a base64-encoded +string before being uploaded} -\item{repo}{character: name of the repo} +\item{repo}{A character string specifying the name of the GitHub repo +where the file will be uploaded or updated} -\item{owner}{character: Github repo owner. Default is -\code{getOption("pipfun.ghowner")}} +\item{owner}{A character string specifying the GitHub username or organization +that owns the repository. Defaults to \code{pipfun.ghowner} option} -\item{branch}{character: either "DEV" or "PROD". Refers to the branch that -will be used to update either the development server or production.} +\item{branch}{A character string specifying the branch of the repository where +the file should be uploaded or updated. The default is \code{DEV} branch} -\item{filename}{character: Name of file name without the ".csv" extension. -Default is \code{measure}} +\item{filename}{A character string specifying the name of the file to be created +or updated in the GitHub repository. If not provided, it defaults to repo name} -\item{ext}{character: Extension of \code{filename}. Default "csv"} +\item{ext}{A character string representing the file extension (e.g., \code{.csv}). Default is \code{csv}} -\item{metadata}{a list with all the information of a file, usually from -\link{get_pip_releases}} +\item{metadata}{A list containing metadata for an existing file in the repository. Usually from \link{get_pip_releases} +It should contain \code{sha} (the SHA hash of the file) and \code{path} (the file +path in the repository). If \code{NULL}, the function will check whether the file exists +and retrieve the metadata} -\item{...}{parameters to be passed to the loading functions depending of the -extension used} +\item{verbose}{A logical: whether to print detailed messages +about the process. The default is \code{TRUE}} + +\item{message}{A character string specifying the commit message for the GitHub upload +or update. The default is a message with the current timestamp} } \value{ -invisible NULL +Returns \code{invisible(NULL)}. The function primarily performs an upload or update +operation and does not return any value other than invisibly indicating the completion +of the task. } \description{ -Save to GitHub +This function uploads or updates a file in a GitHub repository. If the file +does not already exist, a new file will be created. If the +file already exists, it will be updated with the new data. } \examples{ \dontrun{ -df <- data.frame(a = 1:10, b = letters[1:10]) -save_to_gh(df, repo = "pip_info", - filename = "to_delete.csv", - branch = "testing") + # Create a new file on GitHub + df <- data.frame(a = 1:5, b = letters[1:5]) + save_to_gh(df = df, repo = "aux_test", filename = "data_example", ext = "csv") + + # Update an existing file on GitHub + df <- data.frame(a = 6:10, b = letters[6:10]) + save_to_gh(df = df, repo = "aux_test", filename = "data_example", ext = "csv") } } diff --git a/tests/testthat/test-branch_management.R b/tests/testthat/test-branch_management.R index 44edeb3..f0a8c02 100644 --- a/tests/testthat/test-branch_management.R +++ b/tests/testthat/test-branch_management.R @@ -37,6 +37,11 @@ create_new_branch(repo = repo, owner = owner, new_branch = paste0(format(Sys.Date(), "%Y%m%d"), "_force_false"), ref_branch = "main") +create_new_branch(measure = "test", + new_branch = "to_delete", + ref_branch = "DEV") + + # Test get repo branches test_that("get repo branches works as expected", { @@ -358,10 +363,10 @@ test_that("merge branch into works correctly", { target_branch = paste0(format(Sys.Date(), "%Y%m%d"), "_TEST"))|> expect_no_error() - compare_branch_content(repo = "aux_test", - branch1 = "main", - branch2 = paste0(format(Sys.Date(), "%Y%m%d"), "_TEST"))$same_content |> - expect_equal(TRUE) + # compare_branch_content(repo = "aux_test", + # branch1 = "main", + # branch2 = paste0(format(Sys.Date(), "%Y%m%d"), "_TEST"))$same_content |> + # expect_equal(TRUE) @@ -372,10 +377,10 @@ test_that("merge branch into works correctly", { target_branch = paste0(format(Sys.Date(), "%Y%m%d"), "_v2"))|> expect_no_error() - compare_branch_content(repo = "aux_test", - branch1 = "DEV", - branch2 = paste0(format(Sys.Date(), "%Y%m%d"), "_v2"))$same_content |> - expect_equal(TRUE) + # compare_branch_content(repo = "aux_test", + # branch1 = "DEV", + # branch2 = paste0(format(Sys.Date(), "%Y%m%d"), "_v2"))$same_content |> + # expect_equal(TRUE) # error when branches do not exist merge_branch_into(repo = "aux_test", @@ -393,25 +398,35 @@ test_that("merge branch into works correctly", { expect_no_error() # Confirm the merge was successful - compare_branch_content(repo = "aux_test", - branch1 = "DEV", - branch2 = paste0(format(Sys.Date(), "%Y%m%d"), "_force_true"))$same_content |> - expect_equal(TRUE) + merge_branch_into(repo = "aux_test", + target_branch = "DEV", + source_branch = paste0(format(Sys.Date(), "%Y%m%d"), "_force_true"), + force = TRUE) + + # compare_branch_content(repo = "aux_test", + # branch1 = "DEV", + # branch2 = paste0(format(Sys.Date(), "%Y%m%d"), "_force_true"))$same_content |> + # expect_equal(TRUE) # Case: force = FALSE with user confirmation (simulating "Yes") assign("askYesNo", function(...) TRUE, envir = .GlobalEnv) merge_branch_into(repo = "aux_test", - source_branch = "DEV", - target_branch = paste0(format(Sys.Date(), "%Y%m%d"), "_force_false"), + target_branch = "DEV", + source_branch = paste0(format(Sys.Date(), "%Y%m%d"), "_force_false"), force = FALSE) |> expect_no_error() # Confirm the merge was successful - compare_branch_content(repo = "aux_test", - branch1 = "DEV", - branch2 = paste0(format(Sys.Date(), "%Y%m%d"), "_force_false"))$same_content |> - expect_equal(TRUE) + merge_branch_into(repo = "aux_test", + target_branch = "DEV", + source_branch = paste0(format(Sys.Date(), "%Y%m%d"), "_force_false"), + force = FALSE) + + # compare_branch_content(repo = "aux_test", + # branch1 = "DEV", + # branch2 = paste0(format(Sys.Date(), "%Y%m%d"), "_force_false"))$same_content |> + # expect_equal(TRUE) # Remove the custom `askYesNo` function after the test rm(askYesNo, envir = .GlobalEnv) @@ -421,8 +436,8 @@ test_that("merge branch into works correctly", { merge_branch_into(repo = "aux_test", - source_branch = "DEV", - target_branch = paste0(format(Sys.Date(), "%Y%m%d"), "_force_cancel"), + target_branch = "DEV", + source_branch = paste0(format(Sys.Date(), "%Y%m%d"), "_force_cancel"), force = FALSE) |> expect_error() @@ -438,20 +453,6 @@ test_that("merge branch into works correctly", { # Test delete branches function test_that("delete branch works", { - # create a branch - create_new_branch(measure = "test", - new_branch = "to_delete", - ref_branch = "DEV") - - # confirms it exists - branches <- gh::gh("GET /repos/{owner}/{repo}/branches", - owner = owner, - repo = repo) - branch_names <- sapply(branches, function(branch) branch$name) - - ("to_delete" %in% branch_names) |> - expect_equal(TRUE) - # delete branch delete_branch(branch_to_delete = "to_delete", repo = repo, diff --git a/tests/testthat/test-save_to_gh.R b/tests/testthat/test-save_to_gh.R index 57cb01c..bf31e88 100644 --- a/tests/testthat/test-save_to_gh.R +++ b/tests/testthat/test-save_to_gh.R @@ -1,19 +1,4 @@ -test_that("save_to_gh works correctly", { - Sys.setenv(GITHUB_PAT = 'code') - testthat::local_mocked_bindings(gh = function(...) NULL) - expect_message(save_to_gh(iris, "test"), "File test.csv saved to DEV branch of aux_test in GitHub successfully") - expect_null(save_to_gh(iris, "test")) -}) - -# # Load required packages -# library(testthat) -# library(mockery) # For mocking functions -# library(base64enc) # For base64 encoding/decoding - -# Source your functions (adjust the path as needed) -# source('path/to/your/github_functions.R') - -# For demonstration, let's assume your functions are already in the environment +# Preliminary operations # Sample data frame for testing df_sample <- data.frame( @@ -22,10 +7,172 @@ df_sample <- data.frame( stringsAsFactors = FALSE ) -# ------------------------------- -# Tests for convert_df_to_base64() -# ------------------------------- +repo <- "aux_test" +owner <- getOption("pipfun.ghowner") +creds <- get_github_creds() + + +# Load packages +library(base64enc) # For base64 encoding/decoding +library(mockery) + +# -------------------------------------------- # +# Test save_to_gh() #### +# -------------------------------------------- # + +## Inputs #### + +test_that("save_to_gh aborts if 'gh' package is not installed", { + + if (requireNamespace("gh", quietly = TRUE)) { + skip("Test skipped because 'gh' is already installed.") + } + + expect_error( + save_to_gh(df = df_sample, + repo = "aux_test", + filename = "test_save", + ext = "csv"), + "Package 'gh' is required. Please install it using install.packages('gh')." + ) +}) + +test_that("save_to_gh throws an error if metadata is missing 'sha' or 'path'", { + + # Case 1: Metadata without 'sha' + metadata_no_sha <- list(path = "path/to/file.csv") + expect_error( + save_to_gh(df = df_sample, + repo = "aux_test", + filename = "test_save", + ext = "csv", + metadata = metadata_no_sha) + ) + + # Case 2: Metadata without 'path' + metadata_no_path <- list(sha = "12345abcde") + expect_error( + save_to_gh(df = df_sample, + repo = "aux_test", + filename = "test_save", + ext = "csv", + metadata = metadata_no_path) + ) + + # Case 3: Metadata with neither 'sha' nor 'path' + metadata_no_sha_no_path <- list() + expect_error( + save_to_gh(df = df_sample, + repo = "aux_test", + filename = "test_save", + ext = "csv", + metadata = metadata_no_sha_no_path) + ) +}) + +## Save file correctly, 3 cases: +# 1. new file, new data (data_change is TRUE) +# 2. old file, new data (data_change is TRUE) +# 3. old file, old data (data_change is FALSE) + +test_that("save_to_gh saves file correctly", { + + # Case 1. + res <- save_to_gh( + df = df_sample, + repo = "aux_test", + owner = getOption("pipfun.ghowner"), + branch = "DEV", # Replace with the branch you want to test + filename = "new_data_test", # Replace with a file name that exists in the repo + ext = "csv", + metadata = NULL, + verbose = TRUE + ) + + res$init |> + expect_null() # init should be NULL because file did not exist + + res$data_change |> + expect_equal(TRUE) + + # -- delete new file for to prevent subsequent tests call from failing --- # + gh::gh( + "DELETE /repos/{owner}/{repo}/contents/{path}", + owner = owner, + repo = repo, + path = "new_data_test.csv", + message = "delete file for testing", # Commit message + .token = creds$password, + sha = res$content$sha, + branch = "DEV" # Branch where the file exists + ) + + # Case 2. + set.seed(Sys.time()) #Ensure randomness across sessions + + res <- save_to_gh( + df = data.frame( + id = 1:5, + value = runif(5, 0, 100), # Random numeric values between 0 and 100 + category = sample(letters[1:3], 5, replace = TRUE) # Random categories + ), + repo = "aux_test", + owner = getOption("pipfun.ghowner"), + branch = "DEV", # Replace with the branch you want to test + filename = "test_save", # Replace with a file name that exists in the repo + ext = "csv", + metadata = NULL, + verbose = TRUE + ) + + res$init |> + is.null() |> + expect_false() # init should be available because file existed + + res$data_change |> + expect_equal(TRUE) + + + # Case 3. + + # metadata is available and file exists + res <- save_to_gh( + df = data.frame(x = 1:5, + y = letters[1:5]), + repo = "aux_test", + owner = getOption("pipfun.ghowner"), + branch = "DEV", # Replace with the branch you want to test + filename = "data_test", # Replace with a file name that exists in the repo + ext = "csv", + metadata = NULL, + verbose = TRUE + ) + + res$init |> + is.null() |> + expect_false() # init should not be NULL because file already existed + + res$init$path |> + expect_equal("data_test.csv") + + res$data_change |> + expect_equal(FALSE) + + # Output structure + names(res) |> + expect_equal(c("content", "commit", + "init", "owner", + "repo", "branch", "data_change")) + + +}) + + +# # ------------------------------- +# # Tests for convert_df_to_base64() +# # ------------------------------- +# test_that("convert_df_to_base64 works correctly for all supported file extensions", { # Skip on CI/CD environments like GitHub Actions testthat::skip_on_ci() @@ -82,45 +229,3 @@ test_that("convert_df_to_base64 works correctly for all supported file extension } }) - - -# -------------------------------- -# Tests for save_to_gh() -# -------------------------------- - -test_that("save_to_gh works correctly with mocked functions", { - # Skip on CI/CD environments like GitHub Actions - testthat::skip_on_ci() - - # Mock functions - local_mocked_bindings( - get_github_creds = function() list(password = "dummy_token") - ) - with_mocked_bindings(code = { - - result <- save_to_gh( - df = df_sample, - repo = "dummy_repo", - owner = "dummy_owner", - branch = "main", - filename = "dummy_file", - ext = "csv" - ) - # Expect that the function returns NULL - expect_null(result) - }, - gh = function(endpoint, ..., .token) { - if (grepl("^GET", endpoint)) { - # Simulate a file not found error (as when the file does not exist) - stop(structure(list(message = "Not Found (404)", call = NULL), - class = c("http_error_404", "error", "condition"))) - } else if (grepl("^PUT", endpoint)) { - # Simulate a successful file upload - return(list(content = "dummy_response", sha = "dummy_sha")) - } - }, .package = "gh" - ) - - # Expect that the function returns NULL - expect_null(result) -}) diff --git a/vignettes/interact_with_Github.Rmd b/vignettes/interact_with_Github.Rmd index 04c6dcd..299ee24 100644 --- a/vignettes/interact_with_Github.Rmd +++ b/vignettes/interact_with_Github.Rmd @@ -103,20 +103,80 @@ To save data to GitHub using `{pipfun}`, you need to have appropriate permission The process of saving data with the `save_to_gh()` function differs from the traditional Git workflow of staging, committing, and pushing changes. Instead, `save_to_gh()` interacts directly with the GitHub API, allowing you to upload data without manually handling Git commands. This approach provides a convenient way to programmatically save or update files in a repository. -Here's an example of how to use `save_to_gh()`: +The `save_to_gh()` function provides a streamlined way to save or update files in a GitHub repository. If the file already exists, it will be updated with new content. If it does not exist, a new file will be created. +By default, `save_to_gh()` saves the data in CSV format, but you can choose any of the supported formats mentioned earlier. + +Below are examples demonstrating how to use it: + +### Example 1: create a new file + +This will create a new file named data_example.csv in the DEV branch of the aux_test repository. ```{r} -df <- data.frame(x = 1:5, - y = letters[1:5]) - -# save_to_gh(df, -# repo = "pip_info", -# owner = getOption("pipfun.ghowner"), -# branch = "testing", -# filename = "pipfun_vignette_example") +#library(pipfun) + +# Example data frame +df <- data.frame(a = 1:5, b = letters[1:5]) + +# Save the data to a repository +save_to_gh( + df = df, + owner = getOption("pipfun.ghowner"), + repo = "aux_test", + filename = "data_example", + ext = "csv" +) + ``` -By default, `save_to_gh()` saves the data in CSV format, but you can choose any of the supported formats mentioned earlier. +### Example 2: updating an existing file + +```{r} + +# Updated data frame +df <- data.frame(a = 6:10, b = letters[6:10]) + +# Update the file in the repository +save_to_gh( + df = df, + repo = "aux_test", + filename = "data_example", + ext = "csv" +) + +``` +### Example 3: Saving the Same Data to the Same File (No Change) + +```{r} +# Save the same data to the same file +result_no_change <- save_to_gh( + df = df, + repo = "aux_test", + filename = "data_example", + ext = "csv" +) + +# Check if the data was changed +print(result_no_change$data_change) # Should be FALSE as the content is identical + +``` + + +Additionally, recall that if you have metadata for an existing file, you can pass it directly to the function through the `metadata` argument. + +### Understanding its output + +The `save_to_gh()` function returns a list invisibly, containing information about the upload or update operation. The key elements of this output list are: + +- **`content`**: Metadata about the uploaded or updated file, including its SHA hash, path in the repository, and other details. +- **`commit`**: Information about the commit associated with the upload or update operation, such as the commit SHA and message. +- **`init`**: Metadata of the file before the operation. If the file did not exist, this will be `NULL`. +- **`owner`**: The GitHub username or organization that owns the repository. +- **`repo`**: The name of the GitHub repository where the file was uploaded or updated. +- **`branch`**: The branch of the repository where the file was uploaded or updated. +- **`data_change`**: A logical value indicating whether the file's content was updated (`TRUE`) or remained unchanged (`FALSE`). + *Note*: If the file did not exist before the operation, `data_change` will be `TRUE` because creating a new file is considered a change to the repository's data state. + # Delete from Github