Skip to content

Commit a0fbed6

Browse files
committed
feat: add two utility functions export_parquet and create_view and a vignette
* export_parquet uses COPY TO to export a parquet file from a tbl_lazy * create_view creates a view based on a tbl_lazy The vignette explains how to materialize data with those two functions and dbplyr::compute() Fixes #207, #630
1 parent e7bbaaa commit a0fbed6

File tree

9 files changed

+416
-0
lines changed

9 files changed

+416
-0
lines changed

NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ S3method(names,duckdb_relation)
66
S3method(print,duckdb_explain)
77
S3method(print,duckdb_expr)
88
S3method(print,duckdb_relation)
9+
export(create_view)
910
export(duckdb)
1011
export(duckdb_adbc)
1112
export(duckdb_fetch_arrow)
@@ -17,6 +18,7 @@ export(duckdb_register_arrow)
1718
export(duckdb_shutdown)
1819
export(duckdb_unregister)
1920
export(duckdb_unregister_arrow)
21+
export(export_parquet)
2022
export(read_csv_duckdb)
2123
export(simulate_duckdb)
2224
export(tbl_file)
@@ -54,5 +56,8 @@ exportMethods(dbWriteTable)
5456
exportMethods(show)
5557
import(DBI)
5658
import(methods)
59+
importFrom(DBI,dbExecute)
60+
importFrom(dbplyr,remote_con)
61+
importFrom(dbplyr,sql_render)
5762
importFrom(utils,head)
5863
useDynLib(duckdb, .registration = TRUE)

R/create_view.R

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#' Create or Replace a View from a `tbl` in DuckDB
2+
#'
3+
#' This function creates or replaces a view in DuckDB from a `dbplyr`-based `tbl` object.
4+
#' It converts the lazy query associated with the `tbl` into SQL and defines a named view in the database.
5+
#'
6+
#' @param data A `tbl_dbi` object, typically produced by `dplyr::tbl()` or `dbplyr` pipelines.
7+
#' @param view_name A character string specifying the name of the view to create.
8+
#'
9+
#' @return A `tbl` object pointing on the created view (invisible)
10+
#'
11+
#' @details
12+
#' The function uses `CREATE OR REPLACE VIEW`, which means it will overwrite an existing view with the same name.
13+
#' The view is created in the same DuckDB connection used by the `tbl`. The query is lazily evaluated.
14+
#'
15+
#' @examples
16+
#' con <- DBI::dbConnect(duckdb::duckdb())
17+
#' copy_to(con, tibble(a = 1:3, b = letters[1:3]), "source_table", temporary = TRUE)
18+
#' data <- dplyr::tbl(con, "source_table") %>% dplyr::filter(a > 1)
19+
#' create_view(data, "filtered_view")
20+
#' DBI::dbGetQuery(con, "SELECT * FROM filtered_view")
21+
#' DBI::dbDisconnect(con, shutdown = TRUE)
22+
#'
23+
#' @importFrom DBI dbExecute dbQuoteIdentifie
24+
#' @importFrom dbplyr remote_con sql_render
25+
#' @export
26+
create_view <- function(data, view_name) {
27+
if (!inherits(data, "tbl_dbi")) stop("'data' must be a 'tbl_dbi' object.")
28+
29+
con <- dbplyr::remote_con(data)
30+
sql <- dbplyr::sql_render(data, con = con)
31+
32+
sql <- sprintf("CREATE OR REPLACE VIEW %s AS %s", DBI::dbQuoteIdentifier(con, view_name), sql)
33+
34+
DBI::dbExecute(con, sql)
35+
36+
invisible(tbl(con, view_name))
37+
}

R/export_parquet.R

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#' Export a DuckDB table to a Parquet file using COPY TO
2+
#'
3+
#' This function exports a `dbplyr`-based table or SQL query to a Parquet file
4+
#' using DuckDB's native `COPY TO` command.
5+
#'
6+
#' @param data A `tbl_dbi` object representing a DuckDB table or query.
7+
#' @param output Path to the output Parquet file (a single character string).
8+
#' @param options A named list of key-value COPY options. Values can be character,
9+
#' numeric, logical, or vectors (which will be converted to tuples).
10+
#' Examples include `compression = "zstd"` or `ROW_GROUP_SIZE = 1000000`.
11+
#' see https://duckdb.org/docs/sql/statements/copy.html#parquet-options for details.
12+
#'
13+
#' @return Returns the number of rows affected by the `COPY TO` command.
14+
#' The function will stop with an error if the input types are invalid.
15+
#'
16+
#' @details
17+
#' Option values of length >1 are wrapped in parentheses and comma-separated
18+
#' (e.g., for `columns = c("a", "b")`, DuckDB will receive `COLUMNS (a,b)`).
19+
#'
20+
#' @examples
21+
#' con <- DBI::dbConnect(duckdb::duckdb())
22+
#' DBI::dbWriteTable(con, "iris", iris)
23+
#' tbl <- dplyr::tbl(con, "iris")
24+
#' export_parquet(tbl, "iris.parquet", options = list(compression = "zstd"))
25+
#' export_parquet(tbl, "iris_ds", options = list(partition_by = "Species", row_group_size = 1000))
26+
#'
27+
#' @importFrom DBI dbExecute
28+
#' @importFrom dbplyr remote_con sql_render
29+
#' @export
30+
export_parquet <- function(data, output, options = NULL, print_sql = FALSE) {
31+
if (!inherits(data, "tbl_dbi")) stop("'data' must be a 'tbl_dbi' object.")
32+
if (!is.character(output) || length(output) != 1) stop("'output' must be a single character string.")
33+
if (!is.null(options) && !is.list(options)) stop("'options' must be a list or NULL.")
34+
35+
con <- dbplyr::remote_con(data)
36+
sql_query <- dbplyr::sql_render(data, con = con)
37+
38+
# Normalize and format options
39+
if (is.null(options)) options <- list()
40+
formatted_options <- format_copy_to_options(options)
41+
formatted_options$FORMAT <- 'PARQUET'
42+
43+
parquet_opts <- paste(paste0(names(formatted_options), " ", formatted_options), collapse = ", ")
44+
sql <- sprintf("COPY (%s) TO '%s' (%s)", sql_query, output, parquet_opts)
45+
DBI::dbExecute(con, sql)
46+
}
47+
48+
49+
format_copy_to_options <- function(options) {
50+
options <- lapply(options, function(x) {
51+
if (is.logical(x) || is.character(x) || is.numeric(x)) as.character(x)
52+
else stop("All option values must be character, numeric, or logical.")
53+
54+
if (length(x) > 1) x <- paste0("(",paste0(x,collapse=","),")")
55+
x
56+
})
57+
setNames(options, toupper(names(options)))
58+
}

man/create_view.Rd

Lines changed: 33 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/export_parquet.Rd

Lines changed: 38 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-create_view.R

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
test_that("create_view creates a view with expected content", {
2+
con <- dbConnect(duckdb::duckdb())
3+
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)
4+
5+
df <- data.frame(x = 1:5, y = letters[1:5])
6+
copy_to(con, df, "original_table", temporary = TRUE)
7+
8+
data <- tbl(con, "original_table") %>% filter(x > 3)
9+
create_view(data, "view_test")
10+
11+
result <- dbReadTable(con, "view_test")
12+
expect_equal(nrow(result), 2)
13+
expect_equal(result$x, c(4, 5))
14+
15+
result <- tbl(con, "view_test") |> dplyr::collect()
16+
expect_equal(nrow(result), 2)
17+
expect_equal(result$x, c(4, 5))
18+
})
19+
20+
test_that("create_view replaces an existing view", {
21+
con <- dbConnect(duckdb::duckdb())
22+
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)
23+
24+
df <- data.frame(a = 1:2)
25+
copy_to(con, df, "table1", temporary = TRUE)
26+
data1 <- tbl(con, "table1")
27+
create_view(data1, "replace_view")
28+
29+
df2 <- tibble(a = 10:12)
30+
copy_to(con, df2, "table2", temporary = TRUE)
31+
data2 <- tbl(con, "table2")
32+
create_view(data2, "replace_view") # Should replace
33+
34+
result <- dbReadTable(con, "replace_view")
35+
expect_equal(nrow(result), 3)
36+
expect_equal(result$a, 10:12)
37+
})
38+
39+
test_that("create_view works with quoted view names", {
40+
con <- dbConnect(duckdb::duckdb())
41+
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)
42+
43+
df <- data.frame(id = 1:3)
44+
copy_to(con, df, "quoted_table", temporary = TRUE)
45+
data <- tbl(con, "quoted_table")
46+
47+
create_view(data, "weird-Name With Space")
48+
49+
result <- dbGetQuery(con, 'SELECT * FROM "weird-Name With Space"')
50+
expect_equal(nrow(result), 3)
51+
})
52+
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
test_that("export_parquet write a valid Parquet file", {
2+
withr::with_tempfile("parquet_file", fileext = ".parquet", {
3+
con <- dbConnect(duckdb::duckdb())
4+
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)
5+
6+
df <- data.frame(x = 1:3, y = letters[1:3])
7+
copy_to(con, df, "test_table", temporary = TRUE)
8+
9+
data <- tbl(con, "test_table")
10+
export_parquet(data, parquet_file)
11+
12+
expect_true(file.exists(parquet_file))
13+
})
14+
})
15+
16+
test_that("export_parquet allows options", {
17+
withr::with_tempfile("parquet_file", fileext = ".parquet", {
18+
con <- dbConnect(duckdb::duckdb())
19+
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)
20+
21+
df <- data.frame(a = 1:5, b = 1:5)
22+
copy_to(con, df, "table_opt", temporary = TRUE)
23+
data <- tbl(con, "table_opt")
24+
25+
expect_silent(export_parquet(data, parquet_file, list(compression = "zstd", row_group_size = 1000)))
26+
expect_true(file.exists(parquet_file))
27+
28+
})
29+
})
30+
31+
test_that("export_parquet échoue proprement si le fichier est invalide", {
32+
con <- dbConnect(duckdb::duckdb())
33+
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)
34+
35+
df <- data.frame(z = 1:2)
36+
copy_to(con, df, "bad_path_table", temporary = TRUE)
37+
data <- tbl(con, "bad_path_table")
38+
39+
expect_error(
40+
export_parquet(data, "/chemin/inexistant/fichier.parquet"),
41+
"IO Error|Failed to open"
42+
)
43+
})
44+

vignettes/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.html
2+
*.R

0 commit comments

Comments
 (0)