Skip to content

Commit 3bc2409

Browse files
dpprdankrlmlr
authored andcommitted
Support for system catalogs
Full history: 699813b refactor `dbListTables()` with `list_tables()`, now orders result by `table_type` and `table_name` refactor `dbExistsTable()` with `list_tables()` refactor `dbListObjects()` with `list_tables()` merge `find_table()` code into `list_fields()` `find_table()` isn't used anywhere else anymore (e.g. `exists_table()`) simplify the "get current_schemas() as table" code pass full `id` to `list_fields()` align `dbExistsTable()` with `dbListFields()` add some comments and whitespace simplify `where_schema` in `list_tables()` align `where_table` with `where_schema` in `list_tables()` add `system_catalogs` argument to `dbConnect()` add materialized view tests `list_tables()`: query system catalogs if available `list_fields()`: query system catalogs if available
1 parent 0f2c805 commit 3bc2409

9 files changed

+221
-21
lines changed

R/PqConnection.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ setClass("PqConnection",
1010
slots = list(
1111
ptr = "externalptr",
1212
bigint = "character",
13+
system_catalogs = "logical",
1314
timezone = "character",
1415
timezone_out = "character",
1516
typnames = "data.frame"

R/dbConnect_PqDriver.R

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,13 @@
4343
#' @param check_interrupts Should user interrupts be checked during the query execution (before
4444
#' first row of data is available)? Setting to `TRUE` allows interruption of queries
4545
#' running too long.
46+
#' @param system_catalogs Should `dbList*()` functions query the [`system
47+
#' catalogs`](https://www.postgresql.org/docs/current/catalogs.html) (`TRUE`)
48+
#' or the
49+
#' [`information_schema`](https://www.postgresql.org/docs/current/information-schema.html)?
50+
#' The `information_schema` does not contain PostgreSQL-specific information,
51+
#' in particular [Materialized
52+
#' Views](https://www.postgresql.org/docs/current/sql-creatematerializedview.html).
4653
#' @param timezone Sets the timezone for the connection. The default is `"UTC"`.
4754
#' If `NULL` then no timezone is set, which defaults to the server's time zone.
4855
#' @param timezone_out The time zone returned to R, defaults to `timezone`.
@@ -60,7 +67,8 @@
6067
dbConnect_PqDriver <- function(drv, dbname = NULL,
6168
host = NULL, port = NULL, password = NULL, user = NULL, service = NULL, ...,
6269
bigint = c("integer64", "integer", "numeric", "character"),
63-
check_interrupts = FALSE, timezone = "UTC", timezone_out = NULL) {
70+
check_interrupts = FALSE, system_catalogs = TRUE,
71+
timezone = "UTC", timezone_out = NULL) {
6472
opts <- unlist(list(
6573
dbname = dbname, user = user, password = password,
6674
host = host, port = as.character(port), service = service, client_encoding = "utf8", ...
@@ -70,6 +78,7 @@ dbConnect_PqDriver <- function(drv, dbname = NULL,
7078
}
7179
bigint <- match.arg(bigint)
7280
stopifnot(is.logical(check_interrupts), all(!is.na(check_interrupts)), length(check_interrupts) == 1)
81+
stopifnot(is.logical(system_catalogs))
7382
if (!is.null(timezone)) {
7483
stopifnot(is.character(timezone), all(!is.na(timezone)), length(timezone) == 1)
7584
}
@@ -85,7 +94,8 @@ dbConnect_PqDriver <- function(drv, dbname = NULL,
8594

8695
# timezone is set later
8796
conn <- new("PqConnection",
88-
ptr = ptr, bigint = bigint, timezone = character(), typnames = data.frame()
97+
ptr = ptr, bigint = bigint, system_catalogs = system_catalogs,
98+
timezone = character(), typnames = data.frame()
8999
)
90100
on.exit(dbDisconnect(conn))
91101

R/dbConnect_RedshiftDriver.R

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,22 @@ dbConnect_RedshiftDriver <- function(drv, dbname = NULL,
44
host = NULL, port = NULL, password = NULL, user = NULL, service = NULL, ...,
55
bigint = c("integer64", "integer", "numeric", "character"),
66
check_interrupts = FALSE, timezone = "UTC") {
7-
new("RedshiftConnection", callNextMethod())
7+
new("RedshiftConnection",
8+
callNextMethod(
9+
drv = drv,
10+
dbname = dbname,
11+
host = host,
12+
port = port,
13+
password = password,
14+
user = user,
15+
service = service,
16+
...,
17+
bigint = bigint,
18+
check_interrupts = check_interrupts,
19+
system_catalogs = FALSE,
20+
timezone = timezone
21+
)
22+
)
823
}
924

1025
#' @rdname Redshift

R/dbListObjects_PqConnection_ANY.R

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) {
1414
}
1515
query <- paste0(
1616
"SELECT ", null_varchar, " AS schema, table_name AS table FROM ( \n",
17-
list_tables(conn = conn, order_by = "table_type, table_name"),
17+
list_tables(conn = conn, order_by = "table_type, table_name"),
1818
") as table_query \n",
1919
"UNION ALL\n",
2020
"SELECT DISTINCT table_schema AS schema, ", null_varchar, " AS table FROM ( \n",
21-
list_tables(conn = conn, where_schema = "true"),
21+
list_tables(conn = conn, where_schema = "true"),
2222
") as schema_query;"
2323
)
2424
} else {
@@ -38,11 +38,11 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) {
3838
)
3939
query <- paste0(
4040
"SELECT table_schema AS schema, table_name AS table FROM ( \n",
41-
list_tables(
42-
conn = conn,
43-
where_schema = where_schema,
44-
order_by = "table_type, table_name"
45-
),
41+
list_tables(
42+
conn = conn,
43+
where_schema = where_schema,
44+
order_by = "table_type, table_name"
45+
),
4646
") as table_query"
4747
)
4848
}

R/tables.R

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,21 @@ db_append_table <- function(conn, name, value, copy, warn) {
122122

123123
list_tables <- function(conn, where_schema = NULL, where_table = NULL, order_by = NULL) {
124124

125-
query <- paste0(
126-
# information_schema.table docs: https://www.postgresql.org/docs/current/infoschema-tables.html
127-
"SELECT table_schema, table_name \n",
128-
"FROM information_schema.tables \n",
129-
"WHERE TRUE \n" # dummy clause to be able to add additional ones with `AND`
130-
)
125+
if (conn@system_catalogs) {
126+
query <- paste0(
127+
"SELECT table_schema, table_name \n",
128+
"FROM ( ", list_tables_from_system_catalog(), ") AS schema_tables \n",
129+
"WHERE TRUE \n"
130+
)
131+
} else {
132+
query <- paste0(
133+
# information_schema.table docs:
134+
# https://www.postgresql.org/docs/current/infoschema-tables.html
135+
"SELECT table_schema, table_name \n",
136+
"FROM information_schema.tables \n",
137+
"WHERE TRUE \n" # dummy clause to be able to add additional ones with `AND`
138+
)
139+
}
131140

132141
if (is.null(where_schema)) {
133142
# `true` in `current_schemas(true)` is necessary to get temporary tables
@@ -147,6 +156,38 @@ list_tables <- function(conn, where_schema = NULL, where_table = NULL, order_by
147156
query
148157
}
149158

159+
list_tables_from_system_catalog <- function() {
160+
# This imitates (parts of) information_schema.tables, but includes materialized views
161+
paste0(
162+
# pg_class vs. information_schema: https://stackoverflow.com/a/24089729
163+
# pg_class docs: https://www.postgresql.org/docs/current/catalog-pg-class.html
164+
"SELECT n.nspname AS table_schema, cl.relname AS table_name, \n",
165+
" CASE
166+
WHEN (n.oid = pg_my_temp_schema()) THEN 'LOCAL TEMPORARY'
167+
WHEN (cl.relkind IN ('r', 'p')) THEN 'BASE TABLE'
168+
WHEN (cl.relkind = 'v') THEN 'VIEW'
169+
WHEN (cl.relkind = 'f') THEN 'FOREIGN'
170+
WHEN (cl.relkind = 'm') THEN 'MATVIEW'
171+
ELSE NULL
172+
END AS table_type \n",
173+
"FROM pg_class AS cl \n",
174+
"JOIN pg_namespace AS n ON cl.relnamespace = n.oid \n",
175+
# include: r = ordinary table, v = view, m = materialized view,
176+
# f = foreign table, p = partitioned table
177+
"WHERE (cl.relkind IN ('r', 'v', 'm', 'f', 'p')) \n",
178+
# do not return individual table partitions
179+
" AND NOT cl.relispartition \n",
180+
# do not return other people's temp schemas
181+
" AND (NOT pg_is_other_temp_schema(n.oid)) \n",
182+
# Return only objects (relations) which the current user may access
183+
# https://www.postgresql.org/docs/current/functions-info.html
184+
" AND (pg_has_role(cl.relowner, 'USAGE') \n",
185+
" OR has_table_privilege(cl.oid, 'SELECT, INSERT, UPDATE, DELETE, TRUNCATE, REFERENCES, TRIGGER') \n",
186+
" OR has_any_column_privilege(cl.oid, 'SELECT, INSERT, UPDATE, REFERENCES') \n",
187+
" ) \n"
188+
)
189+
}
190+
150191
exists_table <- function(conn, id) {
151192
name <- id@name
152193
stopifnot("table" %in% names(name))
@@ -168,6 +209,14 @@ exists_table <- function(conn, id) {
168209
}
169210

170211
list_fields <- function(conn, id) {
212+
if (conn@system_catalogs) {
213+
list_fields_from_system_catalog(conn, id)
214+
} else {
215+
list_fields_from_info_schema(conn, id)
216+
}
217+
}
218+
219+
list_fields_from_info_schema <- function(conn, id) {
171220
name <- id@name
172221

173222
is_redshift <- is(conn, "RedshiftConnection")
@@ -185,7 +234,7 @@ list_fields <- function(conn, id) {
185234
# as there cannot be multiple tables with the same name in a single schema
186235
only_first <- FALSE
187236

188-
# or we have to look the table up in the schemas on the search path
237+
# or we have to look the table up in the schemas on the search path
189238
} else if (is_redshift) {
190239
# A variant of the Postgres version that uses CTEs and generate_series()
191240
# instead of generate_subscripts(), the latter is not supported on Redshift
@@ -211,10 +260,10 @@ list_fields <- function(conn, id) {
211260
# How to unnest `current_schemas(true)` array with element number (works since v9.4):
212261
# https://stackoverflow.com/a/8767450/2114932
213262
query <- paste0(
214-
"(",
215-
"SELECT * FROM unnest(current_schemas(true)) WITH ORDINALITY AS tbl(table_schema, nr) \n",
216-
"WHERE table_schema != 'pg_catalog'",
217-
") schemas_on_path"
263+
"(",
264+
"SELECT * FROM unnest(current_schemas(true)) WITH ORDINALITY AS tbl(table_schema, nr) \n",
265+
"WHERE table_schema != 'pg_catalog'",
266+
") schemas_on_path"
218267
)
219268
only_first <- TRUE
220269
}
@@ -252,6 +301,29 @@ list_fields <- function(conn, id) {
252301
fields
253302
}
254303

304+
list_fields_from_system_catalog <- function(conn, id) {
305+
if (exists_table(conn, id)) {
306+
# we know from exists_table() that id@name["table"] exists
307+
# and the user has access priviledges
308+
tname_str <- stats::na.omit(id@name[c("schema", "table")])
309+
tname_qstr <- dbQuoteString(conn, paste(tname_str, collapse = "."))
310+
# cast to `regclass` resolves the table name according to the current
311+
# `search_path` https://dba.stackexchange.com/a/75124
312+
query <-
313+
paste0(
314+
"SELECT attname \n",
315+
"FROM pg_attribute \n",
316+
"WHERE attrelid = ", tname_qstr, "::regclass \n",
317+
" AND attnum > 0 \n",
318+
" AND NOT attisdropped \n",
319+
"ORDER BY attnum;"
320+
)
321+
dbGetQuery(conn, query)[[1]]
322+
} else {
323+
stop("Table ", dbQuoteIdentifier(conn, id), " not found.", call. = FALSE)
324+
}
325+
}
326+
255327
find_temp_schema <- function(conn, fail_if_missing = TRUE) {
256328
if (!is.na(connection_get_temp_schema(conn@ptr)))
257329
return(connection_get_temp_schema(conn@ptr))

man/Postgres.Rd

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/helper-with_matview.R

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#' Run an expression that creates a materialized view and clean up afterwards.
2+
#'
3+
#' @param con PqConnection. The database connection.
4+
#' @param matview character. The materialized view name.
5+
#' @param query character. A SELECT, TABLE, or VALUES command to populate the
6+
#' materialized view.
7+
#' @param expr expression. The R expression to execute.
8+
#' @return the return value of the \code{expr}.
9+
#' @seealso https://www.postgresql.org/docs/current/sql-creatematerializedview.html
10+
with_matview <- function(con, matview, query, expr) {
11+
on.exit(
12+
DBI::dbExecute(con, paste0("DROP MATERIALIZED VIEW IF EXISTS ", matview)),
13+
add = TRUE
14+
)
15+
dbExecute(
16+
con,
17+
paste0(
18+
"CREATE MATERIALIZED VIEW IF NOT EXISTS ", matview,
19+
" AS ", query
20+
)
21+
)
22+
23+
force(expr)
24+
}

tests/testthat/helper-with_schema.R

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#' Run an expression that creates a schema and clean up afterwards.
2+
#'
3+
#' @param con PqConnection. The database connection.
4+
#' @param schema character. The schema name.
5+
#' @param expr expression. The R expression to execute.
6+
#' @return the return value of the \code{expr}.
7+
with_schema <- function(con, schema, expr) {
8+
on.exit(
9+
DBI::dbExecute(con, paste0("DROP SCHEMA IF EXISTS ", schema)),
10+
add = TRUE
11+
)
12+
DBI::dbExecute(con, paste0("CREATE SCHEMA IF NOT EXISTS ", schema))
13+
force(expr)
14+
}

tests/testthat/test-list_matviews.R

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
if (postgresHasDefault()) {
2+
with_database_connection(con = postgresDefault(system_catalogs = TRUE), {
3+
4+
skip_if(
5+
is(con, "RedshiftConnection"),
6+
"Redshift doesn't expose system catalogs"
7+
)
8+
skip_if_not(
9+
dbExistsTable(con, Id(schema = "pg_catalog", table = "pg_class")),
10+
"`pg_catalog`.`pg_class` not available"
11+
)
12+
13+
test_that("Materialized View is listed", {
14+
with_matview(con, "matview1", "SELECT 1 AS col1", {
15+
16+
expect_true(dbExistsTable(con, "matview1"))
17+
expect_true("matview1" %in% dbListTables(con))
18+
19+
objects <- dbListObjects(con)
20+
quoted_tables <-
21+
vapply(
22+
objects$table,
23+
dbQuoteIdentifier,
24+
conn = con,
25+
character(1)
26+
)
27+
expect_true(dbQuoteIdentifier(con, "matview1") %in% quoted_tables)
28+
29+
expect_true("col1" %in% dbListFields(con, "matview1"))
30+
})
31+
})
32+
33+
test_that("Materialized View in custom schema is listed", {
34+
with_schema(con, "matschema1", {
35+
matview_id <- Id(schema = "matschema1", table = "matview1")
36+
matview_chr <- "matschema1.matview1"
37+
38+
with_matview(con, matview_chr, "SELECT 1 AS col1", {
39+
40+
expect_true(dbExistsTable(con, matview_id))
41+
42+
objects <- dbListObjects(con, prefix = Id(schema = "matschema1"))
43+
quoted_tables <-
44+
vapply(
45+
objects$table,
46+
dbQuoteIdentifier,
47+
conn = con,
48+
character(1)
49+
)
50+
expect_true(dbQuoteIdentifier(con, matview_id) %in% quoted_tables)
51+
52+
expect_true("col1" %in% dbListFields(con, matview_id))
53+
})
54+
})
55+
})
56+
})
57+
}

0 commit comments

Comments
 (0)