diff --git a/DESCRIPTION b/DESCRIPTION index 530f268f..463ca866 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,9 @@ BugReports: https://github.com/posit-dev/btw/issues Imports: cli, clipr, + DBI, dplyr, + duckdb, ellmer (>= 0.1.1.9000), fs, jsonlite, diff --git a/R/tool-query.R b/R/tool-query.R new file mode 100644 index 00000000..61b583f4 --- /dev/null +++ b/R/tool-query.R @@ -0,0 +1,47 @@ +#' Perform a SQL query on the data, and return the results as JSON. +#' +#' @param query A DuckDB SQL query; must be a SELECT statement. +#' @param data_frame The name of the data frame. +#' @return The results of the query as a JSON string. +btw_tool_env_query_data_frame <- function(query, data_frame) { + d <- get(data_frame) + conn <- btw_connection() + + if (!DBI::dbExistsTable(conn, data_frame)) { + duckdb::duckdb_register(conn, data_frame, d, experimental = FALSE) + } + + res <- DBI::dbGetQuery(conn, query) + + btw_tool_env_describe_data_frame(res, format = "json", dims = c(Inf, Inf)) +} + +.btw_add_to_tools( + name = "btw_tool_env_query_data_frame", + group = "env", + tool = function() { + ellmer::tool( + btw_tool_env_query_data_frame, + .name = "btw_tool_env_query_data_frame", + .description = + "Run a DuckDB SQL query against a data frame. + Use this tool instead of btw_tool_env_describe_data_frame to run more + targeted queries, e.g. calculating statistics on specific columns.", + query = ellmer::type_string("A DuckDB SQL query, as a string."), + data_frame = ellmer::type_string("The name of the data frame, as a string.") + ) + } +) + +btw_connect <- function() { + # TODO: also check if the connection is active + if (is.null(.globals$conn)) { + .globals$conn <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:") + } +} + +btw_connection <- function() { + btw_connect() + + .globals$conn +} diff --git a/R/utils.R b/R/utils.R index ca7d416e..cf47706a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,5 @@ +.globals <- new_environment() + pandoc_convert <- function(path, ..., from = "html", to = "markdown") { tmp_file <- withr::local_tempfile() diff --git a/man/btw_register_tools.Rd b/man/btw_register_tools.Rd index ecc756f9..b6611379 100644 --- a/man/btw_register_tools.Rd +++ b/man/btw_register_tools.Rd @@ -29,12 +29,19 @@ to the tools:\tabular{lll}{ btw_tool_docs_vignette \tab docs \tab Get a package vignette in plain text. \cr btw_tool_env_describe_data_frame \tab env \tab Show the data frame or table or get information about the structure of a data frame or table. \cr btw_tool_env_describe_environment \tab env \tab List and describe items in an environment. \cr - btw_tool_files_list_files \tab files \tab List files in the current working directory or in subfolders in the current project directory. \cr - btw_tool_files_read_text_file \tab files \tab Read an entire text file. \cr - btw_tool_ide_read_current_editor \tab ide \tab Read the contents of the editor that is currently open in the user's IDE. \cr - btw_tool_session_package_info \tab session \tab Verify that a specific package is installed, or find out which packages are in use in the current session. \cr - btw_tool_session_platform_info \tab session \tab Describes the R version, operating system, language and locale settings for the user's system. \cr + btw_tool_env_query_data_frame \tab env \tab Run a DuckDB SQL query against a data frame. \cr } + + +\if{html}{\out{
}}\preformatted{ Use this tool instead of btw_tool_env_describe_data_frame to run more + targeted queries, e.g. | +}\if{html}{\out{
}} + +| btw_tool_files_list_files | files | List files in the current working directory or in subfolders in the current project directory. | +| btw_tool_files_read_text_file | files | Read an entire text file. | +| btw_tool_ide_read_current_editor | ide | Read the contents of the editor that is currently open in the user's IDE. | +| btw_tool_session_package_info | session | Verify that a specific package is installed, or find out which packages are in use in the current session. | +| btw_tool_session_platform_info | session | Describes the R version, operating system, language and locale settings for the user's system. | } \examples{ # requires an ANTHROPIC_API_KEY diff --git a/man/btw_tool_env_query_data_frame.Rd b/man/btw_tool_env_query_data_frame.Rd new file mode 100644 index 00000000..8b332ff9 --- /dev/null +++ b/man/btw_tool_env_query_data_frame.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tool-query.R +\name{btw_tool_env_query_data_frame} +\alias{btw_tool_env_query_data_frame} +\title{Perform a SQL query on the data, and return the results as JSON.} +\usage{ +btw_tool_env_query_data_frame(query, data_frame) +} +\arguments{ +\item{query}{A DuckDB SQL query; must be a SELECT statement.} + +\item{data_frame}{The name of the data frame.} +} +\value{ +The results of the query as a JSON string. +} +\description{ +Perform a SQL query on the data, and return the results as JSON. +} diff --git a/tests/testthat/_snaps/tool-query.md b/tests/testthat/_snaps/tool-query.md new file mode 100644 index 00000000..edf12398 --- /dev/null +++ b/tests/testthat/_snaps/tool-query.md @@ -0,0 +1,18 @@ +# btw_tool_env_query_data_frame() works + + Code + btw_tool_env_query_data_frame("SELECT mpg FROM mtcars LIMIT 5;", "mtcars") + Output + [1] "```json" + [2] "[\n {\"mpg\":21},\n {\"mpg\":21},\n {\"mpg\":22.8},\n {\"mpg\":21.4},\n {\"mpg\":18.7}\n]" + [3] "```" + +--- + + Code + btw_tool_env_query_data_frame("SELECT mpg FROM mtcars LIMIT 5;", "mtcars") + Output + [1] "```json" + [2] "[\n {\"mpg\":21},\n {\"mpg\":21},\n {\"mpg\":22.8},\n {\"mpg\":21.4},\n {\"mpg\":18.7}\n]" + [3] "```" + diff --git a/tests/testthat/test-tool-query.R b/tests/testthat/test-tool-query.R new file mode 100644 index 00000000..f6a4bd33 --- /dev/null +++ b/tests/testthat/test-tool-query.R @@ -0,0 +1,17 @@ +test_that("btw_tool_env_query_data_frame() works", { + # can run a simple query + expect_snapshot( + btw_tool_env_query_data_frame( + "SELECT mpg FROM mtcars LIMIT 5;", + "mtcars" + ) + ) + + # can run a query against the same table twice + expect_snapshot( + btw_tool_env_query_data_frame( + "SELECT mpg FROM mtcars LIMIT 5;", + "mtcars" + ) + ) +})