From 5eee597ef4afc793d4babe75808a1ce0a4f4a10e Mon Sep 17 00:00:00 2001 From: dat-a-man <98139823+dat-a-man@users.noreply.github.com> Date: Mon, 10 Mar 2025 08:52:34 +0000 Subject: [PATCH] Updated the condiguration section for source arguments for `sql_database` --- .../sql_database/configuration.md | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/configuration.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/configuration.md index b783f64c0a..273aa80a17 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/configuration.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/configuration.md @@ -18,6 +18,10 @@ import Header from '../_source-info-header.md'; Read more about sources and resources here: [General usage: source](../../../general-usage/source.md) and [General usage: resource](../../../general-usage/resource.md). +:::note NOTE +To see complete list of source arguments for `sql_database` [refer to the this section](#arguments-for-sql_database-source). +::: + ### Example usage: :::tip @@ -344,3 +348,55 @@ print(info) ``` With the dataset above and a local PostgreSQL instance, the `ConnectorX` backend is 2x faster than the `PyArrow` backend. +### Arguments for `sql_database` source +The following arguments can be used with the `sql_database` source: + + `credentials` (Union[ConnectionStringCredentials, Engine, str]): Database credentials or an `sqlalchemy.Engine` instance. + + `schema` (Optional[str]): Name of the database schema to load (if different from default). + + `metadata` (Optional[MetaData]): Optional `sqlalchemy.MetaData` instance. `schema` argument is ignored when this is used. + + `table_names` (Optional[List[str]]): A list of table names to load. By default, all tables in the schema are loaded. + + `chunk_size` (int): Number of rows yielded in one batch. SQL Alchemy will create additional internal rows buffer twice the chunk size. + + `backend` (TableBackend): Type of backend to generate table data. One of: "sqlalchemy", "pyarrow", "pandas" and "connectorx". + + - "sqlalchemy" yields batches as lists of Python dictionaries, "pyarrow" and "connectorx" yield batches as arrow tables, "pandas" yields panda frames. + + - "sqlalchemy" is the default and does not require additional dependencies, + + - "pyarrow" creates stable destination schemas with correct data types, + + - "connectorx" is typically the fastest but ignores the "chunk_size" so you must deal with large tables yourself. + + `detect_precision_hints` (bool): Deprecated. Use `reflection_level`. Set column precision and scale hints for supported data types in the target schema based on the columns in the source tables. This is disabled by default. + + `reflection_level`: (ReflectionLevel): Specifies how much information should be reflected from the source database schema. + + - "minimal": Only table names, nullability and primary keys are reflected. Data types are inferred from the data. This is the default option. + + - "full": Data types will be reflected on top of "minimal". `dlt` will coerce the data into reflected types if necessary. + + - "full_with_precision": Sets precision and scale on supported data types (ie. decimal, text, binary). Creates big and regular integer types. + + `defer_table_reflect` (bool): Will connect and reflect table schema only when yielding data. Requires table_names to be explicitly passed. + Enable this option when running on Airflow. Available on dlt 0.4.4 and later. + + `table_adapter_callback`: (Callable): Receives each reflected table. May be used to modify the list of columns that will be selected. + + `backend_kwargs` (**kwargs): kwargs passed to table backend ie. "conn" is used to pass specialized connection string to connectorx. + + `include_views` (bool): Reflect views as well as tables. Note view names included in `table_names` are always included regardless of this setting. This is set to false by default. + + `type_adapter_callback`(Optional[Callable]): Callable to override type inference when reflecting columns. + Argument is a single sqlalchemy data type (`TypeEngine` instance) and it should return another sqlalchemy data type, or `None` (type will be inferred from data) + + `query_adapter_callback`(Optional[Callable[Select, Table], Select]): Callable to override the SELECT query used to fetch data from the table. The callback receives the sqlalchemy `Select` and corresponding `Table`, 'Incremental` and `Engine` objects and should return the modified `Select` or `Text`. + + `resolve_foreign_keys` (bool): Translate foreign keys in the same schema to `references` table hints. + May incur additional database calls as all referenced tables are reflected. + + `engine_adapter_callback` (Callable[[Engine], Engine]): Callback to configure, modify and Engine instance that will be used to open a connection ie. to set transaction isolation level. +