diff --git a/.github/ISSUE_TEMPLATE/3-airflow_providers_bug_report.yml b/.github/ISSUE_TEMPLATE/3-airflow_providers_bug_report.yml index f7c6642519d3a..94f85e4ca4228 100644 --- a/.github/ISSUE_TEMPLATE/3-airflow_providers_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/3-airflow_providers_bug_report.yml @@ -30,6 +30,7 @@ body: - airbyte - alibaba - amazon + - apache-arrow - apache-beam - apache-cassandra - apache-drill diff --git a/dev/breeze/doc/images/output_build-docs.svg b/dev/breeze/doc/images/output_build-docs.svg index ebdd2875195d0..b289b5528f892 100644 --- a/dev/breeze/doc/images/output_build-docs.svg +++ b/dev/breeze/doc/images/output_build-docs.svg @@ -224,18 +224,17 @@ - + - + Usage:                                                                                                                 breeze build-docs                                                                                                      [OPTIONS] [airbyte | alibaba | all-providers | amazon | apache-airflow | apache-airflow-ctl | apache-airflow-providers -apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs | apache.hive | apache.iceberg |         -apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | apache.spark |                 -apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |        -common.ai | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  +apache.arrow | apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs | apache.hive |           +apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | apache.spark +apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |     common.ai | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  discord | docker | docker-stack | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc hashicorp | helm-chart | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |        microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage |  diff --git a/dev/breeze/doc/images/output_build-docs.txt b/dev/breeze/doc/images/output_build-docs.txt index ea24111dadf71..1aa7f18b8aa23 100644 --- a/dev/breeze/doc/images/output_build-docs.txt +++ b/dev/breeze/doc/images/output_build-docs.txt @@ -1 +1 @@ -5023b820002e3f33104ae46d617645c4 +71f038d07b701afd0505bdb4c5a505a3 diff --git a/dev/breeze/doc/images/output_release-management_add-back-references.svg b/dev/breeze/doc/images/output_release-management_add-back-references.svg index 297996f63b099..68afead30b9ab 100644 --- a/dev/breeze/doc/images/output_release-management_add-back-references.svg +++ b/dev/breeze/doc/images/output_release-management_add-back-references.svg @@ -139,17 +139,17 @@ - + - + Usage:                                                                                                                 breeze release-management add-back-references                                                                          [OPTIONS] [airbyte | alibaba | all-providers | amazon | apache-airflow | apache-airflow-ctl | apache-airflow-providers -apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs | apache.hive | apache.iceberg |         -apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | apache.spark |                 -apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |        +apache.arrow | apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs | apache.hive |           +apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | apache.spark +apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |      common.ai | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  discord | docker | docker-stack | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc hashicorp | helm-chart | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |        diff --git a/dev/breeze/doc/images/output_release-management_add-back-references.txt b/dev/breeze/doc/images/output_release-management_add-back-references.txt index d8b21fbf0ba31..a28921d2e9967 100644 --- a/dev/breeze/doc/images/output_release-management_add-back-references.txt +++ b/dev/breeze/doc/images/output_release-management_add-back-references.txt @@ -1 +1 @@ -c4f3137fd042c7fe7f6cf21479523c5d +855af89193bab101e903c8efe0a9a0a7 diff --git a/dev/breeze/doc/images/output_release-management_generate-issue-content-providers.svg b/dev/breeze/doc/images/output_release-management_generate-issue-content-providers.svg index 38e064ecfa5a5..cf916af4496a2 100644 --- a/dev/breeze/doc/images/output_release-management_generate-issue-content-providers.svg +++ b/dev/breeze/doc/images/output_release-management_generate-issue-content-providers.svg @@ -131,25 +131,23 @@ - + - + Usage:                                                                                                                 breeze release-management generate-issue-content-providers                                                             -[OPTIONS] [airbyte | alibaba | amazon | apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs |  -apache.hive | apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | -apache.spark | apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes |  -cohere | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |     -discord | docker | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc | hashicorp |  -http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure | microsoft.mssql | microsoft.psrp -microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage | opensearch | opsgenie | oracle |  -pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis | salesforce | samba | segment |      -sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard | tableau | telegram | teradata |   -trino | vertica | weaviate | yandex | ydb | zendesk]...                                                                - -Generates content for issue to test the release. +[OPTIONS] [airbyte | alibaba | amazon | apache.arrow | apache.cassandra | apache.drill | apache.druid | apache.flink | +apache.hdfs | apache.hive | apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig |  +apache.pinot | apache.spark | apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant |     +cncf.kubernetes | cohere | common.compat | common.io | common.messaging | common.sql | databricks | datadog |          +dbt.cloud | dingding | discord | docker | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github |       +google | grpc | hashicorp | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |       +microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage |  +opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |    +salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard |    +tableau | telegram | teradata | trino | vertica | weaviate | yandex | ydb | zendesk]...                                ╭─ Generate issue content flags ───────────────────────────────────────────────────────────────────────────────────────╮ --disable-progress      Disable progress bar diff --git a/dev/breeze/doc/images/output_release-management_generate-issue-content-providers.txt b/dev/breeze/doc/images/output_release-management_generate-issue-content-providers.txt index 0a88b97dfc405..9c9e7460200bb 100644 --- a/dev/breeze/doc/images/output_release-management_generate-issue-content-providers.txt +++ b/dev/breeze/doc/images/output_release-management_generate-issue-content-providers.txt @@ -1 +1 @@ -20f849b3a6098cf813098a97bf220006 +f297fad1c25de5dbbfbc6dce76d830c2 diff --git a/dev/breeze/doc/images/output_release-management_generate-providers-metadata.svg b/dev/breeze/doc/images/output_release-management_generate-providers-metadata.svg index 6e1f93fb8dc8a..3ba9fe4daf106 100644 --- a/dev/breeze/doc/images/output_release-management_generate-providers-metadata.svg +++ b/dev/breeze/doc/images/output_release-management_generate-providers-metadata.svg @@ -149,9 +149,9 @@ - + - + - + - + Usage:                                                                                                                 breeze release-management prepare-provider-distributions                                                               -[OPTIONS] [airbyte | alibaba | amazon | apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs |  -apache.hive | apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | -apache.spark | apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes |  -cohere | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |     -discord | docker | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc | hashicorp |  -http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure | microsoft.mssql | microsoft.psrp -microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage | opensearch | opsgenie | oracle |  -pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis | salesforce | samba | segment |      -sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard | tableau | telegram | teradata |   -trino | vertica | weaviate | yandex | ydb | zendesk]...                                                                - -Prepare sdist/whl distributions of Airflow Providers. +[OPTIONS] [airbyte | alibaba | amazon | apache.arrow | apache.cassandra | apache.drill | apache.druid | apache.flink | +apache.hdfs | apache.hive | apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig |  +apache.pinot | apache.spark | apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant |     +cncf.kubernetes | cohere | common.compat | common.io | common.messaging | common.sql | databricks | datadog |          +dbt.cloud | dingding | discord | docker | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github |       +google | grpc | hashicorp | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |       +microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage |  +opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |    +salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard |    +tableau | telegram | teradata | trino | vertica | weaviate | yandex | ydb | zendesk]...                                ╭─ Package flags ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ --clean-dist                   Clean dist directory before building packages. Useful when you want to build    diff --git a/dev/breeze/doc/images/output_release-management_prepare-provider-distributions.txt b/dev/breeze/doc/images/output_release-management_prepare-provider-distributions.txt index 416151822b1fc..3fb9b67640ced 100644 --- a/dev/breeze/doc/images/output_release-management_prepare-provider-distributions.txt +++ b/dev/breeze/doc/images/output_release-management_prepare-provider-distributions.txt @@ -1 +1 @@ -8e9317e44788c0b820f3a461da6df58a +530302e08f5f6980ddd2f0f4d4757f47 diff --git a/dev/breeze/doc/images/output_release-management_prepare-provider-documentation.svg b/dev/breeze/doc/images/output_release-management_prepare-provider-documentation.svg index 9ad626222bb73..7e4ee504523b7 100644 --- a/dev/breeze/doc/images/output_release-management_prepare-provider-documentation.svg +++ b/dev/breeze/doc/images/output_release-management_prepare-provider-documentation.svg @@ -200,23 +200,23 @@ - + - + Usage:                                                                                                                 breeze release-management prepare-provider-documentation                                                               -[OPTIONS] [airbyte | alibaba | amazon | apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs |  -apache.hive | apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | -apache.spark | apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes |  -cohere | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |     -discord | docker | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc | hashicorp |  -http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure | microsoft.mssql | microsoft.psrp -microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage | opensearch | opsgenie | oracle |  -pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis | salesforce | samba | segment |      -sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard | tableau | telegram | teradata |   -trino | vertica | weaviate | yandex | ydb | zendesk]...                                                                +[OPTIONS] [airbyte | alibaba | amazon | apache.arrow | apache.cassandra | apache.drill | apache.druid | apache.flink | +apache.hdfs | apache.hive | apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig |  +apache.pinot | apache.spark | apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant |     +cncf.kubernetes | cohere | common.compat | common.io | common.messaging | common.sql | databricks | datadog |          +dbt.cloud | dingding | discord | docker | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github |       +google | grpc | hashicorp | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |       +microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage |  +opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |    +salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard |    +tableau | telegram | teradata | trino | vertica | weaviate | yandex | ydb | zendesk]...                                Prepare CHANGELOG, README and COMMITS information for providers. diff --git a/dev/breeze/doc/images/output_release-management_prepare-provider-documentation.txt b/dev/breeze/doc/images/output_release-management_prepare-provider-documentation.txt index 25496ff5fe2a2..b1e4e1ebeeb16 100644 --- a/dev/breeze/doc/images/output_release-management_prepare-provider-documentation.txt +++ b/dev/breeze/doc/images/output_release-management_prepare-provider-documentation.txt @@ -1 +1 @@ -75517e0fd3d48cbc5cf85c50812753df +3ceb08c4902fa1a29594c496a3f74452 diff --git a/dev/breeze/doc/images/output_release-management_publish-docs.svg b/dev/breeze/doc/images/output_release-management_publish-docs.svg index f3f41e8890694..b450c5d75a8f0 100644 --- a/dev/breeze/doc/images/output_release-management_publish-docs.svg +++ b/dev/breeze/doc/images/output_release-management_publish-docs.svg @@ -178,17 +178,17 @@ - + - + Usage:                                                                                                                 breeze release-management publish-docs                                                                                 [OPTIONS] [airbyte | alibaba | all-providers | amazon | apache-airflow | apache-airflow-ctl | apache-airflow-providers -apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs | apache.hive | apache.iceberg |         -apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | apache.spark |                 -apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |        +apache.arrow | apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs | apache.hive |           +apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | apache.spark +apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |      common.ai | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  discord | docker | docker-stack | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc hashicorp | helm-chart | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |        diff --git a/dev/breeze/doc/images/output_release-management_publish-docs.txt b/dev/breeze/doc/images/output_release-management_publish-docs.txt index 12e06cfa472c4..404cd2f79bee6 100644 --- a/dev/breeze/doc/images/output_release-management_publish-docs.txt +++ b/dev/breeze/doc/images/output_release-management_publish-docs.txt @@ -1 +1 @@ -7967a912265c05eb878af2ba3ddf2057 +30caa012cd5872c0eae2caa9a7ebe2e1 diff --git a/dev/breeze/doc/images/output_sbom_generate-providers-requirements.svg b/dev/breeze/doc/images/output_sbom_generate-providers-requirements.svg index aef687a4db08c..456b422334fb3 100644 --- a/dev/breeze/doc/images/output_sbom_generate-providers-requirements.svg +++ b/dev/breeze/doc/images/output_sbom_generate-providers-requirements.svg @@ -167,9 +167,9 @@ - + - + Usage:breeze sbom generate-providers-requirements[OPTIONS] @@ -179,19 +179,19 @@ ╭─ Generate provider requirements flags ───────────────────────────────────────────────────────────────────────────────╮ --python-versions Comma separate list of Python versions to update sbom from (defaults to all historical python    versions) (3.6 | 3.7 | 3.8 | 3.9 | 3.10 | 3.11 | 3.12 | 3.13 | 3.14) ---provider-id     Provider id to generate the requirements for (airbyte | alibaba | amazon | apache.beam |  -apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs | apache.hive |  -apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig |  -apache.pinot | apache.spark | apache.tinkerpop | apprise | arangodb | asana | atlassian.jira |  -celery | cloudant | cncf.kubernetes | cohere | common.ai | common.compat | common.io |  -common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding | discord | docker | -edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc | hashicorp -| http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |  -microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai |  -openfaas | openlineage | opensearch | opsgenie | oracle | pagerduty | papermill | pgvector |  -pinecone | postgres | presto | qdrant | redis | salesforce | samba | segment | sendgrid | sftp | -singularity | slack | smtp | snowflake | sqlite | ssh | standard | tableau | telegram | teradata -| trino | vertica | weaviate | yandex | ydb | zendesk) +--provider-id     Provider id to generate the requirements for (airbyte | alibaba | amazon | apache.arrow |  +apache.beam | apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs |  +apache.hive | apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy |  +apache.pig | apache.pinot | apache.spark | apache.tinkerpop | apprise | arangodb | asana |  +atlassian.jira | celery | cloudant | cncf.kubernetes | cohere | common.ai | common.compat |  +common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  +discord | docker | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google +| grpc | hashicorp | http | imap | influxdb | informatica | jdbc | jenkins | keycloak |  +microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j |  +odbc | openai | openfaas | openlineage | opensearch | opsgenie | oracle | pagerduty | papermill  +| pgvector | pinecone | postgres | presto | qdrant | redis | salesforce | samba | segment |  +sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard | tableau |  +telegram | teradata | trino | vertica | weaviate | yandex | ydb | zendesk) --provider-versionProvider version to generate the requirements for i.e `2.1.0`. `latest` is also a supported      value to account for the most recent version of the provider (TEXT) --force           Force update providers requirements even if they already exist. diff --git a/dev/breeze/doc/images/output_sbom_generate-providers-requirements.txt b/dev/breeze/doc/images/output_sbom_generate-providers-requirements.txt index 0c39f82ba3755..6209cf1a945e7 100644 --- a/dev/breeze/doc/images/output_sbom_generate-providers-requirements.txt +++ b/dev/breeze/doc/images/output_sbom_generate-providers-requirements.txt @@ -1 +1 @@ -cb4611abc6764a8d7c1aacad63da03e3 +2282d87d0f825ed3706631be67bc9248 diff --git a/dev/breeze/doc/images/output_workflow-run_publish-docs.svg b/dev/breeze/doc/images/output_workflow-run_publish-docs.svg index d57016971c488..bdce893c03c3b 100644 --- a/dev/breeze/doc/images/output_workflow-run_publish-docs.svg +++ b/dev/breeze/doc/images/output_workflow-run_publish-docs.svg @@ -184,17 +184,17 @@ - + - + Usage:                                                                                                                 breeze workflow-run publish-docs                                                                                       [OPTIONS] [airbyte | alibaba | all-providers | amazon | apache-airflow | apache-airflow-ctl | apache-airflow-providers -apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs | apache.hive | apache.iceberg |         -apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | apache.spark |                 -apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |        +apache.arrow | apache.cassandra | apache.drill | apache.druid | apache.flink | apache.hdfs | apache.hive |           +apache.iceberg | apache.impala | apache.kafka | apache.kylin | apache.livy | apache.pig | apache.pinot | apache.spark +apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |      common.ai | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  discord | docker | docker-stack | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc hashicorp | helm-chart | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |        diff --git a/dev/breeze/doc/images/output_workflow-run_publish-docs.txt b/dev/breeze/doc/images/output_workflow-run_publish-docs.txt index 3a57f0bb6d668..50fb097d8ce5f 100644 --- a/dev/breeze/doc/images/output_workflow-run_publish-docs.txt +++ b/dev/breeze/doc/images/output_workflow-run_publish-docs.txt @@ -1 +1 @@ -7915334135094723635d68ce396033bf +0cd7f3dc85708e266b837f2c6522930c diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index bd26d1f6ba573..b6fb34eacfa0c 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -2310,7 +2310,7 @@ def test_upgrade_to_newer_dependencies( pytest.param( ("providers/common/sql/src/airflow/providers/common/sql/common_sql_python.py",), { - "docs-list-as-string": "amazon apache.drill apache.druid apache.hive apache.iceberg " + "docs-list-as-string": "amazon apache.arrow apache.drill apache.druid apache.hive apache.iceberg " "apache.impala apache.pinot common.ai common.compat common.sql databricks elasticsearch " "exasol google jdbc microsoft.mssql mysql odbc openlineage " "oracle pgvector postgres presto slack snowflake sqlite teradata trino vertica ydb", diff --git a/providers/apache/arrow/LICENSE b/providers/apache/arrow/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/apache/arrow/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/apache/arrow/NOTICE b/providers/apache/arrow/NOTICE new file mode 100644 index 0000000000000..e02aab0589f0d --- /dev/null +++ b/providers/apache/arrow/NOTICE @@ -0,0 +1,5 @@ +Apache Airflow +Copyright 2016-2025 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). diff --git a/providers/apache/arrow/README.rst b/providers/apache/arrow/README.rst new file mode 100644 index 0000000000000..41272abcd3b39 --- /dev/null +++ b/providers/apache/arrow/README.rst @@ -0,0 +1,113 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +.. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + ``PROVIDER_README_TEMPLATE.rst.jinja2`` IN the ``dev/breeze/src/airflow_breeze/templates`` DIRECTORY + +Package ``apache-airflow-providers-apache-arrow`` + +Release: ``0.1.0`` + + +`Apache Arrow `__ + + +Provider package +---------------- + +This is a provider package for ``apache.arrow`` provider. All classes for this provider package +are in ``airflow.providers.apache.arrow`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-apache-arrow`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +======================================= ====================================== +PIP package Version required +======================================= ====================================== +``apache-airflow`` ``>=2.11.0`` +``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``adbc-driver-manager`` ``>=1.7.0`` +``importlib-resources`` ``>=1.3`` +``pyarrow`` ``>=16.1.0; python_version < "3.13"`` +``pyarrow`` ``>=18.0.0; python_version >= "3.13"`` +======================================= ====================================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified providers in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-arrow[common.sql] + + +============================================================================================================ ============== +Dependent package Extra +============================================================================================================ ============== +`apache-airflow-providers-common-sql `_ ``common.sql`` +============================================================================================================ ============== + +Optional dependencies +---------------------- + +This provider supports multiple ADBC drivers for different databases. You can install the required driver(s) as extras: + +============== ===================================================================================================================================================================== +Extra Dependency +============== ===================================================================================================================================================================== +``sqlite`` ``adbc-driver-sqlite>=1.7.0`` +``postgresql`` ``adbc-driver-postgresql>=1.7.0`` +``snowflake`` ``adbc-driver-snowflake>=1.7.0`` +``bigquery`` ``adbc-driver-bigquery>=1.7.0`` +``flightsql`` ``adbc-driver-flightsql>=1.7.0`` +``all`` ``adbc-driver-sqlite>=1.7.0``, ``adbc-driver-postgresql>=1.7.0``, ``adbc-driver-snowflake>=1.7.0``, ``adbc-driver-bigquery>=1.7.0``, ``adbc-driver-flightsql>=1.7.0`` +============== ===================================================================================================================================================================== + +To use the provider with a specific database, install the provider with the corresponding extra. For example, to use SQLite: + +.. code-block:: bash + + pip install apache-airflow-providers-apache-arrow[sqlite] + +To install all available drivers: + +.. code-block:: bash + + pip install apache-airflow-providers-apache-arrow[all] + +These extras install the Python ADBC drivers required to connect to each database. Refer to the documentation for each driver for any additional system requirements. + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/apache/arrow/docs/.latest-doc-only-change.txt b/providers/apache/arrow/docs/.latest-doc-only-change.txt new file mode 100644 index 0000000000000..f41e3226a6f43 --- /dev/null +++ b/providers/apache/arrow/docs/.latest-doc-only-change.txt @@ -0,0 +1 @@ +7b2ec33c7ad4998d9c9735b79593fcdcd3b9dd1f diff --git a/providers/apache/arrow/docs/changelog.rst b/providers/apache/arrow/docs/changelog.rst new file mode 100644 index 0000000000000..c893ea7e832e1 --- /dev/null +++ b/providers/apache/arrow/docs/changelog.rst @@ -0,0 +1,49 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +.. NOTE TO CONTRIBUTORS: + Please, only add notes to the Changelog just below the "Changelog" header when there are some breaking changes + and you want to add an explanation to the users on how they are supposed to deal with them. + The changelog is updated and maintained semi-automatically by release manager. + +``apache-airflow-providers-apache-arrow`` + +Changelog +--------- + +0.1.0 +..... + +.. note:: + This release of provider is only available for Airflow 2.10+ as explained in the + Apache Airflow providers support policy _. + +Misc +~~~~ + +* ``Bump min Airflow version in providers to 2.10 (#49843)`` + +.. Below changes are excluded from the changelog. Move them to + appropriate section above if needed. Do not delete the lines(!): + * ``Update description of provider.yaml dependencies (#50231)`` + * ``Avoid committing history for providers (#49907)`` + +0.1.0 +..... + +Initial version of the provider. diff --git a/providers/apache/arrow/docs/commits.rst b/providers/apache/arrow/docs/commits.rst new file mode 100644 index 0000000000000..78554447218d2 --- /dev/null +++ b/providers/apache/arrow/docs/commits.rst @@ -0,0 +1,35 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_COMMITS_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + .. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN! + +Package apache-airflow-providers-apache-arrow +------------------------------------------------------ + +`ADBC: Arrow Database Connectivity `__ + + +This is detailed commit list of changes for versions provider package: ``arrow``. +For high-level changelog, see :doc:`package information including changelog `. + +.. airflow-providers-commits:: diff --git a/providers/apache/arrow/docs/conf.py b/providers/apache/arrow/docs/conf.py new file mode 100644 index 0000000000000..29c8f3976f1d7 --- /dev/null +++ b/providers/apache/arrow/docs/conf.py @@ -0,0 +1,27 @@ +# Disable Flake8 because of all the sphinx imports +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Configuration of Providers docs building.""" + +from __future__ import annotations + +import os + +os.environ["AIRFLOW_PACKAGE_NAME"] = "apache-airflow-providers-apache-arrow" + +from docs.provider_conf import * # noqa: F403 diff --git a/providers/apache/arrow/docs/connections/adbc.rst b/providers/apache/arrow/docs/connections/adbc.rst new file mode 100644 index 0000000000000..a77cb4d40f51a --- /dev/null +++ b/providers/apache/arrow/docs/connections/adbc.rst @@ -0,0 +1,23 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. _howto/connection:adbc: + +ADBC connection +=============== + +The ADBC connection type enables connection to a ADBC data source. diff --git a/providers/apache/arrow/docs/index.rst b/providers/apache/arrow/docs/index.rst new file mode 100644 index 0000000000000..2bfc66df17321 --- /dev/null +++ b/providers/apache/arrow/docs/index.rst @@ -0,0 +1,135 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +``apache-airflow-providers-apache-arrow`` +========================================= + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Basics + + Home + Changelog + Security + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: References + + Python API <_api/airflow/providers/apache/arrow/index> + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: System tests + + System Tests <_api/tests/system/apache/arrow/index> + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Resources + + PyPI Repository + Example Dags + Installing from sources + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Guides + + Connection types + +.. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Commits + + Detailed list of commits + + +apache-airflow-providers-apache-arrow package +------------------------------------------------------ + +`Apache Arrow `__ + + +Release: 0.1.0 + +Provider package +---------------- + +This package is for the ``apache.arrow`` provider. +All classes for this package are included in the ``airflow.providers.apache.arrow`` python package. + +Installation +------------ + +You can install this package on top of an existing Airflow installation via +``pip install apache-airflow-providers-apache-arrow``. +For the minimum Airflow version supported, see ``Requirements`` below. + +Requirements +------------ + +The minimum Apache Airflow version supported by this provider distribution is ``2.11.0``. + +======================================= ====================================== +PIP package Version required +======================================= ====================================== +``apache-airflow`` ``>=2.11.0`` +``apache-airflow-providers-common-sql`` ``>=1.28.2`` +``adbc-driver-manager`` ``>=1.7.0`` +``importlib-resources`` ``>=1.3`` +``pyarrow`` ``>=16.1.0; python_version < "3.13"`` +``pyarrow`` ``>=18.0.0; python_version >= "3.13"`` +======================================= ====================================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider distributions in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-apache-arrow[common.sql] + + +============================================================================================================ ============== +Dependent package Extra +============================================================================================================ ============== +`apache-airflow-providers-common-sql `_ ``common.sql`` +============================================================================================================ ============== + +Downloading official packages +----------------------------- + +You can download officially released packages and verify their checksums and signatures from the +`Official Apache Download site `_ + +* `The apache-airflow-providers-apache-arrow 0.1.0 sdist package `_ (`asc `__, `sha512 `__) +* `The apache-airflow-providers-apache-arrow 0.1.0 wheel package `_ (`asc `__, `sha512 `__) diff --git a/providers/apache/arrow/docs/installing-providers-from-sources.rst b/providers/apache/arrow/docs/installing-providers-from-sources.rst new file mode 100644 index 0000000000000..fdbb17d017579 --- /dev/null +++ b/providers/apache/arrow/docs/installing-providers-from-sources.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: /../../../../devel-common/src/sphinx_exts/includes/installing-providers-from-sources.rst diff --git a/providers/apache/arrow/docs/integration-logos/apache-arrow.png b/providers/apache/arrow/docs/integration-logos/apache-arrow.png new file mode 100644 index 0000000000000..164401d1eeff8 Binary files /dev/null and b/providers/apache/arrow/docs/integration-logos/apache-arrow.png differ diff --git a/providers/apache/arrow/docs/integration-logos/arrow-inverse.png b/providers/apache/arrow/docs/integration-logos/arrow-inverse.png new file mode 100644 index 0000000000000..7475df94002fa Binary files /dev/null and b/providers/apache/arrow/docs/integration-logos/arrow-inverse.png differ diff --git a/providers/apache/arrow/docs/redirects.txt b/providers/apache/arrow/docs/redirects.txt new file mode 100644 index 0000000000000..3a3c336a8474d --- /dev/null +++ b/providers/apache/arrow/docs/redirects.txt @@ -0,0 +1 @@ +connections/index.rst connections/adbc.rst diff --git a/providers/apache/arrow/docs/security.rst b/providers/apache/arrow/docs/security.rst new file mode 100644 index 0000000000000..351ff007ebf2f --- /dev/null +++ b/providers/apache/arrow/docs/security.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: /../../../../devel-common/src/sphinx_exts/includes/security.rst diff --git a/providers/apache/arrow/provider.yaml b/providers/apache/arrow/provider.yaml new file mode 100644 index 0000000000000..122df0b1d17b5 --- /dev/null +++ b/providers/apache/arrow/provider.yaml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- +package-name: apache-airflow-providers-apache-arrow +name: Apache Arrow + +state: ready +lifecycle: incubation +source-date-epoch: 1744788833 +description: | + `Apache Arrow `__ +# Note that those versions are maintained by release manager - do not update them manually +# with the exception of case where other provider in sources has >= new provider version. +# In such case adding >= NEW_VERSION and bumping to NEW_VERSION in a provider have +# to be done in the same PR +versions: + - 0.1.0 + +integrations: + - integration-name: Apache Arrow + external-doc-url: https://arrow.apache.org/ + logo: /docs/integration-logos/arrow-inverse.png + tags: [apache] + +hooks: + - integration-name: Apache Arrow + python-modules: + - airflow.providers.apache.arrow.hooks.adbc + +connection-types: + - hook-class-name: airflow.providers.apache.arrow.hooks.adbc.AdbcHook + connection-type: adbc diff --git a/providers/apache/arrow/pyproject.toml b/providers/apache/arrow/pyproject.toml new file mode 100644 index 0000000000000..ab87e7d6cd0d0 --- /dev/null +++ b/providers/apache/arrow/pyproject.toml @@ -0,0 +1,143 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.12.0"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-apache-arrow" +version = "0.1.0" +description = "Provider package apache-airflow-providers-apache-arrow for Apache Airflow" +readme = "README.rst" +license = "Apache-2.0" +license-files = ['LICENSE', 'NOTICE'] +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "apache.arrow", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Topic :: System :: Monitoring", +] +requires-python = ">=3.10" + +# The dependencies should be modified in place in the generated file. +# Any change in the dependencies is preserved when the file is regenerated +# Make sure to run ``prek update-providers-dependencies --all-files`` +# After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build`` +dependencies = [ + "apache-airflow>=2.11.0", + "apache-airflow-providers-common-sql>=1.28.2", + "adbc-driver-manager>=1.7.0", + "importlib-resources>=1.3", + "pyarrow>=16.1.0; python_version < '3.13'", + "pyarrow>=18.0.0; python_version >= '3.13'", +] + +# The optional dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +[project.optional-dependencies] +"sqlite" = [ + "adbc-driver-sqlite>=1.7.0" +] +"snowflake" = [ + "adbc-driver-snowflake>=1.7.0" +] +"bigquery" = [ + "adbc-driver-bigquery>=1.7.0" +] +"flightsql" = [ + "adbc-driver-flightsql>=1.7.0" +] +"postgresql" = [ + "adbc-driver-postgresql>=1.7.0" +] +"all" = [ + "adbc-driver-sqlite>=1.7.0", + "adbc-driver-snowflake>=1.7.0", + "adbc-driver-bigquery>=1.7.0", + "adbc-driver-flightsql>=1.7.0", + "adbc-driver-postgresql>=1.7.0" +] + +[dependency-groups] +dev = [ + "apache-airflow", + "apache-airflow-task-sdk", + "apache-airflow-devel-common", + "apache-airflow-providers-common-sql", + # Additional devel dependencies (do not remove this line and add extra development dependencies) +] + +# To build docs: +# +# uv run --group docs build-docs +# +# To enable auto-refreshing build with server: +# +# uv run --group docs build-docs --autobuild +# +# To see more options: +# +# uv run --group docs build-docs --help +# +docs = [ + "apache-airflow-devel-common[docs]" +] + +[tool.uv.sources] +# These names must match the names as defined in the pyproject.toml of the workspace items, +# *not* the workspace folder paths +apache-airflow = {workspace = true} +apache-airflow-devel-common = {workspace = true} +apache-airflow-task-sdk = {workspace = true} +apache-airflow-providers-common-sql = {workspace = true} +apache-airflow-providers-standard = {workspace = true} + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-arrow/0.1.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-arrow/0.1.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Mastodon" = "https://fosstodon.org/@airflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.apache.arrow.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.apache.arrow" diff --git a/providers/apache/arrow/src/airflow/__init__.py b/providers/apache/arrow/src/airflow/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/arrow/src/airflow/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/arrow/src/airflow/providers/__init__.py b/providers/apache/arrow/src/airflow/providers/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/arrow/src/airflow/providers/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/arrow/src/airflow/providers/apache/__init__.py b/providers/apache/arrow/src/airflow/providers/apache/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/arrow/src/airflow/providers/apache/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/arrow/src/airflow/providers/apache/arrow/LICENSE b/providers/apache/arrow/src/airflow/providers/apache/arrow/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/apache/arrow/src/airflow/providers/apache/arrow/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/apache/arrow/src/airflow/providers/apache/arrow/__init__.py b/providers/apache/arrow/src/airflow/providers/apache/arrow/__init__.py new file mode 100644 index 0000000000000..e76bded9f7ffd --- /dev/null +++ b/providers/apache/arrow/src/airflow/providers/apache/arrow/__init__.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE +# OVERWRITTEN WHEN PREPARING DOCUMENTATION FOR THE PACKAGES. +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `PROVIDER__INIT__PY_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +# +from __future__ import annotations + +import packaging.version + +from airflow import __version__ as airflow_version + +__all__ = ["__version__"] + +__version__ = "0.1.0" + +if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse( + "2.11.0" +): + raise RuntimeError( + f"The package `apache-airflow-providers-apache-arrow:{__version__}` needs Apache Airflow 2.11.0+" + ) diff --git a/providers/apache/arrow/src/airflow/providers/apache/arrow/get_provider_info.py b/providers/apache/arrow/src/airflow/providers/apache/arrow/get_provider_info.py new file mode 100644 index 0000000000000..aebef83cf39a3 --- /dev/null +++ b/providers/apache/arrow/src/airflow/providers/apache/arrow/get_provider_info.py @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-apache-arrow", + "name": "Apache Arrow", + "description": "`Apache Arrow `__\n", + "integrations": [ + { + "integration-name": "Apache Arrow", + "external-doc-url": "https://arrow.apache.org/", + "logo": "/docs/integration-logos/arrow-inverse.png", + "tags": ["apache"], + } + ], + "hooks": [ + { + "integration-name": "Apache Arrow", + "python-modules": ["airflow.providers.apache.arrow.hooks.adbc"], + } + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.apache.arrow.hooks.adbc.AdbcHook", + "connection-type": "adbc", + } + ], + } diff --git a/providers/apache/arrow/src/airflow/providers/apache/arrow/hooks/__init__.py b/providers/apache/arrow/src/airflow/providers/apache/arrow/hooks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/apache/arrow/src/airflow/providers/apache/arrow/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/apache/arrow/src/airflow/providers/apache/arrow/hooks/adbc.py b/providers/apache/arrow/src/airflow/providers/apache/arrow/hooks/adbc.py new file mode 100644 index 0000000000000..ebaaf3487ec60 --- /dev/null +++ b/providers/apache/arrow/src/airflow/providers/apache/arrow/hooks/adbc.py @@ -0,0 +1,316 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import contextlib +import re +from collections.abc import Iterable, Mapping +from contextlib import closing +from functools import cached_property +from typing import Any + +from adbc_driver_manager.dbapi import Connection, connect +from more_itertools import chunked +from pyarrow import RecordBatch, Schema, array, schema + +from airflow.providers.common.sql.dialects.dialect import Dialect +from airflow.providers.common.sql.hooks.sql import DbApiHook + + +def fetch_all_handler(cursor) -> list[tuple] | None: + """Return results for DbApiHook.run().""" + if not hasattr(cursor, "description"): + raise RuntimeError( + "The database we interact with does not support DBAPI 2.0. Use operator and " + "handlers that are specifically designed for your database." + ) + if cursor.description is not None: + return list(zip(*cursor.fetch_arrow_table().to_pydict().values())) + return None + + +def replace_placeholders(sql: str, placeholder: str) -> str: + # Replace each placeholder with $1, $2, $3 ... in order + counter = [1] + + def replacer(match): + replacement = f"${counter[0]}" + counter[0] += 1 + return replacement + + return re.sub(placeholder, replacer, sql) + + +# https://arrow.apache.org/adbc/current/python/api/adbc_driver_manager.html +# https://arrow.apache.org/docs/python/ +class AdbcHook(DbApiHook): + """ + General-purpose Airflow hook for interacting with databases via the Arrow Database Connectivity (ADBC) standard. + + This hook enables connections to any database supported by an ADBC driver, using the Python ADBC driver manager. + It provides methods for executing SQL queries, inserting rows in bulk, and handling Arrow-native data transfers. + + Key Features: + - Supports chunked and batched inserts using Apache Arrow RecordBatches for efficient data transfer. + - Discovers and loads ADBC drivers dynamically based on connection extras or naming conventions. + - Handles dialect-specific connection URIs and driver entrypoints. + - Integrates with Airflow's connection system (conn_id, extras, etc.). + - Provides custom placeholder replacement for parameterized SQL queries. + - Supports both native Arrow binding and DBAPI executemany for inserts. + - Exposes configuration via connection extras: driver, entrypoint, db_kwargs, conn_kwargs, dialect. + + Connection Extras: + - driver: Name of the ADBC driver to use (e.g., "adbc_driver_postgresql"). + - entrypoint: Optional Python entrypoint for the driver. + - db_kwargs: Dict of keyword arguments passed to the database connection. + - conn_kwargs: Dict of keyword arguments passed to the driver connect function. + - dialect: SQL dialect name (default: "default"). + + Example usage: + hook = AdbcHook(adbc_conn_id="my_adbc_conn") + records = hook.get_records("SELECT * FROM my_table") + + For more details, see: + - Apache Arrow ADBC Python API: https://arrow.apache.org/adbc/current/python/api/adbc_driver_manager.html + - Airflow SQL hooks: https://airflow.apache.org/docs/apache-airflow/stable/howto/custom-operator.html#hooks + """ + + conn_name_attr = "adbc_conn_id" + default_conn_name = "adbc_default" + conn_type = "adbc" + hook_name = "ADBC Connection" + supports_autocommit = True + + @classmethod + def get_ui_field_behaviour(cls) -> dict[str, Any]: + """Get custom field behaviour.""" + return { + "hidden_fields": ["port", "schema"], + "relabeling": {"host": "Connection URL"}, + } + + @cached_property + def _driver_path(self) -> str: + import pathlib + import sys + + import importlib_resources + + # Wheels bundle the shared library + root = importlib_resources.files(self.driver) + # The filename is always the same regardless of platform + entrypoint = root.joinpath(f"lib{self.driver}.so") + if entrypoint.is_file(): + return str(entrypoint) + + # Search sys.prefix + '/lib' (Unix, Conda on Unix) + root = pathlib.Path(sys.prefix) + for filename in (f"lib{self.driver}.so", f"lib{self.driver}.dylib"): + entrypoint = root.joinpath("lib", filename) + if entrypoint.is_file(): + return str(entrypoint) + + # Conda on Windows + entrypoint = root.joinpath("bin", f"{self.driver}.dll") + if entrypoint.is_file(): + return str(entrypoint) + + # Let the driver manager fall back to (DY)LD_LIBRARY_PATH/PATH + # (It will insert 'lib', 'so', etc. as needed) + return self.driver + + @cached_property + def uri(self) -> str: + host = self.connection.host + if host and "::" in str(host): + return str(host) + uri = self.get_uri() + return uri.replace( + f"{self.conn_type.lower().replace('_', '-')}://", + f"{self.dialect_name.lower().replace('_', '-')}://", + ) + + @cached_property + def driver(self) -> str: + return self.connection_extra_lower.get("driver") or f"adbc_driver_{self.dialect_name}" + + @cached_property + def entrypoint(self) -> str | None: + return self.connection_extra_lower.get("entrypoint") + + @cached_property + def db_kwargs(self) -> dict: + return {**{"uri": self.uri}, **self.connection_extra_lower.get("db_kwargs", {})} + + @cached_property + def conn_kwargs(self) -> dict: + return self.connection_extra_lower.get("conn_kwargs", {}) + + @cached_property + def dialect_name(self) -> str: + return self.connection_extra_lower.get("dialect", "default") + + def get_conn(self) -> Connection: + return connect( + driver=self._driver_path, + entrypoint=self.entrypoint, + db_kwargs=self.db_kwargs, + conn_kwargs=self.conn_kwargs, + autocommit=False, + ) + + def get_records( + self, + sql: str | list[str], + parameters: Iterable | Mapping[str, Any] | None = None, + ) -> Any: + """ + Execute the sql and return a set of records. + + :param sql: the sql statement to be executed (str) or a list of sql statements to execute + :param parameters: The parameters to render the SQL query with. + """ + return self.run(sql=sql, parameters=parameters, handler=fetch_all_handler) + + def _run_command(self, cur, sql_statement, parameters): + """Run a statement using an already open cursor.""" + if parameters: + sql_statement = replace_placeholders(sql_statement, re.escape(self.dialect.placeholder)) + + super()._run_command(cur, sql_statement, parameters) + + def _generate_insert_sql(self, table, values, target_fields=None, replace: bool = False, **kwargs) -> str: + sql_statement = super()._generate_insert_sql( + table, values, target_fields=target_fields, replace=replace, **kwargs + ) + sql_statement = replace_placeholders(sql_statement, re.escape(self.dialect.placeholder)) + + if self.log_sql: + self.log.info("Running statement: %s", sql_statement) + + return sql_statement + + @classmethod + def _to_record_batch(cls, rows, schema: Schema) -> RecordBatch: + return RecordBatch.from_arrays( + [array([row[index] for row in rows], type=field.type) for index, field in enumerate(schema)], + schema=schema, + ) + + @classmethod + def _execute_native_bind(cls, cursor, statement: str, record_batch: RecordBatch) -> None: + """Execute a statement using native Arrow bind on the cursor.""" + cursor.bind(record_batch) + cursor.execute(statement) + + @classmethod + def _execute_executemany(cls, cursor, statement: str, record_batch: RecordBatch) -> None: + """Execute a statement using cursor.executemany.""" + cursor.executemany(statement, record_batch) + + def insert_rows( + self, + table, + rows, + target_fields=None, + commit_every=1000, + replace=False, + *, + executemany=False, + fast_executemany=False, + autocommit=False, + **kwargs, + ): + """ + Insert a collection of tuples into a table. + + Rows are inserted in chunks, each chunk (of size ``commit_every``) is + done in a new transaction. + + :param table: Name of the target table + :param rows: The rows to insert into the table + :param target_fields: The names of the columns to fill in the table + :param commit_every: The maximum number of rows to insert in one + transaction. Set to 0 to insert all rows in one transaction. + :param replace: Whether to replace instead of insert + :param executemany: If True, all rows are inserted at once in + chunks defined by the commit_every parameter. This only works if all rows + have same number of column names, but leads to better performance. + :param fast_executemany: If True, the `fast_executemany` parameter will be set on the + cursor used by `executemany` which leads to better performance, if supported by driver. + :param autocommit: What to set the connection's autocommit setting to + before executing the query. + """ + nb_rows = 0 + + with self._create_autocommit_connection(autocommit) as conn: + table_name, schema_name = Dialect.extract_schema_from_table(table) + + table_schema = conn.adbc_get_table_schema( + table_name=table_name, + db_schema_filter=schema_name, + ) + + if not target_fields: + target_fields = table_schema.names + else: + table_schema = schema([field for field in table_schema if field.name in target_fields]) + + self.log.info("target fields: %s", target_fields) + self.log.info("table_schema: %s", table_schema) + + sql = self._generate_insert_sql( + table, + target_fields, # values not needed — parameters will come from RecordBatch + target_fields, + replace, + **kwargs, + ) + + with closing(conn.cursor()) as cur: + use_native_bind = hasattr(cur, "bind") + + # If native bind is not available, consider executemany path and + # try to enable fast_executemany if requested and supported. + if not use_native_bind and (self.supports_executemany or executemany): + if fast_executemany: + with contextlib.suppress(AttributeError): + # Try to set the fast_executemany attribute + cur.fast_executemany = True + self.log.info( + "Fast_executemany is enabled for conn_id '%s'!", + self.get_conn_id(), + ) + + # Choose the execution callable once based on cursor capability + if use_native_bind: + self.log.info("Native Arrow bind supported!") + execute_batch = self._execute_native_bind + else: + execute_batch = self._execute_executemany + + for chunked_rows in chunked(rows, commit_every): + batch = self._to_record_batch(rows=chunked_rows, schema=table_schema) + execute_batch(cur, sql, batch) + + conn.commit() + + nb_rows += batch.num_rows + self.log.info("Loaded %s rows into %s so far", nb_rows, table) + + self.log.info("Done loading. Loaded a total of %s rows into %s", nb_rows, table) diff --git a/providers/apache/arrow/tests/conftest.py b/providers/apache/arrow/tests/conftest.py new file mode 100644 index 0000000000000..f56ccce0a3f69 --- /dev/null +++ b/providers/apache/arrow/tests/conftest.py @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +pytest_plugins = "tests_common.pytest_plugin" diff --git a/providers/apache/arrow/tests/system/__init__.py b/providers/apache/arrow/tests/system/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/arrow/tests/system/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/arrow/tests/system/apache/__init__.py b/providers/apache/arrow/tests/system/apache/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/arrow/tests/system/apache/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/arrow/tests/system/apache/arrow/__init__.py b/providers/apache/arrow/tests/system/apache/arrow/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/apache/arrow/tests/system/apache/arrow/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/apache/arrow/tests/unit/__init__.py b/providers/apache/arrow/tests/unit/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/arrow/tests/unit/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/arrow/tests/unit/apache/__init__.py b/providers/apache/arrow/tests/unit/apache/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/arrow/tests/unit/apache/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/arrow/tests/unit/apache/arrow/__init__.py b/providers/apache/arrow/tests/unit/apache/arrow/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/apache/arrow/tests/unit/apache/arrow/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/apache/arrow/tests/unit/apache/arrow/hooks/__init__.py b/providers/apache/arrow/tests/unit/apache/arrow/hooks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/apache/arrow/tests/unit/apache/arrow/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/apache/arrow/tests/unit/apache/arrow/hooks/test_adbc.py b/providers/apache/arrow/tests/unit/apache/arrow/hooks/test_adbc.py new file mode 100644 index 0000000000000..179d08687c82c --- /dev/null +++ b/providers/apache/arrow/tests/unit/apache/arrow/hooks/test_adbc.py @@ -0,0 +1,165 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import importlib +import json +import logging +from unittest import mock + +import pytest +from adbc_driver_manager import dbapi +from adbc_driver_manager.dbapi import Cursor +from pyarrow import field, schema, string + +from airflow.models import Connection +from airflow.providers.apache.arrow.hooks.adbc import AdbcHook +from airflow.providers.common.sql.dialects.dialect import Dialect + + +class TestAdbcHook: + def setup_method(self): + # Create a MagicMock cursor similar to DbApiHook tests + self.cur = mock.MagicMock(rowcount=0, fast_executemany=False) + self.conn = mock.MagicMock() + self.conn.cursor.return_value = self.cur + # Schema and extras that the hook might read + # Provide a real pyarrow Schema so _to_record_batch can build RecordBatch + self.conn.adbc_get_table_schema.return_value = schema([field("col", string())]) + self.conn.extra_dejson = {} + conn = self.conn + + logging.root.disabled = True + + # Instantiate the hook under test + self.hook = self.make_hook_for_conn(conn) + + # Ensure the cursor used in unit tests has a native bind method + # to simulate native Arrow bind support. + self.cur.bind = mock.MagicMock() + + # Make fetch_arrow_table().to_pydict() return a simple column mapping + arrow_table = mock.MagicMock() + arrow_table.to_pydict.return_value = {"col": [1, 2]} + self.cur.fetch_arrow_table.return_value = arrow_table + + def make_hook_for_conn(self, conn): + """ + Return an AdbcHook subclass instance bound to the provided conn. + + Tests previously redefined this subclass locally in multiple places. + This helper centralizes that logic so tests can simply call + `self.make_hook_for_conn(conn)`. + """ + + class AdbcHookMock(AdbcHook): + conn_name_attr = "adbc_default" + + @classmethod + def get_connection(cls, conn_id: str) -> Connection: + return conn + + def get_conn(self): + return conn + + @property + def dialect(self): + return Dialect(self) + + def get_db_log_messages(self, _conn) -> None: + return _conn.get_messages() + + return AdbcHookMock() + + def test_get_records_fetch_all_handler(self): + result = self.hook.get_records("SELECT 1") + assert result == [(1,), (2,)] + + def test_insert_rows_native_bind(self): + table = "table" + rows = [("a",), ("b",)] + + # Native bind supported (cursor has bind attribute) + self.hook.insert_rows(table, rows) + + assert self.cur.bind.called + assert self.cur.execute.called + assert self.conn.commit.call_count >= 1 + + def test_insert_rows_fast_executemany_not_supported(self): + # Cursor without native bind that doesn't support setting fast_executemany + class NoFastExecCursor(mock.MagicMock): + def __setattr__(self, name, value): + if name == "fast_executemany": + raise AttributeError("fast_executemany not supported") + super().__setattr__(name, value) + + cur = NoFastExecCursor(spec=Cursor) + delattr(cur, "bind") # Remove bind to simulate no native bind support + conn = mock.MagicMock() + conn.cursor.return_value = cur + conn.adbc_get_table_schema.return_value = schema([field("col", string())]) + conn.extra_dejson = {} + hook = self.make_hook_for_conn(conn) + + table = "table" + rows = [("x",), ("y",)] + + hook.insert_rows(table, rows, executemany=True, fast_executemany=True) + + assert cur.executemany.called + assert conn.commit.call_count >= 1 + + def test_insert_rows_fast_executemany_supported(self): + # Cursor without native bind but supports setting fast_executemany + cur = mock.MagicMock(spec=Cursor) + delattr(cur, "bind") # Remove bind to simulate no native bind support + conn = mock.MagicMock() + conn.cursor.return_value = cur + conn.adbc_get_table_schema.return_value = schema([field("col", string())]) + conn.extra_dejson = {} + hook = self.make_hook_for_conn(conn) + + table = "table" + rows = [("x",), ("y",)] + + hook.insert_rows(table, rows, executemany=True, fast_executemany=True) + + assert cur.fast_executemany + assert cur.executemany.called + assert conn.commit.call_count >= 1 + + @pytest.mark.skipif( + importlib.util.find_spec("adbc_driver_sqlite") is None, + reason="adbc_driver_sqlite not installed", + ) + def test_dbapi_connection(self, create_connection_without_db): + create_connection_without_db( + Connection( + conn_id="adbc_default", + conn_type="adbc", + host="file::memory:?cache=shared", + extra=json.dumps( + { + "driver": "adbc_driver_sqlite", + } + ), + ) + ) + + with AdbcHook()._create_autocommit_connection() as adbc_conn: + assert isinstance(adbc_conn, dbapi.Connection) diff --git a/pyproject.toml b/pyproject.toml index b356a0e3cd3f6..b5d5d9ce64d88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,6 +103,9 @@ packages = [] "amazon" = [ "apache-airflow-providers-amazon>=9.0.0" ] +"apache.arrow" = [ + "apache-airflow-providers-apache-arrow>=0.1.0" # Set from local provider pyproject.toml +] "apache.cassandra" = [ "apache-airflow-providers-apache-cassandra>=3.7.0; python_version !=\"3.14\"" ] @@ -394,6 +397,7 @@ packages = [] "apache-airflow-providers-airbyte>=5.0.0", "apache-airflow-providers-alibaba>=3.0.0", "apache-airflow-providers-amazon>=9.0.0", + "apache-airflow-providers-apache-arrow>=0.1.0", # Set from local provider pyproject.toml "apache-airflow-providers-apache-cassandra>=3.7.0; python_version !=\"3.14\"", "apache-airflow-providers-apache-drill>=2.8.1", "apache-airflow-providers-apache-druid>=3.12.0", @@ -1052,6 +1056,8 @@ mypy_path = [ "$MYPY_CONFIG_FILE_DIR/providers/alibaba/tests", "$MYPY_CONFIG_FILE_DIR/providers/amazon/src", "$MYPY_CONFIG_FILE_DIR/providers/amazon/tests", + "$MYPY_CONFIG_FILE_DIR/providers/apache/arrow/src", + "$MYPY_CONFIG_FILE_DIR/providers/apache/arrow/tests", "$MYPY_CONFIG_FILE_DIR/providers/apache/cassandra/src", "$MYPY_CONFIG_FILE_DIR/providers/apache/cassandra/tests", "$MYPY_CONFIG_FILE_DIR/providers/apache/drill/src", @@ -1381,6 +1387,7 @@ apache-airflow-shared-timezones = { workspace = true } apache-airflow-providers-airbyte = { workspace = true } apache-airflow-providers-alibaba = { workspace = true } apache-airflow-providers-amazon = { workspace = true } +apache-airflow-providers-apache-arrow = { workspace = true } apache-airflow-providers-apache-cassandra = { workspace = true } apache-airflow-providers-apache-drill = { workspace = true } apache-airflow-providers-apache-druid = { workspace = true } @@ -1514,6 +1521,7 @@ members = [ "providers/airbyte", "providers/alibaba", "providers/amazon", + "providers/apache/arrow", "providers/apache/cassandra", "providers/apache/drill", "providers/apache/druid", diff --git a/scripts/ci/docker-compose/remove-sources.yml b/scripts/ci/docker-compose/remove-sources.yml index bd08c89116b23..41d866cf97b27 100644 --- a/scripts/ci/docker-compose/remove-sources.yml +++ b/scripts/ci/docker-compose/remove-sources.yml @@ -29,6 +29,7 @@ services: - ../../../empty:/opt/airflow/providers/airbyte/src - ../../../empty:/opt/airflow/providers/alibaba/src - ../../../empty:/opt/airflow/providers/amazon/src + - ../../../empty:/opt/airflow/providers/apache/arrow/src - ../../../empty:/opt/airflow/providers/apache/beam/src - ../../../empty:/opt/airflow/providers/apache/cassandra/src - ../../../empty:/opt/airflow/providers/apache/drill/src diff --git a/scripts/ci/docker-compose/tests-sources.yml b/scripts/ci/docker-compose/tests-sources.yml index eb9da3cacebf5..fe4a7e98a8084 100644 --- a/scripts/ci/docker-compose/tests-sources.yml +++ b/scripts/ci/docker-compose/tests-sources.yml @@ -42,6 +42,7 @@ services: - ../../../providers/airbyte/tests:/opt/airflow/providers/airbyte/tests - ../../../providers/alibaba/tests:/opt/airflow/providers/alibaba/tests - ../../../providers/amazon/tests:/opt/airflow/providers/amazon/tests + - ../../../providers/apache/arrow/tests:/opt/airflow/providers/apache/arrow/tests - ../../../providers/apache/beam/tests:/opt/airflow/providers/apache/beam/tests - ../../../providers/apache/cassandra/tests:/opt/airflow/providers/apache/cassandra/tests - ../../../providers/apache/drill/tests:/opt/airflow/providers/apache/drill/tests diff --git a/uv.lock b/uv.lock index 648d507a80727..80488d12c80ea 100644 --- a/uv.lock +++ b/uv.lock @@ -12,7 +12,7 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-03-23T15:05:34.630541Z" +exclude-newer = "2026-03-24T13:08:45.235091259Z" exclude-newer-span = "P4D" [manifest] @@ -32,6 +32,7 @@ members = [ "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", "apache-airflow-providers-amazon", + "apache-airflow-providers-apache-arrow", "apache-airflow-providers-apache-cassandra", "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", @@ -175,12 +176,46 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/8d/58008a9a86075827f99aa8bb75d8db515bb9c34654f95e647cda31987db7/adal-1.2.7-py2.py3-none-any.whl", hash = "sha256:2a7451ed7441ddbc57703042204a3e30ef747478eea022c70f789fc7f084bc3d", size = 55539, upload-time = "2021-04-05T16:33:39.544Z" }, ] +[[package]] +name = "adbc-driver-bigquery" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "adbc-driver-manager" }, + { name = "importlib-resources" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/75/1d/2a1cf2fa0c305de32f78224cfccfc7f59e9c6644e79115732bbcdbe52396/adbc_driver_bigquery-1.10.0.tar.gz", hash = "sha256:97bd6dd233fc61c4484c3a0c0d0dedef57ef9fde22c326a9beec675c2040005f", size = 19243, upload-time = "2026-01-09T07:13:43.862Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/a5/a25ae1c62445b1d57381d59485336599861e7776a94405e3092d4cd2e11d/adbc_driver_bigquery-1.10.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:a7499e520eaec2f8aaafdac30f919e98e96e59323e85f6fd13d64f96853afac9", size = 9630746, upload-time = "2026-01-09T07:11:07.952Z" }, + { url = "https://files.pythonhosted.org/packages/9a/c3/bc030190729cd017f17256bef626ed9dc1f0f8eb44c8383f6bc26a4cf13e/adbc_driver_bigquery-1.10.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bbb96def91b1d1fd8992c2e5ee17e7a7aa1e9443603892db7bcc611ad645b927", size = 8967713, upload-time = "2026-01-09T07:11:13.976Z" }, + { url = "https://files.pythonhosted.org/packages/08/af/b84575649675e32cba0008472eee23459b5c984b7ff2aa8be9be783b9fb7/adbc_driver_bigquery-1.10.0-py3-none-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6df2841a64b130f968f7dfceb4813a82d2667716d0218613bf791ea0a54c7372", size = 17724655, upload-time = "2026-01-09T07:11:19.746Z" }, + { url = "https://files.pythonhosted.org/packages/b7/13/59244c6118babec724214e06e9b770e32135d57491960b0803c4dd5d7599/adbc_driver_bigquery-1.10.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:986fae9fbc742fa30868e6eee1434a1bbdaf489b4dad9676688d474ef172927c", size = 16269317, upload-time = "2026-01-09T07:11:25.482Z" }, + { url = "https://files.pythonhosted.org/packages/3e/58/ca7ef83bdb482bd8e53a94912afd7ced0933c6172166252f798f180ead25/adbc_driver_bigquery-1.10.0-py3-none-win_amd64.whl", hash = "sha256:b0ea1781537c96719837acc81df8f253f462d66663612749412f5f73e7c7db8c", size = 17402610, upload-time = "2026-01-09T07:11:30.738Z" }, +] + +[[package]] +name = "adbc-driver-flightsql" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "adbc-driver-manager" }, + { name = "importlib-resources" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/c3/a97fba4960b76b4b3a3055d8c33f6915e7d40a9c67f065fe760d9a17514a/adbc_driver_flightsql-1.10.0.tar.gz", hash = "sha256:aab737ee7c16d0ec89928ef2297c92f815756e91773085d55cc5eabbebcb9338", size = 24516, upload-time = "2026-01-09T07:13:44.793Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/fb/c0d48ded0e75b61bbaff24ef52c89b97ba9b2fbc5caaeb9a102ab17f8f1d/adbc_driver_flightsql-1.10.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:a520579be3194e315f35c749afc9cb2ae9b9b7b852c8c2ac5fb9cafa31cdc0c6", size = 7922165, upload-time = "2026-01-09T07:11:35.077Z" }, + { url = "https://files.pythonhosted.org/packages/c8/55/c8bc08ea1e0ba3a35f6307528efa23f11745c5acb90981d2632f5d416659/adbc_driver_flightsql-1.10.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c6d6f5e93adcc87f41e70adc07a470f865f36f8dd1e6e9ab2b05855bc44274ca", size = 7366098, upload-time = "2026-01-09T07:11:37.799Z" }, + { url = "https://files.pythonhosted.org/packages/50/ed/2cc8683b1f59d5c9c82aaf8f5992b41d19e5abc90393af6b882eab072773/adbc_driver_flightsql-1.10.0-py3-none-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9dfee1c767281c9add95fcf29eac84338107a7610b2d9b19d1169e67083a3eaa", size = 14446811, upload-time = "2026-01-09T07:11:41.532Z" }, + { url = "https://files.pythonhosted.org/packages/b3/5d/1d4c235a04b349d8d5c89e6ca42e11a6b21e6959f48b5847523e87926b4b/adbc_driver_flightsql-1.10.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e6212fba5a2a59d7a2a71db1b036730908eb85457df3cc3c90563e4ddadaa923", size = 13257586, upload-time = "2026-01-09T07:11:45.63Z" }, + { url = "https://files.pythonhosted.org/packages/e7/4d/55b96339f18c3932c61bd503c477fabfb7f6c95d4fc4ceb31a1e4275d4fd/adbc_driver_flightsql-1.10.0-py3-none-win_amd64.whl", hash = "sha256:6750c1def8c782469cc33dd883f5c9598086a875432a257adf0522bb1e3b95ca", size = 14231290, upload-time = "2026-01-09T07:11:49.683Z" }, +] + [[package]] name = "adbc-driver-manager" version = "1.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/9e/77/b6ffd112a67d133810d0027e9de4408a6e63e0e1c438f5866cc28eb3c213/adbc_driver_manager-1.10.0.tar.gz", hash = "sha256:f04407cf2f99bfde13dea0e136d87219c8a16678d43e322744dbd84cdd8eaac2", size = 208204, upload-time = "2026-01-09T07:13:45.803Z" } wheels = [ @@ -225,8 +260,8 @@ name = "adbc-driver-postgresql" version = "1.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "adbc-driver-manager", marker = "python_full_version < '3.13'" }, - { name = "importlib-resources", marker = "python_full_version < '3.13'" }, + { name = "adbc-driver-manager" }, + { name = "importlib-resources" }, ] sdist = { url = "https://files.pythonhosted.org/packages/14/15/bb1d0458fb79bec75d6392d695f3dd58e3100789e6605a69e2b0e30ba1b5/adbc_driver_postgresql-1.10.0.tar.gz", hash = "sha256:4687d41180a31f6e6081dfb85cbe4b1611498d6b5c462f34ed3b7e68d20ed90d", size = 20747, upload-time = "2026-01-09T07:13:47.118Z" } wheels = [ @@ -237,13 +272,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/a8/17129f60d0de09947f12dbeb25a107bef7cc265ea4f5fd5d88ed02ba58ce/adbc_driver_postgresql-1.10.0-py3-none-win_amd64.whl", hash = "sha256:81c285c08cf28faae36791f5c790a16b1d87cdee9037af874e4dcf747c21a8e2", size = 3020799, upload-time = "2026-01-09T07:13:06.104Z" }, ] +[[package]] +name = "adbc-driver-snowflake" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "adbc-driver-manager" }, + { name = "importlib-resources" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/70/f3/46727eead86527ae9bb05ca9b830cf18481dbe6ac1973d7abb8e81ca0cfb/adbc_driver_snowflake-1.10.0.tar.gz", hash = "sha256:56d5d6e3be86684068f86d49a66f85336b8faa9a4b520b98a122c585d007ce83", size = 20168, upload-time = "2026-01-09T07:13:47.87Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/cb/f2606741776efc4621607ff6fe565aa073b048174ef0f41dfb7d147dd7c4/adbc_driver_snowflake-1.10.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:30237689e6930710b79dcd91fa949f713ead1fd2bb9745fc0ca2b31b876b5ec3", size = 14931557, upload-time = "2026-01-09T07:13:09.198Z" }, + { url = "https://files.pythonhosted.org/packages/34/3b/c7e9aa0da008e321450a0c8c89875dbaa45ee12b277faa29ff7bf7e78d24/adbc_driver_snowflake-1.10.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5d96885c7e2e291c0e8cbb98fade45d326cf08681773cce9c2f68bca8ab3dd75", size = 13395954, upload-time = "2026-01-09T07:13:12.679Z" }, + { url = "https://files.pythonhosted.org/packages/65/d4/3323a56d0bf8fd0b78294c389d6fe441bc135e32d080892aff341b15af59/adbc_driver_snowflake-1.10.0-py3-none-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d2822989e137c773c4d7fecafeb0031428379bad28e55020d01fd0ad9534daf2", size = 27112164, upload-time = "2026-01-09T07:13:16.659Z" }, + { url = "https://files.pythonhosted.org/packages/7b/30/1fd5424b0eae94112dd81993eccf23e95a5a72d425832afbb201f68a0014/adbc_driver_snowflake-1.10.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:13d47c5e8715001496095201083f6e3b8f69a79305a9e86bfe9a767d13b02916", size = 24220654, upload-time = "2026-01-09T07:13:24.533Z" }, + { url = "https://files.pythonhosted.org/packages/3d/4a/f29fd0d5df8b9070adabf24129ee79f7aa6c5e4a1ed23120e2d68f2afb8b/adbc_driver_snowflake-1.10.0-py3-none-win_amd64.whl", hash = "sha256:dd67757909dc3aa9cdfca32c62b490d90a486ccc3f0ed8676d7b29b98dc3af2e", size = 26781446, upload-time = "2026-01-09T07:13:31.667Z" }, +] + [[package]] name = "adbc-driver-sqlite" version = "1.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "adbc-driver-manager", marker = "python_full_version < '3.13'" }, - { name = "importlib-resources", marker = "python_full_version < '3.13'" }, + { name = "adbc-driver-manager" }, + { name = "importlib-resources" }, ] sdist = { url = "https://files.pythonhosted.org/packages/72/34/f4924d98a340835988f135303c7e562f7a388308a3fbb949f357bcb94b04/adbc_driver_sqlite-1.10.0.tar.gz", hash = "sha256:064ed5f1372578d2063cd920514cd2597c6a6db3b0163c2116e373faeedffec6", size = 18298, upload-time = "2026-01-09T07:13:49.001Z" } wheels = [ @@ -869,6 +921,7 @@ all = [ { name = "apache-airflow-providers-airbyte" }, { name = "apache-airflow-providers-alibaba" }, { name = "apache-airflow-providers-amazon", extra = ["aiobotocore", "python3-saml", "s3fs"] }, + { name = "apache-airflow-providers-apache-arrow" }, { name = "apache-airflow-providers-apache-cassandra", marker = "python_full_version != '3.14.*'" }, { name = "apache-airflow-providers-apache-drill" }, { name = "apache-airflow-providers-apache-druid" }, @@ -984,6 +1037,9 @@ amazon = [ amazon-aws-auth = [ { name = "apache-airflow-providers-amazon", extra = ["python3-saml"] }, ] +apache-arrow = [ + { name = "apache-airflow-providers-apache-arrow" }, +] apache-atlas = [ { name = "atlasclient" }, ] @@ -1391,6 +1447,8 @@ requires-dist = [ { name = "apache-airflow-providers-amazon", extras = ["aiobotocore"], marker = "extra == 'aiobotocore'", editable = "providers/amazon" }, { name = "apache-airflow-providers-amazon", extras = ["python3-saml"], marker = "extra == 'amazon-aws-auth'", editable = "providers/amazon" }, { name = "apache-airflow-providers-amazon", extras = ["s3fs"], marker = "extra == 's3fs'", editable = "providers/amazon" }, + { name = "apache-airflow-providers-apache-arrow", marker = "extra == 'all'", editable = "providers/apache/arrow" }, + { name = "apache-airflow-providers-apache-arrow", marker = "extra == 'apache-arrow'", editable = "providers/apache/arrow" }, { name = "apache-airflow-providers-apache-cassandra", marker = "python_full_version != '3.14.*' and extra == 'all'", editable = "providers/apache/cassandra" }, { name = "apache-airflow-providers-apache-cassandra", marker = "python_full_version != '3.14.*' and extra == 'apache-cassandra'", editable = "providers/apache/cassandra" }, { name = "apache-airflow-providers-apache-drill", marker = "extra == 'all'", editable = "providers/apache/drill" }, @@ -1596,7 +1654,7 @@ requires-dist = [ { name = "sentry-sdk", marker = "extra == 'sentry'", specifier = ">=2.30.0" }, { name = "uv", marker = "extra == 'uv'", specifier = ">=0.10.12" }, ] -provides-extras = ["all-core", "async", "graphviz", "gunicorn", "kerberos", "memray", "otel", "statsd", "all-task-sdk", "airbyte", "alibaba", "amazon", "apache-cassandra", "apache-drill", "apache-druid", "apache-flink", "apache-hdfs", "apache-hive", "apache-iceberg", "apache-impala", "apache-kafka", "apache-kylin", "apache-livy", "apache-pig", "apache-pinot", "apache-spark", "apache-tinkerpop", "apprise", "arangodb", "asana", "atlassian-jira", "celery", "cloudant", "cncf-kubernetes", "cohere", "common-ai", "common-compat", "common-io", "common-messaging", "common-sql", "databricks", "datadog", "dbt-cloud", "dingding", "discord", "docker", "edge3", "elasticsearch", "exasol", "fab", "facebook", "ftp", "git", "github", "google", "grpc", "hashicorp", "http", "imap", "influxdb", "informatica", "jdbc", "jenkins", "keycloak", "microsoft-azure", "microsoft-mssql", "microsoft-psrp", "microsoft-winrm", "mongo", "mysql", "neo4j", "odbc", "openai", "openfaas", "openlineage", "opensearch", "opsgenie", "oracle", "pagerduty", "papermill", "pgvector", "pinecone", "postgres", "presto", "qdrant", "redis", "salesforce", "samba", "segment", "sendgrid", "sftp", "singularity", "slack", "smtp", "snowflake", "sqlite", "ssh", "standard", "tableau", "telegram", "teradata", "trino", "vertica", "weaviate", "yandex", "ydb", "zendesk", "all", "aiobotocore", "apache-atlas", "apache-webhdfs", "amazon-aws-auth", "cloudpickle", "github-enterprise", "google-auth", "ldap", "pandas", "polars", "rabbitmq", "sentry", "s3fs", "uv"] +provides-extras = ["all-core", "async", "graphviz", "gunicorn", "kerberos", "memray", "otel", "statsd", "all-task-sdk", "airbyte", "alibaba", "amazon", "apache-arrow", "apache-cassandra", "apache-drill", "apache-druid", "apache-flink", "apache-hdfs", "apache-hive", "apache-iceberg", "apache-impala", "apache-kafka", "apache-kylin", "apache-livy", "apache-pig", "apache-pinot", "apache-spark", "apache-tinkerpop", "apprise", "arangodb", "asana", "atlassian-jira", "celery", "cloudant", "cncf-kubernetes", "cohere", "common-ai", "common-compat", "common-io", "common-messaging", "common-sql", "databricks", "datadog", "dbt-cloud", "dingding", "discord", "docker", "edge3", "elasticsearch", "exasol", "fab", "facebook", "ftp", "git", "github", "google", "grpc", "hashicorp", "http", "imap", "influxdb", "informatica", "jdbc", "jenkins", "keycloak", "microsoft-azure", "microsoft-mssql", "microsoft-psrp", "microsoft-winrm", "mongo", "mysql", "neo4j", "odbc", "openai", "openfaas", "openlineage", "opensearch", "opsgenie", "oracle", "pagerduty", "papermill", "pgvector", "pinecone", "postgres", "presto", "qdrant", "redis", "salesforce", "samba", "segment", "sendgrid", "sftp", "singularity", "slack", "smtp", "snowflake", "sqlite", "ssh", "standard", "tableau", "telegram", "teradata", "trino", "vertica", "weaviate", "yandex", "ydb", "zendesk", "all", "aiobotocore", "apache-atlas", "apache-webhdfs", "amazon-aws-auth", "cloudpickle", "github-enterprise", "google-auth", "ldap", "pandas", "polars", "rabbitmq", "sentry", "s3fs", "uv"] [package.metadata.requires-dev] dev = [ @@ -1685,7 +1743,7 @@ requires-dist = [ { name = "jinja2", specifier = ">=3.1.5" }, { name = "jsonschema", specifier = ">=4.19.1" }, { name = "packaging", specifier = ">=25.0" }, - { name = "prek", specifier = ">=0.3.6" }, + { name = "prek", specifier = ">=0.3.8" }, { name = "psutil", specifier = ">=5.9.6" }, { name = "pygithub", specifier = ">=2.1.1" }, { name = "pytest", specifier = ">=9.0.0" }, @@ -1809,6 +1867,30 @@ statsd = [ { name = "statsd" }, ] +[package.optional-dependencies] +all = [ + { name = "adbc-driver-bigquery" }, + { name = "adbc-driver-flightsql" }, + { name = "adbc-driver-postgresql" }, + { name = "adbc-driver-snowflake" }, + { name = "adbc-driver-sqlite" }, +] +bigquery = [ + { name = "adbc-driver-bigquery" }, +] +flightsql = [ + { name = "adbc-driver-flightsql" }, +] +postgresql = [ + { name = "adbc-driver-postgresql" }, +] +snowflake = [ + { name = "adbc-driver-snowflake" }, +] +sqlite = [ + { name = "adbc-driver-sqlite" }, +] + [package.dev-dependencies] dev = [ { name = "apache-airflow", extra = ["pandas", "polars"] }, @@ -2856,6 +2938,59 @@ dev = [ ] docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "devel-common" }] +[[package]] +name = "apache-airflow-providers-apache-arrow" +version = "0.1.0" +source = { editable = "providers/apache/arrow" } +dependencies = [ + { name = "adbc-driver-manager" }, + { name = "apache-airflow" }, + { name = "apache-airflow-providers-common-sql" }, + { name = "importlib-resources" }, + { name = "pyarrow" }, +] + +[package.dev-dependencies] +dev = [ + { name = "apache-airflow" }, + { name = "apache-airflow-devel-common" }, + { name = "apache-airflow-providers-common-sql" }, + { name = "apache-airflow-task-sdk" }, +] +docs = [ + { name = "apache-airflow-devel-common", extra = ["docs"] }, +] + +[package.metadata] +requires-dist = [ + { name = "adbc-driver-bigquery", marker = "extra == 'all'", specifier = ">=1.7.0" }, + { name = "adbc-driver-bigquery", marker = "extra == 'bigquery'", specifier = ">=1.7.0" }, + { name = "adbc-driver-flightsql", marker = "extra == 'all'", specifier = ">=1.7.0" }, + { name = "adbc-driver-flightsql", marker = "extra == 'flightsql'", specifier = ">=1.7.0" }, + { name = "adbc-driver-manager", specifier = ">=1.7.0" }, + { name = "adbc-driver-postgresql", marker = "extra == 'all'", specifier = ">=1.7.0" }, + { name = "adbc-driver-postgresql", marker = "extra == 'postgresql'", specifier = ">=1.7.0" }, + { name = "adbc-driver-snowflake", marker = "extra == 'all'", specifier = ">=1.7.0" }, + { name = "adbc-driver-snowflake", marker = "extra == 'snowflake'", specifier = ">=1.7.0" }, + { name = "adbc-driver-sqlite", marker = "extra == 'all'", specifier = ">=1.7.0" }, + { name = "adbc-driver-sqlite", marker = "extra == 'sqlite'", specifier = ">=1.7.0" }, + { name = "apache-airflow", editable = "." }, + { name = "apache-airflow-providers-common-sql", editable = "providers/common/sql" }, + { name = "importlib-resources", specifier = ">=1.3" }, + { name = "pyarrow", marker = "python_full_version < '3.13'", specifier = ">=16.1.0" }, + { name = "pyarrow", marker = "python_full_version >= '3.13'", specifier = ">=18.0.0" }, +] +provides-extras = ["sqlite", "snowflake", "bigquery", "flightsql", "postgresql", "all"] + +[package.metadata.requires-dev] +dev = [ + { name = "apache-airflow", editable = "." }, + { name = "apache-airflow-devel-common", editable = "devel-common" }, + { name = "apache-airflow-providers-common-sql", editable = "providers/common/sql" }, + { name = "apache-airflow-task-sdk", editable = "task-sdk" }, +] +docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "devel-common" }] + [[package]] name = "apache-airflow-providers-apache-cassandra" version = "3.9.3"