diff --git a/.github/boring-cyborg.yml b/.github/boring-cyborg.yml index 360de1e96236d..3789452f87371 100644 --- a/.github/boring-cyborg.yml +++ b/.github/boring-cyborg.yml @@ -37,16 +37,10 @@ labelPRBasedOnFilePath: - providers/apache/cassandra/** provider:apache-drill: - - providers/src/airflow/providers/apache/drill/**/* - - docs/apache-airflow-providers-apache-drill/**/* - - providers/tests/apache/drill/**/* - - providers/tests/system/apache/drill/**/* + - providers/apache/drill/** provider:apache-druid: - - providers/src/airflow/providers/apache/druid/**/* - - docs/apache-airflow-providers-apache-druid/**/* - - providers/tests/apache/druid/**/* - - providers/tests/system/apache/druid/**/* + - providers/apache/druid/** provider:apache-flink: - providers/src/airflow/providers/apache/flink/**/* @@ -94,9 +88,7 @@ labelPRBasedOnFilePath: - providers/apprise/** provider:arangodb: - - providers/src/airflow/providers/arangodb/**/* - - docs/apache-airflow-providers-arangodb/**/* - - providers/tests/arangodb/**/* + - providers/arangodb/** provider:asana: - providers/asana/** @@ -147,10 +139,7 @@ labelPRBasedOnFilePath: - providers/datadog/** provider:dbt-cloud: - - providers/src/airflow/providers/dbt/cloud/**/* - - docs/apache-airflow-providers-dbt-cloud/**/* - - providers/tests/dbt/cloud/**/* - - providers/tests/system/dbt/cloud/**/* + - providers/dbt/cloud/** provider:dingding: - providers/src/airflow/providers/dingding/**/* @@ -168,10 +157,7 @@ labelPRBasedOnFilePath: - providers/edge/** provider:elasticsearch: - - providers/src/airflow/providers/elasticsearch/**/* - - docs/apache-airflow-providers-elasticsearch/**/* - - providers/tests/elasticsearch/**/* - - providers/tests/system/elasticsearch/**/* + - providers/elasticsearch/** provider:exasol: - providers/exasol/** @@ -188,10 +174,7 @@ labelPRBasedOnFilePath: - providers/ftp/** provider:github: - - providers/src/airflow/providers/github/**/* - - docs/apache-airflow-providers-github/**/* - - providers/tests/github/**/* - - providers/tests/system/github/**/* + - providers/github/** provider:google: - providers/src/airflow/providers/google/**/* @@ -270,10 +253,7 @@ labelPRBasedOnFilePath: - providers/openlineage/** provider:opensearch: - - providers/src/airflow/providers/opensearch/**/* - - docs/apache-airflow-providers-opensearch/**/* - - providers/tests/opensearch/**/* - - providers/tests/system/opensearch/**/* + - providers/opensearch/** provider:opsgenie: - providers/opsgenie/** @@ -322,7 +302,7 @@ labelPRBasedOnFilePath: - providers/segment/** provider:sendgrid: - - providers/segment/** + - providers/sendgrid/** provider:sftp: - providers/sftp/** diff --git a/.github/workflows/ci-image-checks.yml b/.github/workflows/ci-image-checks.yml index 0448ed40a6f61..faa386a2e981f 100644 --- a/.github/workflows/ci-image-checks.yml +++ b/.github/workflows/ci-image-checks.yml @@ -289,7 +289,9 @@ jobs: key: cache-docs-inventory-v1-${{ hashFiles('pyproject.toml') }} if-no-files-found: 'error' retention-days: '2' - if: steps.restore-docs-inventory-cache != 'true' + # If we upload from multiple matrix jobs we could end up with a race condition. so just pick one job + # to be responsible for updating it. https://github.com/actions/upload-artifact/issues/506 + if: steps.restore-docs-inventory-cache != 'true' && matrix.flag == '--docs-only' - name: "Upload build docs" uses: actions/upload-artifact@v4 with: diff --git a/airflow/api_connexion/schemas/task_instance_schema.py b/airflow/api_connexion/schemas/task_instance_schema.py index 360ecdf277e76..b3aa88d96f589 100644 --- a/airflow/api_connexion/schemas/task_instance_schema.py +++ b/airflow/api_connexion/schemas/task_instance_schema.py @@ -60,6 +60,7 @@ class Meta: priority_weight = auto_field() operator = auto_field() queued_dttm = auto_field(data_key="queued_when") + scheduled_dttm = auto_field(data_key="scheduled_when") pid = auto_field() executor = auto_field() executor_config = auto_field() @@ -102,6 +103,7 @@ class Meta: priority_weight = auto_field() operator = auto_field() queued_dttm = auto_field(data_key="queued_when") + scheduled_dttm = auto_field(data_key="scheduled_when") pid = auto_field() executor = auto_field() executor_config = auto_field() diff --git a/airflow/api_fastapi/core_api/datamodels/task_instances.py b/airflow/api_fastapi/core_api/datamodels/task_instances.py index d9c87b972ba9c..eaebe589613f8 100644 --- a/airflow/api_fastapi/core_api/datamodels/task_instances.py +++ b/airflow/api_fastapi/core_api/datamodels/task_instances.py @@ -64,6 +64,7 @@ class TaskInstanceResponse(BaseModel): priority_weight: int | None operator: str | None queued_dttm: datetime | None = Field(alias="queued_when") + scheduled_dttm: datetime | None = Field(alias="scheduled_when") pid: int | None executor: str | None executor_config: Annotated[str, BeforeValidator(str)] @@ -147,6 +148,7 @@ class TaskInstanceHistoryResponse(BaseModel): priority_weight: int | None operator: str | None queued_dttm: datetime | None = Field(alias="queued_when") + scheduled_dttm: datetime | None = Field(alias="scheduled_when") pid: int | None executor: str | None executor_config: Annotated[str, BeforeValidator(str)] diff --git a/airflow/api_fastapi/core_api/openapi/v1-generated.yaml b/airflow/api_fastapi/core_api/openapi/v1-generated.yaml index b4cccbab0aea4..8bf3b1540fc55 100644 --- a/airflow/api_fastapi/core_api/openapi/v1-generated.yaml +++ b/airflow/api_fastapi/core_api/openapi/v1-generated.yaml @@ -9643,6 +9643,12 @@ components: format: date-time - type: 'null' title: Queued When + scheduled_when: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Scheduled When pid: anyOf: - type: integer @@ -9677,6 +9683,7 @@ components: - priority_weight - operator - queued_when + - scheduled_when - pid - executor - executor_config @@ -9770,6 +9777,12 @@ components: format: date-time - type: 'null' title: Queued When + scheduled_when: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Scheduled When pid: anyOf: - type: integer @@ -9828,6 +9841,7 @@ components: - priority_weight - operator - queued_when + - scheduled_when - pid - executor - executor_config diff --git a/airflow/api_fastapi/execution_api/routes/xcoms.py b/airflow/api_fastapi/execution_api/routes/xcoms.py index faacd543fca2b..f330744536b33 100644 --- a/airflow/api_fastapi/execution_api/routes/xcoms.py +++ b/airflow/api_fastapi/execution_api/routes/xcoms.py @@ -28,6 +28,7 @@ from airflow.api_fastapi.execution_api import deps from airflow.api_fastapi.execution_api.datamodels.token import TIToken from airflow.api_fastapi.execution_api.datamodels.xcom import XComResponse +from airflow.models.taskmap import TaskMap from airflow.models.xcom import BaseXCom # TODO: Add dependency on JWT token @@ -55,7 +56,7 @@ def get_xcom( map_index: Annotated[int, Query()] = -1, ) -> XComResponse: """Get an Airflow XCom from database - not other XCom Backends.""" - if not has_xcom_access(key, token): + if not has_xcom_access(dag_id, run_id, task_id, key, token): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail={ @@ -104,6 +105,8 @@ def get_xcom( return XComResponse(key=key, value=xcom_value) +# TODO: once we have JWT tokens, then remove dag_id/run_id/task_id from the URL and just use the info in +# the token @router.post( "/{dag_id}/{run_id}/{task_id}/{key}", status_code=status.HTTP_201_CREATED, @@ -139,8 +142,23 @@ def set_xcom( token: deps.TokenDep, session: SessionDep, map_index: Annotated[int, Query()] = -1, + mapped_length: Annotated[ + int | None, Query(description="Number of mapped tasks this value expands into") + ] = None, ): """Set an Airflow XCom.""" + from airflow.configuration import conf + + if not has_xcom_access(dag_id, run_id, task_id, key, token, write=True): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail={ + "reason": "access_denied", + "message": f"Task does not have access to set XCom key '{key}'", + }, + ) + + # TODO: This is in-efficient. We json.loads it here for BaseXCom.set to then json.dump it! try: json.loads(value) except json.JSONDecodeError: @@ -152,14 +170,30 @@ def set_xcom( }, ) - if not has_xcom_access(key, token): - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail={ - "reason": "access_denied", - "message": f"Task does not have access to set XCom key '{key}'", - }, + if mapped_length is not None: + task_map = TaskMap( + dag_id=dag_id, + task_id=task_id, + run_id=run_id, + map_index=map_index, + length=mapped_length, + keys=None, ) + max_map_length = conf.getint("core", "max_map_length", fallback=1024) + if task_map.length > max_map_length: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "reason": "unmappable_return_value_length", + "message": "pushed value is too large to map as a downstream's dependency", + }, + ) + session.add(task_map) + + # else: + # TODO: Can/should we check if a client _hasn't_ provided this for an upstream of a mapped task? That + # means loading the serialized dag and that seems like a relatively costly operation for minimal benefit + # (the mapped task would fail in a moment as it can't be expanded anyway.) # We use `BaseXCom.set` to set XComs directly to the database, bypassing the XCom Backend. try: @@ -184,13 +218,16 @@ def set_xcom( return {"message": "XCom successfully set"} -def has_xcom_access(xcom_key: str, token: TIToken) -> bool: +def has_xcom_access( + dag_id: str, run_id: str, task_id: str, xcom_key: str, token: TIToken, write: bool = False +) -> bool: """Check if the task has access to the XCom.""" # TODO: Placeholder for actual implementation ti_key = token.ti_key log.debug( - "Checking access for task instance with key '%s' to XCom '%s'", + "Checking %s XCom access for xcom from TaskInstance with key '%s' to XCom '%s'", + "write" if write else "read", ti_key, xcom_key, ) diff --git a/airflow/cli/commands/remote_commands/task_command.py b/airflow/cli/commands/remote_commands/task_command.py index eb93a2525b868..011f629d13f56 100644 --- a/airflow/cli/commands/remote_commands/task_command.py +++ b/airflow/cli/commands/remote_commands/task_command.py @@ -46,8 +46,8 @@ from airflow.models import TaskInstance from airflow.models.dag import DAG, _run_inline_trigger from airflow.models.dagrun import DagRun -from airflow.models.param import ParamsDict from airflow.models.taskinstance import TaskReturnCode +from airflow.sdk.definitions.param import ParamsDict from airflow.settings import IS_EXECUTOR_CONTAINER, IS_K8S_EXECUTOR_POD from airflow.ti_deps.dep_context import DepContext from airflow.ti_deps.dependencies_deps import SCHEDULER_QUEUED_DEPS diff --git a/airflow/example_dags/example_params_trigger_ui.py b/airflow/example_dags/example_params_trigger_ui.py index e47ceae556501..ece4056764567 100644 --- a/airflow/example_dags/example_params_trigger_ui.py +++ b/airflow/example_dags/example_params_trigger_ui.py @@ -27,7 +27,7 @@ from airflow.decorators import task from airflow.models.dag import DAG -from airflow.models.param import Param, ParamsDict +from airflow.sdk import Param, ParamsDict from airflow.utils.trigger_rule import TriggerRule # [START params_trigger] diff --git a/airflow/example_dags/example_params_ui_tutorial.py b/airflow/example_dags/example_params_ui_tutorial.py index b64e777bed144..0bf9994c95c70 100644 --- a/airflow/example_dags/example_params_ui_tutorial.py +++ b/airflow/example_dags/example_params_ui_tutorial.py @@ -29,7 +29,7 @@ from airflow.decorators import task from airflow.models.dag import DAG -from airflow.models.param import Param, ParamsDict +from airflow.sdk import Param, ParamsDict with ( DAG( diff --git a/airflow/executors/base_executor.py b/airflow/executors/base_executor.py index a0f48c74b1356..765623d8c9427 100644 --- a/airflow/executors/base_executor.py +++ b/airflow/executors/base_executor.py @@ -59,7 +59,7 @@ # Command to execute - list of strings # the first element is always "airflow". # It should be result of TaskInstance.generate_command method. - CommandType = list[str] + CommandType = Sequence[str] # Task that is queued. It contains all the information that is # needed to run the task. @@ -223,7 +223,12 @@ def has_task(self, task_instance: TaskInstance) -> bool: :param task_instance: TaskInstance :return: True if the task is known to this executor """ - return task_instance.key in self.queued_tasks or task_instance.key in self.running + return ( + task_instance.id in self.queued_tasks + or task_instance.id in self.running + or task_instance.key in self.queued_tasks + or task_instance.key in self.running + ) def sync(self) -> None: """ @@ -319,6 +324,20 @@ def order_queued_tasks_by_priority(self) -> list[tuple[TaskInstanceKey, QueuedTa :return: List of tuples from the queued_tasks according to the priority. """ + from airflow.executors import workloads + + if not self.queued_tasks: + return [] + + kind = next(iter(self.queued_tasks.values())) + if isinstance(kind, workloads.BaseWorkload): + # V3 + new executor that supports workloads + return sorted( + self.queued_tasks.items(), + key=lambda x: x[1].ti.priority_weight, + reverse=True, + ) + return sorted( self.queued_tasks.items(), key=lambda x: x[1][1], @@ -332,12 +351,12 @@ def trigger_tasks(self, open_slots: int) -> None: :param open_slots: Number of open slots """ - span = Trace.get_current_span() sorted_queue = self.order_queued_tasks_by_priority() task_tuples = [] + workloads = [] for _ in range(min((open_slots, len(self.queued_tasks)))): - key, (command, _, queue, ti) = sorted_queue.pop(0) + key, item = sorted_queue.pop(0) # If a task makes it here but is still understood by the executor # to be running, it generally means that the task has been killed @@ -375,15 +394,19 @@ def trigger_tasks(self, open_slots: int) -> None: else: if key in self.attempts: del self.attempts[key] - task_tuples.append((key, command, queue, ti.executor_config)) - if span.is_recording(): - span.add_event( - name="task to trigger", - attributes={"command": str(command), "conf": str(ti.executor_config)}, - ) + # TODO: TaskSDK: Compat, remove when KubeExecutor is fully moved over to TaskSDK too. + # TODO: TaskSDK: We need to minimum version requirements on executors with Airflow 3. + # How/where do we do that? Executor loader? + if hasattr(self, "_process_workloads"): + workloads.append(item) + else: + (command, _, queue, ti) = item + task_tuples.append((key, command, queue, getattr(ti, "executor_config", None))) if task_tuples: self._process_tasks(task_tuples) + elif workloads: + self._process_workloads(workloads) # type: ignore[attr-defined] @add_span def _process_tasks(self, task_tuples: list[TaskTuple]) -> None: @@ -625,7 +648,7 @@ def slots_occupied(self): return len(self.running) + len(self.queued_tasks) @staticmethod - def validate_airflow_tasks_run_command(command: list[str]) -> tuple[str | None, str | None]: + def validate_airflow_tasks_run_command(command: Sequence[str]) -> tuple[str | None, str | None]: """ Check if the command to execute is airflow command. diff --git a/airflow/executors/workloads.py b/airflow/executors/workloads.py index 4c3eebe6811b9..f3288e2ae219c 100644 --- a/airflow/executors/workloads.py +++ b/airflow/executors/workloads.py @@ -34,7 +34,7 @@ ] -class BaseActivity(BaseModel): +class BaseWorkload(BaseModel): token: str """The identity token for this workload""" @@ -75,7 +75,7 @@ def key(self) -> TaskInstanceKey: ) -class ExecuteTask(BaseActivity): +class ExecuteTask(BaseWorkload): """Execute the given Task.""" ti: TaskInstance diff --git a/airflow/jobs/scheduler_job_runner.py b/airflow/jobs/scheduler_job_runner.py index 92b7c2b0010ed..dd7c6f1f65550 100644 --- a/airflow/jobs/scheduler_job_runner.py +++ b/airflow/jobs/scheduler_job_runner.py @@ -775,7 +775,7 @@ def process_executor_events( "TaskInstance Finished: dag_id=%s, task_id=%s, run_id=%s, map_index=%s, " "run_start_date=%s, run_end_date=%s, " "run_duration=%s, state=%s, executor=%s, executor_state=%s, try_number=%s, max_tries=%s, " - "pool=%s, queue=%s, priority_weight=%d, operator=%s, queued_dttm=%s, " + "pool=%s, queue=%s, priority_weight=%d, operator=%s, queued_dttm=%s, scheduled_dttm=%s," "queued_by_job_id=%s, pid=%s" ) cls.logger().info( @@ -797,6 +797,7 @@ def process_executor_events( ti.priority_weight, ti.operator, ti.queued_dttm, + ti.scheduled_dttm, ti.queued_by_job_id, ti.pid, ) @@ -836,7 +837,6 @@ def process_executor_events( ) if info is not None: msg += " Extra info: %s" % info # noqa: RUF100, UP031, flynt - cls.logger().error(msg) session.add(Log(event="state mismatch", extra=msg, task_instance=ti.key)) # Get task from the Serialized DAG @@ -849,6 +849,9 @@ def process_executor_events( continue ti.task = task if task.on_retry_callback or task.on_failure_callback: + # Only log the error/extra info here, since the `ti.handle_failure()` path will log it + # too, which would lead to double logging + cls.logger().error(msg) request = TaskCallbackRequest( full_filepath=ti.dag_model.fileloc, ti=ti, @@ -1808,6 +1811,7 @@ def _reschedule_stuck_task(self, ti: TaskInstance, session: Session): .values( state=TaskInstanceState.SCHEDULED, queued_dttm=None, + scheduled_dttm=timezone.utcnow(), ) .execution_options(synchronize_session=False) ) @@ -1962,6 +1966,7 @@ def check_trigger_timeouts( state=TaskInstanceState.SCHEDULED, next_method=TRIGGER_FAIL_REPR, next_kwargs={"error": TriggerFailureReason.TRIGGER_TIMEOUT}, + scheduled_dttm=timezone.utcnow(), trigger_id=None, ) ).rowcount diff --git a/airflow/migrations/versions/0057_3_0_0_add_new_task_instance_field_scheduled_.py b/airflow/migrations/versions/0057_3_0_0_add_new_task_instance_field_scheduled_.py new file mode 100644 index 0000000000000..3f464e45d7c60 --- /dev/null +++ b/airflow/migrations/versions/0057_3_0_0_add_new_task_instance_field_scheduled_.py @@ -0,0 +1,64 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +add new task_instance field scheduled_dttm. + +Revision ID: 33b04e4bfa19 +Revises: 8ea135928435 +Create Date: 2025-01-22 11:22:01.272681 + +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op + +from airflow.utils.sqlalchemy import UtcDateTime + +# revision identifiers, used by Alembic. +revision = "33b04e4bfa19" +down_revision = "8ea135928435" +branch_labels = None +depends_on = None +airflow_version = "3.0.0" + + +def upgrade(): + """Apply add new task_instance field scheduled_dttm.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("task_instance", schema=None) as batch_op: + batch_op.add_column(sa.Column("scheduled_dttm", UtcDateTime(timezone=True), nullable=True)) + + with op.batch_alter_table("task_instance_history", schema=None) as batch_op: + batch_op.add_column(sa.Column("scheduled_dttm", UtcDateTime(timezone=True), nullable=True)) + + # ### end Alembic commands ### + + +def downgrade(): + """Unapply add new task_instance field scheduled_dttm.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("task_instance_history", schema=None) as batch_op: + batch_op.drop_column("scheduled_dttm") + + with op.batch_alter_table("task_instance", schema=None) as batch_op: + batch_op.drop_column("scheduled_dttm") + + # ### end Alembic commands ### diff --git a/airflow/models/__init__.py b/airflow/models/__init__.py index ae0fa3040e181..6bd3883b139af 100644 --- a/airflow/models/__init__.py +++ b/airflow/models/__init__.py @@ -99,7 +99,7 @@ def __getattr__(name): "Log": "airflow.models.log", "MappedOperator": "airflow.models.mappedoperator", "Operator": "airflow.models.operator", - "Param": "airflow.models.param", + "Param": "airflow.sdk.definitions.param", "Pool": "airflow.models.pool", "RenderedTaskInstanceFields": "airflow.models.renderedtifields", "SkipMixin": "airflow.models.skipmixin", @@ -128,7 +128,6 @@ def __getattr__(name): from airflow.models.log import Log from airflow.models.mappedoperator import MappedOperator from airflow.models.operator import Operator - from airflow.models.param import Param from airflow.models.pool import Pool from airflow.models.renderedtifields import RenderedTaskInstanceFields from airflow.models.skipmixin import SkipMixin @@ -138,3 +137,4 @@ def __getattr__(name): from airflow.models.trigger import Trigger from airflow.models.variable import Variable from airflow.models.xcom import XCom + from airflow.sdk.definitions.param import Param diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 964efab4b848b..a22452a748cec 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -1666,6 +1666,7 @@ def add_logger_if_needed(ti: TaskInstance): if s.state != TaskInstanceState.UP_FOR_RESCHEDULE: s.try_number += 1 s.state = TaskInstanceState.SCHEDULED + s.scheduled_dttm = timezone.utcnow() session.commit() # triggerer may mark tasks scheduled so we read from DB all_tis = set(dr.get_task_instances(session=session)) diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index 35d8af4322c49..727746b9b0333 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -1662,6 +1662,7 @@ def schedule_tis( ) .values( state=TaskInstanceState.SCHEDULED, + scheduled_dttm=timezone.utcnow(), try_number=case( ( or_(TI.state.is_(None), TI.state != TaskInstanceState.UP_FOR_RESCHEDULE), diff --git a/airflow/models/param.py b/airflow/models/param.py index cd3ccec26a48a..01886f6e585ab 100644 --- a/airflow/models/param.py +++ b/airflow/models/param.py @@ -14,340 +14,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from __future__ import annotations - -import contextlib -import copy -import json -import logging -from collections.abc import ItemsView, Iterable, MutableMapping, ValuesView -from typing import TYPE_CHECKING, Any, ClassVar - -from airflow.exceptions import AirflowException, ParamValidationError -from airflow.sdk.definitions._internal.mixins import ResolveMixin -from airflow.utils.types import NOTSET, ArgNotSet - -if TYPE_CHECKING: - from airflow.sdk.definitions.context import Context - from airflow.sdk.definitions.dag import DAG - from airflow.sdk.types import Operator - -logger = logging.getLogger(__name__) - - -class Param: - """ - Class to hold the default value of a Param and rule set to do the validations. - - Without the rule set it always validates and returns the default value. - - :param default: The value this Param object holds - :param description: Optional help text for the Param - :param schema: The validation schema of the Param, if not given then all kwargs except - default & description will form the schema - """ - - __version__: ClassVar[int] = 1 - - CLASS_IDENTIFIER = "__class" - - def __init__(self, default: Any = NOTSET, description: str | None = None, **kwargs): - if default is not NOTSET: - self._check_json(default) - self.value = default - self.description = description - self.schema = kwargs.pop("schema") if "schema" in kwargs else kwargs - - def __copy__(self) -> Param: - return Param(self.value, self.description, schema=self.schema) - - @staticmethod - def _check_json(value): - try: - json.dumps(value) - except Exception: - raise ParamValidationError( - "All provided parameters must be json-serializable. " - f"The value '{value}' is not serializable." - ) - - def resolve(self, value: Any = NOTSET, suppress_exception: bool = False) -> Any: - """ - Run the validations and returns the Param's final value. - - May raise ValueError on failed validations, or TypeError - if no value is passed and no value already exists. - We first check that value is json-serializable; if not, warn. - In future release we will require the value to be json-serializable. - - :param value: The value to be updated for the Param - :param suppress_exception: To raise an exception or not when the validations fails. - If true and validations fails, the return value would be None. - """ - import jsonschema - from jsonschema import FormatChecker - from jsonschema.exceptions import ValidationError - - if value is not NOTSET: - self._check_json(value) - final_val = self.value if value is NOTSET else value - if isinstance(final_val, ArgNotSet): - if suppress_exception: - return None - raise ParamValidationError("No value passed and Param has no default value") - try: - jsonschema.validate(final_val, self.schema, format_checker=FormatChecker()) - except ValidationError as err: - if suppress_exception: - return None - raise ParamValidationError(err) from None - self.value = final_val - return final_val - - def dump(self) -> dict: - """Dump the Param as a dictionary.""" - out_dict: dict[str, str | None] = { - self.CLASS_IDENTIFIER: f"{self.__module__}.{self.__class__.__name__}" - } - out_dict.update(self.__dict__) - # Ensure that not set is translated to None - if self.value is NOTSET: - out_dict["value"] = None - return out_dict - - @property - def has_value(self) -> bool: - return self.value is not NOTSET and self.value is not None - - def serialize(self) -> dict: - return {"value": self.value, "description": self.description, "schema": self.schema} - - @staticmethod - def deserialize(data: dict[str, Any], version: int) -> Param: - if version > Param.__version__: - raise TypeError("serialized version > class version") - - return Param(default=data["value"], description=data["description"], schema=data["schema"]) - - -class ParamsDict(MutableMapping[str, Any]): - """ - Class to hold all params for dags or tasks. - - All the keys are strictly string and values are converted into Param's object - if they are not already. This class is to replace param's dictionary implicitly - and ideally not needed to be used directly. - - - :param dict_obj: A dict or dict like object to init ParamsDict - :param suppress_exception: Flag to suppress value exceptions while initializing the ParamsDict - """ - - __version__: ClassVar[int] = 1 - __slots__ = ["__dict", "suppress_exception"] - - def __init__(self, dict_obj: MutableMapping | None = None, suppress_exception: bool = False): - params_dict: dict[str, Param] = {} - dict_obj = dict_obj or {} - for k, v in dict_obj.items(): - if not isinstance(v, Param): - params_dict[k] = Param(v) - else: - params_dict[k] = v - self.__dict = params_dict - self.suppress_exception = suppress_exception - - def __bool__(self) -> bool: - return bool(self.__dict) - - def __eq__(self, other: Any) -> bool: - if isinstance(other, ParamsDict): - return self.dump() == other.dump() - if isinstance(other, dict): - return self.dump() == other - return NotImplemented - - def __copy__(self) -> ParamsDict: - return ParamsDict(self.__dict, self.suppress_exception) - - def __deepcopy__(self, memo: dict[int, Any] | None) -> ParamsDict: - return ParamsDict(copy.deepcopy(self.__dict, memo), self.suppress_exception) - - def __contains__(self, o: object) -> bool: - return o in self.__dict - def __len__(self) -> int: - return len(self.__dict) +"""Re exporting the new param module from Task SDK for backward compatibility.""" - def __delitem__(self, v: str) -> None: - del self.__dict[v] - - def __iter__(self): - return iter(self.__dict) - - def __repr__(self): - return repr(self.dump()) - - def __setitem__(self, key: str, value: Any) -> None: - """ - Override for dictionary's ``setitem`` method to ensure all values are of Param's type only. - - :param key: A key which needs to be inserted or updated in the dict - :param value: A value which needs to be set against the key. It could be of any - type but will be converted and stored as a Param object eventually. - """ - if isinstance(value, Param): - param = value - elif key in self.__dict: - param = self.__dict[key] - try: - param.resolve(value=value, suppress_exception=self.suppress_exception) - except ParamValidationError as ve: - raise ParamValidationError(f"Invalid input for param {key}: {ve}") from None - else: - # if the key isn't there already and if the value isn't of Param type create a new Param object - param = Param(value) - - self.__dict[key] = param - - def __getitem__(self, key: str) -> Any: - """ - Override for dictionary's ``getitem`` method to call the resolve method after fetching the key. - - :param key: The key to fetch - """ - param = self.__dict[key] - return param.resolve(suppress_exception=self.suppress_exception) - - def get_param(self, key: str) -> Param: - """Get the internal :class:`.Param` object for this key.""" - return self.__dict[key] - - def items(self): - return ItemsView(self.__dict) - - def values(self): - return ValuesView(self.__dict) - - def update(self, *args, **kwargs) -> None: - if len(args) == 1 and not kwargs and isinstance(args[0], ParamsDict): - return super().update(args[0].__dict) - super().update(*args, **kwargs) - - def dump(self) -> dict[str, Any]: - """Dump the ParamsDict object as a dictionary, while suppressing exceptions.""" - return {k: v.resolve(suppress_exception=True) for k, v in self.items()} - - def validate(self) -> dict[str, Any]: - """Validate & returns all the Params object stored in the dictionary.""" - resolved_dict = {} - try: - for k, v in self.items(): - resolved_dict[k] = v.resolve(suppress_exception=self.suppress_exception) - except ParamValidationError as ve: - raise ParamValidationError(f"Invalid input for param {k}: {ve}") from None - - return resolved_dict - - def serialize(self) -> dict[str, Any]: - return self.dump() - - @staticmethod - def deserialize(data: dict, version: int) -> ParamsDict: - if version > ParamsDict.__version__: - raise TypeError("serialized version > class version") - - return ParamsDict(data) - - -class DagParam(ResolveMixin): - """ - DAG run parameter reference. - - This binds a simple Param object to a name within a DAG instance, so that it - can be resolved during the runtime via the ``{{ context }}`` dictionary. The - ideal use case of this class is to implicitly convert args passed to a - method decorated by ``@dag``. - - It can be used to parameterize a DAG. You can overwrite its value by setting - it on conf when you trigger your DagRun. - - This can also be used in templates by accessing ``{{ context.params }}``. - - **Example**: - - with DAG(...) as dag: - EmailOperator(subject=dag.param('subject', 'Hi from Airflow!')) - - :param current_dag: Dag being used for parameter. - :param name: key value which is used to set the parameter - :param default: Default value used if no parameter was set. - """ - - def __init__(self, current_dag: DAG, name: str, default: Any = NOTSET): - if default is not NOTSET: - current_dag.params[name] = default - self._name = name - self._default = default - self.current_dag = current_dag - - def iter_references(self) -> Iterable[tuple[Operator, str]]: - return () - - def resolve(self, context: Context, *, include_xcom: bool = True) -> Any: - """Pull DagParam value from DagRun context. This method is run during ``op.execute()``.""" - with contextlib.suppress(KeyError): - if context["dag_run"].conf: - return context["dag_run"].conf[self._name] - if self._default is not NOTSET: - return self._default - with contextlib.suppress(KeyError): - return context["params"][self._name] - raise AirflowException(f"No value could be resolved for parameter {self._name}") - - def serialize(self) -> dict: - """Serialize the DagParam object into a dictionary.""" - return { - "dag_id": self.current_dag.dag_id, - "name": self._name, - "default": self._default, - } - - @classmethod - def deserialize(cls, data: dict, dags: dict) -> DagParam: - """ - Deserializes the dictionary back into a DagParam object. - - :param data: The serialized representation of the DagParam. - :param dags: A dictionary of available DAGs to look up the DAG. - """ - dag_id = data["dag_id"] - # Retrieve the current DAG from the provided DAGs dictionary - current_dag = dags.get(dag_id) - if not current_dag: - raise ValueError(f"DAG with id {dag_id} not found.") - - return cls(current_dag=current_dag, name=data["name"], default=data["default"]) - - -def process_params( - dag: DAG, - task: Operator, - dagrun_conf: dict[str, Any] | None, - *, - suppress_exception: bool, -) -> dict[str, Any]: - """Merge, validate params, and convert them into a simple dict.""" - from airflow.configuration import conf +from __future__ import annotations - dagrun_conf = dagrun_conf or {} +from airflow.sdk.definitions.param import Param, ParamsDict - params = ParamsDict(suppress_exception=suppress_exception) - with contextlib.suppress(AttributeError): - params.update(dag.params) - if task.params: - params.update(task.params) - if conf.getboolean("core", "dag_run_conf_overrides_params") and dagrun_conf: - logger.debug("Updating task params (%s) with DagRun.conf (%s)", params, dagrun_conf) - params.update(dagrun_conf) - return params.validate() +__all__ = ["Param", "ParamsDict"] diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 093b96c2d7699..7106366b67ef8 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -17,7 +17,6 @@ # under the License. from __future__ import annotations -import collections.abc import contextlib import hashlib import itertools @@ -99,7 +98,6 @@ from airflow.models.base import Base, StringID, TaskInstanceDependencies, _sentinel from airflow.models.dagbag import DagBag from airflow.models.log import Log -from airflow.models.param import process_params from airflow.models.renderedtifields import get_serialized_template_fields from airflow.models.taskinstancekey import TaskInstanceKey from airflow.models.taskmap import TaskMap @@ -109,6 +107,7 @@ from airflow.sdk.api.datamodels._generated import AssetProfile from airflow.sdk.definitions._internal.templater import SandboxedEnvironment from airflow.sdk.definitions.asset import Asset, AssetAlias, AssetNameRef, AssetUniqueKey, AssetUriRef +from airflow.sdk.definitions.param import process_params from airflow.sdk.definitions.taskgroup import MappedTaskGroup from airflow.sentry import Sentry from airflow.settings import task_instance_mutation_hook @@ -163,7 +162,7 @@ from airflow.sdk.definitions._internal.abstractoperator import Operator from airflow.sdk.definitions.dag import DAG from airflow.sdk.types import RuntimeTaskInstanceProtocol - from airflow.typing_compat import Literal, TypeGuard + from airflow.typing_compat import Literal from airflow.utils.task_group import TaskGroup @@ -668,20 +667,6 @@ def _xcom_pull( ) -def _is_mappable_value(value: Any) -> TypeGuard[Collection]: - """ - Whether a value can be used for task mapping. - - We only allow collections with guaranteed ordering, but exclude character - sequences since that's usually not what users would expect to be mappable. - """ - if not isinstance(value, (collections.abc.Sequence, dict)): - return False - if isinstance(value, (bytearray, bytes, str)): - return False - return True - - def _creator_note(val): """Creator the ``note`` association proxy.""" if isinstance(val, str): @@ -824,6 +809,7 @@ def _set_ti_attrs(target, source, include_dag_run=False): target.operator = source.operator target.custom_operator_name = source.custom_operator_name target.queued_dttm = source.queued_dttm + target.scheduled_dttm = source.scheduled_dttm target.queued_by_job_id = source.queued_by_job_id target.last_heartbeat_at = source.last_heartbeat_at target.pid = source.pid @@ -1216,7 +1202,7 @@ def _record_task_map_for_downstreams( :meta private: """ - from airflow.sdk.definitions.mappedoperator import MappedOperator + from airflow.sdk.definitions.mappedoperator import MappedOperator, is_mappable_value if next(task.iter_mapped_dependants(), None) is None: # No mapped dependants, no need to validate. return @@ -1228,7 +1214,7 @@ def _record_task_map_for_downstreams( return if value is None: raise XComForMappingNotPushed() - if not _is_mappable_value(value): + if not is_mappable_value(value): raise UnmappableXComTypePushed(value) task_map = TaskMap.from_task_instance_xcom(task_instance, value) max_map_length = conf.getint("core", "max_map_length", fallback=1024) @@ -1712,6 +1698,7 @@ class TaskInstance(Base, LoggingMixin): operator = Column(String(1000)) custom_operator_name = Column(String(1000)) queued_dttm = Column(UtcDateTime) + scheduled_dttm = Column(UtcDateTime) queued_by_job_id = Column(Integer) last_heartbeat_at = Column(UtcDateTime) @@ -2705,23 +2692,24 @@ def emit_state_change_metric(self, new_state: TaskInstanceState) -> None: timing = timezone.utcnow() - self.queued_dttm elif new_state == TaskInstanceState.QUEUED: metric_name = "scheduled_duration" - if self.start_date is None: - # This check does not work correctly before fields like `scheduled_dttm` are implemented. - # TODO: Change the level to WARNING once it's viable. - # see #30612 #34493 and #34771 for more details - self.log.debug( + if self.scheduled_dttm is None: + self.log.warning( "cannot record %s for task %s because previous state change time has not been saved", metric_name, self.task_id, ) return - timing = timezone.utcnow() - self.start_date + timing = timezone.utcnow() - self.scheduled_dttm else: raise NotImplementedError("no metric emission setup for state %s", new_state) # send metric twice, once (legacy) with tags in the name and once with tags as tags Stats.timing(f"dag.{self.dag_id}.{self.task_id}.{metric_name}", timing) - Stats.timing(f"task.{metric_name}", timing, tags={"task_id": self.task_id, "dag_id": self.dag_id}) + Stats.timing( + f"task.{metric_name}", + timing, + tags={"task_id": self.task_id, "dag_id": self.dag_id, "queue": self.queue}, + ) def clear_next_method_args(self) -> None: """Ensure we unset next_method and next_kwargs to ensure that any retries don't reuse them.""" diff --git a/airflow/models/taskinstancehistory.py b/airflow/models/taskinstancehistory.py index 9ac11cad7dba5..e97e6de22ec9a 100644 --- a/airflow/models/taskinstancehistory.py +++ b/airflow/models/taskinstancehistory.py @@ -77,6 +77,7 @@ class TaskInstanceHistory(Base): operator = Column(String(1000)) custom_operator_name = Column(String(1000)) queued_dttm = Column(UtcDateTime) + scheduled_dttm = Column(UtcDateTime) queued_by_job_id = Column(Integer) pid = Column(Integer) executor = Column(String(1000)) diff --git a/airflow/models/trigger.py b/airflow/models/trigger.py index 2e0fe9f7f2bbe..ce139c3134135 100644 --- a/airflow/models/trigger.py +++ b/airflow/models/trigger.py @@ -281,6 +281,7 @@ def submit_failure(cls, trigger_id, exc=None, session: Session = NEW_SESSION) -> task_instance.trigger_id = None # Finally, mark it as scheduled so it gets re-queued task_instance.state = TaskInstanceState.SCHEDULED + task_instance.scheduled_dttm = timezone.utcnow() @classmethod @provide_session diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index b7e08a45aed74..9c5f43c0c0b5e 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -47,7 +47,6 @@ create_expand_input, get_map_type_key, ) -from airflow.models.param import Param, ParamsDict from airflow.models.taskinstance import SimpleTaskInstance from airflow.models.taskinstancekey import TaskInstanceKey from airflow.providers_manager import ProvidersManager @@ -64,6 +63,7 @@ ) from airflow.sdk.definitions.baseoperator import BaseOperator as TaskSDKBaseOperator from airflow.sdk.definitions.mappedoperator import MappedOperator +from airflow.sdk.definitions.param import Param, ParamsDict from airflow.sdk.definitions.taskgroup import MappedTaskGroup, TaskGroup from airflow.sdk.definitions.xcom_arg import XComArg, deserialize_xcom_arg, serialize_xcom_arg from airflow.sdk.execution_time.context import OutletEventAccessor, OutletEventAccessors @@ -985,7 +985,7 @@ def _serialize_params_dict(cls, params: ParamsDict | dict) -> list[tuple[str, di class_identity = f"{v.__module__}.{v.__class__.__name__}" except AttributeError: class_identity = "" - if class_identity == "airflow.models.param.Param": + if class_identity == "airflow.sdk.definitions.param.Param": serialized_params.append((k, cls._serialize_param(v))) else: # Auto-box other values into Params object like it is done by DAG parsing as well diff --git a/airflow/triggers/base.py b/airflow/triggers/base.py index cf71f1a426ddd..4e88465d533a7 100644 --- a/airflow/triggers/base.py +++ b/airflow/triggers/base.py @@ -25,6 +25,7 @@ from airflow.callbacks.callback_requests import TaskCallbackRequest from airflow.callbacks.database_callback_sink import DatabaseCallbackSink +from airflow.utils import timezone from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import TaskInstanceState @@ -172,6 +173,7 @@ def handle_submit(self, *, task_instance: TaskInstance, session: Session = NEW_S # Set the state of the task instance to scheduled task_instance.state = TaskInstanceState.SCHEDULED + task_instance.scheduled_dttm = timezone.utcnow() class BaseTaskEndEvent(TriggerEvent): diff --git a/airflow/ui/openapi-gen/requests/schemas.gen.ts b/airflow/ui/openapi-gen/requests/schemas.gen.ts index 8b30ecc9c1ec0..d30aacd6be03e 100644 --- a/airflow/ui/openapi-gen/requests/schemas.gen.ts +++ b/airflow/ui/openapi-gen/requests/schemas.gen.ts @@ -4667,6 +4667,18 @@ export const $TaskInstanceHistoryResponse = { ], title: "Queued When", }, + scheduled_when: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Scheduled When", + }, pid: { anyOf: [ { @@ -4715,6 +4727,7 @@ export const $TaskInstanceHistoryResponse = { "priority_weight", "operator", "queued_when", + "scheduled_when", "pid", "executor", "executor_config", @@ -4882,6 +4895,18 @@ export const $TaskInstanceResponse = { ], title: "Queued When", }, + scheduled_when: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Scheduled When", + }, pid: { anyOf: [ { @@ -4979,6 +5004,7 @@ export const $TaskInstanceResponse = { "priority_weight", "operator", "queued_when", + "scheduled_when", "pid", "executor", "executor_config", diff --git a/airflow/ui/openapi-gen/requests/types.gen.ts b/airflow/ui/openapi-gen/requests/types.gen.ts index 7d44b0ede5101..17ef596ce2f07 100644 --- a/airflow/ui/openapi-gen/requests/types.gen.ts +++ b/airflow/ui/openapi-gen/requests/types.gen.ts @@ -1223,6 +1223,7 @@ export type TaskInstanceHistoryResponse = { priority_weight: number | null; operator: string | null; queued_when: string | null; + scheduled_when: string | null; pid: number | null; executor: string | null; executor_config: string; @@ -1253,6 +1254,7 @@ export type TaskInstanceResponse = { priority_weight: number | null; operator: string | null; queued_when: string | null; + scheduled_when: string | null; pid: number | null; executor: string | null; executor_config: string; diff --git a/airflow/ui/package.json b/airflow/ui/package.json index 0526f20d4d040..322623e9db68b 100644 --- a/airflow/ui/package.json +++ b/airflow/ui/package.json @@ -39,6 +39,7 @@ "react-chartjs-2": "^5.2.0", "react-dom": "^18.3.1", "react-hook-form": "^7.20.0", + "react-hotkeys-hook": "^4.6.1", "react-icons": "^5.4.0", "react-json-view": "^1.21.3", "react-markdown": "^9.0.1", diff --git a/airflow/ui/pnpm-lock.yaml b/airflow/ui/pnpm-lock.yaml index 5b89d9517c81f..20ae58594252c 100644 --- a/airflow/ui/pnpm-lock.yaml +++ b/airflow/ui/pnpm-lock.yaml @@ -77,6 +77,9 @@ importers: react-hook-form: specifier: ^7.20.0 version: 7.53.1(react@18.3.1) + react-hotkeys-hook: + specifier: ^4.6.1 + version: 4.6.1(react-dom@18.3.1(react@18.3.1))(react@18.3.1) react-icons: specifier: ^5.4.0 version: 5.4.0(react@18.3.1) @@ -3441,6 +3444,12 @@ packages: peerDependencies: react: ^16.8.0 || ^17 || ^18 || ^19 + react-hotkeys-hook@4.6.1: + resolution: {integrity: sha512-XlZpbKUj9tkfgPgT9gA+1p7Ey6vFIZHttUjPqpTdyT5nqQ8mHL7elxvSbaC+dpSiHUSmr21Ya1mDxBZG3aje4Q==} + peerDependencies: + react: '>=16.8.1' + react-dom: '>=16.8.1' + react-icons@5.4.0: resolution: {integrity: sha512-7eltJxgVt7X64oHh6wSWNwwbKTCtMfK35hcjvJS0yxEAhPM8oUKdS3+kqaW1vicIltw+kR2unHaa12S9pPALoQ==} peerDependencies: @@ -8475,6 +8484,11 @@ snapshots: dependencies: react: 18.3.1 + react-hotkeys-hook@4.6.1(react-dom@18.3.1(react@18.3.1))(react@18.3.1): + dependencies: + react: 18.3.1 + react-dom: 18.3.1(react@18.3.1) + react-icons@5.4.0(react@18.3.1): dependencies: react: 18.3.1 diff --git a/airflow/ui/src/components/ErrorAlert.tsx b/airflow/ui/src/components/ErrorAlert.tsx index 538cb230f7da1..f5d9ac15cb48b 100644 --- a/airflow/ui/src/components/ErrorAlert.tsx +++ b/airflow/ui/src/components/ErrorAlert.tsx @@ -23,7 +23,7 @@ import type { HTTPExceptionResponse, HTTPValidationError } from "openapi-gen/req import { Alert } from "./ui"; type ExpandedApiError = { - body: HTTPExceptionResponse | HTTPValidationError; + body: HTTPExceptionResponse | HTTPValidationError | undefined; } & ApiError; type Props = { @@ -37,7 +37,7 @@ export const ErrorAlert = ({ error: err }: Props) => { return undefined; } - const details = error.body.detail; + const details = error.body?.detail; let detailMessage; if (details !== undefined) { diff --git a/airflow/ui/src/components/SearchDags/SearchDagsButton.tsx b/airflow/ui/src/components/SearchDags/SearchDagsButton.tsx index 942e3e5a848dc..83974fd090985 100644 --- a/airflow/ui/src/components/SearchDags/SearchDagsButton.tsx +++ b/airflow/ui/src/components/SearchDags/SearchDagsButton.tsx @@ -16,25 +16,37 @@ * specific language governing permissions and limitations * under the License. */ -import { Button, Box } from "@chakra-ui/react"; +import { Button, Box, Kbd } from "@chakra-ui/react"; import { useState } from "react"; +import { useHotkeys } from "react-hotkeys-hook"; import { MdSearch } from "react-icons/md"; import { Dialog } from "src/components/ui"; +import { getMetaKey } from "src/utils"; import { SearchDags } from "./SearchDags"; export const SearchDagsButton = () => { const [isOpen, setIsOpen] = useState(false); + const metaKey = getMetaKey(); const onOpenChange = () => { setIsOpen(false); }; + useHotkeys( + "mod+k", + () => { + setIsOpen(true); + }, + [isOpen], + { preventDefault: true }, + ); + return ( diff --git a/airflow/ui/src/components/TaskTrySelect.tsx b/airflow/ui/src/components/TaskTrySelect.tsx index f4b4cf6c33c56..e1c5a68ec6996 100644 --- a/airflow/ui/src/components/TaskTrySelect.tsx +++ b/airflow/ui/src/components/TaskTrySelect.tsx @@ -21,8 +21,7 @@ import { Button, createListCollection, HStack, VStack, Heading } from "@chakra-u import { useTaskInstanceServiceGetMappedTaskInstanceTries } from "openapi/queries"; import type { TaskInstanceHistoryResponse, TaskInstanceResponse } from "openapi/requests/types.gen"; import { StateBadge } from "src/components/StateBadge"; -import { useConfig } from "src/queries/useConfig"; -import { isStatePending } from "src/utils/refresh"; +import { isStatePending, useAutoRefresh } from "src/utils"; import TaskInstanceTooltip from "./TaskInstanceTooltip"; import { Select } from "./ui"; @@ -43,7 +42,7 @@ export const TaskTrySelect = ({ onSelectTryNumber, selectedTryNumber, taskInstan try_number: finalTryNumber, } = taskInstance; - const autoRefreshInterval = useConfig("auto_refresh_interval") as number; + const refetchInterval = useAutoRefresh({ dagId }); const { data: tiHistory } = useTaskInstanceServiceGetMappedTaskInstanceTries( { @@ -59,7 +58,7 @@ export const TaskTrySelect = ({ onSelectTryNumber, selectedTryNumber, taskInstan // We actually want to use || here // eslint-disable-next-line @typescript-eslint/prefer-nullish-coalescing query.state.data?.task_instances.some((ti) => isStatePending(ti.state)) || isStatePending(state) - ? autoRefreshInterval * 1000 + ? refetchInterval : false, }, ); diff --git a/airflow/ui/src/components/TriggerDag/TriggerDAGForm.tsx b/airflow/ui/src/components/TriggerDag/TriggerDAGForm.tsx index e8e02cc7c1f15..1fe47e8c0c95b 100644 --- a/airflow/ui/src/components/TriggerDag/TriggerDAGForm.tsx +++ b/airflow/ui/src/components/TriggerDag/TriggerDAGForm.tsx @@ -55,7 +55,7 @@ const TriggerDAGForm = ({ dagId, onClose, open }: TriggerDAGFormProps) => { error: errorTrigger, isPending, triggerDagRun, - } = useTrigger({ onSuccessConfirm: onClose }); + } = useTrigger({ dagId, onSuccessConfirm: onClose }); const { conf, setConf } = useParamStore(); const { control, handleSubmit, reset, watch } = useForm({ @@ -85,7 +85,7 @@ const TriggerDAGForm = ({ dagId, onClose, open }: TriggerDAGFormProps) => { const dataIntervalEnd = watch("dataIntervalEnd"); const onSubmit = (data: DagRunTriggerParams) => { - triggerDagRun(dagId, data); + triggerDagRun(data); }; const validateAndPrettifyJson = (value: string) => { diff --git a/airflow/ui/src/constants/sortParams.ts b/airflow/ui/src/constants/sortParams.ts index 21b55730baa35..3ffc36844e82e 100644 --- a/airflow/ui/src/constants/sortParams.ts +++ b/airflow/ui/src/constants/sortParams.ts @@ -24,14 +24,14 @@ export const dagSortOptions = createListCollection({ { label: "Sort by Display Name (Z-A)", value: "-dag_display_name" }, { label: "Sort by Next DAG Run (Earliest-Latest)", value: "next_dagrun" }, { label: "Sort by Next DAG Run (Latest-Earliest)", value: "-next_dagrun" }, - { label: "Sort by Last Run State (A-Z)", value: "last_run_state" }, - { label: "Sort by Last Run State (Z-A)", value: "-last_run_state" }, + { label: "Sort by Latest Run State (A-Z)", value: "last_run_state" }, + { label: "Sort by Latest Run State (Z-A)", value: "-last_run_state" }, { - label: "Sort by Last Run Start Date (Earliest-Latest)", + label: "Sort by Latest Run Start Date (Earliest-Latest)", value: "last_run_start_date", }, { - label: "Sort by Last Run Start Date (Latest-Earliest)", + label: "Sort by Latest Run Start Date (Latest-Earliest)", value: "-last_run_start_date", }, ], diff --git a/airflow/ui/src/layouts/Details/Graph/Graph.tsx b/airflow/ui/src/layouts/Details/Graph/Graph.tsx index 43b914e115b8b..ef58246e39a50 100644 --- a/airflow/ui/src/layouts/Details/Graph/Graph.tsx +++ b/airflow/ui/src/layouts/Details/Graph/Graph.tsx @@ -24,6 +24,7 @@ import { useParams } from "react-router-dom"; import { useGridServiceGridData, useStructureServiceStructureData } from "openapi/queries"; import { useColorMode } from "src/context/colorMode"; import { useOpenGroups } from "src/context/openGroups"; +import { isStatePending, useAutoRefresh } from "src/utils"; import Edge from "./Edge"; import { JoinNode } from "./JoinNode"; @@ -87,6 +88,8 @@ export const Graph = () => { openGroupIds, }); + const refetchInterval = useAutoRefresh({ dagId }); + const { data: gridData } = useGridServiceGridData( { dagId, @@ -97,6 +100,8 @@ export const Graph = () => { undefined, { enabled: Boolean(runId), + refetchInterval: (query) => + query.state.data?.dag_runs.some((dr) => isStatePending(dr.state)) && refetchInterval, }, ); diff --git a/airflow/ui/src/layouts/Details/Grid/Grid.tsx b/airflow/ui/src/layouts/Details/Grid/Grid.tsx index 1e36c2a763e8d..f66a7b4a5de79 100644 --- a/airflow/ui/src/layouts/Details/Grid/Grid.tsx +++ b/airflow/ui/src/layouts/Details/Grid/Grid.tsx @@ -27,6 +27,7 @@ import { useParams, useSearchParams } from "react-router-dom"; import { useGridServiceGridData, useStructureServiceStructureData } from "openapi/queries"; import type { GridResponse } from "openapi/requests/types.gen"; import { useOpenGroups } from "src/context/openGroups"; +import { isStatePending, useAutoRefresh } from "src/utils"; import { Bar } from "./Bar"; import { DurationAxis } from "./DurationAxis"; @@ -47,6 +48,7 @@ export const Grid = () => { }); const [searchParams, setSearchParams] = useSearchParams(); + const refetchInterval = useAutoRefresh({ dagId }); const offset = parseInt(searchParams.get("offset") ?? "0", 10); @@ -62,6 +64,8 @@ export const Grid = () => { undefined, { placeholderData: keepPreviousData, + refetchInterval: (query) => + query.state.data?.dag_runs.some((dr) => isStatePending(dr.state)) && refetchInterval, }, ); diff --git a/airflow/ui/src/layouts/Nav/AdminButton.tsx b/airflow/ui/src/layouts/Nav/AdminButton.tsx index 7889f4ede56cd..53ad5c9d4d752 100644 --- a/airflow/ui/src/layouts/Nav/AdminButton.tsx +++ b/airflow/ui/src/layouts/Nav/AdminButton.tsx @@ -32,6 +32,10 @@ const links = [ href: "/pools", title: "Pools", }, + { + href: "/providers", + title: "Providers", + }, ]; export const AdminButton = () => ( diff --git a/airflow/ui/src/pages/Dag/Dag.tsx b/airflow/ui/src/pages/Dag/Dag.tsx index 89aa32eda7944..c7107f98fff71 100644 --- a/airflow/ui/src/pages/Dag/Dag.tsx +++ b/airflow/ui/src/pages/Dag/Dag.tsx @@ -20,6 +20,7 @@ import { useParams } from "react-router-dom"; import { useDagServiceGetDagDetails, useDagsServiceRecentDagRuns } from "openapi/queries"; import { DetailsLayout } from "src/layouts/Details/DetailsLayout"; +import { isStatePending, useAutoRefresh } from "src/utils"; import { Header } from "./Header"; @@ -42,6 +43,8 @@ export const Dag = () => { dagId, }); + const refetchInterval = useAutoRefresh({ dagId }); + // TODO: replace with with a list dag runs by dag id request const { data: runsData, @@ -49,13 +52,25 @@ export const Dag = () => { isLoading: isLoadingRuns, } = useDagsServiceRecentDagRuns({ dagIds: [dagId] }, undefined, { enabled: Boolean(dagId), + refetchInterval: (query) => + query.state.data?.dags + .find((recentDag) => recentDag.dag_id === dagId) + ?.latest_dag_runs.some((run) => isStatePending(run.state)) + ? refetchInterval + : false, }); - const runs = runsData?.dags.find((dagWithRuns) => dagWithRuns.dag_id === dagId)?.latest_dag_runs ?? []; + const dagWithRuns = runsData?.dags.find((recentDag) => recentDag.dag_id === dagId); return ( -
+
isStatePending(dr.state)) && Boolean(refetchInterval), + )} + /> ); }; diff --git a/airflow/ui/src/pages/Dag/Header.tsx b/airflow/ui/src/pages/Dag/Header.tsx index 0c01513adb4a7..bc4b295d64354 100644 --- a/airflow/ui/src/pages/Dag/Header.tsx +++ b/airflow/ui/src/pages/Dag/Header.tsx @@ -16,10 +16,11 @@ * specific language governing permissions and limitations * under the License. */ -import { Box, Flex, Heading, HStack, SimpleGrid, Text } from "@chakra-ui/react"; +import { Box, Flex, Heading, HStack, SimpleGrid, Spinner, Text } from "@chakra-ui/react"; import { FiBookOpen, FiCalendar } from "react-icons/fi"; +import { useParams } from "react-router-dom"; -import type { DAGDetailsResponse, DAGRunResponse } from "openapi/requests/types.gen"; +import type { DAGDetailsResponse, DAGWithLatestDagRunsResponse } from "openapi/requests/types.gen"; import { DagIcon } from "src/assets/DagIcon"; import DagRunInfo from "src/components/DagRunInfo"; import DisplayMarkdownButton from "src/components/DisplayMarkdownButton"; @@ -33,87 +34,98 @@ import { DagTags } from "../DagsList/DagTags"; export const Header = ({ dag, - dagId, - latestRun, + dagWithRuns, + isRefreshing, }: { readonly dag?: DAGDetailsResponse; - readonly dagId?: string; - readonly latestRun?: DAGRunResponse; -}) => ( - - - - - - {dag?.dag_display_name ?? dagId} - {dag !== undefined && ( - - )} - - - {dag ? ( - - {dag.doc_md === null ? undefined : ( - } - mdContent={dag.doc_md} - text="Dag Docs" - /> - )} - - - - ) : undefined} + readonly dagWithRuns?: DAGWithLatestDagRunsResponse; + readonly isRefreshing?: boolean; +}) => { + // We would still like to show the dagId even if the dag object hasn't loaded yet + const { dagId } = useParams(); + const latestRun = dagWithRuns?.latest_dag_runs ? dagWithRuns.latest_dag_runs[0] : undefined; + + return ( + + + + + + {dag?.dag_display_name ?? dagId} + {dag !== undefined && ( + + )} + {isRefreshing ? :
} + + + {dag ? ( + + {dag.doc_md === null ? undefined : ( + } + mdContent={dag.doc_md} + text="Dag Docs" + /> + )} + + + + ) : undefined} + + + + {Boolean(dag?.timetable_summary) ? ( + + + {dag?.timetable_summary} + + + ) : undefined} + + + {Boolean(latestRun) && latestRun !== undefined ? ( + + ) : undefined} + + + {Boolean(dagWithRuns?.next_dagrun) ? ( + + ) : undefined} + +
+
+ + + + Owner: {dag?.owners.join(", ")} + - - - {Boolean(dag?.timetable_summary) ? ( - - - {dag?.timetable_summary} - - - ) : undefined} - - - {Boolean(latestRun) && latestRun !== undefined ? ( - - ) : undefined} - - - {Boolean(dag?.next_dagrun) && dag !== undefined ? ( - - ) : undefined} - -
-
- - - Owner: {dag?.owners.join(", ")} - - - -); + ); +}; diff --git a/airflow/ui/src/pages/Dag/Runs/Runs.tsx b/airflow/ui/src/pages/Dag/Runs/Runs.tsx index a986d5d2130ab..29a1e90fd8616 100644 --- a/airflow/ui/src/pages/Dag/Runs/Runs.tsx +++ b/airflow/ui/src/pages/Dag/Runs/Runs.tsx @@ -40,7 +40,7 @@ import { RunTypeIcon } from "src/components/RunTypeIcon"; import { StateBadge } from "src/components/StateBadge"; import Time from "src/components/Time"; import { Select } from "src/components/ui"; -import { capitalize, getDuration } from "src/utils"; +import { capitalize, getDuration, useAutoRefresh, isStatePending } from "src/utils"; const columns: Array> = [ { @@ -125,11 +125,13 @@ export const Runs = () => { const { setTableURLState, tableURLState } = useTableURLState(); const { pagination, sorting } = tableURLState; const [sort] = sorting; - const orderBy = sort ? `${sort.desc ? "-" : ""}${sort.id}` : "-start_date"; + const orderBy = sort ? `${sort.desc ? "-" : ""}${sort.id}` : "-logical_date"; const filteredState = searchParams.get(STATE_PARAM); - const { data, error, isFetching, isLoading } = useDagRunServiceGetDagRuns( + const refetchInterval = useAutoRefresh({ dagId }); + + const { data, error, isLoading } = useDagRunServiceGetDagRuns( { dagId: dagId ?? "~", limit: pagination.pageSize, @@ -138,7 +140,11 @@ export const Runs = () => { state: filteredState === null ? undefined : [filteredState], }, undefined, - { enabled: !isNaN(pagination.pageSize) }, + { + enabled: !isNaN(pagination.pageSize), + refetchInterval: (query) => + query.state.data?.dag_runs.some((run) => isStatePending(run.state)) ? refetchInterval : false, + }, ); const handleStateChange = useCallback( @@ -197,7 +203,6 @@ export const Runs = () => { data={data?.dag_runs ?? []} errorMessage={} initialState={tableURLState} - isFetching={isFetching} isLoading={isLoading} modelName="Dag Run" onStateChange={setTableURLState} diff --git a/airflow/ui/src/pages/Dag/Tasks/TaskCard.tsx b/airflow/ui/src/pages/Dag/Tasks/TaskCard.tsx index 3476e1850ad5a..e49f563eef52d 100644 --- a/airflow/ui/src/pages/Dag/Tasks/TaskCard.tsx +++ b/airflow/ui/src/pages/Dag/Tasks/TaskCard.tsx @@ -19,58 +19,79 @@ import { Heading, VStack, Box, SimpleGrid, Text, Link } from "@chakra-ui/react"; import { Link as RouterLink } from "react-router-dom"; -import type { TaskResponse, TaskInstanceResponse } from "openapi/requests/types.gen"; +import { useTaskInstanceServiceGetTaskInstances } from "openapi/queries/queries.ts"; +import type { TaskResponse } from "openapi/requests/types.gen"; import { StateBadge } from "src/components/StateBadge"; import TaskInstanceTooltip from "src/components/TaskInstanceTooltip"; import Time from "src/components/Time"; -import { getTaskInstanceLink } from "src/utils/links.ts"; +import { isStatePending, useAutoRefresh } from "src/utils"; +import { getTaskInstanceLink } from "src/utils/links"; import { TaskRecentRuns } from "./TaskRecentRuns.tsx"; type Props = { readonly dagId: string; readonly task: TaskResponse; - readonly taskInstances: Array; }; -export const TaskCard = ({ dagId, task, taskInstances }: Props) => ( - - - - {task.task_display_name ?? task.task_id} - {task.is_mapped ? "[]" : undefined} - - - - - - Operator - - {task.operator_name} - - - - Trigger Rule - - {task.trigger_rule} - - - - Last Instance - - {taskInstances[0] ? ( - - - - - - - ) : undefined} - - {/* TODO: Handled mapped tasks to not plot each map index as a task instance */} - {!task.is_mapped && } - - -); +export const TaskCard = ({ dagId, task }: Props) => { + const refetchInterval = useAutoRefresh({ dagId }); + + const { data } = useTaskInstanceServiceGetTaskInstances( + { + dagId, + dagRunId: "~", + limit: 14, + orderBy: "-logical_date", + taskId: task.task_id ?? "", + }, + undefined, + { + enabled: Boolean(dagId) && Boolean(task.task_id), + refetchInterval: (query) => + query.state.data?.task_instances.some((ti) => isStatePending(ti.state)) ? refetchInterval : false, + }, + ); + + return ( + + + + {task.task_display_name ?? task.task_id} + {task.is_mapped ? "[]" : undefined} + + + + + + Operator + + {task.operator_name} + + + + Trigger Rule + + {task.trigger_rule} + + + + Last Instance + + {data?.task_instances[0] ? ( + + + + + + + ) : undefined} + + {/* TODO: Handled mapped tasks to not plot each map index as a task instance */} + {!task.is_mapped && } + + + ); +}; diff --git a/airflow/ui/src/pages/Dag/Tasks/TaskRecentRuns.tsx b/airflow/ui/src/pages/Dag/Tasks/TaskRecentRuns.tsx index 3a7fc13d44949..8dd9e70ad8df3 100644 --- a/airflow/ui/src/pages/Dag/Tasks/TaskRecentRuns.tsx +++ b/airflow/ui/src/pages/Dag/Tasks/TaskRecentRuns.tsx @@ -51,23 +51,21 @@ export const TaskRecentRuns = ({ return ( - {taskInstancesWithDuration.map((taskInstance) => - taskInstance.state === null ? undefined : ( - - - - - - - - ), - )} + {taskInstancesWithDuration.map((taskInstance) => ( + + + + + + + + ))} ); }; diff --git a/airflow/ui/src/pages/Dag/Tasks/Tasks.tsx b/airflow/ui/src/pages/Dag/Tasks/Tasks.tsx index 6e005fe82e619..ae33c69f66f00 100644 --- a/airflow/ui/src/pages/Dag/Tasks/Tasks.tsx +++ b/airflow/ui/src/pages/Dag/Tasks/Tasks.tsx @@ -19,12 +19,8 @@ import { Heading, Skeleton, Box } from "@chakra-ui/react"; import { useParams } from "react-router-dom"; -import { - useTaskServiceGetTasks, - useTaskInstanceServiceGetTaskInstances, - useDagsServiceRecentDagRuns, -} from "openapi/queries"; -import type { TaskResponse, TaskInstanceResponse } from "openapi/requests/types.gen"; +import { useTaskServiceGetTasks } from "openapi/queries"; +import type { TaskResponse } from "openapi/requests/types.gen"; import { DataTable } from "src/components/DataTable"; import type { CardDef } from "src/components/DataTable/types"; import { ErrorAlert } from "src/components/ErrorAlert"; @@ -32,18 +28,8 @@ import { pluralize } from "src/utils"; import { TaskCard } from "./TaskCard"; -const cardDef = (dagId: string, taskInstances?: Array): CardDef => ({ - card: ({ row }) => ( - instance.task_id === row.task_id) - : [] - } - /> - ), +const cardDef = (dagId: string): CardDef => ({ + card: ({ row }) => , meta: { customSkeleton: , }, @@ -60,25 +46,6 @@ export const Tasks = () => { dagId, }); - const { data: runsData } = useDagsServiceRecentDagRuns({ dagIds: [dagId], dagRunsLimit: 14 }, undefined, { - enabled: Boolean(dagId), - }); - - const runs = runsData?.dags.find((dagWithRuns) => dagWithRuns.dag_id === dagId)?.latest_dag_runs ?? []; - - // TODO: Revisit this endpoint since only 100 task instances are returned and - // only duration is calculated with other attributes unused. - const { data: taskInstancesResponse } = useTaskInstanceServiceGetTaskInstances( - { - dagId, - dagRunId: "~", - logicalDateGte: runs.at(-1)?.logical_date ?? "", - orderBy: "-start_date", - }, - undefined, - { enabled: Boolean(runs[0]?.dag_run_id) }, - ); - return ( @@ -86,14 +53,14 @@ export const Tasks = () => { {pluralize("Task", data ? data.total_entries : 0)} ); diff --git a/airflow/ui/src/pages/DagsList/DagCard.tsx b/airflow/ui/src/pages/DagsList/DagCard.tsx index 42a6d66c3835e..b222b037c7240 100644 --- a/airflow/ui/src/pages/DagsList/DagCard.tsx +++ b/airflow/ui/src/pages/DagsList/DagCard.tsx @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -import { Box, Flex, HStack, SimpleGrid, Link } from "@chakra-ui/react"; +import { Box, Flex, HStack, SimpleGrid, Link, Spinner } from "@chakra-ui/react"; import { Link as RouterLink } from "react-router-dom"; import type { DAGWithLatestDagRunsResponse } from "openapi/requests/types.gen"; @@ -25,6 +25,7 @@ import { Stat } from "src/components/Stat"; import { TogglePause } from "src/components/TogglePause"; import TriggerDAGButton from "src/components/TriggerDag/TriggerDAGButton"; import { Tooltip } from "src/components/ui"; +import { isStatePending, useAutoRefresh } from "src/utils"; import { DagTags } from "./DagTags"; import { RecentRuns } from "./RecentRuns"; @@ -37,6 +38,8 @@ type Props = { export const DagCard = ({ dag }: Props) => { const [latestRun] = dag.latest_dag_runs; + const refetchInterval = useAutoRefresh({ dagId: dag.dag_id }); + return ( @@ -68,6 +71,7 @@ export const DagCard = ({ dag }: Props) => { startDate={latestRun.start_date} state={latestRun.state} /> + {isStatePending(latestRun.state) && Boolean(refetchInterval) ? : undefined} ) : undefined} diff --git a/airflow/ui/src/pages/DagsList/DagsList.tsx b/airflow/ui/src/pages/DagsList/DagsList.tsx index 7829738030d25..c8c32b74e8e22 100644 --- a/airflow/ui/src/pages/DagsList/DagsList.tsx +++ b/airflow/ui/src/pages/DagsList/DagsList.tsx @@ -189,7 +189,7 @@ export const DagsList = () => { paused = false; } - const { data, error, isFetching, isLoading } = useDags({ + const { data, error, isLoading } = useDags({ dagDisplayNamePattern: Boolean(dagDisplayNamePattern) ? `${dagDisplayNamePattern}` : undefined, lastDagRunState, limit: pagination.pageSize, @@ -244,7 +244,6 @@ export const DagsList = () => { displayMode={display} errorMessage={} initialState={tableURLState} - isFetching={isFetching} isLoading={isLoading} modelName="Dag" onStateChange={setTableURLState} diff --git a/airflow/ui/src/pages/Providers.tsx b/airflow/ui/src/pages/Providers.tsx new file mode 100644 index 0000000000000..8c05f434d5d23 --- /dev/null +++ b/airflow/ui/src/pages/Providers.tsx @@ -0,0 +1,83 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { Box, Heading, Link } from "@chakra-ui/react"; +import type { ColumnDef } from "@tanstack/react-table"; + +import { useProviderServiceGetProviders } from "openapi/queries"; +import type { ProviderResponse } from "openapi/requests/types.gen"; +import { DataTable } from "src/components/DataTable"; +import { ErrorAlert } from "src/components/ErrorAlert"; + +const columns: Array> = [ + { + accessorKey: "package_name", + cell: ({ row: { original } }) => ( + + {original.package_name} + + ), + header: "Package Name", + }, + { + accessorKey: "version", + cell: ({ row: { original } }) => original.version, + header: () => "Version", + }, + { + accessorKey: "description", + cell: ({ row: { original } }) => { + const urlRegex = /http(s)?:\/\/[\w.-]+(\.?:[\w.-]+)*([#/?][\w!#$%&'()*+,./:;=?@[\]~-]*)?/gu; + const urls = original.description.match(urlRegex); + const cleanText = original.description.replaceAll(/\n(?:and)?/gu, " ").split(" "); + + return cleanText.map((part) => + urls?.includes(part) ? ( + + {part} + + ) : ( + `${part} ` + ), + ); + }, + header: "Description", + }, +]; + +export const Providers = () => { + const { data, error } = useProviderServiceGetProviders(); + + return ( + + Providers + } + total={data?.total_entries} + /> + + ); +}; diff --git a/airflow/ui/src/pages/Run/Run.tsx b/airflow/ui/src/pages/Run/Run.tsx index 8e2d8ef6f3b81..6689ca1a7adf1 100644 --- a/airflow/ui/src/pages/Run/Run.tsx +++ b/airflow/ui/src/pages/Run/Run.tsx @@ -22,8 +22,7 @@ import { useParams, Link as RouterLink } from "react-router-dom"; import { useDagRunServiceGetDagRun, useDagServiceGetDagDetails } from "openapi/queries"; import { Breadcrumb } from "src/components/ui"; import { DetailsLayout } from "src/layouts/Details/DetailsLayout"; -import { useConfig } from "src/queries/useConfig"; -import { isStatePending } from "src/utils/refresh"; +import { isStatePending, useAutoRefresh } from "src/utils"; import { Header } from "./Header"; @@ -37,7 +36,15 @@ const tabs = [ export const Run = () => { const { dagId = "", runId = "" } = useParams(); - const autoRefreshInterval = useConfig("auto_refresh_interval") as number; + const refetchInterval = useAutoRefresh({ dagId }); + + const { + data: dag, + error: dagError, + isLoading: isLoadinDag, + } = useDagServiceGetDagDetails({ + dagId, + }); const { data: dagRun, @@ -50,19 +57,10 @@ export const Run = () => { }, undefined, { - refetchInterval: (query) => - isStatePending(query.state.data?.state) ? autoRefreshInterval * 1000 : false, + refetchInterval: (query) => (isStatePending(query.state.data?.state) ? refetchInterval : false), }, ); - const { - data: dag, - error: dagError, - isLoading: isLoadinDag, - } = useDagServiceGetDagDetails({ - dagId, - }); - return ( }> @@ -75,7 +73,10 @@ export const Run = () => { {runId} {dagRun === undefined ? undefined : ( -
+
)} ); diff --git a/airflow/ui/src/pages/Run/TaskInstances.tsx b/airflow/ui/src/pages/Run/TaskInstances.tsx index e7e57375b9e8b..7ba8be6e5bfe1 100644 --- a/airflow/ui/src/pages/Run/TaskInstances.tsx +++ b/airflow/ui/src/pages/Run/TaskInstances.tsx @@ -40,10 +40,8 @@ import { StateBadge } from "src/components/StateBadge"; import Time from "src/components/Time"; import { Select } from "src/components/ui"; import { SearchParamsKeys, type SearchParamsKeysType } from "src/constants/searchParams"; -import { useConfig } from "src/queries/useConfig"; -import { capitalize, getDuration } from "src/utils"; +import { capitalize, getDuration, useAutoRefresh, isStatePending } from "src/utils"; import { getTaskInstanceLink } from "src/utils/links"; -import { isStatePending } from "src/utils/refresh"; const columns: Array> = [ { @@ -180,9 +178,9 @@ export const TaskInstances = () => { setSearchParams(searchParams); }; - const autoRefreshInterval = useConfig("auto_refresh_interval") as number; + const refetchInterval = useAutoRefresh({ dagId }); - const { data, error, isFetching, isLoading } = useTaskInstanceServiceGetTaskInstances( + const { data, error, isLoading } = useTaskInstanceServiceGetTaskInstances( { dagId, dagRunId: runId, @@ -196,9 +194,7 @@ export const TaskInstances = () => { { enabled: !isNaN(pagination.pageSize), refetchInterval: (query) => - query.state.data?.task_instances.some((ti) => isStatePending(ti.state)) - ? autoRefreshInterval * 1000 - : false, + query.state.data?.task_instances.some((ti) => isStatePending(ti.state)) ? refetchInterval : false, }, ); @@ -258,7 +254,6 @@ export const TaskInstances = () => { data={data?.task_instances ?? []} errorMessage={} initialState={tableURLState} - isFetching={isFetching} isLoading={isLoading} modelName="Task Instance" onStateChange={setTableURLState} diff --git a/airflow/ui/src/pages/TaskInstance/Details.tsx b/airflow/ui/src/pages/TaskInstance/Details.tsx index dc7074f16476d..9cc8a756572c7 100644 --- a/airflow/ui/src/pages/TaskInstance/Details.tsx +++ b/airflow/ui/src/pages/TaskInstance/Details.tsx @@ -27,9 +27,7 @@ import { StateBadge } from "src/components/StateBadge"; import { TaskTrySelect } from "src/components/TaskTrySelect"; import Time from "src/components/Time"; import { ClipboardRoot, ClipboardIconButton } from "src/components/ui"; -import { useConfig } from "src/queries/useConfig"; -import { getDuration } from "src/utils"; -import { isStatePending } from "src/utils/refresh"; +import { getDuration, useAutoRefresh, isStatePending } from "src/utils"; export const Details = () => { const { dagId = "", runId = "", taskId = "" } = useParams(); @@ -39,8 +37,6 @@ export const Details = () => { const tryNumberParam = searchParams.get("try_number"); const mapIndex = parseInt(mapIndexParam ?? "-1", 10); - const autoRefreshInterval = useConfig("auto_refresh_interval") as number; - const { data: taskInstance } = useTaskInstanceServiceGetMappedTaskInstance({ dagId, dagRunId: runId, @@ -59,6 +55,8 @@ export const Details = () => { const tryNumber = tryNumberParam === null ? taskInstance?.try_number : parseInt(tryNumberParam, 10); + const refetchInterval = useAutoRefresh({ dagId }); + const { data: tryInstance } = useTaskInstanceServiceGetTaskInstanceTryDetails( { dagId, @@ -69,8 +67,7 @@ export const Details = () => { }, undefined, { - refetchInterval: (query) => - isStatePending(query.state.data?.state) ? autoRefreshInterval * 1000 : false, + refetchInterval: (query) => (isStatePending(query.state.data?.state) ? refetchInterval : false), }, ); diff --git a/airflow/ui/src/pages/TaskInstance/TaskInstance.tsx b/airflow/ui/src/pages/TaskInstance/TaskInstance.tsx index fc5b46883ed0f..8e0013a7dfda3 100644 --- a/airflow/ui/src/pages/TaskInstance/TaskInstance.tsx +++ b/airflow/ui/src/pages/TaskInstance/TaskInstance.tsx @@ -22,8 +22,7 @@ import { useParams, Link as RouterLink, useSearchParams } from "react-router-dom import { useDagServiceGetDagDetails, useTaskInstanceServiceGetMappedTaskInstance } from "openapi/queries"; import { Breadcrumb } from "src/components/ui"; import { DetailsLayout } from "src/layouts/Details/DetailsLayout"; -import { useConfig } from "src/queries/useConfig"; -import { isStatePending } from "src/utils/refresh"; +import { isStatePending, useAutoRefresh } from "src/utils"; import { Header } from "./Header"; @@ -42,7 +41,15 @@ export const TaskInstance = () => { const mapIndexParam = searchParams.get("map_index"); const mapIndex = parseInt(mapIndexParam ?? "-1", 10); - const autoRefreshInterval = useConfig("auto_refresh_interval") as number; + const refetchInterval = useAutoRefresh({ dagId }); + + const { + data: dag, + error: dagError, + isLoading: isDagLoading, + } = useDagServiceGetDagDetails({ + dagId, + }); const { data: taskInstance, @@ -57,19 +64,10 @@ export const TaskInstance = () => { }, undefined, { - refetchInterval: (query) => - isStatePending(query.state.data?.state) ? autoRefreshInterval * 1000 : false, + refetchInterval: (query) => (isStatePending(query.state.data?.state) ? refetchInterval : false), }, ); - const { - data: dag, - error: dagError, - isLoading: isDagLoading, - } = useDagServiceGetDagDetails({ - dagId, - }); - const links = [ { label: "Dags", value: "/dags" }, { label: dag?.dag_display_name ?? dagId, value: `/dags/${dagId}` }, @@ -102,7 +100,7 @@ export const TaskInstance = () => { {taskInstance === undefined ? undefined : (
)} diff --git a/airflow/ui/src/queries/useDags.tsx b/airflow/ui/src/queries/useDags.tsx index 1b9cacb05d79b..9cecc48a427ae 100644 --- a/airflow/ui/src/queries/useDags.tsx +++ b/airflow/ui/src/queries/useDags.tsx @@ -18,6 +18,7 @@ */ import { useDagServiceGetDags, useDagsServiceRecentDagRuns } from "openapi/queries"; import type { DagRunState, DAGWithLatestDagRunsResponse } from "openapi/requests/types.gen"; +import { isStatePending, useAutoRefresh } from "src/utils"; export type DagWithLatest = { last_run_start_date: string; @@ -39,16 +40,27 @@ export const useDags = ( ) => { const { data, error, isFetching, isLoading } = useDagServiceGetDags(searchParams); + const refetchInterval = useAutoRefresh({}); + const { orderBy, ...runsParams } = searchParams; const { data: runsData, error: runsError, isFetching: isRunsFetching, isLoading: isRunsLoading, - } = useDagsServiceRecentDagRuns({ - ...runsParams, - dagRunsLimit: 14, - }); + } = useDagsServiceRecentDagRuns( + { + ...runsParams, + dagRunsLimit: 14, + }, + undefined, + { + refetchInterval: (query) => + query.state.data?.dags.some((dag) => dag.latest_dag_runs.some((dr) => isStatePending(dr.state))) + ? refetchInterval + : false, + }, + ); const dags = (data?.dags ?? []).map((dag) => { const dagWithRuns = runsData?.dags.find((runsDag) => runsDag.dag_id === dag.dag_id); diff --git a/airflow/ui/src/queries/useLogs.tsx b/airflow/ui/src/queries/useLogs.tsx index bf7a164dfa52a..c165e13c0b893 100644 --- a/airflow/ui/src/queries/useLogs.tsx +++ b/airflow/ui/src/queries/useLogs.tsx @@ -20,9 +20,7 @@ import dayjs from "dayjs"; import { useTaskInstanceServiceGetLog } from "openapi/queries"; import type { TaskInstanceResponse } from "openapi/requests/types.gen"; -import { isStatePending } from "src/utils/refresh"; - -import { useConfig } from "./useConfig"; +import { isStatePending, useAutoRefresh } from "src/utils"; type Props = { dagId: string; @@ -62,7 +60,8 @@ const parseLogs = ({ data }: ParseLogsProps) => { }; export const useLogs = ({ dagId, taskInstance, tryNumber = 1 }: Props) => { - const autoRefreshInterval = useConfig("auto_refresh_interval") as number; + const refetchInterval = useAutoRefresh({ dagId }); + const { data, ...rest } = useTaskInstanceServiceGetLog( { dagId, @@ -77,8 +76,8 @@ export const useLogs = ({ dagId, taskInstance, tryNumber = 1 }: Props) => { refetchInterval: (query) => isStatePending(taskInstance?.state) || dayjs(query.state.dataUpdatedAt).isBefore(taskInstance?.end_date) - ? autoRefreshInterval * 1000 - : autoRefreshInterval * 10 * 1000, + ? refetchInterval + : false, }, ); diff --git a/airflow/ui/src/queries/useTrigger.ts b/airflow/ui/src/queries/useTrigger.ts index f21964cc3c141..0a6e6f492abb9 100644 --- a/airflow/ui/src/queries/useTrigger.ts +++ b/airflow/ui/src/queries/useTrigger.ts @@ -24,24 +24,28 @@ import { useDagRunServiceTriggerDagRun, useDagServiceGetDagsKey, useDagsServiceRecentDagRunsKey, + useTaskInstanceServiceGetTaskInstancesKey, } from "openapi/queries"; import type { DagRunTriggerParams } from "src/components/TriggerDag/TriggerDAGForm"; import { toaster } from "src/components/ui"; +import { doQueryKeysMatch, type PartialQueryKey } from "src/utils"; -export const useTrigger = ({ onSuccessConfirm }: { onSuccessConfirm: () => void }) => { +export const useTrigger = ({ dagId, onSuccessConfirm }: { dagId: string; onSuccessConfirm: () => void }) => { const queryClient = useQueryClient(); const [error, setError] = useState(undefined); const [dateValidationError, setDateValidationError] = useState(undefined); const onSuccess = async () => { - const queryKeys = [ - useDagServiceGetDagsKey, - useDagsServiceRecentDagRunsKey, - useDagRunServiceGetDagRunsKey, + const queryKeys: Array = [ + { baseKey: useDagServiceGetDagsKey }, + { baseKey: useDagsServiceRecentDagRunsKey }, + { baseKey: useDagRunServiceGetDagRunsKey, options: { dagIds: [dagId] } }, + { baseKey: useTaskInstanceServiceGetTaskInstancesKey, options: { dagId, dagRunId: "~" } }, ]; - await Promise.all(queryKeys.map((key) => queryClient.invalidateQueries({ queryKey: [key] }))); + await queryClient.invalidateQueries({ predicate: (query) => doQueryKeysMatch(query, queryKeys) }); + toaster.create({ description: "DAG run has been successfully triggered.", title: "DAG Run Request Submitted", @@ -59,7 +63,7 @@ export const useTrigger = ({ onSuccessConfirm }: { onSuccessConfirm: () => void onSuccess, }); - const triggerDagRun = (dagId: string, dagRunRequestBody: DagRunTriggerParams) => { + const triggerDagRun = (dagRunRequestBody: DagRunTriggerParams) => { const parsedConfig = JSON.parse(dagRunRequestBody.conf) as Record; const DataIntervalStart = dagRunRequestBody.dataIntervalStart diff --git a/airflow/ui/src/router.tsx b/airflow/ui/src/router.tsx index 5e5f1b3a576a7..24a5c7240cb50 100644 --- a/airflow/ui/src/router.tsx +++ b/airflow/ui/src/router.tsx @@ -31,6 +31,7 @@ import { DagsList } from "src/pages/DagsList"; import { Dashboard } from "src/pages/Dashboard"; import { ErrorPage } from "src/pages/Error"; import { Events } from "src/pages/Events"; +import { Providers } from "src/pages/Providers"; import { Run } from "src/pages/Run"; import { Details as DagRunDetails } from "src/pages/Run/Details"; import { TaskInstances } from "src/pages/Run/TaskInstances"; @@ -70,6 +71,10 @@ export const routerConfig = [ element: , path: "pools", }, + { + element: , + path: "providers", + }, { children: [ { element: , index: true }, diff --git a/airflow/ui/src/utils/refresh.ts b/airflow/ui/src/utils/getMetaKey.ts similarity index 69% rename from airflow/ui/src/utils/refresh.ts rename to airflow/ui/src/utils/getMetaKey.ts index d9ecb618c0e95..4e1e867c2c268 100644 --- a/airflow/ui/src/utils/refresh.ts +++ b/airflow/ui/src/utils/getMetaKey.ts @@ -16,14 +16,5 @@ * specific language governing permissions and limitations * under the License. */ -import type { TaskInstanceState } from "openapi/requests/types.gen"; -export const isStatePending = (state?: TaskInstanceState | null) => - state === "deferred" || - state === "scheduled" || - state === "running" || - state === "up_for_reschedule" || - state === "up_for_retry" || - state === "queued" || - state === "restarting" || - !Boolean(state); +export const getMetaKey = () => (navigator.appVersion.includes("Mac") ? "⌘" : "Ctrl"); diff --git a/airflow/ui/src/utils/index.ts b/airflow/ui/src/utils/index.ts index bdfe8ac8d8002..632d24b3f450f 100644 --- a/airflow/ui/src/utils/index.ts +++ b/airflow/ui/src/utils/index.ts @@ -20,3 +20,5 @@ export { capitalize } from "./capitalize"; export { pluralize } from "./pluralize"; export { getDuration } from "./datetime_utils"; +export { getMetaKey } from "./getMetaKey"; +export * from "./query"; diff --git a/airflow/ui/src/utils/query.ts b/airflow/ui/src/utils/query.ts new file mode 100644 index 0000000000000..0db7b895903e2 --- /dev/null +++ b/airflow/ui/src/utils/query.ts @@ -0,0 +1,69 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import type { Query } from "@tanstack/react-query"; + +import { useDagServiceGetDagDetails } from "openapi/queries"; +import type { TaskInstanceState } from "openapi/requests/types.gen"; +import { useConfig } from "src/queries/useConfig"; + +export const isStatePending = (state?: TaskInstanceState | null) => + state === "deferred" || + state === "scheduled" || + state === "running" || + state === "up_for_reschedule" || + state === "up_for_retry" || + state === "queued" || + state === "restarting" || + !Boolean(state); + +export type PartialQueryKey = { baseKey: string; options?: Record }; + +// This allows us to specify what query key values we actually care about and ignore the rest +// ex: match everything with this dagId and dagRunId but ignore anything related to pagination +export const doQueryKeysMatch = (query: Query, queryKeysToMatch: Array) => { + const [baseKey, options] = query.queryKey; + + const matchedKey = queryKeysToMatch.find((qk) => qk.baseKey === baseKey); + + if (!matchedKey) { + return false; + } + + return matchedKey.options + ? Object.entries(matchedKey.options).every( + ([key, value]) => typeof options === "object" && (options as Record)[key] === value, + ) + : true; +}; + +export const useAutoRefresh = ({ dagId }: { dagId?: string }) => { + const autoRefreshInterval = useConfig("auto_refresh_interval") as number | undefined; + const { data: dag } = useDagServiceGetDagDetails( + { + dagId: dagId ?? "", + }, + undefined, + { enabled: dagId !== undefined }, + ); + + const canRefresh = autoRefreshInterval !== undefined && (dagId === undefined ? true : !dag?.is_paused); + + // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion + return (canRefresh ? autoRefreshInterval * 1000 : false) as number | false; +}; diff --git a/airflow/utils/context.py b/airflow/utils/context.py index 6ed1399fe63f6..0415542c6ca8c 100644 --- a/airflow/utils/context.py +++ b/airflow/utils/context.py @@ -303,7 +303,7 @@ def context_update_for_unmapped(context: Context, task: BaseOperator) -> None: :meta private: """ - from airflow.models.param import process_params + from airflow.sdk.definitions.param import process_params context["task"] = context["ti"].task = task context["params"] = process_params( diff --git a/airflow/utils/db.py b/airflow/utils/db.py index 9e21885eb3a99..0dc4dbb1b61f4 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -94,7 +94,7 @@ class MappedClassProtocol(Protocol): "2.9.2": "686269002441", "2.10.0": "22ed7efa9da2", "2.10.3": "5f2621c13b39", - "3.0.0": "8ea135928435", + "3.0.0": "33b04e4bfa19", } diff --git a/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py b/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py index 8525d1e9acc50..a7780debb2ff9 100644 --- a/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py @@ -33,7 +33,7 @@ from typing import Any, NamedTuple from urllib import request -from airflow_breeze.branch_defaults import AIRFLOW_BRANCH +from airflow_breeze.branch_defaults import DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH from airflow_breeze.global_constants import ( ALLOWED_ARCHITECTURES, ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS, @@ -336,7 +336,7 @@ def _install_packages_in_k8s_virtualenv(): "--constraint", "https://raw.githubusercontent.com/" f"{APACHE_AIRFLOW_GITHUB_REPOSITORY}/" - f"constraints-{AIRFLOW_BRANCH}/constraints-{python_major_minor_version}.txt", + f"{DEFAULT_AIRFLOW_CONSTRAINTS_BRANCH}/constraints-{python_major_minor_version}.txt", ], ) install_packages_result = run_command( diff --git a/docs/.gitignore b/docs/.gitignore index 1f80c74e825a2..b6b1a0286949a 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -4,6 +4,8 @@ apache-airflow-providers-airbyte apache-airflow-providers-alibaba apache-airflow-providers-apache-beam apache-airflow-providers-apache-cassandra +apache-airflow-providers-apache-drill +apache-airflow-providers-apache-druid apache-airflow-providers-apache-iceberg apache-airflow-providers-apache-kafka apache-airflow-providers-apache-kylin @@ -11,7 +13,9 @@ apache-airflow-providers-apache-livy apache-airflow-providers-apache-pig apache-airflow-providers-apache-pinot apache-airflow-providers-apache-spark +apache-airflow-providers-arangodb apache-airflow-providers-apprise +apache-airflow-providers-arangodb apache-airflow-providers-asana apache-airflow-providers-atlassian-jira apache-airflow-providers-celery @@ -20,12 +24,15 @@ apache-airflow-providers-common-compat apache-airflow-providers-common-io apache-airflow-providers-common-sql apache-airflow-providers-datadog +apache-airflow-providers-dbt-cloud apache-airflow-providers-discord apache-airflow-providers-docker apache-airflow-providers-edge +apache-airflow-providers-elasticsearch apache-airflow-providers-exasol apache-airflow-providers-facebook apache-airflow-providers-ftp +apache-airflow-providers-github apache-airflow-providers-http apache-airflow-providers-influxdb apache-airflow-providers-mongo @@ -35,6 +42,7 @@ apache-airflow-providers-imap apache-airflow-providers-neo4j apache-airflow-providers-openai apache-airflow-providers-openfaas +apache-airflow-providers-opensearch apache-airflow-providers-opsgenie apache-airflow-providers-papermill apache-airflow-providers-pgvector @@ -50,6 +58,7 @@ apache-airflow-providers-presto apache-airflow-providers-qdrant apache-airflow-providers-samba apache-airflow-providers-segment +apache-airflow-providers-sendgrid apache-airflow-providers-sftp apache-airflow-providers-singularity apache-airflow-providers-slack diff --git a/docs/apache-airflow-providers-apache-drill/changelog.rst b/docs/apache-airflow-providers-apache-drill/changelog.rst deleted file mode 100644 index 79971613d2f63..0000000000000 --- a/docs/apache-airflow-providers-apache-drill/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/apache/drill/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-apache-druid/changelog.rst b/docs/apache-airflow-providers-apache-druid/changelog.rst deleted file mode 100644 index 652948c8ee8c2..0000000000000 --- a/docs/apache-airflow-providers-apache-druid/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/apache/druid/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-arangodb/changelog.rst b/docs/apache-airflow-providers-arangodb/changelog.rst deleted file mode 100644 index c4229fd8a9c80..0000000000000 --- a/docs/apache-airflow-providers-arangodb/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/arangodb/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-databricks/operators/run_now.rst b/docs/apache-airflow-providers-databricks/operators/run_now.rst index facf47e7d6c56..17259b1a1b462 100644 --- a/docs/apache-airflow-providers-databricks/operators/run_now.rst +++ b/docs/apache-airflow-providers-databricks/operators/run_now.rst @@ -42,7 +42,7 @@ All other parameters are optional and described in documentation for ``Databrick * ``notebook_params`` * ``python_params`` -* ``python_named_parameters`` +* ``python_named_params`` * ``jar_params`` * ``spark_submit_params`` * ``idempotency_token`` diff --git a/docs/apache-airflow-providers-dbt-cloud/changelog.rst b/docs/apache-airflow-providers-dbt-cloud/changelog.rst deleted file mode 100644 index be4203ad0c942..0000000000000 --- a/docs/apache-airflow-providers-dbt-cloud/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/dbt/cloud/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-elasticsearch/changelog.rst b/docs/apache-airflow-providers-elasticsearch/changelog.rst deleted file mode 100644 index 840359e3b0a4e..0000000000000 --- a/docs/apache-airflow-providers-elasticsearch/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/elasticsearch/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-github/changelog.rst b/docs/apache-airflow-providers-github/changelog.rst deleted file mode 100644 index 231425db490d2..0000000000000 --- a/docs/apache-airflow-providers-github/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/github/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-google/operators/cloud/automl.rst b/docs/apache-airflow-providers-google/operators/cloud/automl.rst index 4eb461409fa3c..4cb6a9724cadb 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/automl.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/automl.rst @@ -163,25 +163,21 @@ You can find example on how to use VertexAI operators here: :end-before: [END how_to_cloud_vertex_ai_delete_model_operator] .. _howto/operator:AutoMLPredictOperator: -.. _howto/operator:AutoMLBatchPredictOperator: Making Predictions ^^^^^^^^^^^^^^^^^^ To obtain predictions from Google Cloud AutoML model you can use -:class:`~airflow.providers.google.cloud.operators.automl.AutoMLPredictOperator` or -:class:`~airflow.providers.google.cloud.operators.automl.AutoMLBatchPredictOperator`. In the first case +:class:`~airflow.providers.google.cloud.operators.automl.AutoMLPredictOperator`. In the first case the model must be deployed. -Th :class:`~airflow.providers.google.cloud.operators.automl.AutoMLBatchPredictOperator` deprecated for tables, -video intelligence, vision and natural language is deprecated and will be removed after 31.03.2024. -Please use +For tables, video intelligence, vision and natural language you can use the following operators: + :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.CreateBatchPredictionJobOperator`, :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.GetBatchPredictionJobOperator`, :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.ListBatchPredictionJobsOperator`, -:class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.DeleteBatchPredictionJobOperator`, -instead. +:class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.DeleteBatchPredictionJobOperator`. You can find examples on how to use VertexAI operators here: .. exampleinclude:: /../../providers/tests/system/google/cloud/vertex_ai/example_vertex_ai_batch_prediction_job.py diff --git a/docs/apache-airflow-providers-google/operators/cloud/dataflow.rst b/docs/apache-airflow-providers-google/operators/cloud/dataflow.rst index 3213aec60690e..6dd405ce93213 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/dataflow.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/dataflow.rst @@ -54,9 +54,6 @@ There are several ways to run a Dataflow pipeline depending on your environment, command-line tool to build and save the Flex Template spec file in Cloud Storage. See: :ref:`howto/operator:DataflowStartFlexTemplateOperator` -- **SQL pipeline**: Developer can write pipeline as SQL statement and then execute it in Dataflow. See: - :ref:`howto/operator:DataflowStartSqlJobOperator` - It is a good idea to test your pipeline using the non-templated pipeline, and then run the pipeline in production using the templates. @@ -283,29 +280,6 @@ Also for this action you can use the operator in the deferrable mode: :start-after: [START howto_operator_start_flex_template_job_deferrable] :end-before: [END howto_operator_start_flex_template_job_deferrable] -.. _howto/operator:DataflowStartSqlJobOperator: - -Dataflow SQL -"""""""""""" -Dataflow SQL supports a variant of the ZetaSQL query syntax and includes additional streaming -extensions for running Dataflow streaming jobs. - -Here is an example of running Dataflow SQL job with -:class:`~airflow.providers.google.cloud.operators.dataflow.DataflowStartSqlJobOperator`: - -.. exampleinclude:: /../../providers/tests/system/google/cloud/dataflow/example_dataflow_sql.py - :language: python - :dedent: 4 - :start-after: [START howto_operator_start_sql_job] - :end-before: [END howto_operator_start_sql_job] - -.. warning:: - This operator requires ``gcloud`` command (Google Cloud SDK) must be installed on the Airflow worker - `__ - -See the `Dataflow SQL reference -`_. - .. _howto/operator:DataflowStartYamlJobOperator: Dataflow YAML diff --git a/docs/apache-airflow-providers-opensearch/changelog.rst b/docs/apache-airflow-providers-opensearch/changelog.rst deleted file mode 100644 index 21f39a6c1f6ea..0000000000000 --- a/docs/apache-airflow-providers-opensearch/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/opensearch/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-sendgrid/changelog.rst b/docs/apache-airflow-providers-sendgrid/changelog.rst deleted file mode 100644 index 913b7477b13a2..0000000000000 --- a/docs/apache-airflow-providers-sendgrid/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/sendgrid/CHANGELOG.rst diff --git a/docs/apache-airflow/core-concepts/params.rst b/docs/apache-airflow/core-concepts/params.rst index b54026ccb22fc..8c1c98cd1724c 100644 --- a/docs/apache-airflow/core-concepts/params.rst +++ b/docs/apache-airflow/core-concepts/params.rst @@ -22,8 +22,8 @@ Params Params enable you to provide runtime configuration to tasks. You can configure default Params in your DAG code and supply additional Params, or overwrite Param values, at runtime when you trigger a DAG. -:class:`~airflow.models.param.Param` values are validated with JSON Schema. For scheduled DAG runs, -default :class:`~airflow.models.param.Param` values are used. +:class:`~airflow.sdk.definitions.param.Param` values are validated with JSON Schema. For scheduled DAG runs, +default :class:`~airflow.sdk.definitions.param.Param` values are used. Also defined Params are used to render a nice UI when triggering manually. When you trigger a DAG manually, you can modify its Params before the dagrun starts. @@ -33,14 +33,14 @@ DAG-level Params ---------------- To add Params to a :class:`~airflow.models.dag.DAG`, initialize it with the ``params`` kwarg. -Use a dictionary that maps Param names to either a :class:`~airflow.models.param.Param` or an object indicating the parameter's default value. +Use a dictionary that maps Param names to either a :class:`~airflow.sdk.definitions.param.Param` or an object indicating the parameter's default value. .. code-block:: :emphasize-lines: 7-10 from airflow import DAG from airflow.decorators import task - from airflow.models.param import Param + from airflow.sdk import Param with DAG( "the_dag", @@ -127,7 +127,7 @@ You can change this by setting ``render_template_as_native_obj=True`` while init ): -This way, the :class:`~airflow.models.param.Param`'s type is respected when it's provided to your task: +This way, the :class:`~airflow.sdk.definitions.param.Param`'s type is respected when it's provided to your task: .. code-block:: @@ -160,7 +160,7 @@ Another way to access your param is via a task's ``context`` kwarg. JSON Schema Validation ---------------------- -:class:`~airflow.models.param.Param` makes use of `JSON Schema `_, so you can use the full JSON Schema specifications mentioned at https://json-schema.org/draft/2020-12/json-schema-validation.html to define ``Param`` objects. +:class:`~airflow.sdk.definitions.param.Param` makes use of `JSON Schema `_, so you can use the full JSON Schema specifications mentioned at https://json-schema.org/draft/2020-12/json-schema-validation.html to define ``Param`` objects. .. code-block:: @@ -195,8 +195,8 @@ JSON Schema Validation at time of trigger. .. note:: - As of now, for security reasons, one can not use :class:`~airflow.models.param.Param` objects derived out of custom classes. We are - planning to have a registration system for custom :class:`~airflow.models.param.Param` classes, just like we've for Operator ExtraLinks. + As of now, for security reasons, one can not use :class:`~airflow.sdk.definitions.param.Param` objects derived out of custom classes. We are + planning to have a registration system for custom :class:`~airflow.sdk.definitions.param.Param` classes, just like we've for Operator ExtraLinks. Use Params to Provide a Trigger UI Form --------------------------------------- @@ -207,21 +207,21 @@ Use Params to Provide a Trigger UI Form This form is provided when a user clicks on the "Trigger DAG" button. The Trigger UI Form is rendered based on the pre-defined DAG Params. If the DAG has no params defined, the trigger form is skipped. -The form elements can be defined with the :class:`~airflow.models.param.Param` class and attributes define how a form field is displayed. +The form elements can be defined with the :class:`~airflow.sdk.definitions.param.Param` class and attributes define how a form field is displayed. The following features are supported in the Trigger UI Form: -- Direct scalar values (boolean, int, string, lists, dicts) from top-level DAG params are auto-boxed into :class:`~airflow.models.param.Param` objects. +- Direct scalar values (boolean, int, string, lists, dicts) from top-level DAG params are auto-boxed into :class:`~airflow.sdk.definitions.param.Param` objects. From the native Python data type the ``type`` attribute is auto detected. So these simple types render to a corresponding field type. The name of the parameter is used as label and no further validation is made, all values are treated as optional. -- If you use the :class:`~airflow.models.param.Param` class as definition of the parameter value, the following attributes can be added: +- If you use the :class:`~airflow.sdk.definitions.param.Param` class as definition of the parameter value, the following attributes can be added: - - The :class:`~airflow.models.param.Param` attribute ``title`` is used to render the form field label of the entry box. + - The :class:`~airflow.sdk.definitions.param.Param` attribute ``title`` is used to render the form field label of the entry box. If no ``title`` is defined the parameter name/key is used instead. - - The :class:`~airflow.models.param.Param` attribute ``description`` is rendered below an entry field as help text in gray color. + - The :class:`~airflow.sdk.definitions.param.Param` attribute ``description`` is rendered below an entry field as help text in gray color. If you want to provide special formatting or links you need to use the Param attribute ``description_md``. See tutorial DAG :ref:`Params UI example DAG ` for an example. - - The :class:`~airflow.models.param.Param` attribute ``type`` influences how a field is rendered. The following types are supported: + - The :class:`~airflow.sdk.definitions.param.Param` attribute ``type`` influences how a field is rendered. The following types are supported: .. list-table:: :header-rows: 1 diff --git a/docs/apache-airflow/howto/docker-compose/docker-compose.yaml b/docs/apache-airflow/howto/docker-compose/docker-compose.yaml index 761f3782df37e..ba24c0119d4bd 100644 --- a/docs/apache-airflow/howto/docker-compose/docker-compose.yaml +++ b/docs/apache-airflow/howto/docker-compose/docker-compose.yaml @@ -62,6 +62,7 @@ x-airflow-common: AIRFLOW__CORE__LOAD_EXAMPLES: 'true' AIRFLOW__API__AUTH_BACKENDS: >- airflow.providers.fab.auth_manager.api.auth.backend.basic_auth,airflow.api.auth.backend.session + AIRFLOW__WORKERS__EXECUTION_API_SERVER_URL: 'http://airflow-apiserver:9091/execution/' # yamllint disable rule:line-length # Use simple http server on scheduler for health checks # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server @@ -133,6 +134,23 @@ services: airflow-init: condition: service_completed_successfully + airflow-apiserver: + <<: *airflow-common + command: fastapi-api + ports: + - "9091:9091" + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:9091/public/version"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + airflow-scheduler: <<: *airflow-common command: scheduler @@ -183,6 +201,8 @@ services: restart: always depends_on: <<: *airflow-common-depends-on + airflow-apiserver: + condition: service_healthy airflow-init: condition: service_completed_successfully diff --git a/docs/apache-airflow/img/airflow_erd.sha256 b/docs/apache-airflow/img/airflow_erd.sha256 index 5b584764eb69e..e3f420e62c1d6 100644 --- a/docs/apache-airflow/img/airflow_erd.sha256 +++ b/docs/apache-airflow/img/airflow_erd.sha256 @@ -1 +1 @@ -ff7265e5bc09d6b46d8e95f0c247b3dc5b1262451ab128c711888ffafa21c9db \ No newline at end of file +829be35e333798f7c33c5fe0130ed12fad481c92145abc398ae23b815dd7b6ed \ No newline at end of file diff --git a/docs/apache-airflow/img/airflow_erd.svg b/docs/apache-airflow/img/airflow_erd.svg index 378cfdc0f259c..d20fa37b8ea66 100644 --- a/docs/apache-airflow/img/airflow_erd.svg +++ b/docs/apache-airflow/img/airflow_erd.svg @@ -649,24 +649,24 @@ dagrun_asset_event - -dagrun_asset_event - -dag_run_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +dagrun_asset_event + +dag_run_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL asset_event--dagrun_asset_event - -0..N + +0..N 1 @@ -709,687 +709,695 @@ task_instance - -task_instance - -id - - [UUID] - NOT NULL - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -last_heartbeat_at - - [TIMESTAMP] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSON] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance + +id + + [UUID] + NOT NULL + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +last_heartbeat_at + + [TIMESTAMP] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSON] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] trigger--task_instance - -0..N + +0..N {0,1} task_reschedule - -task_reschedule - -id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -duration - - [INTEGER] - NOT NULL - -end_date - - [TIMESTAMP] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -reschedule_date - - [TIMESTAMP] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -try_number - - [INTEGER] - NOT NULL + +task_reschedule + +id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +duration + + [INTEGER] + NOT NULL + +end_date + + [TIMESTAMP] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +reschedule_date + + [TIMESTAMP] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +try_number + + [INTEGER] + NOT NULL task_instance--task_reschedule - -0..N -1 + +0..N +1 task_instance--task_reschedule - -0..N -1 + +0..N +1 task_instance--task_reschedule - -0..N -1 + +0..N +1 task_instance--task_reschedule - -0..N -1 + +0..N +1 rendered_task_instance_fields - -rendered_task_instance_fields - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -k8s_pod_yaml - - [JSON] - -rendered_fields - - [JSON] - NOT NULL + +rendered_task_instance_fields + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +k8s_pod_yaml + + [JSON] + +rendered_fields + + [JSON] + NOT NULL task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 task_map - -task_map - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -keys - - [JSON] - -length - - [INTEGER] - NOT NULL + +task_map + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +keys + + [JSON] + +length + + [INTEGER] + NOT NULL task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 task_instance--task_map - -0..N -1 + +0..N +1 xcom - -xcom - -dag_run_id - - [INTEGER] - NOT NULL - -key - - [VARCHAR(512)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL - -value - - [JSONB] + +xcom + +dag_run_id + + [INTEGER] + NOT NULL + +key + + [VARCHAR(512)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL + +value + + [JSONB] task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance--xcom - -0..N -1 + +0..N +1 task_instance_note - -task_instance_note - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +task_instance_note + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance--task_instance_note - -0..N -1 + +0..N +1 task_instance_history - -task_instance_history - -id - - [INTEGER] - NOT NULL - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSON] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - NOT NULL - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance_history + +id + + [INTEGER] + NOT NULL + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSON] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + NOT NULL + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 task_instance--task_instance_history - -0..N -1 + +0..N +1 @@ -1744,155 +1752,155 @@ deadline - -deadline - -id - - [UUID] - NOT NULL - -callback - - [VARCHAR(500)] - NOT NULL - -callback_kwargs - - [JSON] - -dag_id - - [VARCHAR(250)] - -dagrun_id - - [INTEGER] - -deadline - - [TIMESTAMP] - NOT NULL + +deadline + +id + + [UUID] + NOT NULL + +callback + + [VARCHAR(500)] + NOT NULL + +callback_kwargs + + [JSON] + +dag_id + + [VARCHAR(250)] + +dagrun_id + + [INTEGER] + +deadline + + [TIMESTAMP] + NOT NULL dag--deadline - -0..N + +0..N {0,1} dag_version--task_instance - -0..N + +0..N {0,1} dag_run - -dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - -bundle_version - - [VARCHAR(250)] - -clear_number - - [INTEGER] - NOT NULL - -conf - - [JSONB] - -creating_job_id - - [INTEGER] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -data_interval_end - - [TIMESTAMP] - -data_interval_start - - [TIMESTAMP] - -end_date - - [TIMESTAMP] - -external_trigger - - [BOOLEAN] - -last_scheduling_decision - - [TIMESTAMP] - -log_template_id - - [INTEGER] - -logical_date - - [TIMESTAMP] - NOT NULL - -queued_at - - [TIMESTAMP] - -run_id - - [VARCHAR(250)] - NOT NULL - -run_type - - [VARCHAR(50)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(50)] - -triggered_by - - [VARCHAR(50)] - -updated_at - - [TIMESTAMP] + +dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + +bundle_version + + [VARCHAR(250)] + +clear_number + + [INTEGER] + NOT NULL + +conf + + [JSONB] + +creating_job_id + + [INTEGER] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +data_interval_end + + [TIMESTAMP] + +data_interval_start + + [TIMESTAMP] + +end_date + + [TIMESTAMP] + +external_trigger + + [BOOLEAN] + +last_scheduling_decision + + [TIMESTAMP] + +log_template_id + + [INTEGER] + +logical_date + + [TIMESTAMP] + NOT NULL + +queued_at + + [TIMESTAMP] + +run_id + + [VARCHAR(250)] + NOT NULL + +run_type + + [VARCHAR(50)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(50)] + +triggered_by + + [VARCHAR(50)] + +updated_at + + [TIMESTAMP] dag_version--dag_run - -0..N + +0..N {0,1} @@ -1992,121 +2000,121 @@ dag_run--dagrun_asset_event - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 dag_run--task_instance - -0..N -1 + +0..N +1 dag_run--deadline - -0..N -{0,1} + +0..N +{0,1} backfill_dag_run - -backfill_dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - NOT NULL - -dag_run_id - - [INTEGER] - -exception_reason - - [VARCHAR(250)] - -logical_date - - [TIMESTAMP] - NOT NULL - -sort_ordinal - - [INTEGER] - NOT NULL + +backfill_dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + NOT NULL + +dag_run_id + + [INTEGER] + +exception_reason + + [VARCHAR(250)] + +logical_date + + [TIMESTAMP] + NOT NULL + +sort_ordinal + + [INTEGER] + NOT NULL dag_run--backfill_dag_run - -0..N -{0,1} + +0..N +{0,1} dag_run_note - -dag_run_note - -dag_run_id - - [INTEGER] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +dag_run_note + +dag_run_id + + [INTEGER] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] dag_run--dag_run_note - -1 -1 + +1 +1 dag_run--task_reschedule - -0..N -1 + +0..N +1 dag_run--task_reschedule - -0..N -1 + +0..N +1 @@ -2137,9 +2145,9 @@ log_template--dag_run - -0..N -{0,1} + +0..N +{0,1} @@ -2203,16 +2211,16 @@ backfill--dag_run - -0..N -{0,1} + +0..N +{0,1} backfill--backfill_dag_run - -0..N -1 + +0..N +1 diff --git a/docs/apache-airflow/migrations-ref.rst b/docs/apache-airflow/migrations-ref.rst index b145e71d11ca3..633c2338f4064 100644 --- a/docs/apache-airflow/migrations-ref.rst +++ b/docs/apache-airflow/migrations-ref.rst @@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``8ea135928435`` (head) | ``e39a26ac59f6`` | ``3.0.0`` | Add relative fileloc column. | +| ``33b04e4bfa19`` (head) | ``8ea135928435`` | ``3.0.0`` | add new task_instance field scheduled_dttm. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``8ea135928435`` | ``e39a26ac59f6`` | ``3.0.0`` | Add relative fileloc column. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``e39a26ac59f6`` | ``38770795785f`` | ``3.0.0`` | remove pickled data from dagrun table. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ diff --git a/docs/apache-airflow/public-airflow-interface.rst b/docs/apache-airflow/public-airflow-interface.rst index 2853c6fbe2e1b..d1ac63eb5b3a9 100644 --- a/docs/apache-airflow/public-airflow-interface.rst +++ b/docs/apache-airflow/public-airflow-interface.rst @@ -62,7 +62,7 @@ DAGs The DAG is Airflow's core entity that represents a recurring workflow. You can create a DAG by instantiating the :class:`~airflow.models.dag.DAG` class in your DAG file. You can also instantiate them via :class:`~airflow.models.dagbag.DagBag` class that reads DAGs from a file or a folder. DAGs -can also have parameters specified via :class:`~airflow.models.param.Param` class. +can also have parameters specified via :class:`~airflow.sdk.definitions.param.Param` class. Airflow has a set of example DAGs that you can use to learn how to write DAGs diff --git a/docs/docker-stack/recipes.rst b/docs/docker-stack/recipes.rst index 3402acb1019ca..7666fa892f747 100644 --- a/docs/docker-stack/recipes.rst +++ b/docs/docker-stack/recipes.rst @@ -26,8 +26,7 @@ Google Cloud SDK installation ----------------------------- Some operators, such as :class:`~airflow.providers.google.cloud.operators.kubernetes_engine.GKEStartPodOperator`, -:class:`~airflow.providers.google.cloud.operators.dataflow.DataflowStartSqlJobOperator`, require -the installation of `Google Cloud SDK `__ (includes ``gcloud``). +require the installation of `Google Cloud SDK `__ (includes ``gcloud``). You can also run these commands with BashOperator. Create a new Dockerfile like the one shown below. diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 681b18a0d2fef..848c0d9ac02c8 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -75,7 +75,8 @@ "apache.beam": { "deps": [ "apache-airflow>=2.9.0", - "apache-beam>=2.53.0", + "apache-beam>=2.53.0; python_version < \"3.12\"", + "apache-beam>=2.57.0; python_version >= \"3.12\"", "numpy>=1.26.0", "pyarrow>=14.0.1" ], @@ -336,7 +337,7 @@ "celery": { "deps": [ "apache-airflow>=2.9.0", - "celery[redis]>=5.3.0,<6,!=5.3.3,!=5.3.2", + "celery[redis]>=5.4.0,<6", "flower>=1.0.0", "google-re2>=1.0" ], diff --git a/providers/apache/beam/README.rst b/providers/apache/beam/README.rst index 7f61ac4e600bd..d55425f7cddae 100644 --- a/providers/apache/beam/README.rst +++ b/providers/apache/beam/README.rst @@ -51,14 +51,15 @@ The package supports the following python versions: 3.9,3.10,3.11,3.12 Requirements ------------ -================== ================== +================== ====================================== PIP package Version required -================== ================== +================== ====================================== ``apache-airflow`` ``>=2.9.0`` -``apache-beam`` ``>=2.53.0`` +``apache-beam`` ``>=2.53.0; python_version < "3.12"`` +``apache-beam`` ``>=2.57.0; python_version >= "3.12"`` ``pyarrow`` ``>=14.0.1`` ``numpy`` ``>=1.26.0`` -================== ================== +================== ====================================== Cross provider package dependencies ----------------------------------- diff --git a/providers/apache/beam/pyproject.toml b/providers/apache/beam/pyproject.toml index 9425bdfa5d3a1..9e8355d89a74f 100644 --- a/providers/apache/beam/pyproject.toml +++ b/providers/apache/beam/pyproject.toml @@ -57,9 +57,11 @@ requires-python = "~=3.9" dependencies = [ "apache-airflow>=2.9.0", # Apache Beam > 2.53.0 and pyarrow > 14.0.1 fix https://nvd.nist.gov/vuln/detail/CVE-2023-47248. - "apache-beam>=2.53.0", + 'apache-beam>=2.53.0; python_version < "3.12"', + 'apache-beam>=2.57.0; python_version >= "3.12"', "pyarrow>=14.0.1", "numpy>=1.26.0", + ] # The optional dependencies should be modified in place in the generated file diff --git a/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py b/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py index 479cfbeb41ba0..4f9e2055564eb 100644 --- a/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py +++ b/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py @@ -93,7 +93,13 @@ def get_provider_info(): "python-modules": ["airflow.providers.apache.beam.triggers.beam"], } ], - "dependencies": ["apache-airflow>=2.9.0", "apache-beam>=2.53.0", "pyarrow>=14.0.1", "numpy>=1.26.0"], + "dependencies": [ + "apache-airflow>=2.9.0", + 'apache-beam>=2.53.0; python_version < "3.12"', + 'apache-beam>=2.57.0; python_version >= "3.12"', + "pyarrow>=14.0.1", + "numpy>=1.26.0", + ], "optional-dependencies": { "google": ["apache-beam[gcp]"], "common.compat": ["apache-airflow-providers-common-compat"], diff --git a/providers/apache/drill/README.rst b/providers/apache/drill/README.rst new file mode 100644 index 0000000000000..0a6a843e9e19e --- /dev/null +++ b/providers/apache/drill/README.rst @@ -0,0 +1,82 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-apache-drill`` + +Release: ``3.0.0`` + + +`Apache Drill `__. + + +Provider package +---------------- + +This is a provider package for ``apache.drill`` provider. All classes for this provider package +are in ``airflow.providers.apache.drill`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-apache-drill`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +======================================= ================== +PIP package Version required +======================================= ================== +``apache-airflow`` ``>=2.9.0`` +``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``sqlalchemy-drill`` ``>=1.1.0`` +======================================= ================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider packages in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-apache-drill[common.sql] + + +============================================================================================================ ============== +Dependent package Extra +============================================================================================================ ============== +`apache-airflow-providers-common-sql `_ ``common.sql`` +============================================================================================================ ============== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/apache/drill/.latest-doc-only-change.txt b/providers/apache/drill/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/apache/drill/.latest-doc-only-change.txt rename to providers/apache/drill/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/apache/drill/CHANGELOG.rst b/providers/apache/drill/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/apache/drill/CHANGELOG.rst rename to providers/apache/drill/docs/changelog.rst diff --git a/docs/apache-airflow-providers-apache-drill/commits.rst b/providers/apache/drill/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-apache-drill/commits.rst rename to providers/apache/drill/docs/commits.rst diff --git a/docs/apache-airflow-providers-apache-drill/connections/drill.rst b/providers/apache/drill/docs/connections/drill.rst similarity index 100% rename from docs/apache-airflow-providers-apache-drill/connections/drill.rst rename to providers/apache/drill/docs/connections/drill.rst diff --git a/docs/apache-airflow-providers-apache-drill/index.rst b/providers/apache/drill/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-apache-drill/index.rst rename to providers/apache/drill/docs/index.rst diff --git a/docs/apache-airflow-providers-apache-drill/installing-providers-from-sources.rst b/providers/apache/drill/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-apache-drill/installing-providers-from-sources.rst rename to providers/apache/drill/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/apache/drill.png b/providers/apache/drill/docs/integration-logos/drill.png similarity index 100% rename from docs/integration-logos/apache/drill.png rename to providers/apache/drill/docs/integration-logos/drill.png diff --git a/docs/apache-airflow-providers-apache-drill/operators.rst b/providers/apache/drill/docs/operators.rst similarity index 95% rename from docs/apache-airflow-providers-apache-drill/operators.rst rename to providers/apache/drill/docs/operators.rst index 47784b67fedc3..396964ef70e13 100644 --- a/docs/apache-airflow-providers-apache-drill/operators.rst +++ b/providers/apache/drill/docs/operators.rst @@ -39,7 +39,7 @@ The ``sql`` parameter can be templated and be an external ``.sql`` file. Using the operator """""""""""""""""" -.. exampleinclude:: /../../providers/tests/system/apache/drill/example_drill_dag.py +.. exampleinclude:: /../../providers/apache/drill/tests/system/apache/drill/example_drill_dag.py :language: python :dedent: 4 :start-after: [START howto_operator_drill] diff --git a/docs/apache-airflow-providers-apache-drill/security.rst b/providers/apache/drill/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-apache-drill/security.rst rename to providers/apache/drill/docs/security.rst diff --git a/providers/src/airflow/providers/apache/drill/provider.yaml b/providers/apache/drill/provider.yaml similarity index 91% rename from providers/src/airflow/providers/apache/drill/provider.yaml rename to providers/apache/drill/provider.yaml index 3ea76623b1dca..89007829985b3 100644 --- a/providers/src/airflow/providers/apache/drill/provider.yaml +++ b/providers/apache/drill/provider.yaml @@ -53,17 +53,12 @@ versions: - 1.0.1 - 1.0.0 -dependencies: - - apache-airflow>=2.9.0 - - apache-airflow-providers-common-sql>=1.20.0 - - sqlalchemy-drill>=1.1.0 - integrations: - integration-name: Apache Drill external-doc-url: https://drill.apache.org/ how-to-guide: - /docs/apache-airflow-providers-apache-drill/operators.rst - logo: /integration-logos/apache/drill.png + logo: /docs/integration-logos/drill.png tags: [apache] hooks: diff --git a/providers/apache/drill/pyproject.toml b/providers/apache/drill/pyproject.toml new file mode 100644 index 0000000000000..6dfb0c9caab63 --- /dev/null +++ b/providers/apache/drill/pyproject.toml @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-apache-drill" +version = "3.0.0" +description = "Provider package apache-airflow-providers-apache-drill for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "apache.drill", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "sqlalchemy-drill>=1.1.0", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-drill/3.0.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-drill/3.0.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.apache.drill.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.apache.drill" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/apache/drill/src/airflow/providers/apache/drill/LICENSE b/providers/apache/drill/src/airflow/providers/apache/drill/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/apache/drill/src/airflow/providers/apache/drill/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/apache/drill/__init__.py b/providers/apache/drill/src/airflow/providers/apache/drill/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/drill/__init__.py rename to providers/apache/drill/src/airflow/providers/apache/drill/__init__.py diff --git a/providers/apache/drill/src/airflow/providers/apache/drill/get_provider_info.py b/providers/apache/drill/src/airflow/providers/apache/drill/get_provider_info.py new file mode 100644 index 0000000000000..e0553925d547d --- /dev/null +++ b/providers/apache/drill/src/airflow/providers/apache/drill/get_provider_info.py @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-apache-drill", + "name": "Apache Drill", + "description": "`Apache Drill `__.\n", + "state": "ready", + "source-date-epoch": 1734527755, + "versions": [ + "3.0.0", + "2.8.1", + "2.8.0", + "2.7.3", + "2.7.2", + "2.7.1", + "2.7.0", + "2.6.1", + "2.6.0", + "2.5.0", + "2.4.4", + "2.4.3", + "2.4.2", + "2.4.1", + "2.4.0", + "2.3.2", + "2.3.1", + "2.3.0", + "2.2.1", + "2.2.0", + "2.1.0", + "2.0.0", + "1.0.4", + "1.0.3", + "1.0.2", + "1.0.1", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "Apache Drill", + "external-doc-url": "https://drill.apache.org/", + "how-to-guide": ["/docs/apache-airflow-providers-apache-drill/operators.rst"], + "logo": "/docs/integration-logos/drill.png", + "tags": ["apache"], + } + ], + "hooks": [ + { + "integration-name": "Apache Drill", + "python-modules": ["airflow.providers.apache.drill.hooks.drill"], + } + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.apache.drill.hooks.drill.DrillHook", + "connection-type": "drill", + } + ], + "dependencies": [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "sqlalchemy-drill>=1.1.0", + ], + } diff --git a/providers/src/airflow/providers/apache/drill/hooks/__init__.py b/providers/apache/drill/src/airflow/providers/apache/drill/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/drill/hooks/__init__.py rename to providers/apache/drill/src/airflow/providers/apache/drill/hooks/__init__.py diff --git a/providers/src/airflow/providers/apache/drill/hooks/drill.py b/providers/apache/drill/src/airflow/providers/apache/drill/hooks/drill.py similarity index 100% rename from providers/src/airflow/providers/apache/drill/hooks/drill.py rename to providers/apache/drill/src/airflow/providers/apache/drill/hooks/drill.py diff --git a/providers/src/airflow/providers/apache/drill/operators/__init__.py b/providers/apache/drill/src/airflow/providers/apache/drill/operators/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/drill/operators/__init__.py rename to providers/apache/drill/src/airflow/providers/apache/drill/operators/__init__.py diff --git a/providers/apache/drill/tests/conftest.py b/providers/apache/drill/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/apache/drill/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/apache/drill/tests/provider_tests/__init__.py b/providers/apache/drill/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/drill/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/drill/tests/provider_tests/apache/__init__.py b/providers/apache/drill/tests/provider_tests/apache/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/drill/tests/provider_tests/apache/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/src/airflow/providers/apache/druid/hooks/__init__.py b/providers/apache/drill/tests/provider_tests/apache/drill/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/hooks/__init__.py rename to providers/apache/drill/tests/provider_tests/apache/drill/__init__.py diff --git a/providers/src/airflow/providers/apache/druid/operators/__init__.py b/providers/apache/drill/tests/provider_tests/apache/drill/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/operators/__init__.py rename to providers/apache/drill/tests/provider_tests/apache/drill/hooks/__init__.py diff --git a/providers/tests/apache/drill/hooks/test_drill.py b/providers/apache/drill/tests/provider_tests/apache/drill/hooks/test_drill.py similarity index 100% rename from providers/tests/apache/drill/hooks/test_drill.py rename to providers/apache/drill/tests/provider_tests/apache/drill/hooks/test_drill.py diff --git a/providers/src/airflow/providers/apache/druid/transfers/__init__.py b/providers/apache/drill/tests/system/apache/drill/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/transfers/__init__.py rename to providers/apache/drill/tests/system/apache/drill/__init__.py diff --git a/providers/tests/system/apache/drill/example_drill_dag.py b/providers/apache/drill/tests/system/apache/drill/example_drill_dag.py similarity index 100% rename from providers/tests/system/apache/drill/example_drill_dag.py rename to providers/apache/drill/tests/system/apache/drill/example_drill_dag.py diff --git a/providers/apache/druid/README.rst b/providers/apache/druid/README.rst new file mode 100644 index 0000000000000..ee723b1cf434f --- /dev/null +++ b/providers/apache/druid/README.rst @@ -0,0 +1,83 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-apache-druid`` + +Release: ``4.0.0`` + + +`Apache Druid `__. + + +Provider package +---------------- + +This is a provider package for ``apache.druid`` provider. All classes for this provider package +are in ``airflow.providers.apache.druid`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-apache-druid`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +======================================= ================== +PIP package Version required +======================================= ================== +``apache-airflow`` ``>=2.9.0`` +``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``pydruid`` ``>=0.4.1`` +======================================= ================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider packages in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-apache-druid[apache.hive] + + +============================================================================================================== =============== +Dependent package Extra +============================================================================================================== =============== +`apache-airflow-providers-apache-hive `_ ``apache.hive`` +`apache-airflow-providers-common-sql `_ ``common.sql`` +============================================================================================================== =============== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/apache/druid/.latest-doc-only-change.txt b/providers/apache/druid/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/apache/druid/.latest-doc-only-change.txt rename to providers/apache/druid/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/apache/druid/CHANGELOG.rst b/providers/apache/druid/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/apache/druid/CHANGELOG.rst rename to providers/apache/druid/docs/changelog.rst diff --git a/docs/apache-airflow-providers-apache-druid/commits.rst b/providers/apache/druid/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-apache-druid/commits.rst rename to providers/apache/druid/docs/commits.rst diff --git a/docs/apache-airflow-providers-apache-druid/index.rst b/providers/apache/druid/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-apache-druid/index.rst rename to providers/apache/druid/docs/index.rst diff --git a/docs/apache-airflow-providers-apache-druid/installing-providers-from-sources.rst b/providers/apache/druid/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-apache-druid/installing-providers-from-sources.rst rename to providers/apache/druid/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/apache/druid-1.png b/providers/apache/druid/docs/integration-logos/druid-1.png similarity index 100% rename from docs/integration-logos/apache/druid-1.png rename to providers/apache/druid/docs/integration-logos/druid-1.png diff --git a/docs/apache-airflow-providers-apache-druid/operators.rst b/providers/apache/druid/docs/operators.rst similarity index 95% rename from docs/apache-airflow-providers-apache-druid/operators.rst rename to providers/apache/druid/docs/operators.rst index 758c51c538538..1c0566202c235 100644 --- a/docs/apache-airflow-providers-apache-druid/operators.rst +++ b/providers/apache/druid/docs/operators.rst @@ -38,7 +38,7 @@ For parameter definition take a look at :class:`~airflow.providers.apache.druid. Using the operator """""""""""""""""" -.. exampleinclude:: /../../providers/tests/system/apache/druid/example_druid_dag.py +.. exampleinclude:: /../../providers/apache/druid/tests/system/apache/druid/example_druid_dag.py :language: python :dedent: 4 :start-after: [START howto_operator_druid_submit] diff --git a/docs/apache-airflow-providers-apache-druid/security.rst b/providers/apache/druid/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-apache-druid/security.rst rename to providers/apache/druid/docs/security.rst diff --git a/providers/src/airflow/providers/apache/druid/provider.yaml b/providers/apache/druid/provider.yaml similarity index 93% rename from providers/src/airflow/providers/apache/druid/provider.yaml rename to providers/apache/druid/provider.yaml index 7d71784796f6f..f87180f5820de 100644 --- a/providers/src/airflow/providers/apache/druid/provider.yaml +++ b/providers/apache/druid/provider.yaml @@ -60,15 +60,10 @@ versions: - 1.0.1 - 1.0.0 -dependencies: - - apache-airflow>=2.9.0 - - apache-airflow-providers-common-sql>=1.20.0 - - pydruid>=0.4.1 - integrations: - integration-name: Apache Druid external-doc-url: https://druid.apache.org/ - logo: /integration-logos/apache/druid-1.png + logo: /docs/integration-logos/druid-1.png how-to-guide: - /docs/apache-airflow-providers-apache-druid/operators.rst tags: [apache] diff --git a/providers/apache/druid/pyproject.toml b/providers/apache/druid/pyproject.toml new file mode 100644 index 0000000000000..baa5ede29fed4 --- /dev/null +++ b/providers/apache/druid/pyproject.toml @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-apache-druid" +version = "4.0.0" +description = "Provider package apache-airflow-providers-apache-druid for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "apache.druid", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "pydruid>=0.4.1", +] + +# The optional dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +[project.optional-dependencies] +"apache.hive" = [ + "apache-airflow-providers-apache-hive" +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-druid/4.0.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-druid/4.0.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.apache.druid.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.apache.druid" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/apache/druid/src/airflow/providers/apache/druid/LICENSE b/providers/apache/druid/src/airflow/providers/apache/druid/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/apache/druid/src/airflow/providers/apache/druid/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/apache/druid/__init__.py b/providers/apache/druid/src/airflow/providers/apache/druid/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/__init__.py rename to providers/apache/druid/src/airflow/providers/apache/druid/__init__.py diff --git a/providers/apache/druid/src/airflow/providers/apache/druid/get_provider_info.py b/providers/apache/druid/src/airflow/providers/apache/druid/get_provider_info.py new file mode 100644 index 0000000000000..33088aafa69bf --- /dev/null +++ b/providers/apache/druid/src/airflow/providers/apache/druid/get_provider_info.py @@ -0,0 +1,107 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-apache-druid", + "name": "Apache Druid", + "description": "`Apache Druid `__.\n", + "state": "ready", + "source-date-epoch": 1734527841, + "versions": [ + "4.0.0", + "3.12.1", + "3.12.0", + "3.11.0", + "3.10.2", + "3.10.1", + "3.10.0", + "3.9.0", + "3.8.1", + "3.8.0", + "3.7.0", + "3.6.0", + "3.5.0", + "3.4.2", + "3.4.1", + "3.4.0", + "3.3.1", + "3.3.0", + "3.2.1", + "3.2.0", + "3.1.0", + "3.0.0", + "2.3.3", + "2.3.2", + "2.3.1", + "2.3.0", + "2.2.0", + "2.1.0", + "2.0.2", + "2.0.1", + "2.0.0", + "1.1.0", + "1.0.1", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "Apache Druid", + "external-doc-url": "https://druid.apache.org/", + "logo": "/docs/integration-logos/druid-1.png", + "how-to-guide": ["/docs/apache-airflow-providers-apache-druid/operators.rst"], + "tags": ["apache"], + } + ], + "operators": [ + { + "integration-name": "Apache Druid", + "python-modules": ["airflow.providers.apache.druid.operators.druid"], + } + ], + "hooks": [ + { + "integration-name": "Apache Druid", + "python-modules": ["airflow.providers.apache.druid.hooks.druid"], + } + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.apache.druid.hooks.druid.DruidDbApiHook", + "connection-type": "druid", + } + ], + "transfers": [ + { + "source-integration-name": "Apache Hive", + "target-integration-name": "Apache Druid", + "python-module": "airflow.providers.apache.druid.transfers.hive_to_druid", + } + ], + "dependencies": [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "pydruid>=0.4.1", + ], + "optional-dependencies": {"apache.hive": ["apache-airflow-providers-apache-hive"]}, + } diff --git a/providers/src/airflow/providers/dbt/cloud/sensors/__init__.py b/providers/apache/druid/src/airflow/providers/apache/druid/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/sensors/__init__.py rename to providers/apache/druid/src/airflow/providers/apache/druid/hooks/__init__.py diff --git a/providers/src/airflow/providers/apache/druid/hooks/druid.py b/providers/apache/druid/src/airflow/providers/apache/druid/hooks/druid.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/hooks/druid.py rename to providers/apache/druid/src/airflow/providers/apache/druid/hooks/druid.py diff --git a/providers/src/airflow/providers/elasticsearch/hooks/__init__.py b/providers/apache/druid/src/airflow/providers/apache/druid/operators/__init__.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/hooks/__init__.py rename to providers/apache/druid/src/airflow/providers/apache/druid/operators/__init__.py diff --git a/providers/src/airflow/providers/apache/druid/operators/druid.py b/providers/apache/druid/src/airflow/providers/apache/druid/operators/druid.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/operators/druid.py rename to providers/apache/druid/src/airflow/providers/apache/druid/operators/druid.py diff --git a/providers/src/airflow/providers/arangodb/example_dags/__init__.py b/providers/apache/druid/src/airflow/providers/apache/druid/transfers/__init__.py similarity index 100% rename from providers/src/airflow/providers/arangodb/example_dags/__init__.py rename to providers/apache/druid/src/airflow/providers/apache/druid/transfers/__init__.py diff --git a/providers/src/airflow/providers/apache/druid/transfers/hive_to_druid.py b/providers/apache/druid/src/airflow/providers/apache/druid/transfers/hive_to_druid.py similarity index 100% rename from providers/src/airflow/providers/apache/druid/transfers/hive_to_druid.py rename to providers/apache/druid/src/airflow/providers/apache/druid/transfers/hive_to_druid.py diff --git a/providers/apache/druid/tests/conftest.py b/providers/apache/druid/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/apache/druid/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/apache/druid/tests/provider_tests/__init__.py b/providers/apache/druid/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/druid/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/druid/tests/provider_tests/apache/__init__.py b/providers/apache/druid/tests/provider_tests/apache/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/druid/tests/provider_tests/apache/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/apache/drill/__init__.py b/providers/apache/druid/tests/provider_tests/apache/druid/__init__.py similarity index 100% rename from providers/tests/apache/drill/__init__.py rename to providers/apache/druid/tests/provider_tests/apache/druid/__init__.py diff --git a/providers/tests/apache/drill/hooks/__init__.py b/providers/apache/druid/tests/provider_tests/apache/druid/hooks/__init__.py similarity index 100% rename from providers/tests/apache/drill/hooks/__init__.py rename to providers/apache/druid/tests/provider_tests/apache/druid/hooks/__init__.py diff --git a/providers/tests/apache/druid/hooks/test_druid.py b/providers/apache/druid/tests/provider_tests/apache/druid/hooks/test_druid.py similarity index 100% rename from providers/tests/apache/druid/hooks/test_druid.py rename to providers/apache/druid/tests/provider_tests/apache/druid/hooks/test_druid.py diff --git a/providers/tests/apache/druid/__init__.py b/providers/apache/druid/tests/provider_tests/apache/druid/operators/__init__.py similarity index 100% rename from providers/tests/apache/druid/__init__.py rename to providers/apache/druid/tests/provider_tests/apache/druid/operators/__init__.py diff --git a/providers/tests/apache/druid/operators/test_druid.py b/providers/apache/druid/tests/provider_tests/apache/druid/operators/test_druid.py similarity index 100% rename from providers/tests/apache/druid/operators/test_druid.py rename to providers/apache/druid/tests/provider_tests/apache/druid/operators/test_druid.py diff --git a/providers/src/airflow/providers/arangodb/hooks/__init__.py b/providers/apache/druid/tests/provider_tests/apache/druid/transfers/__init__.py similarity index 100% rename from providers/src/airflow/providers/arangodb/hooks/__init__.py rename to providers/apache/druid/tests/provider_tests/apache/druid/transfers/__init__.py diff --git a/providers/tests/apache/druid/transfers/test_hive_to_druid.py b/providers/apache/druid/tests/provider_tests/apache/druid/transfers/test_hive_to_druid.py similarity index 100% rename from providers/tests/apache/druid/transfers/test_hive_to_druid.py rename to providers/apache/druid/tests/provider_tests/apache/druid/transfers/test_hive_to_druid.py diff --git a/providers/src/airflow/providers/arangodb/operators/__init__.py b/providers/apache/druid/tests/system/apache/druid/__init__.py similarity index 100% rename from providers/src/airflow/providers/arangodb/operators/__init__.py rename to providers/apache/druid/tests/system/apache/druid/__init__.py diff --git a/providers/tests/system/apache/druid/example_druid_dag.py b/providers/apache/druid/tests/system/apache/druid/example_druid_dag.py similarity index 100% rename from providers/tests/system/apache/druid/example_druid_dag.py rename to providers/apache/druid/tests/system/apache/druid/example_druid_dag.py diff --git a/providers/arangodb/README.rst b/providers/arangodb/README.rst new file mode 100644 index 0000000000000..bc7e1d1986a05 --- /dev/null +++ b/providers/arangodb/README.rst @@ -0,0 +1,62 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-arangodb`` + +Release: ``2.7.0`` + + +`ArangoDB `__ + + +Provider package +---------------- + +This is a provider package for ``arangodb`` provider. All classes for this provider package +are in ``airflow.providers.arangodb`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-arangodb`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +================== ================== +PIP package Version required +================== ================== +``apache-airflow`` ``>=2.9.0`` +``python-arango`` ``>=7.3.2`` +================== ================== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/arangodb/.latest-doc-only-change.txt b/providers/arangodb/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/arangodb/.latest-doc-only-change.txt rename to providers/arangodb/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/arangodb/CHANGELOG.rst b/providers/arangodb/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/arangodb/CHANGELOG.rst rename to providers/arangodb/docs/changelog.rst diff --git a/docs/apache-airflow-providers-arangodb/commits.rst b/providers/arangodb/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-arangodb/commits.rst rename to providers/arangodb/docs/commits.rst diff --git a/docs/apache-airflow-providers-arangodb/connections/arangodb.rst b/providers/arangodb/docs/connections/arangodb.rst similarity index 100% rename from docs/apache-airflow-providers-arangodb/connections/arangodb.rst rename to providers/arangodb/docs/connections/arangodb.rst diff --git a/docs/apache-airflow-providers-arangodb/index.rst b/providers/arangodb/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-arangodb/index.rst rename to providers/arangodb/docs/index.rst diff --git a/docs/apache-airflow-providers-arangodb/installing-providers-from-sources.rst b/providers/arangodb/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-arangodb/installing-providers-from-sources.rst rename to providers/arangodb/docs/installing-providers-from-sources.rst diff --git a/docs/apache-airflow-providers-arangodb/operators/index.rst b/providers/arangodb/docs/operators/index.rst similarity index 100% rename from docs/apache-airflow-providers-arangodb/operators/index.rst rename to providers/arangodb/docs/operators/index.rst diff --git a/docs/apache-airflow-providers-arangodb/security.rst b/providers/arangodb/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-arangodb/security.rst rename to providers/arangodb/docs/security.rst diff --git a/providers/src/airflow/providers/arangodb/provider.yaml b/providers/arangodb/provider.yaml similarity index 96% rename from providers/src/airflow/providers/arangodb/provider.yaml rename to providers/arangodb/provider.yaml index 46b96e877a14e..a79a9384fc831 100644 --- a/providers/src/airflow/providers/arangodb/provider.yaml +++ b/providers/arangodb/provider.yaml @@ -21,10 +21,6 @@ name: ArangoDB description: | `ArangoDB `__ -dependencies: - - apache-airflow>=2.9.0 - - python-arango>=7.3.2 - state: ready source-date-epoch: 1734528758 # note that those versions are maintained by release manager - do not update them manually diff --git a/providers/arangodb/pyproject.toml b/providers/arangodb/pyproject.toml new file mode 100644 index 0000000000000..6ee68f49407e6 --- /dev/null +++ b/providers/arangodb/pyproject.toml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-arangodb" +version = "2.7.0" +description = "Provider package apache-airflow-providers-arangodb for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "arangodb", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "python-arango>=7.3.2", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-arangodb/2.7.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-arangodb/2.7.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.arangodb.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.arangodb" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/arangodb/src/airflow/providers/arangodb/LICENSE b/providers/arangodb/src/airflow/providers/arangodb/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/arangodb/src/airflow/providers/arangodb/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/arangodb/__init__.py b/providers/arangodb/src/airflow/providers/arangodb/__init__.py similarity index 100% rename from providers/src/airflow/providers/arangodb/__init__.py rename to providers/arangodb/src/airflow/providers/arangodb/__init__.py diff --git a/providers/src/airflow/providers/arangodb/sensors/__init__.py b/providers/arangodb/src/airflow/providers/arangodb/example_dags/__init__.py similarity index 100% rename from providers/src/airflow/providers/arangodb/sensors/__init__.py rename to providers/arangodb/src/airflow/providers/arangodb/example_dags/__init__.py diff --git a/providers/src/airflow/providers/arangodb/example_dags/example_arangodb.py b/providers/arangodb/src/airflow/providers/arangodb/example_dags/example_arangodb.py similarity index 100% rename from providers/src/airflow/providers/arangodb/example_dags/example_arangodb.py rename to providers/arangodb/src/airflow/providers/arangodb/example_dags/example_arangodb.py diff --git a/providers/arangodb/src/airflow/providers/arangodb/get_provider_info.py b/providers/arangodb/src/airflow/providers/arangodb/get_provider_info.py new file mode 100644 index 0000000000000..2213270caa610 --- /dev/null +++ b/providers/arangodb/src/airflow/providers/arangodb/get_provider_info.py @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-arangodb", + "name": "ArangoDB", + "description": "`ArangoDB `__\n", + "state": "ready", + "source-date-epoch": 1734528758, + "versions": [ + "2.7.0", + "2.6.0", + "2.5.1", + "2.5.0", + "2.4.1", + "2.4.0", + "2.3.0", + "2.2.2", + "2.2.1", + "2.2.0", + "2.1.1", + "2.1.0", + "2.0.0", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "ArangoDB", + "external-doc-url": "https://www.arangodb.com/", + "tags": ["software"], + } + ], + "hooks": [ + {"integration-name": "ArangoDB", "python-modules": ["airflow.providers.arangodb.hooks.arangodb"]} + ], + "operators": [ + { + "integration-name": "ArangoDB", + "python-modules": ["airflow.providers.arangodb.operators.arangodb"], + } + ], + "sensors": [ + { + "integration-name": "ArangoDB", + "python-modules": ["airflow.providers.arangodb.sensors.arangodb"], + } + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.arangodb.hooks.arangodb.ArangoDBHook", + "connection-type": "arangodb", + } + ], + "dependencies": ["apache-airflow>=2.9.0", "python-arango>=7.3.2"], + } diff --git a/providers/src/airflow/providers/dbt/cloud/hooks/__init__.py b/providers/arangodb/src/airflow/providers/arangodb/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/hooks/__init__.py rename to providers/arangodb/src/airflow/providers/arangodb/hooks/__init__.py diff --git a/providers/src/airflow/providers/arangodb/hooks/arangodb.py b/providers/arangodb/src/airflow/providers/arangodb/hooks/arangodb.py similarity index 100% rename from providers/src/airflow/providers/arangodb/hooks/arangodb.py rename to providers/arangodb/src/airflow/providers/arangodb/hooks/arangodb.py diff --git a/providers/src/airflow/providers/dbt/cloud/operators/__init__.py b/providers/arangodb/src/airflow/providers/arangodb/operators/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/operators/__init__.py rename to providers/arangodb/src/airflow/providers/arangodb/operators/__init__.py diff --git a/providers/src/airflow/providers/arangodb/operators/arangodb.py b/providers/arangodb/src/airflow/providers/arangodb/operators/arangodb.py similarity index 100% rename from providers/src/airflow/providers/arangodb/operators/arangodb.py rename to providers/arangodb/src/airflow/providers/arangodb/operators/arangodb.py diff --git a/providers/src/airflow/providers/dbt/cloud/triggers/__init__.py b/providers/arangodb/src/airflow/providers/arangodb/sensors/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/triggers/__init__.py rename to providers/arangodb/src/airflow/providers/arangodb/sensors/__init__.py diff --git a/providers/src/airflow/providers/arangodb/sensors/arangodb.py b/providers/arangodb/src/airflow/providers/arangodb/sensors/arangodb.py similarity index 100% rename from providers/src/airflow/providers/arangodb/sensors/arangodb.py rename to providers/arangodb/src/airflow/providers/arangodb/sensors/arangodb.py diff --git a/providers/arangodb/tests/conftest.py b/providers/arangodb/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/arangodb/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/arangodb/tests/provider_tests/__init__.py b/providers/arangodb/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/arangodb/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/src/airflow/providers/dbt/cloud/utils/__init__.py b/providers/arangodb/tests/provider_tests/arangodb/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/utils/__init__.py rename to providers/arangodb/tests/provider_tests/arangodb/__init__.py diff --git a/providers/src/airflow/providers/elasticsearch/log/__init__.py b/providers/arangodb/tests/provider_tests/arangodb/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/log/__init__.py rename to providers/arangodb/tests/provider_tests/arangodb/hooks/__init__.py diff --git a/providers/tests/arangodb/hooks/test_arangodb.py b/providers/arangodb/tests/provider_tests/arangodb/hooks/test_arangodb.py similarity index 100% rename from providers/tests/arangodb/hooks/test_arangodb.py rename to providers/arangodb/tests/provider_tests/arangodb/hooks/test_arangodb.py diff --git a/providers/src/airflow/providers/github/hooks/__init__.py b/providers/arangodb/tests/provider_tests/arangodb/operators/__init__.py similarity index 100% rename from providers/src/airflow/providers/github/hooks/__init__.py rename to providers/arangodb/tests/provider_tests/arangodb/operators/__init__.py diff --git a/providers/tests/arangodb/operators/test_arangodb.py b/providers/arangodb/tests/provider_tests/arangodb/operators/test_arangodb.py similarity index 100% rename from providers/tests/arangodb/operators/test_arangodb.py rename to providers/arangodb/tests/provider_tests/arangodb/operators/test_arangodb.py diff --git a/providers/src/airflow/providers/github/operators/__init__.py b/providers/arangodb/tests/provider_tests/arangodb/sensors/__init__.py similarity index 100% rename from providers/src/airflow/providers/github/operators/__init__.py rename to providers/arangodb/tests/provider_tests/arangodb/sensors/__init__.py diff --git a/providers/tests/arangodb/sensors/test_arangodb.py b/providers/arangodb/tests/provider_tests/arangodb/sensors/test_arangodb.py similarity index 100% rename from providers/tests/arangodb/sensors/test_arangodb.py rename to providers/arangodb/tests/provider_tests/arangodb/sensors/test_arangodb.py diff --git a/providers/celery/README.rst b/providers/celery/README.rst index f2177d13b6013..001620b728916 100644 --- a/providers/celery/README.rst +++ b/providers/celery/README.rst @@ -51,14 +51,14 @@ The package supports the following python versions: 3.9,3.10,3.11,3.12 Requirements ------------ -================== ============================== +================== ================== PIP package Version required -================== ============================== +================== ================== ``apache-airflow`` ``>=2.9.0`` -``celery[redis]`` ``>=5.3.0,!=5.3.2,!=5.3.3,<6`` +``celery[redis]`` ``>=5.4.0,<6`` ``flower`` ``>=1.0.0`` ``google-re2`` ``>=1.0`` -================== ============================== +================== ================== Cross provider package dependencies ----------------------------------- diff --git a/providers/celery/pyproject.toml b/providers/celery/pyproject.toml index 011a3812c4803..959a219c2ebc3 100644 --- a/providers/celery/pyproject.toml +++ b/providers/celery/pyproject.toml @@ -59,9 +59,7 @@ dependencies = [ # The Celery is known to introduce problems when upgraded to a MAJOR version. Airflow Core # Uses Celery for CeleryExecutor, and we also know that Kubernetes Python client follows SemVer # (https://docs.celeryq.dev/en/stable/contributing.html?highlight=semver#versions). - # Make sure that the limit here is synchronized with [celery] extra in the airflow core - # The 5.3.3/5.3.2 limit comes from https://github.com/celery/celery/issues/8470 - "celery[redis]>=5.3.0,<6,!=5.3.3,!=5.3.2", + "celery[redis]>=5.4.0,<6", "flower>=1.0.0", "google-re2>=1.0", ] diff --git a/providers/celery/src/airflow/providers/celery/cli/celery_command.py b/providers/celery/src/airflow/providers/celery/cli/celery_command.py index aaff91bff226c..aa0a0ec2ebe57 100644 --- a/providers/celery/src/airflow/providers/celery/cli/celery_command.py +++ b/providers/celery/src/airflow/providers/celery/cli/celery_command.py @@ -154,6 +154,11 @@ def worker(args): # This needs to be imported locally to not trigger Providers Manager initialization from airflow.providers.celery.executors.celery_executor import app as celery_app + if AIRFLOW_V_3_0_PLUS: + from airflow.sdk.log import configure_logging + + configure_logging(output=sys.stdout.buffer) + # Disable connection pool so that celery worker does not hold an unnecessary db connection settings.reconfigure_orm(disable_connection_pool=True) if not settings.validate_session(): diff --git a/providers/celery/src/airflow/providers/celery/executors/celery_executor.py b/providers/celery/src/airflow/providers/celery/executors/celery_executor.py index d9121dcd7ab32..970921c9081d1 100644 --- a/providers/celery/src/airflow/providers/celery/executors/celery_executor.py +++ b/providers/celery/src/airflow/providers/celery/executors/celery_executor.py @@ -33,7 +33,7 @@ from collections.abc import Sequence from concurrent.futures import ProcessPoolExecutor from multiprocessing import cpu_count -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any from deprecated import deprecated @@ -53,7 +53,7 @@ from airflow.configuration import conf from airflow.exceptions import AirflowProviderDeprecationWarning, AirflowTaskTimeout from airflow.executors.base_executor import BaseExecutor -from airflow.providers.celery.version_compat import AIRFLOW_V_2_8_PLUS +from airflow.providers.celery.version_compat import AIRFLOW_V_2_8_PLUS, AIRFLOW_V_3_0_PLUS from airflow.stats import Stats from airflow.utils.state import TaskInstanceState from celery import states as celery_states @@ -67,14 +67,13 @@ if TYPE_CHECKING: import argparse - from airflow.executors.base_executor import CommandType, TaskTuple + from sqlalchemy.orm import Session + + from airflow.executors import workloads + from airflow.executors.base_executor import TaskTuple from airflow.models.taskinstance import TaskInstance from airflow.models.taskinstancekey import TaskInstanceKey - from celery import Task - - # Task instance that is sent over Celery queues - # TaskInstanceKey, Command, queue_name, CallableTask - TaskInstanceInCelery = tuple[TaskInstanceKey, CommandType, Optional[str], Task] + from airflow.providers.celery.executors.celery_executor_utils import TaskInstanceInCelery # PEP562 @@ -228,6 +227,11 @@ class CeleryExecutor(BaseExecutor): supports_ad_hoc_ti_run: bool = True supports_sentry: bool = True + if TYPE_CHECKING and AIRFLOW_V_3_0_PLUS: + # In the v3 path, we store workloads, not commands as strings. + # TODO: TaskSDK: move this type change into BaseExecutor + queued_tasks: dict[TaskInstanceKey, workloads.All] # type: ignore[assignment] + def __init__(self): super().__init__() @@ -256,10 +260,22 @@ def _num_tasks_per_send_process(self, to_send_count: int) -> int: return max(1, math.ceil(to_send_count / self._sync_parallelism)) def _process_tasks(self, task_tuples: list[TaskTuple]) -> None: + # Airflow V2 version from airflow.providers.celery.executors.celery_executor_utils import execute_command task_tuples_to_send = [task_tuple[:3] + (execute_command,) for task_tuple in task_tuples] - first_task = next(t[3] for t in task_tuples_to_send) + + self._send_tasks(task_tuples_to_send) + + def _process_workloads(self, workloads: list[workloads.All]) -> None: + # Airflow V3 version + from airflow.providers.celery.executors.celery_executor_utils import execute_workload + + tasks = [(workload.ti.key, workload, workload.ti.queue, execute_workload) for workload in workloads] + self._send_tasks(tasks) + + def _send_tasks(self, task_tuples_to_send: Sequence[TaskInstanceInCelery]): + first_task = next(t[-1] for t in task_tuples_to_send) # Celery state queries will stuck if we do not use one same backend # for all tasks. @@ -280,7 +296,7 @@ def _process_tasks(self, task_tuples: list[TaskTuple]) -> None: "[Try %s of %s] Task Timeout Error for Task: (%s).", self.task_publish_retries[key] + 1, self.task_publish_max_retries, - key, + tuple(key), ) self.task_publish_retries[key] = retries + 1 continue @@ -299,7 +315,7 @@ def _process_tasks(self, task_tuples: list[TaskTuple]) -> None: # which point we don't need the ID anymore anyway self.event_buffer[key] = (TaskInstanceState.QUEUED, result.task_id) - def _send_tasks_to_celery(self, task_tuples_to_send: list[TaskInstanceInCelery]): + def _send_tasks_to_celery(self, task_tuples_to_send: Sequence[TaskInstanceInCelery]): from airflow.providers.celery.executors.celery_executor_utils import send_task_to_executor if len(task_tuples_to_send) == 1 or self._sync_parallelism == 1: @@ -359,7 +375,7 @@ def update_task_state(self, key: TaskInstanceKey, state: str, info: Any) -> None self.success(key, info) elif state in (celery_states.FAILURE, celery_states.REVOKED): self.fail(key, info) - elif state in (celery_states.STARTED, celery_states.PENDING): + elif state in (celery_states.STARTED, celery_states.PENDING, celery_states.RETRY): pass else: self.log.info("Unexpected state for %s: %s", key, state) @@ -416,6 +432,10 @@ def try_adopt_task_instances(self, tis: Sequence[TaskInstance]) -> Sequence[Task for celery_task_id, (state, info) in states_by_celery_task_id.items(): result, ti = celery_tasks[celery_task_id] result.backend = cached_celery_backend + if isinstance(result.result, BaseException): + e = result.result + # Log the exception we got from the remote end + self.log.warning("Task %s failed with error", ti.key, exc_info=e) # Set the correct elements of the state dicts, then update this # like we just queried it. @@ -475,6 +495,10 @@ def get_cli_commands() -> list[GroupCommand]: ), ] + def queue_workload(self, workload: workloads.ExecuteTask, session: Session | None) -> None: + ti = workload.ti + self.queued_tasks[ti.key] = workload + def _get_parser() -> argparse.ArgumentParser: """ diff --git a/providers/celery/src/airflow/providers/celery/executors/celery_executor_utils.py b/providers/celery/src/airflow/providers/celery/executors/celery_executor_utils.py index 6d88d9f578d24..38b26ebd79b26 100644 --- a/providers/celery/src/airflow/providers/celery/executors/celery_executor_utils.py +++ b/providers/celery/src/airflow/providers/celery/executors/celery_executor_utils.py @@ -31,7 +31,7 @@ import warnings from collections.abc import Mapping, MutableMapping from concurrent.futures import ProcessPoolExecutor -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Optional, Union from setproctitle import setproctitle from sqlalchemy import select @@ -40,8 +40,8 @@ from airflow.configuration import conf from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowTaskTimeout from airflow.executors.base_executor import BaseExecutor +from airflow.providers.celery.version_compat import AIRFLOW_V_3_0_PLUS from airflow.stats import Stats -from airflow.utils.dag_parsing_context import _airflow_parsing_context_manager from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.net import get_hostname from airflow.utils.providers_configuration_loader import providers_configuration_loaded @@ -51,14 +51,25 @@ from celery.backends.database import DatabaseBackend, Task as TaskDb, retry, session_cleanup from celery.signals import import_modules as celery_import_modules +try: + from airflow.sdk.definitions._internal.dag_parsing_context import _airflow_parsing_context_manager +except ImportError: + from airflow.utils.dag_parsing_context import _airflow_parsing_context_manager + log = logging.getLogger(__name__) if TYPE_CHECKING: + from airflow.executors import workloads from airflow.executors.base_executor import CommandType, EventBufferValueType from airflow.models.taskinstance import TaskInstanceKey + from airflow.typing_compat import TypeAlias from celery.result import AsyncResult - TaskInstanceInCelery = tuple[TaskInstanceKey, CommandType, Optional[str], Task] + # We can't use `if AIRFLOW_V_3_0_PLUS` conditions in type checks, so unfortunately we just have to define + # the type as the union of both kinds + TaskInstanceInCelery: TypeAlias = tuple[ + TaskInstanceKey, Union[workloads.All, CommandType], Optional[str], Task + ] OPERATION_TIMEOUT = conf.getfloat("celery", "operation_timeout") @@ -125,21 +136,54 @@ def on_celery_import_modules(*args, **kwargs): import kubernetes.client # noqa: F401 -@app.task -def execute_command(command_to_exec: CommandType) -> None: - """Execute command.""" - dag_id, task_id = BaseExecutor.validate_airflow_tasks_run_command(command_to_exec) +# Once Celery 5.5 is out of beta, we can pass `pydantic=True` to the decorator and it will handle the validation +# and deserialization for us +@app.task(name="execute_workload") +def execute_workload(input: str) -> None: + from pydantic import TypeAdapter + + from airflow.configuration import conf + from airflow.executors import workloads + from airflow.sdk.execution_time.supervisor import supervise + + decoder = TypeAdapter(workloads.All) + workload = decoder.validate_json(input) + celery_task_id = app.current_task.request.id - log.info("[%s] Executing command in Celery: %s", celery_task_id, command_to_exec) - with _airflow_parsing_context_manager(dag_id=dag_id, task_id=task_id): - try: - if settings.EXECUTE_TASKS_NEW_PYTHON_INTERPRETER: - _execute_in_subprocess(command_to_exec, celery_task_id) - else: - _execute_in_fork(command_to_exec, celery_task_id) - except Exception: - Stats.incr("celery.execute_command.failure") - raise + + if not isinstance(workload, workloads.ExecuteTask): + raise ValueError(f"CeleryExecutor does not now how to handle {type(workload)}") + + log.info("[%s] Executing workload in Celery: %s", celery_task_id, workload) + + supervise( + # This is the "wrong" ti type, but it duck types the same. TODO: Create a protocol for this. + ti=workload.ti, # type: ignore[arg-type] + dag_rel_path=workload.dag_rel_path, + bundle_info=workload.bundle_info, + token=workload.token, + server=conf.get("workers", "execution_api_server_url", fallback="http://localhost:9091/execution/"), + log_path=workload.log_path, + ) + + +if not AIRFLOW_V_3_0_PLUS: + + @app.task + def execute_command(command_to_exec: CommandType) -> None: + """Execute command.""" + dag_id, task_id = BaseExecutor.validate_airflow_tasks_run_command(command_to_exec) + celery_task_id = app.current_task.request.id + log.info("[%s] Executing command in Celery: %s", celery_task_id, command_to_exec) + with _airflow_parsing_context_manager(dag_id=dag_id, task_id=task_id): + try: + if settings.EXECUTE_TASKS_NEW_PYTHON_INTERPRETER: + _execute_in_subprocess(command_to_exec, celery_task_id) + else: + _execute_in_fork(command_to_exec, celery_task_id) + except Exception: + Stats.incr("celery.execute_command.failure") + raise def _execute_in_fork(command_to_exec: CommandType, celery_task_id: str | None = None) -> None: @@ -213,15 +257,19 @@ def send_task_to_executor( task_tuple: TaskInstanceInCelery, ) -> tuple[TaskInstanceKey, CommandType, AsyncResult | ExceptionWithTraceback]: """Send task to executor.""" - key, command, queue, task_to_run = task_tuple + from airflow.executors import workloads + + key, args, queue, task_to_run = task_tuple + if isinstance(args, workloads.BaseWorkload): + args = (args.model_dump_json(),) try: with timeout(seconds=OPERATION_TIMEOUT): - result = task_to_run.apply_async(args=[command], queue=queue) + result = task_to_run.apply_async(args=args, queue=queue) except (Exception, AirflowTaskTimeout) as e: exception_traceback = f"Celery Task ID: {key}\n{traceback.format_exc()}" result = ExceptionWithTraceback(e, exception_traceback) - return key, command, result + return key, args, result def fetch_celery_task_state(async_result: AsyncResult) -> tuple[str, str | ExceptionWithTraceback, Any]: diff --git a/providers/celery/src/airflow/providers/celery/executors/celery_kubernetes_executor.py b/providers/celery/src/airflow/providers/celery/executors/celery_kubernetes_executor.py index 680bcfb3d603e..9caf0a5866890 100644 --- a/providers/celery/src/airflow/providers/celery/executors/celery_kubernetes_executor.py +++ b/providers/celery/src/airflow/providers/celery/executors/celery_kubernetes_executor.py @@ -97,9 +97,9 @@ def _task_event_logs(self, value): def queued_tasks(self) -> dict[TaskInstanceKey, QueuedTaskInstanceType]: """Return queued tasks from celery and kubernetes executor.""" queued_tasks = self.celery_executor.queued_tasks.copy() - queued_tasks.update(self.kubernetes_executor.queued_tasks) + queued_tasks.update(self.kubernetes_executor.queued_tasks) # type: ignore[arg-type] - return queued_tasks + return queued_tasks # type: ignore[return-value] @queued_tasks.setter def queued_tasks(self, value) -> None: diff --git a/providers/celery/src/airflow/providers/celery/executors/default_celery.py b/providers/celery/src/airflow/providers/celery/executors/default_celery.py index 20c307a77b04f..9fb4a7e3bbbb6 100644 --- a/providers/celery/src/airflow/providers/celery/executors/default_celery.py +++ b/providers/celery/src/airflow/providers/celery/executors/default_celery.py @@ -27,6 +27,7 @@ from airflow.configuration import conf from airflow.exceptions import AirflowConfigException, AirflowException +from airflow.providers.celery.version_compat import AIRFLOW_V_3_0_PLUS def _broker_supports_visibility_timeout(url): @@ -67,7 +68,7 @@ def _broker_supports_visibility_timeout(url): result_backend = conf.get_mandatory_value("celery", "RESULT_BACKEND") else: log.debug("Value for celery result_backend not found. Using sql_alchemy_conn with db+ prefix.") - result_backend = f'db+{conf.get("database", "SQL_ALCHEMY_CONN")}' + result_backend = f"db+{conf.get('database', 'SQL_ALCHEMY_CONN')}" extra_celery_config = conf.getjson("celery", "extra_celery_config", fallback={}) @@ -81,6 +82,9 @@ def _broker_supports_visibility_timeout(url): "task_track_started": conf.getboolean("celery", "task_track_started", fallback=True), "broker_url": broker_url, "broker_transport_options": broker_transport_options, + "broker_connection_retry_on_startup": conf.getboolean( + "celery", "broker_connection_retry_on_startup", fallback=True + ), "result_backend": result_backend, "database_engine_options": conf.getjson( "celery", "result_backend_sqlalchemy_engine_options", fallback={} @@ -90,6 +94,11 @@ def _broker_supports_visibility_timeout(url): **(extra_celery_config if isinstance(extra_celery_config, dict) else {}), } +# In order to not change anything pre Task Execution API, we leave this setting as it was (unset) in Airflow2 +if AIRFLOW_V_3_0_PLUS: + DEFAULT_CELERY_CONFIG.setdefault("worker_redirect_stdouts", False) + DEFAULT_CELERY_CONFIG.setdefault("worker_hijack_root_logger", False) + def _get_celery_ssl_active() -> bool: try: @@ -126,9 +135,7 @@ def _get_celery_ssl_active() -> bool: DEFAULT_CELERY_CONFIG["broker_use_ssl"] = broker_use_ssl except AirflowConfigException: raise AirflowException( - "AirflowConfigException: SSL_ACTIVE is True, " - "please ensure SSL_KEY, " - "SSL_CERT and SSL_CACERT are set" + "AirflowConfigException: SSL_ACTIVE is True, please ensure SSL_KEY, SSL_CERT and SSL_CACERT are set" ) except Exception as e: raise AirflowException( diff --git a/providers/celery/src/airflow/providers/celery/get_provider_info.py b/providers/celery/src/airflow/providers/celery/get_provider_info.py index 2c0a3d70a5877..0cc999f5baf05 100644 --- a/providers/celery/src/airflow/providers/celery/get_provider_info.py +++ b/providers/celery/src/airflow/providers/celery/get_provider_info.py @@ -304,7 +304,7 @@ def get_provider_info(): }, "dependencies": [ "apache-airflow>=2.9.0", - "celery[redis]>=5.3.0,<6,!=5.3.3,!=5.3.2", + "celery[redis]>=5.4.0,<6", "flower>=1.0.0", "google-re2>=1.0", ], diff --git a/providers/celery/tests/provider_tests/celery/executors/test_celery_executor.py b/providers/celery/tests/provider_tests/celery/executors/test_celery_executor.py index 7a33e0cfbc17c..22dbd59a914c1 100644 --- a/providers/celery/tests/provider_tests/celery/executors/test_celery_executor.py +++ b/providers/celery/tests/provider_tests/celery/executors/test_celery_executor.py @@ -44,7 +44,7 @@ from tests_common.test_utils import db from tests_common.test_utils.config import conf_vars -from tests_common.test_utils.version_compat import AIRFLOW_V_2_10_PLUS +from tests_common.test_utils.version_compat import AIRFLOW_V_2_10_PLUS, AIRFLOW_V_3_0_PLUS pytestmark = pytest.mark.db_test @@ -71,21 +71,24 @@ def task_id(self): def _prepare_app(broker_url=None, execute=None): broker_url = broker_url or conf.get("celery", "BROKER_URL") - execute = execute or celery_executor_utils.execute_command.__wrapped__ + if AIRFLOW_V_3_0_PLUS: + execute_name = "execute_workload" + execute = execute or celery_executor_utils.execute_workload.__wrapped__ + else: + execute_name = "execute_command" + execute = execute or celery_executor_utils.execute_command.__wrapped__ test_config = dict(celery_executor_utils.celery_configuration) test_config.update({"broker_url": broker_url}) test_app = Celery(broker_url, config_source=test_config) test_execute = test_app.task(execute) - patch_app = mock.patch("airflow.providers.celery.executors.celery_executor_utils.app", test_app) - patch_execute = mock.patch( - "airflow.providers.celery.executors.celery_executor_utils.execute_command", test_execute - ) + patch_app = mock.patch.object(celery_executor_utils, "app", test_app) + patch_execute = mock.patch.object(celery_executor_utils, execute_name, test_execute) backend = test_app.backend if hasattr(backend, "ResultSession"): - # Pre-create the database tables now, otherwise SQLA vis Celery has a + # Pre-create the database tables now, otherwise SQLA via Celery has a # race condition where it one of the subprocesses can die with "Table # already exists" error, because SQLA checks for which tables exist, # then issues a CREATE TABLE, rather than doing CREATE TABLE IF NOT @@ -147,6 +150,7 @@ def test_gauge_executor_metrics(self, mock_stats_gauge, mock_trigger_tasks, mock ] mock_stats_gauge.assert_has_calls(calls) + @pytest.mark.skipif(AIRFLOW_V_3_0_PLUS, reason="Airflow 3 doesn't have execute_command anymore") @pytest.mark.parametrize( "command, raise_exception", [ diff --git a/providers/dbt/cloud/README.rst b/providers/dbt/cloud/README.rst new file mode 100644 index 0000000000000..4c85eebd9f0c9 --- /dev/null +++ b/providers/dbt/cloud/README.rst @@ -0,0 +1,84 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-dbt-cloud`` + +Release: ``4.0.0`` + + +`dbt Cloud `__ + + +Provider package +---------------- + +This is a provider package for ``dbt.cloud`` provider. All classes for this provider package +are in ``airflow.providers.dbt.cloud`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-dbt-cloud`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +================================= ================== +PIP package Version required +================================= ================== +``apache-airflow`` ``>=2.9.0`` +``apache-airflow-providers-http`` +``asgiref`` ``>=2.3.0`` +``aiohttp`` ``>=3.9.2`` +================================= ================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider packages in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-dbt-cloud[http] + + +============================================================================================================== =============== +Dependent package Extra +============================================================================================================== =============== +`apache-airflow-providers-http `_ ``http`` +`apache-airflow-providers-openlineage `_ ``openlineage`` +============================================================================================================== =============== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/dbt/cloud/.latest-doc-only-change.txt b/providers/dbt/cloud/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/.latest-doc-only-change.txt rename to providers/dbt/cloud/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/dbt/cloud/CHANGELOG.rst b/providers/dbt/cloud/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/CHANGELOG.rst rename to providers/dbt/cloud/docs/changelog.rst diff --git a/docs/apache-airflow-providers-dbt-cloud/commits.rst b/providers/dbt/cloud/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-dbt-cloud/commits.rst rename to providers/dbt/cloud/docs/commits.rst diff --git a/docs/apache-airflow-providers-dbt-cloud/connections.rst b/providers/dbt/cloud/docs/connections.rst similarity index 100% rename from docs/apache-airflow-providers-dbt-cloud/connections.rst rename to providers/dbt/cloud/docs/connections.rst diff --git a/docs/apache-airflow-providers-dbt-cloud/index.rst b/providers/dbt/cloud/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-dbt-cloud/index.rst rename to providers/dbt/cloud/docs/index.rst diff --git a/docs/apache-airflow-providers-dbt-cloud/installing-providers-from-sources.rst b/providers/dbt/cloud/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-dbt-cloud/installing-providers-from-sources.rst rename to providers/dbt/cloud/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/dbt/dbt.png b/providers/dbt/cloud/docs/integration-logos/dbt.png similarity index 100% rename from docs/integration-logos/dbt/dbt.png rename to providers/dbt/cloud/docs/integration-logos/dbt.png diff --git a/docs/apache-airflow-providers-dbt-cloud/operators.rst b/providers/dbt/cloud/docs/operators.rst similarity index 93% rename from docs/apache-airflow-providers-dbt-cloud/operators.rst rename to providers/dbt/cloud/docs/operators.rst index eaa285f6d4082..3eeb5b04dbb59 100644 --- a/docs/apache-airflow-providers-dbt-cloud/operators.rst +++ b/providers/dbt/cloud/docs/operators.rst @@ -67,7 +67,7 @@ The below examples demonstrate how to instantiate DbtCloudRunJobOperator tasks w asynchronous waiting for run termination, respectively. To note, the ``account_id`` for the operators is referenced within the ``default_args`` of the example DAG. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_run_job] @@ -76,7 +76,7 @@ referenced within the ``default_args`` of the example DAG. This next example also shows how to pass in custom runtime configuration (in this case for ``threads_override``) via the ``additional_run_config`` dictionary. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_run_job_async] @@ -95,7 +95,7 @@ In the example below, the ``run_id`` value in the example below comes from the o DbtCloudRunJobOperator task by utilizing the ``.output`` property exposed for all operators. Also, to note, the ``account_id`` for the task is referenced within the ``default_args`` of the example DAG. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_run_job_sensor] @@ -104,7 +104,7 @@ the ``account_id`` for the task is referenced within the ``default_args`` of the Also, you can poll for status of the job run asynchronously using ``deferrable`` mode. In this mode, worker slots are freed up while the sensor is running. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_run_job_sensor_deferred] @@ -125,7 +125,7 @@ downloaded. For more information on dbt Cloud artifacts, reference `this documentation `__. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_get_artifact] @@ -146,7 +146,7 @@ If a ``project_id`` is supplied, only jobs pertaining to this project id will be For more information on dbt Cloud list jobs, reference `this documentation `__. -.. exampleinclude:: /../../providers/tests/system/dbt/cloud/example_dbt_cloud.py +.. exampleinclude:: /../../providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py :language: python :dedent: 4 :start-after: [START howto_operator_dbt_cloud_list_jobs] diff --git a/docs/apache-airflow-providers-dbt-cloud/security.rst b/providers/dbt/cloud/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-dbt-cloud/security.rst rename to providers/dbt/cloud/docs/security.rst diff --git a/providers/src/airflow/providers/dbt/cloud/provider.yaml b/providers/dbt/cloud/provider.yaml similarity index 85% rename from providers/src/airflow/providers/dbt/cloud/provider.yaml rename to providers/dbt/cloud/provider.yaml index dd6dc42b3b75f..99dc44927ff97 100644 --- a/providers/src/airflow/providers/dbt/cloud/provider.yaml +++ b/providers/dbt/cloud/provider.yaml @@ -59,23 +59,10 @@ versions: - 1.0.2 - 1.0.1 -dependencies: - - apache-airflow>=2.9.0 - - apache-airflow-providers-http - - asgiref>=2.3.0 - - aiohttp>=3.9.2 - -additional-extras: - # pip install apache-airflow-providers-dbt-cloud[openlineage] - - name: openlineage - description: Install compatible OpenLineage dependencies - dependencies: - - apache-airflow-providers-openlineage>=1.7.0 - integrations: - integration-name: dbt Cloud external-doc-url: https://docs.getdbt.com/docs/dbt-cloud/cloud-overview - logo: /integration-logos/dbt/dbt.png + logo: /docs/integration-logos/dbt.png how-to-guide: - /docs/apache-airflow-providers-dbt-cloud/operators.rst tags: [dbt] diff --git a/providers/dbt/cloud/pyproject.toml b/providers/dbt/cloud/pyproject.toml new file mode 100644 index 0000000000000..a74cf457016cc --- /dev/null +++ b/providers/dbt/cloud/pyproject.toml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-dbt-cloud" +version = "4.0.0" +description = "Provider package apache-airflow-providers-dbt-cloud for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "dbt.cloud", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-http", + "asgiref>=2.3.0", + "aiohttp>=3.9.2", +] + +# The optional dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +[project.optional-dependencies] +# pip install apache-airflow-providers-dbt-cloud[openlineage] +"openlineage" = [ + "apache-airflow-providers-openlineage>=1.7.0", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/4.0.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-dbt-cloud/4.0.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.dbt.cloud.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.dbt.cloud" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/dbt/cloud/src/airflow/providers/dbt/cloud/LICENSE b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/dbt/cloud/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/__init__.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/__init__.py diff --git a/providers/dbt/cloud/src/airflow/providers/dbt/cloud/get_provider_info.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/get_provider_info.py new file mode 100644 index 0000000000000..de75fc08c4c6b --- /dev/null +++ b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/get_provider_info.py @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-dbt-cloud", + "name": "dbt Cloud", + "description": "`dbt Cloud `__\n", + "state": "ready", + "source-date-epoch": 1734533324, + "versions": [ + "4.0.0", + "3.11.2", + "3.11.1", + "3.11.0", + "3.10.1", + "3.10.0", + "3.9.0", + "3.8.1", + "3.8.0", + "3.7.1", + "3.7.0", + "3.6.1", + "3.6.0", + "3.5.1", + "3.5.0", + "3.4.1", + "3.4.0", + "3.3.0", + "3.2.3", + "3.2.2", + "3.2.1", + "3.2.0", + "3.1.1", + "3.1.0", + "3.0.0", + "2.3.1", + "2.3.0", + "2.2.0", + "2.1.0", + "2.0.1", + "2.0.0", + "1.0.2", + "1.0.1", + ], + "integrations": [ + { + "integration-name": "dbt Cloud", + "external-doc-url": "https://docs.getdbt.com/docs/dbt-cloud/cloud-overview", + "logo": "/docs/integration-logos/dbt.png", + "how-to-guide": ["/docs/apache-airflow-providers-dbt-cloud/operators.rst"], + "tags": ["dbt"], + } + ], + "operators": [ + {"integration-name": "dbt Cloud", "python-modules": ["airflow.providers.dbt.cloud.operators.dbt"]} + ], + "sensors": [ + {"integration-name": "dbt Cloud", "python-modules": ["airflow.providers.dbt.cloud.sensors.dbt"]} + ], + "hooks": [ + {"integration-name": "dbt Cloud", "python-modules": ["airflow.providers.dbt.cloud.hooks.dbt"]} + ], + "triggers": [ + {"integration-name": "dbt Cloud", "python-modules": ["airflow.providers.dbt.cloud.triggers.dbt"]} + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.dbt.cloud.hooks.dbt.DbtCloudHook", + "connection-type": "dbt_cloud", + } + ], + "extra-links": ["airflow.providers.dbt.cloud.operators.dbt.DbtCloudRunJobOperatorLink"], + "dependencies": [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-http", + "asgiref>=2.3.0", + "aiohttp>=3.9.2", + ], + "optional-dependencies": {"openlineage": ["apache-airflow-providers-openlineage>=1.7.0"]}, + } diff --git a/providers/src/airflow/providers/github/sensors/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/github/sensors/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/hooks/__init__.py diff --git a/providers/src/airflow/providers/dbt/cloud/hooks/dbt.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/hooks/dbt.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/hooks/dbt.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/hooks/dbt.py diff --git a/providers/src/airflow/providers/opensearch/hooks/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/operators/__init__.py similarity index 100% rename from providers/src/airflow/providers/opensearch/hooks/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/operators/__init__.py diff --git a/providers/src/airflow/providers/dbt/cloud/operators/dbt.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/operators/dbt.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/operators/dbt.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/operators/dbt.py diff --git a/providers/tests/apache/druid/hooks/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/sensors/__init__.py similarity index 100% rename from providers/tests/apache/druid/hooks/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/sensors/__init__.py diff --git a/providers/src/airflow/providers/dbt/cloud/sensors/dbt.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/sensors/dbt.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/sensors/dbt.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/sensors/dbt.py diff --git a/providers/src/airflow/providers/opensearch/log/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/triggers/__init__.py similarity index 100% rename from providers/src/airflow/providers/opensearch/log/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/triggers/__init__.py diff --git a/providers/src/airflow/providers/dbt/cloud/triggers/dbt.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/triggers/dbt.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/triggers/dbt.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/triggers/dbt.py diff --git a/providers/src/airflow/providers/opensearch/operators/__init__.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/utils/__init__.py similarity index 100% rename from providers/src/airflow/providers/opensearch/operators/__init__.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/utils/__init__.py diff --git a/providers/src/airflow/providers/dbt/cloud/utils/openlineage.py b/providers/dbt/cloud/src/airflow/providers/dbt/cloud/utils/openlineage.py similarity index 100% rename from providers/src/airflow/providers/dbt/cloud/utils/openlineage.py rename to providers/dbt/cloud/src/airflow/providers/dbt/cloud/utils/openlineage.py diff --git a/providers/dbt/cloud/tests/conftest.py b/providers/dbt/cloud/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/dbt/cloud/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/dbt/cloud/tests/provider_tests/__init__.py b/providers/dbt/cloud/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/dbt/cloud/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/dbt/cloud/tests/provider_tests/dbt/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/dbt/cloud/tests/provider_tests/dbt/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/src/airflow/providers/sendgrid/utils/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/__init__.py similarity index 100% rename from providers/src/airflow/providers/sendgrid/utils/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/__init__.py diff --git a/providers/tests/apache/druid/transfers/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/hooks/__init__.py similarity index 100% rename from providers/tests/apache/druid/transfers/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/hooks/__init__.py diff --git a/providers/tests/dbt/cloud/hooks/test_dbt.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/hooks/test_dbt.py similarity index 100% rename from providers/tests/dbt/cloud/hooks/test_dbt.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/hooks/test_dbt.py diff --git a/providers/tests/arangodb/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/operators/__init__.py similarity index 100% rename from providers/tests/arangodb/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/operators/__init__.py diff --git a/providers/tests/dbt/cloud/operators/test_dbt.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/operators/test_dbt.py similarity index 100% rename from providers/tests/dbt/cloud/operators/test_dbt.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/operators/test_dbt.py diff --git a/providers/tests/arangodb/hooks/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/sensors/__init__.py similarity index 100% rename from providers/tests/arangodb/hooks/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/sensors/__init__.py diff --git a/providers/tests/dbt/cloud/sensors/test_dbt.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/sensors/test_dbt.py similarity index 100% rename from providers/tests/dbt/cloud/sensors/test_dbt.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/sensors/test_dbt.py diff --git a/providers/tests/arangodb/operators/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/__init__.py similarity index 100% rename from providers/tests/arangodb/operators/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/__init__.py diff --git a/providers/tests/dbt/cloud/test_data/catalog.json b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/catalog.json similarity index 100% rename from providers/tests/dbt/cloud/test_data/catalog.json rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/catalog.json diff --git a/providers/tests/dbt/cloud/test_data/job_run.json b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/job_run.json similarity index 100% rename from providers/tests/dbt/cloud/test_data/job_run.json rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/job_run.json diff --git a/providers/tests/dbt/cloud/test_data/manifest.json b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/manifest.json similarity index 100% rename from providers/tests/dbt/cloud/test_data/manifest.json rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/manifest.json diff --git a/providers/tests/dbt/cloud/test_data/run_results.json b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/run_results.json similarity index 100% rename from providers/tests/dbt/cloud/test_data/run_results.json rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/test_data/run_results.json diff --git a/providers/tests/arangodb/sensors/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/triggers/__init__.py similarity index 100% rename from providers/tests/arangodb/sensors/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/triggers/__init__.py diff --git a/providers/tests/dbt/cloud/triggers/test_dbt.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/triggers/test_dbt.py similarity index 100% rename from providers/tests/dbt/cloud/triggers/test_dbt.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/triggers/test_dbt.py diff --git a/providers/tests/dbt/cloud/__init__.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/utils/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/__init__.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/utils/__init__.py diff --git a/providers/tests/dbt/cloud/utils/test_openlineage.py b/providers/dbt/cloud/tests/provider_tests/dbt/cloud/utils/test_openlineage.py similarity index 100% rename from providers/tests/dbt/cloud/utils/test_openlineage.py rename to providers/dbt/cloud/tests/provider_tests/dbt/cloud/utils/test_openlineage.py diff --git a/providers/tests/dbt/cloud/hooks/__init__.py b/providers/dbt/cloud/tests/system/dbt/cloud/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/hooks/__init__.py rename to providers/dbt/cloud/tests/system/dbt/cloud/__init__.py diff --git a/providers/tests/system/dbt/cloud/example_dbt_cloud.py b/providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py similarity index 100% rename from providers/tests/system/dbt/cloud/example_dbt_cloud.py rename to providers/dbt/cloud/tests/system/dbt/cloud/example_dbt_cloud.py diff --git a/providers/edge/src/airflow/providers/edge/example_dags/integration_test.py b/providers/edge/src/airflow/providers/edge/example_dags/integration_test.py index 418164832576d..777a85ef2dd99 100644 --- a/providers/edge/src/airflow/providers/edge/example_dags/integration_test.py +++ b/providers/edge/src/airflow/providers/edge/example_dags/integration_test.py @@ -30,10 +30,10 @@ from airflow.exceptions import AirflowNotFoundException from airflow.hooks.base import BaseHook from airflow.models.dag import DAG -from airflow.models.param import Param from airflow.models.variable import Variable from airflow.operators.empty import EmptyOperator from airflow.providers.common.compat.standard.operators import PythonOperator +from airflow.sdk import Param from airflow.utils.trigger_rule import TriggerRule try: diff --git a/providers/edge/src/airflow/providers/edge/example_dags/win_notepad.py b/providers/edge/src/airflow/providers/edge/example_dags/win_notepad.py index da50fff8b96ee..a3b229a28f722 100644 --- a/providers/edge/src/airflow/providers/edge/example_dags/win_notepad.py +++ b/providers/edge/src/airflow/providers/edge/example_dags/win_notepad.py @@ -34,7 +34,7 @@ from airflow.models import BaseOperator from airflow.models.dag import DAG -from airflow.models.param import Param +from airflow.sdk import Param if TYPE_CHECKING: from airflow.utils.context import Context diff --git a/providers/edge/src/airflow/providers/edge/example_dags/win_test.py b/providers/edge/src/airflow/providers/edge/example_dags/win_test.py index 3a730009d50c3..630092180b590 100644 --- a/providers/edge/src/airflow/providers/edge/example_dags/win_test.py +++ b/providers/edge/src/airflow/providers/edge/example_dags/win_test.py @@ -37,9 +37,9 @@ from airflow.hooks.base import BaseHook from airflow.models import BaseOperator from airflow.models.dag import DAG -from airflow.models.param import Param from airflow.models.variable import Variable from airflow.operators.empty import EmptyOperator +from airflow.sdk import Param from airflow.utils.operator_helpers import context_to_airflow_vars from airflow.utils.trigger_rule import TriggerRule from airflow.utils.types import ArgNotSet diff --git a/providers/elasticsearch/README.rst b/providers/elasticsearch/README.rst new file mode 100644 index 0000000000000..65252da3eb32c --- /dev/null +++ b/providers/elasticsearch/README.rst @@ -0,0 +1,82 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-elasticsearch`` + +Release: ``6.0.0`` + + +`Elasticsearch `__ + + +Provider package +---------------- + +This is a provider package for ``elasticsearch`` provider. All classes for this provider package +are in ``airflow.providers.elasticsearch`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-elasticsearch`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +======================================= ================== +PIP package Version required +======================================= ================== +``apache-airflow`` ``>=2.9.0`` +``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``elasticsearch`` ``>=8.10,<9`` +======================================= ================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider packages in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-elasticsearch[common.sql] + + +============================================================================================================ ============== +Dependent package Extra +============================================================================================================ ============== +`apache-airflow-providers-common-sql `_ ``common.sql`` +============================================================================================================ ============== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/elasticsearch/.latest-doc-only-change.txt b/providers/elasticsearch/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/elasticsearch/.latest-doc-only-change.txt rename to providers/elasticsearch/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/elasticsearch/CHANGELOG.rst b/providers/elasticsearch/docs/changelog.rst similarity index 99% rename from providers/src/airflow/providers/elasticsearch/CHANGELOG.rst rename to providers/elasticsearch/docs/changelog.rst index 1c9dc79d348ac..6ee3bfc59b6df 100644 --- a/providers/src/airflow/providers/elasticsearch/CHANGELOG.rst +++ b/providers/elasticsearch/docs/changelog.rst @@ -104,7 +104,7 @@ Misc ~~~~ * ``Generalize caching of connection in DbApiHook to improve performance (#40751)`` -* ``filename template arg in providers file task handlers backward compitability support (#41633)`` +* ``filename template arg in providers file task handlers backward compatibility support (#41633)`` * ``Remove deprecated log handler argument filename_template (#41552)`` diff --git a/docs/apache-airflow-providers-elasticsearch/commits.rst b/providers/elasticsearch/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/commits.rst rename to providers/elasticsearch/docs/commits.rst diff --git a/docs/apache-airflow-providers-elasticsearch/configurations-ref.rst b/providers/elasticsearch/docs/configurations-ref.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/configurations-ref.rst rename to providers/elasticsearch/docs/configurations-ref.rst diff --git a/docs/apache-airflow-providers-elasticsearch/connections/elasticsearch.rst b/providers/elasticsearch/docs/connections/elasticsearch.rst similarity index 95% rename from docs/apache-airflow-providers-elasticsearch/connections/elasticsearch.rst rename to providers/elasticsearch/docs/connections/elasticsearch.rst index 8097b8bd61c4f..7d52bbcfb7f90 100644 --- a/docs/apache-airflow-providers-elasticsearch/connections/elasticsearch.rst +++ b/providers/elasticsearch/docs/connections/elasticsearch.rst @@ -72,7 +72,7 @@ For example: export AIRFLOW_CONN_ELASTICSEARCH_DEFAULT='elasticsearch://elasticsearchlogin:elasticsearchpassword@elastic.co:80/http' -.. exampleinclude:: /../../providers/tests/system/elasticsearch/example_elasticsearch_query.py +.. exampleinclude:: /../../providers/elasticsearch/tests/system/elasticsearch/example_elasticsearch_query.py :language: python :dedent: 4 :start-after: [START howto_elasticsearch_query] diff --git a/docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_python_hook.rst b/providers/elasticsearch/docs/hooks/elasticsearch_python_hook.rst similarity index 93% rename from docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_python_hook.rst rename to providers/elasticsearch/docs/hooks/elasticsearch_python_hook.rst index 537b4973b41cf..d1a9e5300fe13 100644 --- a/docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_python_hook.rst +++ b/providers/elasticsearch/docs/hooks/elasticsearch_python_hook.rst @@ -36,7 +36,7 @@ es_conn_args Usage Example --------------------- -.. exampleinclude:: /../../providers/tests/system/elasticsearch/example_elasticsearch_query.py +.. exampleinclude:: /../../providers/elasticsearch/tests/system/elasticsearch/example_elasticsearch_query.py :language: python :start-after: [START howto_elasticsearch_python_hook] :end-before: [END howto_elasticsearch_python_hook] diff --git a/docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_sql_hook.rst b/providers/elasticsearch/docs/hooks/elasticsearch_sql_hook.rst similarity index 91% rename from docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_sql_hook.rst rename to providers/elasticsearch/docs/hooks/elasticsearch_sql_hook.rst index 084d445cb0bad..658a5f59ae3c7 100644 --- a/docs/apache-airflow-providers-elasticsearch/hooks/elasticsearch_sql_hook.rst +++ b/providers/elasticsearch/docs/hooks/elasticsearch_sql_hook.rst @@ -26,7 +26,7 @@ Elasticsearch Hook that interact with Elasticsearch through the elasticsearch-db Usage Example --------------------- -.. exampleinclude:: /../../providers/tests/system/elasticsearch/example_elasticsearch_query.py +.. exampleinclude:: /../../providers/elasticsearch/tests/system/elasticsearch/example_elasticsearch_query.py :language: python :start-after: [START howto_elasticsearch_query] :end-before: [END howto_elasticsearch_query] diff --git a/docs/apache-airflow-providers-elasticsearch/hooks/index.rst b/providers/elasticsearch/docs/hooks/index.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/hooks/index.rst rename to providers/elasticsearch/docs/hooks/index.rst diff --git a/docs/apache-airflow-providers-elasticsearch/index.rst b/providers/elasticsearch/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/index.rst rename to providers/elasticsearch/docs/index.rst diff --git a/docs/apache-airflow-providers-elasticsearch/installing-providers-from-sources.rst b/providers/elasticsearch/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/installing-providers-from-sources.rst rename to providers/elasticsearch/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/elasticsearch/Elasticsearch.png b/providers/elasticsearch/docs/integration-logos/Elasticsearch.png similarity index 100% rename from docs/integration-logos/elasticsearch/Elasticsearch.png rename to providers/elasticsearch/docs/integration-logos/Elasticsearch.png diff --git a/docs/apache-airflow-providers-elasticsearch/logging/index.rst b/providers/elasticsearch/docs/logging/index.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/logging/index.rst rename to providers/elasticsearch/docs/logging/index.rst diff --git a/docs/apache-airflow-providers-elasticsearch/redirects.txt b/providers/elasticsearch/docs/redirects.txt similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/redirects.txt rename to providers/elasticsearch/docs/redirects.txt diff --git a/docs/apache-airflow-providers-elasticsearch/security.rst b/providers/elasticsearch/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-elasticsearch/security.rst rename to providers/elasticsearch/docs/security.rst diff --git a/providers/src/airflow/providers/elasticsearch/provider.yaml b/providers/elasticsearch/provider.yaml similarity index 97% rename from providers/src/airflow/providers/elasticsearch/provider.yaml rename to providers/elasticsearch/provider.yaml index 88ebba2a510c4..3e76e6ff76d8a 100644 --- a/providers/src/airflow/providers/elasticsearch/provider.yaml +++ b/providers/elasticsearch/provider.yaml @@ -70,15 +70,10 @@ versions: - 1.0.1 - 1.0.0 -dependencies: - - apache-airflow>=2.9.0 - - apache-airflow-providers-common-sql>=1.20.0 - - elasticsearch>=8.10,<9 - integrations: - integration-name: Elasticsearch external-doc-url: https://www.elastic.co/elasticsearch - logo: /integration-logos/elasticsearch/Elasticsearch.png + logo: /docs/integration-logos/Elasticsearch.png tags: [software] hooks: diff --git a/providers/elasticsearch/pyproject.toml b/providers/elasticsearch/pyproject.toml new file mode 100644 index 0000000000000..63f28a026ee10 --- /dev/null +++ b/providers/elasticsearch/pyproject.toml @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-elasticsearch" +version = "6.0.0" +description = "Provider package apache-airflow-providers-elasticsearch for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "elasticsearch", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "elasticsearch>=8.10,<9", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/6.0.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/6.0.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.elasticsearch.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.elasticsearch" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/elasticsearch/src/airflow/providers/elasticsearch/LICENSE b/providers/elasticsearch/src/airflow/providers/elasticsearch/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/elasticsearch/src/airflow/providers/elasticsearch/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/elasticsearch/__init__.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/__init__.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/__init__.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/__init__.py diff --git a/providers/elasticsearch/src/airflow/providers/elasticsearch/get_provider_info.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/get_provider_info.py new file mode 100644 index 0000000000000..8b1ef52d8ea50 --- /dev/null +++ b/providers/elasticsearch/src/airflow/providers/elasticsearch/get_provider_info.py @@ -0,0 +1,221 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-elasticsearch", + "name": "Elasticsearch", + "description": "`Elasticsearch `__\n", + "state": "ready", + "source-date-epoch": 1734533734, + "versions": [ + "6.0.0", + "5.5.3", + "5.5.2", + "5.5.1", + "5.5.0", + "5.4.2", + "5.4.1", + "5.4.0", + "5.3.4", + "5.3.3", + "5.3.2", + "5.3.1", + "5.3.0", + "5.2.0", + "5.1.1", + "5.1.0", + "5.0.2", + "5.0.1", + "5.0.0", + "4.5.1", + "4.5.0", + "4.4.0", + "4.3.3", + "4.3.2", + "4.3.1", + "4.3.0", + "4.2.1", + "4.2.0", + "4.1.0", + "4.0.0", + "3.0.3", + "3.0.2", + "3.0.1", + "3.0.0", + "2.2.0", + "2.1.0", + "2.0.3", + "2.0.2", + "2.0.1", + "1.0.4", + "1.0.3", + "1.0.2", + "1.0.1", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "Elasticsearch", + "external-doc-url": "https://www.elastic.co/elasticsearch", + "logo": "/docs/integration-logos/Elasticsearch.png", + "tags": ["software"], + } + ], + "hooks": [ + { + "integration-name": "Elasticsearch", + "python-modules": ["airflow.providers.elasticsearch.hooks.elasticsearch"], + } + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.elasticsearch.hooks.elasticsearch.ElasticsearchSQLHook", + "connection-type": "elasticsearch", + } + ], + "logging": ["airflow.providers.elasticsearch.log.es_task_handler.ElasticsearchTaskHandler"], + "config": { + "elasticsearch": { + "description": None, + "options": { + "host": { + "description": "Elasticsearch host\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "default": "", + }, + "log_id_template": { + "description": "Format of the log_id, which is used to query for a given tasks logs\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "is_template": True, + "default": "{dag_id}-{task_id}-{run_id}-{map_index}-{try_number}", + }, + "end_of_log_mark": { + "description": "Used to mark the end of a log stream for a task\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "default": "end_of_log", + }, + "frontend": { + "description": "Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id\nCode will construct log_id using the log_id template from the argument above.\nNOTE: scheme will default to https if one is not provided\n", + "version_added": "1.10.4", + "type": "string", + "example": "http://localhost:5601/app/kibana#/discover?_a=(columns:!(message),query:(language:kuery,query:'log_id: \"{log_id}\"'),sort:!(log.offset,asc))", + "default": "", + }, + "write_stdout": { + "description": "Write the task logs to the stdout of the worker, rather than the default files\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "default": "False", + }, + "write_to_es": { + "description": "Write the task logs to the ElasticSearch\n", + "version_added": "5.5.4", + "type": "string", + "example": None, + "default": "False", + }, + "target_index": { + "description": "Name of the index to write to, when enabling writing the task logs to the ElasticSearch\n", + "version_added": "5.5.4", + "type": "string", + "example": None, + "default": "airflow-logs", + }, + "json_format": { + "description": "Instead of the default log formatter, write the log lines as JSON\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "default": "False", + }, + "json_fields": { + "description": "Log fields to also attach to the json output, if enabled\n", + "version_added": "1.10.4", + "type": "string", + "example": None, + "default": "asctime, filename, lineno, levelname, message", + }, + "host_field": { + "description": "The field where host name is stored (normally either `host` or `host.name`)\n", + "version_added": "2.1.1", + "type": "string", + "example": None, + "default": "host", + }, + "offset_field": { + "description": "The field where offset is stored (normally either `offset` or `log.offset`)\n", + "version_added": "2.1.1", + "type": "string", + "example": None, + "default": "offset", + }, + "index_patterns": { + "description": "Comma separated list of index patterns to use when searching for logs (default: `_all`).\nThe index_patterns_callable takes precedence over this.\n", + "version_added": "2.6.0", + "type": "string", + "example": "something-*", + "default": "_all", + }, + "index_patterns_callable": { + "description": "A string representing the full path to the Python callable path which accept TI object and\nreturn comma separated list of index patterns. This will takes precedence over index_patterns.\n", + "version_added": "5.5.0", + "type": "string", + "example": "module.callable", + "default": "", + }, + }, + }, + "elasticsearch_configs": { + "description": None, + "options": { + "http_compress": { + "description": None, + "version_added": "1.10.5", + "type": "string", + "example": None, + "default": "False", + }, + "verify_certs": { + "description": None, + "version_added": "1.10.5", + "type": "string", + "example": None, + "default": "True", + }, + }, + }, + }, + "dependencies": [ + "apache-airflow>=2.9.0", + "apache-airflow-providers-common-sql>=1.20.0", + "elasticsearch>=8.10,<9", + ], + } diff --git a/providers/tests/apache/druid/operators/__init__.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/hooks/__init__.py similarity index 100% rename from providers/tests/apache/druid/operators/__init__.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/hooks/__init__.py diff --git a/providers/src/airflow/providers/elasticsearch/hooks/elasticsearch.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/hooks/elasticsearch.py similarity index 99% rename from providers/src/airflow/providers/elasticsearch/hooks/elasticsearch.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/hooks/elasticsearch.py index 70c60d78f9a1d..ab1bc433d94a4 100644 --- a/providers/src/airflow/providers/elasticsearch/hooks/elasticsearch.py +++ b/providers/elasticsearch/src/airflow/providers/elasticsearch/hooks/elasticsearch.py @@ -21,10 +21,9 @@ from typing import TYPE_CHECKING, Any from urllib import parse -from elasticsearch import Elasticsearch - from airflow.hooks.base import BaseHook from airflow.providers.common.sql.hooks.sql import DbApiHook +from elasticsearch import Elasticsearch if TYPE_CHECKING: from elastic_transport import ObjectApiResponse diff --git a/providers/tests/dbt/cloud/operators/__init__.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/log/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/operators/__init__.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/log/__init__.py diff --git a/providers/src/airflow/providers/elasticsearch/log/es_json_formatter.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_json_formatter.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/log/es_json_formatter.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_json_formatter.py diff --git a/providers/src/airflow/providers/elasticsearch/log/es_response.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_response.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/log/es_response.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_response.py diff --git a/providers/src/airflow/providers/elasticsearch/log/es_task_handler.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_task_handler.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/log/es_task_handler.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_task_handler.py index 15904e7ebf3b4..5343ba46618ea 100644 --- a/providers/src/airflow/providers/elasticsearch/log/es_task_handler.py +++ b/providers/elasticsearch/src/airflow/providers/elasticsearch/log/es_task_handler.py @@ -31,12 +31,10 @@ from typing import TYPE_CHECKING, Any, Callable, Literal from urllib.parse import quote, urlparse -# Using `from elasticsearch import *` would break elasticsearch mocking used in unit test. -import elasticsearch import pendulum -from elasticsearch import helpers -from elasticsearch.exceptions import NotFoundError +# Using `from elasticsearch import *` would break elasticsearch mocking used in unit test. +import elasticsearch from airflow.configuration import conf from airflow.exceptions import AirflowException from airflow.models.dagrun import DagRun @@ -48,6 +46,8 @@ from airflow.utils.log.logging_mixin import ExternalLoggingMixin, LoggingMixin from airflow.utils.module_loading import import_string from airflow.utils.session import create_session +from elasticsearch import helpers +from elasticsearch.exceptions import NotFoundError if TYPE_CHECKING: from datetime import datetime diff --git a/providers/src/airflow/providers/elasticsearch/version_compat.py b/providers/elasticsearch/src/airflow/providers/elasticsearch/version_compat.py similarity index 100% rename from providers/src/airflow/providers/elasticsearch/version_compat.py rename to providers/elasticsearch/src/airflow/providers/elasticsearch/version_compat.py diff --git a/providers/elasticsearch/tests/conftest.py b/providers/elasticsearch/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/elasticsearch/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/elasticsearch/tests/provider_tests/__init__.py b/providers/elasticsearch/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/elasticsearch/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/elasticsearch/__init__.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/__init__.py similarity index 100% rename from providers/tests/elasticsearch/__init__.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/__init__.py diff --git a/providers/tests/elasticsearch/hooks/__init__.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/hooks/__init__.py similarity index 100% rename from providers/tests/elasticsearch/hooks/__init__.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/hooks/__init__.py diff --git a/providers/tests/elasticsearch/hooks/test_elasticsearch.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/hooks/test_elasticsearch.py similarity index 100% rename from providers/tests/elasticsearch/hooks/test_elasticsearch.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/hooks/test_elasticsearch.py diff --git a/providers/tests/dbt/cloud/sensors/__init__.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/sensors/__init__.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/__init__.py diff --git a/providers/tests/elasticsearch/log/elasticmock/__init__.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/__init__.py similarity index 97% rename from providers/tests/elasticsearch/log/elasticmock/__init__.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/__init__.py index 44e242d114574..912d754a966c7 100644 --- a/providers/tests/elasticsearch/log/elasticmock/__init__.py +++ b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/__init__.py @@ -1,3 +1,5 @@ +"""Elastic mock module used for testing""" + # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -38,12 +40,11 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -"""Elastic mock module used for testing""" from functools import wraps from unittest.mock import patch from urllib.parse import unquote, urlparse -from providers.tests.elasticsearch.log.elasticmock.fake_elasticsearch import FakeElasticsearch +from provider_tests.elasticsearch.log.elasticmock.fake_elasticsearch import FakeElasticsearch ELASTIC_INSTANCES: dict[str, FakeElasticsearch] = {} diff --git a/providers/tests/elasticsearch/log/elasticmock/fake_elasticsearch.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/fake_elasticsearch.py similarity index 99% rename from providers/tests/elasticsearch/log/elasticmock/fake_elasticsearch.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/fake_elasticsearch.py index 1d975ee718c3f..39aa0fc66082c 100644 --- a/providers/tests/elasticsearch/log/elasticmock/fake_elasticsearch.py +++ b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/fake_elasticsearch.py @@ -22,7 +22,7 @@ from elasticsearch import Elasticsearch from elasticsearch.exceptions import NotFoundError -from providers.tests.elasticsearch.log.elasticmock.utilities import ( +from provider_tests.elasticsearch.log.elasticmock.utilities import ( MissingIndexException, get_random_id, query_params, diff --git a/providers/tests/elasticsearch/log/elasticmock/utilities/__init__.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/utilities/__init__.py similarity index 99% rename from providers/tests/elasticsearch/log/elasticmock/utilities/__init__.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/utilities/__init__.py index f5a6c14dba2aa..62fef03473aa6 100644 --- a/providers/tests/elasticsearch/log/elasticmock/utilities/__init__.py +++ b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/elasticmock/utilities/__init__.py @@ -1,3 +1,5 @@ +"""Utilities for Elastic mock""" + # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -38,7 +40,6 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -"""Utilities for Elastic mock""" import base64 import random import string diff --git a/providers/tests/elasticsearch/log/test_es_json_formatter.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_json_formatter.py similarity index 100% rename from providers/tests/elasticsearch/log/test_es_json_formatter.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_json_formatter.py diff --git a/providers/tests/elasticsearch/log/test_es_response.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_response.py similarity index 100% rename from providers/tests/elasticsearch/log/test_es_response.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_response.py diff --git a/providers/tests/elasticsearch/log/test_es_task_handler.py b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_task_handler.py similarity index 99% rename from providers/tests/elasticsearch/log/test_es_task_handler.py rename to providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_task_handler.py index f6b3f79395009..af17f151b9be0 100644 --- a/providers/tests/elasticsearch/log/test_es_task_handler.py +++ b/providers/elasticsearch/tests/provider_tests/elasticsearch/log/test_es_task_handler.py @@ -43,9 +43,9 @@ from airflow.utils import timezone from airflow.utils.state import DagRunState, TaskInstanceState from airflow.utils.timezone import datetime +from provider_tests.elasticsearch.log.elasticmock import elasticmock +from provider_tests.elasticsearch.log.elasticmock.utilities import SearchFailedException -from providers.tests.elasticsearch.log.elasticmock import elasticmock -from providers.tests.elasticsearch.log.elasticmock.utilities import SearchFailedException from tests_common.test_utils.config import conf_vars from tests_common.test_utils.db import clear_db_dags, clear_db_runs from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS diff --git a/providers/tests/dbt/cloud/test_data/__init__.py b/providers/elasticsearch/tests/system/elasticsearch/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/test_data/__init__.py rename to providers/elasticsearch/tests/system/elasticsearch/__init__.py diff --git a/providers/tests/system/elasticsearch/example_elasticsearch_query.py b/providers/elasticsearch/tests/system/elasticsearch/example_elasticsearch_query.py similarity index 100% rename from providers/tests/system/elasticsearch/example_elasticsearch_query.py rename to providers/elasticsearch/tests/system/elasticsearch/example_elasticsearch_query.py diff --git a/providers/github/README.rst b/providers/github/README.rst new file mode 100644 index 0000000000000..fae79eec7dc6d --- /dev/null +++ b/providers/github/README.rst @@ -0,0 +1,62 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-github`` + +Release: ``2.8.0`` + + +`GitHub `__ + + +Provider package +---------------- + +This is a provider package for ``github`` provider. All classes for this provider package +are in ``airflow.providers.github`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-github`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +================== ================== +PIP package Version required +================== ================== +``apache-airflow`` ``>=2.9.0`` +``PyGithub`` ``>=2.1.1`` +================== ================== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/github/.latest-doc-only-change.txt b/providers/github/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/github/.latest-doc-only-change.txt rename to providers/github/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/github/CHANGELOG.rst b/providers/github/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/github/CHANGELOG.rst rename to providers/github/docs/changelog.rst diff --git a/docs/apache-airflow-providers-github/commits.rst b/providers/github/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-github/commits.rst rename to providers/github/docs/commits.rst diff --git a/docs/apache-airflow-providers-github/connections/github.rst b/providers/github/docs/connections/github.rst similarity index 100% rename from docs/apache-airflow-providers-github/connections/github.rst rename to providers/github/docs/connections/github.rst diff --git a/docs/apache-airflow-providers-github/index.rst b/providers/github/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-github/index.rst rename to providers/github/docs/index.rst diff --git a/docs/apache-airflow-providers-github/installing-providers-from-sources.rst b/providers/github/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-github/installing-providers-from-sources.rst rename to providers/github/docs/installing-providers-from-sources.rst diff --git a/docs/apache-airflow-providers-github/operators/index.rst b/providers/github/docs/operators/index.rst similarity index 90% rename from docs/apache-airflow-providers-github/operators/index.rst rename to providers/github/docs/operators/index.rst index 448fb8dc3dc20..e8d3126f90bbc 100644 --- a/docs/apache-airflow-providers-github/operators/index.rst +++ b/providers/github/docs/operators/index.rst @@ -33,7 +33,7 @@ You can further process the result using An example of Listing all Repositories owned by a user, **client.get_user().get_repos()** can be implemented as following: -.. exampleinclude:: /../../providers/tests/system/github/example_github.py +.. exampleinclude:: /../../providers/github/tests/system/github/example_github.py :language: python :dedent: 4 :start-after: [START howto_operator_list_repos_github] @@ -43,7 +43,7 @@ An example of Listing all Repositories owned by a user, **client.get_user().get_ An example of Listing Tags in a Repository, **client.get_repo(full_name_or_id='apache/airflow').get_tags()** can be implemented as following: -.. exampleinclude:: /../../providers/tests/system/github/example_github.py +.. exampleinclude:: /../../providers/github/tests/system/github/example_github.py :language: python :dedent: 4 :start-after: [START howto_operator_list_tags_github] @@ -64,7 +64,7 @@ a Tag in `GitHub `__. An example for tag **v1.0**: -.. exampleinclude:: /../../providers/tests/system/github/example_github.py +.. exampleinclude:: /../../providers/github/tests/system/github/example_github.py :language: python :dedent: 4 :start-after: [START howto_tag_sensor_github] @@ -73,7 +73,7 @@ An example for tag **v1.0**: Similar Functionality can be achieved by directly using :class:`~from airflow.providers.github.sensors.github.GithubSensor`. -.. exampleinclude:: /../../providers/tests/system/github/example_github.py +.. exampleinclude:: /../../providers/github/tests/system/github/example_github.py :language: python :dedent: 4 :start-after: [START howto_sensor_github] diff --git a/docs/apache-airflow-providers-github/security.rst b/providers/github/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-github/security.rst rename to providers/github/docs/security.rst diff --git a/providers/src/airflow/providers/github/provider.yaml b/providers/github/provider.yaml similarity index 96% rename from providers/src/airflow/providers/github/provider.yaml rename to providers/github/provider.yaml index f154c68f47df5..6087dc01f3304 100644 --- a/providers/src/airflow/providers/github/provider.yaml +++ b/providers/github/provider.yaml @@ -22,10 +22,6 @@ name: Github description: | `GitHub `__ -dependencies: - - apache-airflow>=2.9.0 - - PyGithub>=2.1.1 - state: ready source-date-epoch: 1734533986 # note that those versions are maintained by release manager - do not update them manually diff --git a/providers/github/pyproject.toml b/providers/github/pyproject.toml new file mode 100644 index 0000000000000..b0e9ff3648b52 --- /dev/null +++ b/providers/github/pyproject.toml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-github" +version = "2.8.0" +description = "Provider package apache-airflow-providers-github for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "github", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "PyGithub>=2.1.1", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-github/2.8.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-github/2.8.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.github.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.github" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/github/src/airflow/providers/github/LICENSE b/providers/github/src/airflow/providers/github/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/github/src/airflow/providers/github/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/github/__init__.py b/providers/github/src/airflow/providers/github/__init__.py similarity index 100% rename from providers/src/airflow/providers/github/__init__.py rename to providers/github/src/airflow/providers/github/__init__.py diff --git a/providers/github/src/airflow/providers/github/get_provider_info.py b/providers/github/src/airflow/providers/github/get_provider_info.py new file mode 100644 index 0000000000000..37ca8ed6e950c --- /dev/null +++ b/providers/github/src/airflow/providers/github/get_provider_info.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-github", + "name": "Github", + "description": "`GitHub `__\n", + "state": "ready", + "source-date-epoch": 1734533986, + "versions": [ + "2.8.0", + "2.7.0", + "2.6.2", + "2.6.1", + "2.6.0", + "2.5.1", + "2.5.0", + "2.4.0", + "2.3.2", + "2.3.1", + "2.3.0", + "2.2.1", + "2.2.0", + "2.1.0", + "2.0.0", + "1.0.3", + "1.0.2", + "1.0.1", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "Github", + "external-doc-url": "https://www.github.com/", + "tags": ["software"], + } + ], + "hooks": [ + {"integration-name": "Github", "python-modules": ["airflow.providers.github.hooks.github"]} + ], + "operators": [ + {"integration-name": "Github", "python-modules": ["airflow.providers.github.operators.github"]} + ], + "sensors": [ + {"integration-name": "Github", "python-modules": ["airflow.providers.github.sensors.github"]} + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.github.hooks.github.GithubHook", + "connection-type": "github", + } + ], + "dependencies": ["apache-airflow>=2.9.0", "PyGithub>=2.1.1"], + } diff --git a/providers/tests/dbt/cloud/triggers/__init__.py b/providers/github/src/airflow/providers/github/hooks/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/triggers/__init__.py rename to providers/github/src/airflow/providers/github/hooks/__init__.py diff --git a/providers/src/airflow/providers/github/hooks/github.py b/providers/github/src/airflow/providers/github/hooks/github.py similarity index 99% rename from providers/src/airflow/providers/github/hooks/github.py rename to providers/github/src/airflow/providers/github/hooks/github.py index 6be50fd31fec4..fb2b2b0416008 100644 --- a/providers/src/airflow/providers/github/hooks/github.py +++ b/providers/github/src/airflow/providers/github/hooks/github.py @@ -21,10 +21,9 @@ from typing import TYPE_CHECKING -from github import Github as GithubClient - from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook +from github import Github as GithubClient class GithubHook(BaseHook): diff --git a/providers/tests/dbt/cloud/utils/__init__.py b/providers/github/src/airflow/providers/github/operators/__init__.py similarity index 100% rename from providers/tests/dbt/cloud/utils/__init__.py rename to providers/github/src/airflow/providers/github/operators/__init__.py diff --git a/providers/src/airflow/providers/github/operators/github.py b/providers/github/src/airflow/providers/github/operators/github.py similarity index 99% rename from providers/src/airflow/providers/github/operators/github.py rename to providers/github/src/airflow/providers/github/operators/github.py index 82996d3ecedb4..3889335628d03 100644 --- a/providers/src/airflow/providers/github/operators/github.py +++ b/providers/github/src/airflow/providers/github/operators/github.py @@ -19,11 +19,10 @@ from typing import TYPE_CHECKING, Any, Callable -from github import GithubException - from airflow.exceptions import AirflowException from airflow.models import BaseOperator from airflow.providers.github.hooks.github import GithubHook +from github import GithubException if TYPE_CHECKING: try: diff --git a/providers/tests/elasticsearch/log/__init__.py b/providers/github/src/airflow/providers/github/sensors/__init__.py similarity index 100% rename from providers/tests/elasticsearch/log/__init__.py rename to providers/github/src/airflow/providers/github/sensors/__init__.py diff --git a/providers/src/airflow/providers/github/sensors/github.py b/providers/github/src/airflow/providers/github/sensors/github.py similarity index 99% rename from providers/src/airflow/providers/github/sensors/github.py rename to providers/github/src/airflow/providers/github/sensors/github.py index cacaef9e32fb8..b40420ce207ab 100644 --- a/providers/src/airflow/providers/github/sensors/github.py +++ b/providers/github/src/airflow/providers/github/sensors/github.py @@ -19,11 +19,10 @@ from typing import TYPE_CHECKING, Any, Callable -from github import GithubException - from airflow.exceptions import AirflowException from airflow.providers.github.hooks.github import GithubHook from airflow.sensors.base import BaseSensorOperator +from github import GithubException if TYPE_CHECKING: try: diff --git a/providers/github/tests/conftest.py b/providers/github/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/github/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/github/tests/provider_tests/__init__.py b/providers/github/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/github/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/github/__init__.py b/providers/github/tests/provider_tests/github/__init__.py similarity index 100% rename from providers/tests/github/__init__.py rename to providers/github/tests/provider_tests/github/__init__.py diff --git a/providers/tests/github/hooks/__init__.py b/providers/github/tests/provider_tests/github/hooks/__init__.py similarity index 100% rename from providers/tests/github/hooks/__init__.py rename to providers/github/tests/provider_tests/github/hooks/__init__.py diff --git a/providers/tests/github/hooks/test_github.py b/providers/github/tests/provider_tests/github/hooks/test_github.py similarity index 100% rename from providers/tests/github/hooks/test_github.py rename to providers/github/tests/provider_tests/github/hooks/test_github.py diff --git a/providers/tests/github/operators/__init__.py b/providers/github/tests/provider_tests/github/operators/__init__.py similarity index 100% rename from providers/tests/github/operators/__init__.py rename to providers/github/tests/provider_tests/github/operators/__init__.py diff --git a/providers/tests/github/operators/test_github.py b/providers/github/tests/provider_tests/github/operators/test_github.py similarity index 100% rename from providers/tests/github/operators/test_github.py rename to providers/github/tests/provider_tests/github/operators/test_github.py diff --git a/providers/tests/github/sensors/__init__.py b/providers/github/tests/provider_tests/github/sensors/__init__.py similarity index 100% rename from providers/tests/github/sensors/__init__.py rename to providers/github/tests/provider_tests/github/sensors/__init__.py diff --git a/providers/tests/github/sensors/test_github.py b/providers/github/tests/provider_tests/github/sensors/test_github.py similarity index 100% rename from providers/tests/github/sensors/test_github.py rename to providers/github/tests/provider_tests/github/sensors/test_github.py diff --git a/providers/tests/opensearch/__init__.py b/providers/github/tests/system/github/__init__.py similarity index 100% rename from providers/tests/opensearch/__init__.py rename to providers/github/tests/system/github/__init__.py diff --git a/providers/tests/system/github/example_github.py b/providers/github/tests/system/github/example_github.py similarity index 99% rename from providers/tests/system/github/example_github.py rename to providers/github/tests/system/github/example_github.py index b076647177cc9..c3d1d3a4c7e4e 100644 --- a/providers/tests/system/github/example_github.py +++ b/providers/github/tests/system/github/example_github.py @@ -21,12 +21,11 @@ from datetime import datetime from typing import Any -from github import GithubException - from airflow.exceptions import AirflowException from airflow.models.dag import DAG from airflow.providers.github.operators.github import GithubOperator from airflow.providers.github.sensors.github import GithubSensor, GithubTagSensor +from github import GithubException ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID") DAG_ID = "example_github_operator" diff --git a/providers/opensearch/README.rst b/providers/opensearch/README.rst new file mode 100644 index 0000000000000..640015fdeacbb --- /dev/null +++ b/providers/opensearch/README.rst @@ -0,0 +1,62 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-opensearch`` + +Release: ``1.6.0`` + + +`OpenSearch `__ + + +Provider package +---------------- + +This is a provider package for ``opensearch`` provider. All classes for this provider package +are in ``airflow.providers.opensearch`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-opensearch`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +================== ================== +PIP package Version required +================== ================== +``apache-airflow`` ``>=2.9.0`` +``opensearch-py`` ``>=2.2.0`` +================== ================== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/opensearch/.latest-doc-only-change.txt b/providers/opensearch/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/opensearch/.latest-doc-only-change.txt rename to providers/opensearch/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/opensearch/CHANGELOG.rst b/providers/opensearch/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/opensearch/CHANGELOG.rst rename to providers/opensearch/docs/changelog.rst diff --git a/docs/apache-airflow-providers-opensearch/commits.rst b/providers/opensearch/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/commits.rst rename to providers/opensearch/docs/commits.rst diff --git a/docs/apache-airflow-providers-opensearch/configurations-ref.rst b/providers/opensearch/docs/configurations-ref.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/configurations-ref.rst rename to providers/opensearch/docs/configurations-ref.rst diff --git a/docs/apache-airflow-providers-opensearch/connections/index.rst b/providers/opensearch/docs/connections/index.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/connections/index.rst rename to providers/opensearch/docs/connections/index.rst diff --git a/docs/apache-airflow-providers-opensearch/connections/opensearch.rst b/providers/opensearch/docs/connections/opensearch.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/connections/opensearch.rst rename to providers/opensearch/docs/connections/opensearch.rst diff --git a/docs/apache-airflow-providers-opensearch/index.rst b/providers/opensearch/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/index.rst rename to providers/opensearch/docs/index.rst diff --git a/docs/apache-airflow-providers-opensearch/installing-providers-from-sources.rst b/providers/opensearch/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/installing-providers-from-sources.rst rename to providers/opensearch/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/opensearch/opensearch.png b/providers/opensearch/docs/integration-logos/opensearch.png similarity index 100% rename from docs/integration-logos/opensearch/opensearch.png rename to providers/opensearch/docs/integration-logos/opensearch.png diff --git a/docs/apache-airflow-providers-opensearch/logging/index.rst b/providers/opensearch/docs/logging/index.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/logging/index.rst rename to providers/opensearch/docs/logging/index.rst diff --git a/docs/apache-airflow-providers-opensearch/operators/index.rst b/providers/opensearch/docs/operators/index.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/operators/index.rst rename to providers/opensearch/docs/operators/index.rst diff --git a/docs/apache-airflow-providers-opensearch/operators/opensearch.rst b/providers/opensearch/docs/operators/opensearch.rst similarity index 88% rename from docs/apache-airflow-providers-opensearch/operators/opensearch.rst rename to providers/opensearch/docs/operators/opensearch.rst index b85a014ebefee..fc66fa0548842 100644 --- a/docs/apache-airflow-providers-opensearch/operators/opensearch.rst +++ b/providers/opensearch/docs/operators/opensearch.rst @@ -35,7 +35,7 @@ to create a new index in an OpenSearch domain. -.. exampleinclude:: /../../providers/tests/system/opensearch/example_opensearch.py +.. exampleinclude:: /../../providers/opensearch/tests/system/opensearch/example_opensearch.py :language: python :start-after: [START howto_operator_opensearch_create_index] :dedent: 4 @@ -50,7 +50,7 @@ Add a Document to an Index on OpenSearch Use :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchAddDocumentOperator` to add single documents to an OpenSearch Index -.. exampleinclude:: /../../providers/tests/system/opensearch/example_opensearch.py +.. exampleinclude:: /../../providers/opensearch/tests/system/opensearch/example_opensearch.py :language: python :start-after: [START howto_operator_opensearch_add_document] :dedent: 4 @@ -65,7 +65,7 @@ Run a query against an OpenSearch Index Use :class:`~airflow.providers.opensearch.operators.opensearch.OpenSearchQueryOperator` to run a query against an OpenSearch index. -.. exampleinclude:: /../../providers/tests/system/opensearch/example_opensearch.py +.. exampleinclude:: /../../providers/opensearch/tests/system/opensearch/example_opensearch.py :language: python :start-after: [START howto_operator_opensearch_query] :dedent: 4 diff --git a/docs/apache-airflow-providers-opensearch/security.rst b/providers/opensearch/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-opensearch/security.rst rename to providers/opensearch/docs/security.rst diff --git a/providers/src/airflow/providers/opensearch/provider.yaml b/providers/opensearch/provider.yaml similarity index 97% rename from providers/src/airflow/providers/opensearch/provider.yaml rename to providers/opensearch/provider.yaml index 04ce301b905a6..288d39dca4a56 100644 --- a/providers/src/airflow/providers/opensearch/provider.yaml +++ b/providers/opensearch/provider.yaml @@ -36,16 +36,12 @@ versions: - 1.1.0 - 1.0.0 -dependencies: - - apache-airflow>=2.9.0 - - opensearch-py>=2.2.0 - integrations: - integration-name: OpenSearch external-doc-url: https://opensearch.org/ how-to-guide: - /docs/apache-airflow-providers-opensearch/operators/opensearch.rst - logo: /integration-logos/opensearch/opensearch.png + logo: /docs/integration-logos/opensearch.png tags: [software] hooks: diff --git a/providers/opensearch/pyproject.toml b/providers/opensearch/pyproject.toml new file mode 100644 index 0000000000000..3015634ad4293 --- /dev/null +++ b/providers/opensearch/pyproject.toml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-opensearch" +version = "1.6.0" +description = "Provider package apache-airflow-providers-opensearch for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "opensearch", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "opensearch-py>=2.2.0", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-opensearch/1.6.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-opensearch/1.6.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.opensearch.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.opensearch" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/opensearch/src/airflow/providers/opensearch/LICENSE b/providers/opensearch/src/airflow/providers/opensearch/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/opensearch/src/airflow/providers/opensearch/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/opensearch/__init__.py b/providers/opensearch/src/airflow/providers/opensearch/__init__.py similarity index 100% rename from providers/src/airflow/providers/opensearch/__init__.py rename to providers/opensearch/src/airflow/providers/opensearch/__init__.py diff --git a/providers/opensearch/src/airflow/providers/opensearch/get_provider_info.py b/providers/opensearch/src/airflow/providers/opensearch/get_provider_info.py new file mode 100644 index 0000000000000..feb4f5639507a --- /dev/null +++ b/providers/opensearch/src/airflow/providers/opensearch/get_provider_info.py @@ -0,0 +1,220 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-opensearch", + "name": "OpenSearch", + "description": "`OpenSearch `__\n", + "state": "ready", + "source-date-epoch": 1734536033, + "versions": [ + "1.6.0", + "1.5.0", + "1.4.0", + "1.3.0", + "1.2.1", + "1.2.0", + "1.1.2", + "1.1.1", + "1.1.0", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "OpenSearch", + "external-doc-url": "https://opensearch.org/", + "how-to-guide": ["/docs/apache-airflow-providers-opensearch/operators/opensearch.rst"], + "logo": "/docs/integration-logos/opensearch.png", + "tags": ["software"], + } + ], + "hooks": [ + { + "integration-name": "OpenSearch", + "python-modules": ["airflow.providers.opensearch.hooks.opensearch"], + } + ], + "operators": [ + { + "integration-name": "OpenSearch", + "python-modules": ["airflow.providers.opensearch.operators.opensearch"], + } + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.opensearch.hooks.opensearch.OpenSearchHook", + "connection-type": "opensearch", + } + ], + "logging": ["airflow.providers.opensearch.log.os_task_handler.OpensearchTaskHandler"], + "config": { + "opensearch": { + "description": None, + "options": { + "host": { + "description": "Opensearch host\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "", + }, + "port": { + "description": "The port number of Opensearch host\n", + "version_added": "1.5.0", + "type": "integer", + "example": None, + "default": "", + }, + "username": { + "description": "The username for connecting to Opensearch\n", + "version_added": "1.5.0", + "type": "string", + "sensitive": True, + "example": None, + "default": "", + }, + "password": { + "description": "The password for connecting to Opensearch\n", + "version_added": "1.5.0", + "type": "string", + "sensitive": True, + "example": None, + "default": "", + }, + "log_id_template": { + "description": "Format of the log_id, which is used to query for a given tasks logs\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "is_template": True, + "default": "{dag_id}-{task_id}-{run_id}-{map_index}-{try_number}", + }, + "end_of_log_mark": { + "description": "Used to mark the end of a log stream for a task\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "end_of_log", + }, + "write_stdout": { + "description": "Write the task logs to the stdout of the worker, rather than the default files\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "json_format": { + "description": "Instead of the default log formatter, write the log lines as JSON\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "json_fields": { + "description": "Log fields to also attach to the json output, if enabled\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "asctime, filename, lineno, levelname, message", + }, + "host_field": { + "description": "The field where host name is stored (normally either `host` or `host.name`)\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "host", + }, + "offset_field": { + "description": "The field where offset is stored (normally either `offset` or `log.offset`)\n", + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "offset", + }, + "index_patterns": { + "description": "Comma separated list of index patterns to use when searching for logs (default: `_all`).\nThe index_patterns_callable takes precedence over this.\n", + "version_added": "1.5.0", + "type": "string", + "example": "something-*", + "default": "_all", + }, + "index_patterns_callable": { + "description": "A string representing the full path to the Python callable path which accept TI object and\nreturn comma separated list of index patterns. This will takes precedence over index_patterns.\n", + "version_added": "1.5.0", + "type": "string", + "example": "module.callable", + "default": "", + }, + }, + }, + "opensearch_configs": { + "description": None, + "options": { + "http_compress": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "use_ssl": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "verify_certs": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "ssl_assert_hostname": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "ssl_show_warn": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "False", + }, + "ca_certs": { + "description": None, + "version_added": "1.5.0", + "type": "string", + "example": None, + "default": "", + }, + }, + }, + }, + "dependencies": ["apache-airflow>=2.9.0", "opensearch-py>=2.2.0"], + } diff --git a/providers/tests/opensearch/hooks/__init__.py b/providers/opensearch/src/airflow/providers/opensearch/hooks/__init__.py similarity index 100% rename from providers/tests/opensearch/hooks/__init__.py rename to providers/opensearch/src/airflow/providers/opensearch/hooks/__init__.py diff --git a/providers/src/airflow/providers/opensearch/hooks/opensearch.py b/providers/opensearch/src/airflow/providers/opensearch/hooks/opensearch.py similarity index 100% rename from providers/src/airflow/providers/opensearch/hooks/opensearch.py rename to providers/opensearch/src/airflow/providers/opensearch/hooks/opensearch.py diff --git a/providers/tests/opensearch/log/__init__.py b/providers/opensearch/src/airflow/providers/opensearch/log/__init__.py similarity index 100% rename from providers/tests/opensearch/log/__init__.py rename to providers/opensearch/src/airflow/providers/opensearch/log/__init__.py diff --git a/providers/src/airflow/providers/opensearch/log/os_json_formatter.py b/providers/opensearch/src/airflow/providers/opensearch/log/os_json_formatter.py similarity index 100% rename from providers/src/airflow/providers/opensearch/log/os_json_formatter.py rename to providers/opensearch/src/airflow/providers/opensearch/log/os_json_formatter.py diff --git a/providers/src/airflow/providers/opensearch/log/os_response.py b/providers/opensearch/src/airflow/providers/opensearch/log/os_response.py similarity index 100% rename from providers/src/airflow/providers/opensearch/log/os_response.py rename to providers/opensearch/src/airflow/providers/opensearch/log/os_response.py diff --git a/providers/src/airflow/providers/opensearch/log/os_task_handler.py b/providers/opensearch/src/airflow/providers/opensearch/log/os_task_handler.py similarity index 100% rename from providers/src/airflow/providers/opensearch/log/os_task_handler.py rename to providers/opensearch/src/airflow/providers/opensearch/log/os_task_handler.py diff --git a/providers/tests/opensearch/operators/__init__.py b/providers/opensearch/src/airflow/providers/opensearch/operators/__init__.py similarity index 100% rename from providers/tests/opensearch/operators/__init__.py rename to providers/opensearch/src/airflow/providers/opensearch/operators/__init__.py diff --git a/providers/src/airflow/providers/opensearch/operators/opensearch.py b/providers/opensearch/src/airflow/providers/opensearch/operators/opensearch.py similarity index 100% rename from providers/src/airflow/providers/opensearch/operators/opensearch.py rename to providers/opensearch/src/airflow/providers/opensearch/operators/opensearch.py diff --git a/providers/src/airflow/providers/opensearch/version_compat.py b/providers/opensearch/src/airflow/providers/opensearch/version_compat.py similarity index 100% rename from providers/src/airflow/providers/opensearch/version_compat.py rename to providers/opensearch/src/airflow/providers/opensearch/version_compat.py diff --git a/providers/opensearch/tests/conftest.py b/providers/opensearch/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/opensearch/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/opensearch/tests/provider_tests/__init__.py b/providers/opensearch/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/opensearch/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/sendgrid/__init__.py b/providers/opensearch/tests/provider_tests/opensearch/__init__.py similarity index 100% rename from providers/tests/sendgrid/__init__.py rename to providers/opensearch/tests/provider_tests/opensearch/__init__.py diff --git a/providers/tests/opensearch/conftest.py b/providers/opensearch/tests/provider_tests/opensearch/conftest.py similarity index 100% rename from providers/tests/opensearch/conftest.py rename to providers/opensearch/tests/provider_tests/opensearch/conftest.py diff --git a/providers/tests/sendgrid/utils/__init__.py b/providers/opensearch/tests/provider_tests/opensearch/hooks/__init__.py similarity index 100% rename from providers/tests/sendgrid/utils/__init__.py rename to providers/opensearch/tests/provider_tests/opensearch/hooks/__init__.py diff --git a/providers/tests/opensearch/hooks/test_opensearch.py b/providers/opensearch/tests/provider_tests/opensearch/hooks/test_opensearch.py similarity index 99% rename from providers/tests/opensearch/hooks/test_opensearch.py rename to providers/opensearch/tests/provider_tests/opensearch/hooks/test_opensearch.py index 53364a50072f7..79ca0a34a363d 100644 --- a/providers/tests/opensearch/hooks/test_opensearch.py +++ b/providers/opensearch/tests/provider_tests/opensearch/hooks/test_opensearch.py @@ -20,14 +20,13 @@ from unittest import mock import pytest - -opensearchpy = pytest.importorskip("opensearchpy") from opensearchpy import Urllib3HttpConnection from airflow.exceptions import AirflowException from airflow.models import Connection from airflow.providers.opensearch.hooks.opensearch import OpenSearchHook +opensearchpy = pytest.importorskip("opensearchpy") pytestmark = pytest.mark.db_test diff --git a/providers/tests/system/apache/drill/__init__.py b/providers/opensearch/tests/provider_tests/opensearch/log/__init__.py similarity index 100% rename from providers/tests/system/apache/drill/__init__.py rename to providers/opensearch/tests/provider_tests/opensearch/log/__init__.py diff --git a/providers/tests/opensearch/log/test_os_json_formatter.py b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_json_formatter.py similarity index 99% rename from providers/tests/opensearch/log/test_os_json_formatter.py rename to providers/opensearch/tests/provider_tests/opensearch/log/test_os_json_formatter.py index bae039e199bdb..e85f02b56beea 100644 --- a/providers/tests/opensearch/log/test_os_json_formatter.py +++ b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_json_formatter.py @@ -24,12 +24,11 @@ import pendulum import pytest -opensearchpy = pytest.importorskip("opensearchpy") - from airflow.providers.opensearch.log.os_task_handler import ( OpensearchJSONFormatter, ) +opensearchpy = pytest.importorskip("opensearchpy") pytestmark = pytest.mark.db_test diff --git a/providers/tests/opensearch/log/test_os_response.py b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_response.py similarity index 99% rename from providers/tests/opensearch/log/test_os_response.py rename to providers/opensearch/tests/provider_tests/opensearch/log/test_os_response.py index 5b2f36d3c21b9..31af433754ff4 100644 --- a/providers/tests/opensearch/log/test_os_response.py +++ b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_response.py @@ -22,8 +22,6 @@ import pytest -opensearchpy = pytest.importorskip("opensearchpy") - from airflow.providers.opensearch.log.os_response import ( AttributeList, Hit, @@ -32,6 +30,7 @@ ) from airflow.providers.opensearch.log.os_task_handler import OpensearchTaskHandler +opensearchpy = pytest.importorskip("opensearchpy") pytestmark = pytest.mark.db_test diff --git a/providers/tests/opensearch/log/test_os_task_handler.py b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_task_handler.py similarity index 99% rename from providers/tests/opensearch/log/test_os_task_handler.py rename to providers/opensearch/tests/provider_tests/opensearch/log/test_os_task_handler.py index 4d7c9eb53a790..cbe9f086950cc 100644 --- a/providers/tests/opensearch/log/test_os_task_handler.py +++ b/providers/opensearch/tests/provider_tests/opensearch/log/test_os_task_handler.py @@ -29,8 +29,6 @@ import pendulum import pytest - -opensearchpy = pytest.importorskip("opensearchpy") from opensearchpy.exceptions import NotFoundError from airflow.configuration import conf @@ -43,12 +41,13 @@ from airflow.utils import timezone from airflow.utils.state import DagRunState, TaskInstanceState from airflow.utils.timezone import datetime +from provider_tests.opensearch.conftest import MockClient -from providers.tests.opensearch.conftest import MockClient from tests_common.test_utils.config import conf_vars from tests_common.test_utils.db import clear_db_dags, clear_db_runs from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS +opensearchpy = pytest.importorskip("opensearchpy") pytestmark = pytest.mark.db_test AIRFLOW_SOURCES_ROOT_DIR = Path(__file__).parents[4].resolve() diff --git a/providers/tests/system/apache/druid/__init__.py b/providers/opensearch/tests/provider_tests/opensearch/operators/__init__.py similarity index 100% rename from providers/tests/system/apache/druid/__init__.py rename to providers/opensearch/tests/provider_tests/opensearch/operators/__init__.py diff --git a/providers/tests/opensearch/operators/test_opensearch.py b/providers/opensearch/tests/provider_tests/opensearch/operators/test_opensearch.py similarity index 99% rename from providers/tests/opensearch/operators/test_opensearch.py rename to providers/opensearch/tests/provider_tests/opensearch/operators/test_opensearch.py index 63ad7eafe48de..fd42a91a71dd6 100644 --- a/providers/tests/opensearch/operators/test_opensearch.py +++ b/providers/opensearch/tests/provider_tests/opensearch/operators/test_opensearch.py @@ -17,9 +17,6 @@ from __future__ import annotations import pytest - -opensearchpy = pytest.importorskip("opensearchpy") - from opensearchpy import Document, Keyword, Text from airflow.models import DAG @@ -30,6 +27,7 @@ ) from airflow.utils.timezone import datetime +opensearchpy = pytest.importorskip("opensearchpy") pytestmark = pytest.mark.db_test diff --git a/providers/tests/system/dbt/cloud/__init__.py b/providers/opensearch/tests/system/opensearch/__init__.py similarity index 100% rename from providers/tests/system/dbt/cloud/__init__.py rename to providers/opensearch/tests/system/opensearch/__init__.py diff --git a/providers/tests/system/opensearch/example_opensearch.py b/providers/opensearch/tests/system/opensearch/example_opensearch.py similarity index 100% rename from providers/tests/system/opensearch/example_opensearch.py rename to providers/opensearch/tests/system/opensearch/example_opensearch.py diff --git a/providers/sendgrid/README.rst b/providers/sendgrid/README.rst new file mode 100644 index 0000000000000..18776ce3bf72c --- /dev/null +++ b/providers/sendgrid/README.rst @@ -0,0 +1,62 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-sendgrid`` + +Release: ``4.0.0`` + + +`Sendgrid `__ + + +Provider package +---------------- + +This is a provider package for ``sendgrid`` provider. All classes for this provider package +are in ``airflow.providers.sendgrid`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-sendgrid`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 + +Requirements +------------ + +================== ================== +PIP package Version required +================== ================== +``apache-airflow`` ``>=2.9.0`` +``sendgrid`` ``>=6.0.0`` +================== ================== + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/sendgrid/.latest-doc-only-change.txt b/providers/sendgrid/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/sendgrid/.latest-doc-only-change.txt rename to providers/sendgrid/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/sendgrid/CHANGELOG.rst b/providers/sendgrid/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/sendgrid/CHANGELOG.rst rename to providers/sendgrid/docs/changelog.rst diff --git a/docs/apache-airflow-providers-sendgrid/commits.rst b/providers/sendgrid/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-sendgrid/commits.rst rename to providers/sendgrid/docs/commits.rst diff --git a/docs/apache-airflow-providers-sendgrid/index.rst b/providers/sendgrid/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-sendgrid/index.rst rename to providers/sendgrid/docs/index.rst diff --git a/docs/apache-airflow-providers-sendgrid/installing-providers-from-sources.rst b/providers/sendgrid/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-sendgrid/installing-providers-from-sources.rst rename to providers/sendgrid/docs/installing-providers-from-sources.rst diff --git a/docs/apache-airflow-providers-sendgrid/security.rst b/providers/sendgrid/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-sendgrid/security.rst rename to providers/sendgrid/docs/security.rst diff --git a/providers/src/airflow/providers/sendgrid/provider.yaml b/providers/sendgrid/provider.yaml similarity index 95% rename from providers/src/airflow/providers/sendgrid/provider.yaml rename to providers/sendgrid/provider.yaml index a04d9fb69fa71..6e250ef099572 100644 --- a/providers/src/airflow/providers/sendgrid/provider.yaml +++ b/providers/sendgrid/provider.yaml @@ -21,10 +21,6 @@ name: Sendgrid description: | `Sendgrid `__ -dependencies: - - apache-airflow>=2.9.0 - - sendgrid>=6.0.0 - state: ready source-date-epoch: 1734536500 # note that those versions are maintained by release manager - do not update them manually diff --git a/providers/sendgrid/pyproject.toml b/providers/sendgrid/pyproject.toml new file mode 100644 index 0000000000000..e3b0577b89f8e --- /dev/null +++ b/providers/sendgrid/pyproject.toml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-sendgrid" +version = "4.0.0" +description = "Provider package apache-airflow-providers-sendgrid for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "sendgrid", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + "sendgrid>=6.0.0", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-sendgrid/4.0.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-sendgrid/4.0.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.sendgrid.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.sendgrid" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/sendgrid/src/airflow/providers/sendgrid/LICENSE b/providers/sendgrid/src/airflow/providers/sendgrid/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/sendgrid/src/airflow/providers/sendgrid/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/sendgrid/__init__.py b/providers/sendgrid/src/airflow/providers/sendgrid/__init__.py similarity index 100% rename from providers/src/airflow/providers/sendgrid/__init__.py rename to providers/sendgrid/src/airflow/providers/sendgrid/__init__.py diff --git a/providers/sendgrid/src/airflow/providers/sendgrid/get_provider_info.py b/providers/sendgrid/src/airflow/providers/sendgrid/get_provider_info.py new file mode 100644 index 0000000000000..7b6da38bc2cb7 --- /dev/null +++ b/providers/sendgrid/src/airflow/providers/sendgrid/get_provider_info.py @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-sendgrid", + "name": "Sendgrid", + "description": "`Sendgrid `__\n", + "state": "ready", + "source-date-epoch": 1734536500, + "versions": [ + "4.0.0", + "3.6.0", + "3.5.1", + "3.5.0", + "3.4.0", + "3.3.0", + "3.2.2", + "3.2.1", + "3.2.0", + "3.1.0", + "3.0.0", + "2.0.4", + "2.0.3", + "2.0.2", + "2.0.1", + "2.0.0", + "1.0.2", + "1.0.1", + "1.0.0", + ], + "dependencies": ["apache-airflow>=2.9.0", "sendgrid>=6.0.0"], + } diff --git a/providers/tests/system/elasticsearch/__init__.py b/providers/sendgrid/src/airflow/providers/sendgrid/utils/__init__.py similarity index 100% rename from providers/tests/system/elasticsearch/__init__.py rename to providers/sendgrid/src/airflow/providers/sendgrid/utils/__init__.py diff --git a/providers/src/airflow/providers/sendgrid/utils/emailer.py b/providers/sendgrid/src/airflow/providers/sendgrid/utils/emailer.py similarity index 99% rename from providers/src/airflow/providers/sendgrid/utils/emailer.py rename to providers/sendgrid/src/airflow/providers/sendgrid/utils/emailer.py index f22a080deba78..7a637e868ed51 100644 --- a/providers/src/airflow/providers/sendgrid/utils/emailer.py +++ b/providers/sendgrid/src/airflow/providers/sendgrid/utils/emailer.py @@ -27,6 +27,8 @@ from typing import Union import sendgrid +from airflow.hooks.base import BaseHook +from airflow.utils.email import get_email_address_list from sendgrid.helpers.mail import ( Attachment, Category, @@ -39,9 +41,6 @@ SandBoxMode, ) -from airflow.hooks.base import BaseHook -from airflow.utils.email import get_email_address_list - log = logging.getLogger(__name__) AddressesType = Union[str, Iterable[str]] diff --git a/providers/sendgrid/tests/conftest.py b/providers/sendgrid/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/sendgrid/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/sendgrid/tests/provider_tests/__init__.py b/providers/sendgrid/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/sendgrid/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/system/github/__init__.py b/providers/sendgrid/tests/provider_tests/sendgrid/__init__.py similarity index 100% rename from providers/tests/system/github/__init__.py rename to providers/sendgrid/tests/provider_tests/sendgrid/__init__.py diff --git a/providers/tests/system/opensearch/__init__.py b/providers/sendgrid/tests/provider_tests/sendgrid/utils/__init__.py similarity index 100% rename from providers/tests/system/opensearch/__init__.py rename to providers/sendgrid/tests/provider_tests/sendgrid/utils/__init__.py diff --git a/providers/tests/sendgrid/utils/test_emailer.py b/providers/sendgrid/tests/provider_tests/sendgrid/utils/test_emailer.py similarity index 100% rename from providers/tests/sendgrid/utils/test_emailer.py rename to providers/sendgrid/tests/provider_tests/sendgrid/utils/test_emailer.py diff --git a/providers/src/airflow/providers/amazon/aws/executors/batch/batch_executor.py b/providers/src/airflow/providers/amazon/aws/executors/batch/batch_executor.py index f7226fbed7158..4a4d1f9fb6d45 100644 --- a/providers/src/airflow/providers/amazon/aws/executors/batch/batch_executor.py +++ b/providers/src/airflow/providers/amazon/aws/executors/batch/batch_executor.py @@ -56,7 +56,7 @@ ) from airflow.utils.state import State -CommandType = list[str] +CommandType = Sequence[str] ExecutorConfigType = dict[str, Any] INVALID_CREDENTIALS_EXCEPTIONS = [ @@ -350,7 +350,7 @@ def execute_async(self, key: TaskInstanceKey, command: CommandType, queue=None, self.pending_jobs.append( BatchQueuedJob( key=key, - command=command, + command=list(command), queue=queue, executor_config=executor_config or {}, attempt_number=1, diff --git a/providers/src/airflow/providers/amazon/aws/executors/ecs/utils.py b/providers/src/airflow/providers/amazon/aws/executors/ecs/utils.py index 39b266253ce05..8024e6181db45 100644 --- a/providers/src/airflow/providers/amazon/aws/executors/ecs/utils.py +++ b/providers/src/airflow/providers/amazon/aws/executors/ecs/utils.py @@ -25,6 +25,7 @@ import datetime from collections import defaultdict +from collections.abc import Sequence from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Callable @@ -36,7 +37,7 @@ if TYPE_CHECKING: from airflow.models.taskinstance import TaskInstanceKey -CommandType = list[str] +CommandType = Sequence[str] ExecutorConfigFunctionType = Callable[[CommandType], dict] ExecutorConfigType = dict[str, Any] diff --git a/providers/src/airflow/providers/amazon/aws/links/datasync.py b/providers/src/airflow/providers/amazon/aws/links/datasync.py new file mode 100644 index 0000000000000..f9a643aa99e18 --- /dev/null +++ b/providers/src/airflow/providers/amazon/aws/links/datasync.py @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from airflow.providers.amazon.aws.links.base_aws import BASE_AWS_CONSOLE_LINK, BaseAwsLink + + +class DataSyncTaskLink(BaseAwsLink): + """Helper class for constructing AWS DataSync Task console link.""" + + name = "DataSync Task" + key = "datasync_task" + format_str = BASE_AWS_CONSOLE_LINK + "/datasync/home?region={region_name}#" + "/tasks/{task_id}" + + +class DataSyncTaskExecutionLink(BaseAwsLink): + """Helper class for constructing AWS DataSync TaskExecution console link.""" + + name = "DataSync Task Execution" + key = "datasync_task_execution" + format_str = ( + BASE_AWS_CONSOLE_LINK + "/datasync/home?region={region_name}#/history/{task_id}/{task_execution_id}" + ) diff --git a/providers/src/airflow/providers/amazon/aws/operators/datasync.py b/providers/src/airflow/providers/amazon/aws/operators/datasync.py index d5c97843f16fb..7b2b7282efca7 100644 --- a/providers/src/airflow/providers/amazon/aws/operators/datasync.py +++ b/providers/src/airflow/providers/amazon/aws/operators/datasync.py @@ -25,6 +25,7 @@ from airflow.exceptions import AirflowException, AirflowTaskTimeout from airflow.providers.amazon.aws.hooks.datasync import DataSyncHook +from airflow.providers.amazon.aws.links.datasync import DataSyncTaskExecutionLink, DataSyncTaskLink from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator from airflow.providers.amazon.aws.utils.mixins import aws_template_fields @@ -130,6 +131,8 @@ class DataSyncOperator(AwsBaseOperator[DataSyncHook]): } ui_color = "#44b5e2" + operator_extra_links = (DataSyncTaskLink(), DataSyncTaskExecutionLink()) + def __init__( self, *, @@ -215,6 +218,23 @@ def execute(self, context: Context): if not self.task_arn: raise AirflowException("DataSync TaskArn could not be identified or created.") + task_id = self.task_arn.split("/")[-1] + + task_url = DataSyncTaskLink.format_str.format( + aws_domain=DataSyncTaskLink.get_aws_domain(self.hook.conn_partition), + region_name=self.hook.conn_region_name, + task_id=task_id, + ) + + DataSyncTaskLink.persist( + context=context, + operator=self, + region_name=self.hook.conn_region_name, + aws_partition=self.hook.conn_partition, + task_id=task_id, + ) + self.log.info("You can view this DataSync task at %s", task_url) + self.log.info("Using DataSync TaskArn %s", self.task_arn) # Update the DataSync Task @@ -222,7 +242,7 @@ def execute(self, context: Context): self._update_datasync_task() # Execute the DataSync Task - self._execute_datasync_task() + self._execute_datasync_task(context=context) if not self.task_execution_arn: raise AirflowException("Nothing was executed") @@ -327,7 +347,7 @@ def _update_datasync_task(self) -> None: self.hook.update_task(self.task_arn, **self.update_task_kwargs) self.log.info("Updated TaskArn %s", self.task_arn) - def _execute_datasync_task(self) -> None: + def _execute_datasync_task(self, context: Context) -> None: """Create and monitor an AWS DataSync TaskExecution for a Task.""" if not self.task_arn: raise AirflowException("Missing TaskArn") @@ -337,6 +357,24 @@ def _execute_datasync_task(self) -> None: self.task_execution_arn = self.hook.start_task_execution(self.task_arn, **self.task_execution_kwargs) self.log.info("Started TaskExecutionArn %s", self.task_execution_arn) + # Create the execution extra link + execution_url = DataSyncTaskExecutionLink.format_str.format( + aws_domain=DataSyncTaskExecutionLink.get_aws_domain(self.hook.conn_partition), + region_name=self.hook.conn_region_name, + task_id=self.task_arn.split("/")[-1], + task_execution_id=self.task_execution_arn.split("/")[-1], + ) + DataSyncTaskExecutionLink.persist( + context=context, + operator=self, + region_name=self.hook.conn_region_name, + aws_partition=self.hook.conn_partition, + task_id=self.task_arn.split("/")[-1], + task_execution_id=self.task_execution_arn.split("/")[-1], + ) + + self.log.info("You can view this DataSync task execution at %s", execution_url) + if not self.wait_for_completion: return diff --git a/providers/src/airflow/providers/amazon/provider.yaml b/providers/src/airflow/providers/amazon/provider.yaml index 824c9b08dee66..43569a28827ab 100644 --- a/providers/src/airflow/providers/amazon/provider.yaml +++ b/providers/src/airflow/providers/amazon/provider.yaml @@ -889,6 +889,8 @@ extra-links: - airflow.providers.amazon.aws.links.step_function.StateMachineExecutionsDetailsLink - airflow.providers.amazon.aws.links.comprehend.ComprehendPiiEntitiesDetectionLink - airflow.providers.amazon.aws.links.comprehend.ComprehendDocumentClassifierLink + - airflow.providers.amazon.aws.links.datasync.DataSyncTaskLink + - airflow.providers.amazon.aws.links.datasync.DataSyncTaskExecutionLink connection-types: diff --git a/providers/src/airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py b/providers/src/airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py index 702703b2142e0..15fa954439a9d 100644 --- a/providers/src/airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py +++ b/providers/src/airflow/providers/cncf/kubernetes/executors/kubernetes_executor_utils.py @@ -411,7 +411,7 @@ def run_next(self, next_job: KubernetesJobType) -> None: map_index=map_index, date=None, run_id=run_id, - args=command, + args=list(command), pod_override_object=kube_executor_config, base_worker_pod=base_worker_pod, with_mutation_hook=True, diff --git a/providers/src/airflow/providers/google/CHANGELOG.rst b/providers/src/airflow/providers/google/CHANGELOG.rst index 8b8a8bc83ff9d..de464fb4849da 100644 --- a/providers/src/airflow/providers/google/CHANGELOG.rst +++ b/providers/src/airflow/providers/google/CHANGELOG.rst @@ -27,6 +27,37 @@ Changelog --------- +13.0.0 +...... + +.. note:: + This release of provider is only available for Airflow 2.9+ as explained in the + `Apache Airflow providers support policy `_. + +Breaking changes +~~~~~~~~~~~~~~~~ + +.. warning:: + Deprecated classes, parameters and features have been removed from the Google provider package. + The following breaking changes were introduced: + + * Operators + + * Removed ``AutoMLBatchPredictOperator``. Please use the operators from ``airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job`` instead + * Removed ``DataflowStartSqlJobOperator``. Please ``DataflowStartYamlJobOperator`` instead + * Removed ``PromptLanguageModelOperator``. Please ``TextGenerationModelPredictOperator`` instead + * Removed ``GenerateTextEmbeddingsOperator``. Please ``TextEmbeddingModelGetEmbeddingsOperator`` instead + * Removed ``PromptMultimodalModelOperator``. Please ``GenerativeModelGenerateContentOperator`` instead + * Removed ``PromptMultimodalModelWithMediaOperator``. Please ``GenerativeModelGenerateContentOperator`` instead + + * Hooks + + * Removed ``GenerativeModelHook.prompt_multimodal_model_with_media()``. Please use ``GenerativeModelHook.generative_model_generate_content()`` instead + * Removed ``GenerativeModelHook.prompt_multimodal_model()``. Please use ``GenerativeModelHook.generative_model_generate_content()`` instead + * Removed ``GenerativeModelHook.get_generative_model_part()``. Please use ``GenerativeModelHook.generative_model_generate_content()`` instead + * Removed ``GenerativeModelHook.prompt_language_model()``. Please use ``GenerativeModelHook.text_generation_model_predict()`` instead + * Removed ``GenerativeModelHook.generate_text_embeddings()``. Please use ``GenerativeModelHook.text_generation_model_predict()`` instead + 12.0.0 ...... diff --git a/providers/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py b/providers/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py index 7e506641484b3..8f06d4974e137 100644 --- a/providers/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +++ b/providers/src/airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py @@ -24,7 +24,7 @@ from typing import TYPE_CHECKING import vertexai -from vertexai.generative_models import GenerativeModel, Part +from vertexai.generative_models import GenerativeModel from vertexai.language_models import TextEmbeddingModel, TextGenerationModel from vertexai.preview.caching import CachedContent from vertexai.preview.evaluation import EvalResult, EvalTask @@ -100,186 +100,6 @@ def get_cached_context_model( cached_context_model = preview_generative_model.from_cached_content(cached_content) return cached_context_model - @deprecated( - planned_removal_date="January 01, 2025", - use_instead="Part objects included in contents parameter of " - "airflow.providers.google.cloud.hooks.generative_model." - "GenerativeModelHook.generative_model_generate_content", - category=AirflowProviderDeprecationWarning, - ) - def get_generative_model_part(self, content_gcs_path: str, content_mime_type: str | None = None) -> Part: - """Return a Generative Model Part object.""" - part = Part.from_uri(content_gcs_path, mime_type=content_mime_type) - return part - - @deprecated( - planned_removal_date="January 01, 2025", - use_instead="airflow.providers.google.cloud.hooks.generative_model." - "GenerativeModelHook.text_generation_model_predict", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def prompt_language_model( - self, - prompt: str, - pretrained_model: str, - temperature: float, - max_output_tokens: int, - top_p: float, - top_k: int, - location: str, - project_id: str = PROVIDE_PROJECT_ID, - ) -> str: - """ - Use the Vertex AI PaLM API to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param prompt: Required. Inputs or queries that a user or a program gives - to the Vertex AI PaLM API, in order to elicit a specific response. - :param pretrained_model: A pre-trained model optimized for performing natural - language tasks such as classification, summarization, extraction, content - creation, and ideation. - :param temperature: Temperature controls the degree of randomness in token - selection. - :param max_output_tokens: Token limit determines the maximum amount of text - output. - :param top_p: Tokens are selected from most probable to least until the sum - of their probabilities equals the top_p value. Defaults to 0.8. - :param top_k: A top_k of 1 means the selected token is the most probable - among all tokens. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - parameters = { - "temperature": temperature, - "max_output_tokens": max_output_tokens, - "top_p": top_p, - "top_k": top_k, - } - - model = self.get_text_generation_model(pretrained_model) - - response = model.predict( - prompt=prompt, - **parameters, - ) - return response.text - - @deprecated( - planned_removal_date="January 01, 2025", - use_instead="airflow.providers.google.cloud.hooks.generative_model." - "GenerativeModelHook.text_embedding_model_get_embeddings", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def generate_text_embeddings( - self, - prompt: str, - pretrained_model: str, - location: str, - project_id: str = PROVIDE_PROJECT_ID, - ) -> list: - """ - Use the Vertex AI PaLM API to generate text embeddings. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param prompt: Required. Inputs or queries that a user or a program gives - to the Vertex AI PaLM API, in order to elicit a specific response. - :param pretrained_model: A pre-trained model optimized for generating text embeddings. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - model = self.get_text_embedding_model(pretrained_model) - - response = model.get_embeddings([prompt])[0] # single prompt - - return response.values - - @deprecated( - planned_removal_date="January 01, 2025", - use_instead="airflow.providers.google.cloud.hooks.generative_model." - "GenerativeModelHook.generative_model_generate_content", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def prompt_multimodal_model( - self, - prompt: str, - location: str, - generation_config: dict | None = None, - safety_settings: dict | None = None, - pretrained_model: str = "gemini-pro", - project_id: str = PROVIDE_PROJECT_ID, - ) -> str: - """ - Use the Vertex AI Gemini Pro foundation model to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param prompt: Required. Inputs or queries that a user or a program gives - to the Multi-modal model, in order to elicit a specific response. - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param pretrained_model: By default uses the pre-trained model `gemini-pro`, - supporting prompts with text-only input, including natural language - tasks, multi-turn text and code chat, and code generation. It can - output text and code. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - model = self.get_generative_model(pretrained_model) - response = model.generate_content( - contents=[prompt], generation_config=generation_config, safety_settings=safety_settings - ) - - return response.text - - @deprecated( - planned_removal_date="January 01, 2025", - use_instead="airflow.providers.google.cloud.hooks.generative_model." - "GenerativeModelHook.generative_model_generate_content", - category=AirflowProviderDeprecationWarning, - ) - @GoogleBaseHook.fallback_to_default_project_id - def prompt_multimodal_model_with_media( - self, - prompt: str, - location: str, - media_gcs_path: str, - mime_type: str, - generation_config: dict | None = None, - safety_settings: dict | None = None, - pretrained_model: str = "gemini-pro-vision", - project_id: str = PROVIDE_PROJECT_ID, - ) -> str: - """ - Use the Vertex AI Gemini Pro foundation model to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :param location: Required. The ID of the Google Cloud location that the service belongs to. - :param prompt: Required. Inputs or queries that a user or a program gives - to the Multi-modal model, in order to elicit a specific response. - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param pretrained_model: By default uses the pre-trained model `gemini-pro-vision`, - supporting prompts with text-only input, including natural language - tasks, multi-turn text and code chat, and code generation. It can - output text and code. - :param media_gcs_path: A GCS path to a content file such as an image or a video. - Can be passed to the multi-modal model as part of the prompt. Used with vision models. - :param mime_type: Validates the media type presented by the file in the media_gcs_path. - """ - vertexai.init(project=project_id, location=location, credentials=self.get_credentials()) - - model = self.get_generative_model(pretrained_model) - part = self.get_generative_model_part(media_gcs_path, mime_type) - response = model.generate_content( - contents=[prompt, part], generation_config=generation_config, safety_settings=safety_settings - ) - - return response.text - @deprecated( planned_removal_date="April 09, 2025", use_instead="GenerativeModelHook.generative_model_generate_content", diff --git a/providers/src/airflow/providers/google/cloud/operators/automl.py b/providers/src/airflow/providers/google/cloud/operators/automl.py index 7ef0716615126..2a683938ed9ac 100644 --- a/providers/src/airflow/providers/google/cloud/operators/automl.py +++ b/providers/src/airflow/providers/google/cloud/operators/automl.py @@ -26,7 +26,6 @@ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault from google.cloud.automl_v1beta1 import ( - BatchPredictResult, ColumnSpec, Dataset, Model, @@ -322,145 +321,6 @@ def execute(self, context: Context): return PredictResponse.to_dict(result) -@deprecated( - planned_removal_date="January 01, 2025", - use_instead="airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job", - category=AirflowProviderDeprecationWarning, -) -class AutoMLBatchPredictOperator(GoogleCloudBaseOperator): - """ - Perform a batch prediction on Google Cloud AutoML. - - .. warning:: - AutoMLBatchPredictOperator for tables, video intelligence, vision and natural language has been deprecated - and no longer available. Please use - :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.CreateBatchPredictionJobOperator`, - :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.GetBatchPredictionJobOperator`, - :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.ListBatchPredictionJobsOperator`, - :class:`airflow.providers.google.cloud.operators.vertex_ai.batch_prediction_job.DeleteBatchPredictionJobOperator`, - instead. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:AutoMLBatchPredictOperator` - - :param project_id: ID of the Google Cloud project where model will be created if None then - default project_id is used. - :param location: The location of the project. - :param model_id: Name of the model_id requested to serve the batch prediction. - :param input_config: Required. The input configuration for batch prediction. - If a dict is provided, it must be of the same form as the protobuf message - `google.cloud.automl_v1beta1.types.BatchPredictInputConfig` - :param output_config: Required. The Configuration specifying where output predictions should be - written. If a dict is provided, it must be of the same form as the protobuf message - `google.cloud.automl_v1beta1.types.BatchPredictOutputConfig` - :param prediction_params: Additional domain-specific parameters for the predictions, - any string must be up to 25000 characters long. - :param project_id: ID of the Google Cloud project where model is located if None then - default project_id is used. - :param location: The location of the project. - :param retry: A retry object used to retry requests. If `None` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - `retry` is specified, the timeout applies to each individual attempt. - :param metadata: Additional metadata that is provided to the method. - :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "model_id", - "input_config", - "output_config", - "location", - "project_id", - "impersonation_chain", - ) - operator_extra_links = (TranslationLegacyModelPredictLink(),) - - def __init__( - self, - *, - model_id: str, - input_config: dict, - output_config: dict, - location: str, - project_id: str = PROVIDE_PROJECT_ID, - prediction_params: dict[str, str] | None = None, - metadata: MetaData = (), - timeout: float | None = None, - retry: Retry | _MethodDefault = DEFAULT, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - - self.model_id = model_id - self.location = location - self.project_id = project_id - self.prediction_params = prediction_params - self.metadata = metadata - self.timeout = timeout - self.retry = retry - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - self.input_config = input_config - self.output_config = output_config - - @cached_property - def hook(self) -> CloudAutoMLHook: - return CloudAutoMLHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - - @cached_property - def model(self) -> Model: - return self.hook.get_model( - model_id=self.model_id, - location=self.location, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - - def execute(self, context: Context): - self.log.info("Fetch batch prediction.") - operation = self.hook.batch_predict( - model_id=self.model_id, - input_config=self.input_config, - output_config=self.output_config, - project_id=self.project_id, - location=self.location, - params=self.prediction_params, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - operation_result = self.hook.wait_for_operation(timeout=self.timeout, operation=operation) - result = BatchPredictResult.to_dict(operation_result) - self.log.info("Batch prediction is ready.") - project_id = self.project_id or self.hook.project_id - if project_id: - TranslationLegacyModelPredictLink.persist( - context=context, - task_instance=self, - model_id=self.model_id, - project_id=project_id, - dataset_id=self.model.dataset_id, - ) - return result - - @deprecated( planned_removal_date="September 30, 2025", use_instead="airflow.providers.google.cloud.operators.vertex_ai.dataset.CreateDatasetOperator, " diff --git a/providers/src/airflow/providers/google/cloud/operators/dataflow.py b/providers/src/airflow/providers/google/cloud/operators/dataflow.py index 3fcbc7f67b784..c881853374ead 100644 --- a/providers/src/airflow/providers/google/cloud/operators/dataflow.py +++ b/providers/src/airflow/providers/google/cloud/operators/dataflow.py @@ -28,7 +28,7 @@ from googleapiclient.errors import HttpError from airflow.configuration import conf -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.providers.google.cloud.hooks.dataflow import ( DEFAULT_DATAFLOW_LOCATION, DataflowHook, @@ -40,7 +40,6 @@ TemplateJobStartTrigger, ) from airflow.providers.google.common.consts import GOOGLE_DEFAULT_DEFERRABLE_METHOD_NAME -from airflow.providers.google.common.deprecated import deprecated from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID if TYPE_CHECKING: @@ -654,116 +653,6 @@ def on_kill(self) -> None: ) -@deprecated( - planned_removal_date="January 31, 2025", - use_instead="DataflowStartYamlJobOperator", - category=AirflowProviderDeprecationWarning, -) -class DataflowStartSqlJobOperator(GoogleCloudBaseOperator): - """ - Starts Dataflow SQL query. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:DataflowStartSqlJobOperator` - - .. warning:: - This operator requires ``gcloud`` command (Google Cloud SDK) must be installed on the Airflow worker - `__ - - :param job_name: The unique name to assign to the Cloud Dataflow job. - :param query: The SQL query to execute. - :param options: Job parameters to be executed. It can be a dictionary with the following keys. - - For more information, look at: - `https://cloud.google.com/sdk/gcloud/reference/beta/dataflow/sql/query - `__ - command reference - - :param location: The location of the Dataflow job (for example europe-west1) - :param project_id: The ID of the GCP project that owns the job. - If set to ``None`` or missing, the default project_id from the GCP connection is used. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud - Platform. - :param drain_pipeline: Optional, set to True if want to stop streaming job by draining it - instead of canceling during killing task instance. See: - https://cloud.google.com/dataflow/docs/guides/stopping-a-pipeline - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields: Sequence[str] = ( - "job_name", - "query", - "options", - "location", - "project_id", - "gcp_conn_id", - ) - template_fields_renderers = {"query": "sql"} - - def __init__( - self, - job_name: str, - query: str, - options: dict[str, Any], - location: str = DEFAULT_DATAFLOW_LOCATION, - project_id: str = PROVIDE_PROJECT_ID, - gcp_conn_id: str = "google_cloud_default", - drain_pipeline: bool = False, - impersonation_chain: str | Sequence[str] | None = None, - *args, - **kwargs, - ) -> None: - super().__init__(*args, **kwargs) - self.job_name = job_name - self.query = query - self.options = options - self.location = location - self.project_id = project_id - self.gcp_conn_id = gcp_conn_id - self.drain_pipeline = drain_pipeline - self.impersonation_chain = impersonation_chain - self.job = None - self.hook: DataflowHook | None = None - - def execute(self, context: Context): - self.hook = DataflowHook( - gcp_conn_id=self.gcp_conn_id, - drain_pipeline=self.drain_pipeline, - impersonation_chain=self.impersonation_chain, - ) - - def set_current_job(current_job): - self.job = current_job - - job = self.hook.start_sql_job( - job_name=self.job_name, - query=self.query, - options=self.options, - location=self.location, - project_id=self.project_id, - on_new_job_callback=set_current_job, - ) - - return job - - def on_kill(self) -> None: - self.log.info("On kill.") - if self.job: - self.hook.cancel_job( - job_id=self.job.get("id"), - project_id=self.job.get("projectId"), - location=self.job.get("location"), - ) - - class DataflowStartYamlJobOperator(GoogleCloudBaseOperator): """ Launch a Dataflow YAML job and return the result. diff --git a/providers/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py b/providers/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py index 42e4fdc588e43..71af5659552e2 100644 --- a/providers/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +++ b/providers/src/airflow/providers/google/cloud/operators/vertex_ai/generative_model.py @@ -31,328 +31,6 @@ from airflow.utils.context import Context -@deprecated( - planned_removal_date="January 01, 2025", - use_instead="TextGenerationModelPredictOperator", - category=AirflowProviderDeprecationWarning, -) -class PromptLanguageModelOperator(GoogleCloudBaseOperator): - """ - Uses the Vertex AI PaLM API to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param prompt: Required. Inputs or queries that a user or a program gives - to the Vertex AI PaLM API, in order to elicit a specific response (templated). - :param pretrained_model: By default uses the pre-trained model `text-bison`, - optimized for performing natural language tasks such as classification, - summarization, extraction, content creation, and ideation. - :param temperature: Temperature controls the degree of randomness in token - selection. Defaults to 0.0. - :param max_output_tokens: Token limit determines the maximum amount of text - output. Defaults to 256. - :param top_p: Tokens are selected from most probable to least until the sum - of their probabilities equals the top_p value. Defaults to 0.8. - :param top_k: A top_k of 1 means the selected token is the most probable - among all tokens. Defaults to 0.4. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "prompt") - - def __init__( - self, - *, - project_id: str, - location: str, - prompt: str, - pretrained_model: str = "text-bison", - temperature: float = 0.0, - max_output_tokens: int = 256, - top_p: float = 0.8, - top_k: int = 40, - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.prompt = prompt - self.pretrained_model = pretrained_model - self.temperature = temperature - self.max_output_tokens = max_output_tokens - self.top_p = top_p - self.top_k = top_k - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - - self.log.info("Submitting prompt") - response = self.hook.prompt_language_model( - project_id=self.project_id, - location=self.location, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - temperature=self.temperature, - max_output_tokens=self.max_output_tokens, - top_p=self.top_p, - top_k=self.top_k, - ) - - self.log.info("Model response: %s", response) - self.xcom_push(context, key="prompt_response", value=response) - - return response - - -@deprecated( - planned_removal_date="January 01, 2025", - use_instead="TextEmbeddingModelGetEmbeddingsOperator", - category=AirflowProviderDeprecationWarning, -) -class GenerateTextEmbeddingsOperator(GoogleCloudBaseOperator): - """ - Uses the Vertex AI PaLM API to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param prompt: Required. Inputs or queries that a user or a program gives - to the Vertex AI PaLM API, in order to elicit a specific response (templated). - :param pretrained_model: By default uses the pre-trained model `textembedding-gecko`, - optimized for performing text embeddings. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "prompt") - - def __init__( - self, - *, - project_id: str, - location: str, - prompt: str, - pretrained_model: str = "textembedding-gecko", - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.prompt = prompt - self.pretrained_model = pretrained_model - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - - self.log.info("Generating text embeddings") - response = self.hook.generate_text_embeddings( - project_id=self.project_id, - location=self.location, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - ) - - self.log.info("Model response: %s", response) - self.xcom_push(context, key="prompt_response", value=response) - - return response - - -@deprecated( - planned_removal_date="January 01, 2025", - use_instead="GenerativeModelGenerateContentOperator", - category=AirflowProviderDeprecationWarning, -) -class PromptMultimodalModelOperator(GoogleCloudBaseOperator): - """ - Use the Vertex AI Gemini Pro foundation model to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param prompt: Required. Inputs or queries that a user or a program gives - to the Multi-modal model, in order to elicit a specific response (templated). - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param pretrained_model: By default uses the pre-trained model `gemini-pro`, - supporting prompts with text-only input, including natural language - tasks, multi-turn text and code chat, and code generation. It can - output text and code. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "prompt") - - def __init__( - self, - *, - project_id: str, - location: str, - prompt: str, - generation_config: dict | None = None, - safety_settings: dict | None = None, - pretrained_model: str = "gemini-pro", - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.prompt = prompt - self.generation_config = generation_config - self.safety_settings = safety_settings - self.pretrained_model = pretrained_model - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - response = self.hook.prompt_multimodal_model( - project_id=self.project_id, - location=self.location, - prompt=self.prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - ) - - self.log.info("Model response: %s", response) - self.xcom_push(context, key="prompt_response", value=response) - - return response - - -@deprecated( - planned_removal_date="January 01, 2025", - use_instead="GenerativeModelGenerateContentOperator", - category=AirflowProviderDeprecationWarning, -) -class PromptMultimodalModelWithMediaOperator(GoogleCloudBaseOperator): - """ - Use the Vertex AI Gemini Pro foundation model to generate natural language text. - - :param project_id: Required. The ID of the Google Cloud project that the - service belongs to (templated). - :param location: Required. The ID of the Google Cloud location that the - service belongs to (templated). - :param prompt: Required. Inputs or queries that a user or a program gives - to the Multi-modal model, in order to elicit a specific response (templated). - :param generation_config: Optional. Generation configuration settings. - :param safety_settings: Optional. Per request settings for blocking unsafe content. - :param pretrained_model: By default uses the pre-trained model `gemini-pro-vision`, - supporting prompts with text-only input, including natural language - tasks, multi-turn text and code chat, and code generation. It can - output text and code. - :param media_gcs_path: A GCS path to a media file such as an image or a video. - Can be passed to the multi-modal model as part of the prompt. Used with vision models. - :param mime_type: Validates the media type presented by the file in the media_gcs_path. - :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - """ - - template_fields = ("location", "project_id", "impersonation_chain", "prompt") - - def __init__( - self, - *, - project_id: str, - location: str, - prompt: str, - media_gcs_path: str, - mime_type: str, - generation_config: dict | None = None, - safety_settings: dict | None = None, - pretrained_model: str = "gemini-pro-vision", - gcp_conn_id: str = "google_cloud_default", - impersonation_chain: str | Sequence[str] | None = None, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.project_id = project_id - self.location = location - self.prompt = prompt - self.generation_config = generation_config - self.safety_settings = safety_settings - self.pretrained_model = pretrained_model - self.media_gcs_path = media_gcs_path - self.mime_type = mime_type - self.gcp_conn_id = gcp_conn_id - self.impersonation_chain = impersonation_chain - - def execute(self, context: Context): - self.hook = GenerativeModelHook( - gcp_conn_id=self.gcp_conn_id, - impersonation_chain=self.impersonation_chain, - ) - response = self.hook.prompt_multimodal_model_with_media( - project_id=self.project_id, - location=self.location, - prompt=self.prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - media_gcs_path=self.media_gcs_path, - mime_type=self.mime_type, - ) - - self.log.info("Model response: %s", response) - self.xcom_push(context, key="prompt_response", value=response) - - return response - - @deprecated( planned_removal_date="April 09, 2025", use_instead="GenerativeModelGenerateContentOperator", diff --git a/providers/tests/amazon/aws/executors/batch/test_batch_executor.py b/providers/tests/amazon/aws/executors/batch/test_batch_executor.py index 3b02d11250125..809187cc9cf1a 100644 --- a/providers/tests/amazon/aws/executors/batch/test_batch_executor.py +++ b/providers/tests/amazon/aws/executors/batch/test_batch_executor.py @@ -189,7 +189,7 @@ class TestAwsBatchExecutor: def test_execute(self, mock_executor): """Test execution from end-to-end""" airflow_key = mock.Mock(spec=tuple) - airflow_cmd = mock.Mock(spec=list) + airflow_cmd = ["1", "2"] mock_executor.batch.submit_job.return_value = {"jobId": MOCK_JOB_ID, "jobName": "some-job-name"} @@ -209,8 +209,8 @@ def test_attempt_all_jobs_when_some_jobs_fail(self, _, mock_executor): failed jobs are added back to the pending_jobs queue to be run in the next iteration. """ airflow_key = TaskInstanceKey("a", "b", "c", 1, -1) - airflow_cmd1 = mock.Mock(spec=list) - airflow_cmd2 = mock.Mock(spec=list) + airflow_cmd1 = ["1", "2"] + airflow_cmd2 = ["3", "4"] airflow_commands = [airflow_cmd1, airflow_cmd2] responses = [Exception("Failure 1"), {"jobId": "job-2"}] @@ -238,8 +238,8 @@ def test_attempt_all_jobs_when_some_jobs_fail(self, _, mock_executor): assert len(mock_executor.active_workers.get_all_jobs()) == 1 # Add more tasks to pending_jobs. This simulates tasks being scheduled by Airflow - airflow_cmd3 = mock.Mock(spec=list) - airflow_cmd4 = mock.Mock(spec=list) + airflow_cmd3 = ["5", "6"] + airflow_cmd4 = ["7", "8"] airflow_commands.extend([airflow_cmd1, airflow_cmd3, airflow_cmd4]) responses.extend([Exception("Failure 1"), {"jobId": "job-3"}, {"jobId": "job-4"}]) mock_executor.execute_async(airflow_key, airflow_cmd3) @@ -277,8 +277,8 @@ def test_attempt_all_jobs_when_jobs_fail(self, _, mock_executor): until all the tasks have been attempted the maximum number of times. """ airflow_key = TaskInstanceKey("a", "b", "c", 1, -1) - airflow_cmd1 = mock.Mock(spec=list) - airflow_cmd2 = mock.Mock(spec=list) + airflow_cmd1 = ["1", "2"] + airflow_cmd2 = ["3", "4"] commands = [airflow_cmd1, airflow_cmd2] failures = [Exception("Failure 1"), Exception("Failure 2")] submit_job_args = { @@ -339,7 +339,7 @@ def test_attempt_submit_jobs_failure(self, mock_executor): def test_task_retry_on_api_failure(self, _, mock_executor, caplog): """Test API failure retries""" airflow_keys = ["TaskInstanceKey1", "TaskInstanceKey2"] - airflow_cmds = [mock.Mock(spec=list), mock.Mock(spec=list)] + airflow_cmds = [["1", "2"], ["3", "4"]] mock_executor.execute_async(airflow_keys[0], airflow_cmds[0]) mock_executor.execute_async(airflow_keys[1], airflow_cmds[1]) diff --git a/providers/tests/amazon/aws/links/test_datasync.py b/providers/tests/amazon/aws/links/test_datasync.py new file mode 100644 index 0000000000000..9ff1610ac30c8 --- /dev/null +++ b/providers/tests/amazon/aws/links/test_datasync.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from airflow.providers.amazon.aws.links.datasync import DataSyncTaskExecutionLink, DataSyncTaskLink + +from providers.tests.amazon.aws.links.test_base_aws import BaseAwsLinksTestCase + +TASK_ID = "task-0b36221bf94ad2bdd" +EXECUTION_ID = "exec-00000000000000004" + + +class TestDataSyncTaskLink(BaseAwsLinksTestCase): + link_class = DataSyncTaskLink + + def test_extra_link(self): + task_id = TASK_ID + self.assert_extra_link_url( + expected_url=(f"https://console.aws.amazon.com/datasync/home?region=us-east-1#/tasks/{TASK_ID}"), + region_name="us-east-1", + aws_partition="aws", + task_id=task_id, + ) + + +class TestDataSyncTaskExecutionLink(BaseAwsLinksTestCase): + link_class = DataSyncTaskExecutionLink + + def test_extra_link(self): + self.assert_extra_link_url( + expected_url=( + f"https://console.aws.amazon.com/datasync/home?region=us-east-1#/history/{TASK_ID}/{EXECUTION_ID}" + ), + region_name="us-east-1", + aws_partition="aws", + task_id=TASK_ID, + task_execution_id=EXECUTION_ID, + ) diff --git a/providers/tests/amazon/aws/operators/test_datasync.py b/providers/tests/amazon/aws/operators/test_datasync.py index 6b6b64caa130e..81c8ea7445dbf 100644 --- a/providers/tests/amazon/aws/operators/test_datasync.py +++ b/providers/tests/amazon/aws/operators/test_datasync.py @@ -25,6 +25,7 @@ from airflow.exceptions import AirflowException from airflow.models import DAG, DagRun, TaskInstance from airflow.providers.amazon.aws.hooks.datasync import DataSyncHook +from airflow.providers.amazon.aws.links.datasync import DataSyncTaskLink from airflow.providers.amazon.aws.operators.datasync import DataSyncOperator from airflow.utils import timezone from airflow.utils.state import DagRunState @@ -748,6 +749,27 @@ def test_init_fails(self, mock_get_conn): # ### Check mocks: mock_get_conn.assert_not_called() + def test_task_extra_links(self, mock_get_conn): + mock_get_conn.return_value = self.client + self.set_up_operator() + + region = "us-east-1" + aws_domain = DataSyncTaskLink.get_aws_domain("aws") + task_id = self.task_arn.split("/")[-1] + + base_url = f"https://console.{aws_domain}/datasync/home?region={region}#" + task_url = f"{base_url}/tasks/{task_id}" + + with mock.patch.object(self.datasync.log, "info") as mock_logging: + result = self.datasync.execute(None) + task_execution_arn = result["TaskExecutionArn"] + execution_id = task_execution_arn.split("/")[-1] + execution_url = f"{base_url}/history/{task_id}/{execution_id}" + + assert self.datasync.task_arn == self.task_arn + mock_logging.assert_any_call("You can view this DataSync task at %s", task_url) + mock_logging.assert_any_call("You can view this DataSync task execution at %s", execution_url) + def test_execute_task(self, mock_get_conn): # ### Set up mocks: mock_get_conn.return_value = self.client diff --git a/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py b/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py index e745d3d655bdc..d0aecf20f92a6 100644 --- a/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py +++ b/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py @@ -22,8 +22,8 @@ from airflow.models.dag import DagModel from airflow.models.dagrun import DagRun -from airflow.models.param import Param from airflow.providers.fab.www.security import permissions +from airflow.sdk.definitions.param import Param from airflow.utils import timezone from airflow.utils.session import create_session from airflow.utils.state import DagRunState diff --git a/providers/tests/google/cloud/hooks/vertex_ai/test_generative_model.py b/providers/tests/google/cloud/hooks/vertex_ai/test_generative_model.py index 21741a617ea92..762958d621a58 100644 --- a/providers/tests/google/cloud/hooks/vertex_ai/test_generative_model.py +++ b/providers/tests/google/cloud/hooks/vertex_ai/test_generative_model.py @@ -148,61 +148,6 @@ def setup_method(self): self.hook = GenerativeModelHook(gcp_conn_id=TEST_GCP_CONN_ID) self.hook.get_credentials = self.dummy_get_credentials - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_text_generation_model")) - def test_prompt_language_model(self, mock_model) -> None: - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - self.hook.prompt_language_model( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=TEST_PROMPT, - pretrained_model=TEST_LANGUAGE_PRETRAINED_MODEL, - temperature=TEST_TEMPERATURE, - max_output_tokens=TEST_MAX_OUTPUT_TOKENS, - top_p=TEST_TOP_P, - top_k=TEST_TOP_K, - ) - assert_warning("text_generation_model_predict", warnings) - - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_text_embedding_model")) - def test_generate_text_embeddings(self, mock_model) -> None: - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - self.hook.generate_text_embeddings( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=TEST_PROMPT, - pretrained_model=TEST_TEXT_EMBEDDING_MODEL, - ) - assert_warning("text_embedding_model_get_embeddings", warnings) - - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_generative_model")) - def test_prompt_multimodal_model(self, mock_model) -> None: - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - self.hook.prompt_multimodal_model( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=TEST_PROMPT, - generation_config=TEST_GENERATION_CONFIG, - safety_settings=TEST_SAFETY_SETTINGS, - pretrained_model=TEST_MULTIMODAL_PRETRAINED_MODEL, - ) - assert_warning("generative_model_generate_content", warnings) - - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_generative_model_part")) - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_generative_model")) - def test_prompt_multimodal_model_with_media(self, mock_model, mock_part) -> None: - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - self.hook.prompt_multimodal_model_with_media( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=TEST_VISION_PROMPT, - generation_config=TEST_GENERATION_CONFIG, - safety_settings=TEST_SAFETY_SETTINGS, - pretrained_model=TEST_MULTIMODAL_VISION_MODEL, - media_gcs_path=TEST_MEDIA_GCS_PATH, - mime_type=TEST_MIME_TYPE, - ) - assert_warning("generative_model_generate_content", warnings) - @mock.patch(GENERATIVE_MODEL_STRING.format("GenerativeModelHook.get_text_generation_model")) def test_text_generation_model_predict(self, mock_model) -> None: with pytest.warns(AirflowProviderDeprecationWarning) as warnings: diff --git a/providers/tests/google/cloud/links/test_translate.py b/providers/tests/google/cloud/links/test_translate.py index 69c860a8c53fb..1d3822ad32d3e 100644 --- a/providers/tests/google/cloud/links/test_translate.py +++ b/providers/tests/google/cloud/links/test_translate.py @@ -22,19 +22,14 @@ # For no Pydantic environment, we need to skip the tests pytest.importorskip("google.cloud.aiplatform_v1") -from google.cloud.automl_v1beta1 import Model - -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.google.cloud.links.translate import ( TRANSLATION_BASE_LINK, TranslationDatasetListLink, TranslationLegacyDatasetLink, TranslationLegacyModelLink, - TranslationLegacyModelPredictLink, TranslationLegacyModelTrainLink, ) from airflow.providers.google.cloud.operators.automl import ( - AutoMLBatchPredictOperator, AutoMLCreateDatasetOperator, AutoMLListDatasetOperator, AutoMLTrainModelOperator, @@ -137,36 +132,3 @@ def test_get_link(self, create_task_instance_of_operator, session): ) actual_url = link.get_link(operator=ti.task, ti_key=ti.key) assert actual_url == expected_url - - -class TestTranslationLegacyModelPredictLink: - @pytest.mark.db_test - def test_get_link(self, create_task_instance_of_operator, session): - expected_url = ( - f"{TRANSLATION_BASE_LINK}/locations/{GCP_LOCATION}/datasets/{DATASET}/" - f"predict;modelId={MODEL}?project={GCP_PROJECT_ID}" - ) - link = TranslationLegacyModelPredictLink() - with pytest.warns(AirflowProviderDeprecationWarning): - ti = create_task_instance_of_operator( - AutoMLBatchPredictOperator, - dag_id="test_legacy_model_predict_link_dag", - task_id="test_legacy_model_predict_link_task", - model_id=MODEL, - project_id=GCP_PROJECT_ID, - location=GCP_LOCATION, - input_config="input_config", - output_config="input_config", - ) - ti.task.model = Model(dataset_id=DATASET, display_name=MODEL) - session.add(ti) - session.commit() - link.persist( - context={"ti": ti}, - task_instance=ti.task, - model_id=MODEL, - project_id=GCP_PROJECT_ID, - dataset_id=DATASET, - ) - actual_url = link.get_link(operator=ti.task, ti_key=ti.key) - assert actual_url == expected_url diff --git a/providers/tests/google/cloud/operators/test_automl.py b/providers/tests/google/cloud/operators/test_automl.py index 94dca98be917b..7ae70c83c9ed3 100644 --- a/providers/tests/google/cloud/operators/test_automl.py +++ b/providers/tests/google/cloud/operators/test_automl.py @@ -26,13 +26,12 @@ pytest.importorskip("google.cloud.aiplatform_v1") from google.api_core.gapic_v1.method import DEFAULT -from google.cloud.automl_v1beta1 import BatchPredictResult, Dataset, Model, PredictResponse +from google.cloud.automl_v1beta1 import Dataset, Model, PredictResponse from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook from airflow.providers.google.cloud.hooks.vertex_ai.prediction_service import PredictionServiceHook from airflow.providers.google.cloud.operators.automl import ( - AutoMLBatchPredictOperator, AutoMLCreateDatasetOperator, AutoMLDeleteDatasetOperator, AutoMLDeleteModelOperator, @@ -125,73 +124,6 @@ def test_templating(self, create_task_instance_of_operator, session): assert task.impersonation_chain == "impersonation_chain" -class TestAutoMLBatchPredictOperator: - @mock.patch("airflow.providers.google.cloud.links.translate.TranslationLegacyModelPredictLink.persist") - @mock.patch("airflow.providers.google.cloud.operators.automl.CloudAutoMLHook") - def test_execute(self, mock_hook, mock_link_persist): - mock_hook.return_value.batch_predict.return_value.result.return_value = BatchPredictResult() - mock_hook.return_value.extract_object_id = extract_object_id - mock_hook.return_value.wait_for_operation.return_value = BatchPredictResult() - mock_hook.return_value.get_model.return_value = mock.MagicMock(**MODEL) - mock_context = {"ti": mock.MagicMock()} - with pytest.warns(AirflowProviderDeprecationWarning): - op = AutoMLBatchPredictOperator( - model_id=MODEL_ID, - location=GCP_LOCATION, - project_id=GCP_PROJECT_ID, - input_config=INPUT_CONFIG, - output_config=OUTPUT_CONFIG, - task_id=TASK_ID, - prediction_params={}, - ) - op.execute(context=mock_context) - mock_hook.return_value.batch_predict.assert_called_once_with( - input_config=INPUT_CONFIG, - location=GCP_LOCATION, - metadata=(), - model_id=MODEL_ID, - output_config=OUTPUT_CONFIG, - params={}, - project_id=GCP_PROJECT_ID, - retry=DEFAULT, - timeout=None, - ) - mock_link_persist.assert_called_once_with( - context=mock_context, - task_instance=op, - model_id=MODEL_ID, - project_id=GCP_PROJECT_ID, - dataset_id=DATASET_ID, - ) - - @pytest.mark.db_test - def test_templating(self, create_task_instance_of_operator, session): - with pytest.warns(AirflowProviderDeprecationWarning): - ti = create_task_instance_of_operator( - AutoMLBatchPredictOperator, - # Templated fields - model_id="{{ 'model' }}", - input_config="{{ 'input-config' }}", - output_config="{{ 'output-config' }}", - location="{{ 'location' }}", - project_id="{{ 'project-id' }}", - impersonation_chain="{{ 'impersonation-chain' }}", - # Other parameters - dag_id="test_template_body_templating_dag", - task_id="test_template_body_templating_task", - ) - session.add(ti) - session.commit() - ti.render_templates() - task: AutoMLBatchPredictOperator = ti.task - assert task.model_id == "model" - assert task.input_config == "input-config" - assert task.output_config == "output-config" - assert task.location == "location" - assert task.project_id == "project-id" - assert task.impersonation_chain == "impersonation-chain" - - class TestAutoMLPredictOperator: @mock.patch("airflow.providers.google.cloud.links.translate.TranslationLegacyModelPredictLink.persist") @mock.patch("airflow.providers.google.cloud.operators.automl.CloudAutoMLHook") diff --git a/providers/tests/google/cloud/operators/test_dataflow.py b/providers/tests/google/cloud/operators/test_dataflow.py index 83b33eaccf001..89b5f9180838f 100644 --- a/providers/tests/google/cloud/operators/test_dataflow.py +++ b/providers/tests/google/cloud/operators/test_dataflow.py @@ -17,14 +17,13 @@ # under the License. from __future__ import annotations -from copy import deepcopy from unittest import mock import httplib2 import pytest from googleapiclient.errors import HttpError -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.providers.google.cloud.hooks.dataflow import ( DEFAULT_DATAFLOW_LOCATION, DataflowJobStatus, @@ -34,7 +33,6 @@ DataflowDeletePipelineOperator, DataflowRunPipelineOperator, DataflowStartFlexTemplateOperator, - DataflowStartSqlJobOperator, DataflowStartYamlJobOperator, DataflowStopJobOperator, DataflowTemplatedJobStartOperator, @@ -348,40 +346,6 @@ def test_execute_with_deferrable_mode(self, mock_hook, mock_defer_method, deferr mock_defer_method.assert_called_once() -class TestDataflowStartSqlJobOperator: - @mock.patch("airflow.providers.google.cloud.operators.dataflow.DataflowHook") - def test_execute(self, mock_hook): - with pytest.warns(AirflowProviderDeprecationWarning): - start_sql = DataflowStartSqlJobOperator( - task_id="start_sql_query", - job_name=TEST_SQL_JOB_NAME, - query=TEST_SQL_QUERY, - options=deepcopy(TEST_SQL_OPTIONS), - location=TEST_LOCATION, - do_xcom_push=True, - ) - start_sql.execute(mock.MagicMock()) - - mock_hook.assert_called_once_with( - gcp_conn_id="google_cloud_default", - drain_pipeline=False, - impersonation_chain=None, - ) - mock_hook.return_value.start_sql_job.assert_called_once_with( - job_name=TEST_SQL_JOB_NAME, - query=TEST_SQL_QUERY, - options=TEST_SQL_OPTIONS, - location=TEST_LOCATION, - project_id=None, - on_new_job_callback=mock.ANY, - ) - start_sql.job = TEST_SQL_JOB - start_sql.on_kill() - mock_hook.return_value.cancel_job.assert_called_once_with( - job_id="test-job-id", project_id=None, location=None - ) - - class TestDataflowStartYamlJobOperator: @pytest.fixture def sync_operator(self): diff --git a/providers/tests/google/cloud/operators/vertex_ai/test_generative_model.py b/providers/tests/google/cloud/operators/vertex_ai/test_generative_model.py index 709e5d1f78402..8712830c6eee3 100644 --- a/providers/tests/google/cloud/operators/vertex_ai/test_generative_model.py +++ b/providers/tests/google/cloud/operators/vertex_ai/test_generative_model.py @@ -35,11 +35,7 @@ CountTokensOperator, CreateCachedContentOperator, GenerateFromCachedContentOperator, - GenerateTextEmbeddingsOperator, GenerativeModelGenerateContentOperator, - PromptLanguageModelOperator, - PromptMultimodalModelOperator, - PromptMultimodalModelWithMediaOperator, RunEvaluationOperator, SupervisedFineTuningTrainOperator, TextEmbeddingModelGetEmbeddingsOperator, @@ -59,224 +55,6 @@ def assert_warning(msg: str, warnings): assert any(msg in str(w) for w in warnings) -class TestVertexAIPromptLanguageModelOperator: - prompt = "In 10 words or less, what is Apache Airflow?" - pretrained_model = "text-bison" - temperature = 0.0 - max_output_tokens = 256 - top_p = 0.8 - top_k = 40 - - def test_deprecation_warning(self): - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - PromptLanguageModelOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - temperature=self.temperature, - max_output_tokens=self.max_output_tokens, - top_p=self.top_p, - top_k=self.top_k, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - assert_warning("TextGenerationModelPredictOperator", warnings) - - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - with pytest.warns(AirflowProviderDeprecationWarning): - op = PromptLanguageModelOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - temperature=self.temperature, - max_output_tokens=self.max_output_tokens, - top_p=self.top_p, - top_k=self.top_k, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.prompt_language_model.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - temperature=self.temperature, - max_output_tokens=self.max_output_tokens, - top_p=self.top_p, - top_k=self.top_k, - ) - - -class TestVertexAIGenerateTextEmbeddingsOperator: - prompt = "In 10 words or less, what is Apache Airflow?" - pretrained_model = "textembedding-gecko" - - def test_deprecation_warning(self): - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - GenerateTextEmbeddingsOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - assert_warning("TextEmbeddingModelGetEmbeddingsOperator", warnings) - - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - with pytest.warns(AirflowProviderDeprecationWarning): - op = GenerateTextEmbeddingsOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.generate_text_embeddings.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - pretrained_model=self.pretrained_model, - ) - - -class TestVertexAIPromptMultimodalModelOperator: - prompt = "In 10 words or less, what is Apache Airflow?" - pretrained_model = "gemini-pro" - safety_settings = { - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - } - generation_config = {"max_output_tokens": 256, "top_p": 0.8, "temperature": 0.0} - - def test_deprecation_warning(self): - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - PromptMultimodalModelOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - assert_warning("GenerativeModelGenerateContentOperator", warnings) - - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - with pytest.warns(AirflowProviderDeprecationWarning): - op = PromptMultimodalModelOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.prompt_multimodal_model.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - ) - - -class TestVertexAIPromptMultimodalModelWithMediaOperator: - pretrained_model = "gemini-pro-vision" - vision_prompt = "In 10 words or less, describe this content." - media_gcs_path = "gs://download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg" - mime_type = "image/jpeg" - safety_settings = { - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, - } - generation_config = {"max_output_tokens": 256, "top_p": 0.8, "temperature": 0.0} - - def test_deprecation_warning(self): - with pytest.warns(AirflowProviderDeprecationWarning) as warnings: - PromptMultimodalModelWithMediaOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.vision_prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - media_gcs_path=self.media_gcs_path, - mime_type=self.mime_type, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - assert_warning("GenerativeModelGenerateContentOperator", warnings) - - @mock.patch(VERTEX_AI_PATH.format("generative_model.GenerativeModelHook")) - def test_execute(self, mock_hook): - with pytest.warns(AirflowProviderDeprecationWarning): - op = PromptMultimodalModelWithMediaOperator( - task_id=TASK_ID, - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.vision_prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - media_gcs_path=self.media_gcs_path, - mime_type=self.mime_type, - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - op.execute(context={"ti": mock.MagicMock()}) - mock_hook.assert_called_once_with( - gcp_conn_id=GCP_CONN_ID, - impersonation_chain=IMPERSONATION_CHAIN, - ) - mock_hook.return_value.prompt_multimodal_model_with_media.assert_called_once_with( - project_id=GCP_PROJECT, - location=GCP_LOCATION, - prompt=self.vision_prompt, - generation_config=self.generation_config, - safety_settings=self.safety_settings, - pretrained_model=self.pretrained_model, - media_gcs_path=self.media_gcs_path, - mime_type=self.mime_type, - ) - - class TestVertexAITextGenerationModelPredictOperator: prompt = "In 10 words or less, what is Apache Airflow?" pretrained_model = "text-bison" diff --git a/providers/tests/system/google/cloud/dataflow/example_dataflow_sql.py b/providers/tests/system/google/cloud/dataflow/example_dataflow_sql.py deleted file mode 100644 index 2ba0bf0534c59..0000000000000 --- a/providers/tests/system/google/cloud/dataflow/example_dataflow_sql.py +++ /dev/null @@ -1,149 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -Example Airflow DAG for Google Cloud Dataflow service -""" - -from __future__ import annotations - -import os -from datetime import datetime - -from airflow.models.dag import DAG -from airflow.providers.google.cloud.operators.bigquery import ( - BigQueryCreateEmptyDatasetOperator, - BigQueryCreateEmptyTableOperator, - BigQueryDeleteDatasetOperator, - BigQueryDeleteTableOperator, - BigQueryInsertJobOperator, -) -from airflow.providers.google.cloud.operators.dataflow import DataflowStartSqlJobOperator -from airflow.utils.trigger_rule import TriggerRule - -from providers.tests.system.google import DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID - -PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") or DEFAULT_GCP_SYSTEM_TEST_PROJECT_ID -ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default") -DAG_ID = "dataflow_sql" -LOCATION = "europe-west3" -DATAFLOW_SQL_JOB_NAME = f"{DAG_ID}_{ENV_ID}".replace("_", "-") -BQ_SQL_DATASET = f"{DAG_ID}_{ENV_ID}".replace("-", "_") -BQ_SQL_TABLE_INPUT = f"input_{ENV_ID}".replace("-", "_") -BQ_SQL_TABLE_OUTPUT = f"output_{ENV_ID}".replace("-", "_") -INSERT_ROWS_QUERY = ( - f"INSERT {BQ_SQL_DATASET}.{BQ_SQL_TABLE_INPUT} VALUES " - "('John Doe', 900), " - "('Alice Storm', 1200)," - "('Bob Max', 1000)," - "('Peter Jackson', 800)," - "('Mia Smith', 1100);" -) - - -with DAG( - dag_id=DAG_ID, - start_date=datetime(2021, 1, 1), - schedule="@once", - catchup=False, - tags=["example", "dataflow-sql"], -) as dag: - create_bq_dataset = BigQueryCreateEmptyDatasetOperator( - task_id="create_bq_dataset", - dataset_id=BQ_SQL_DATASET, - location=LOCATION, - ) - - create_bq_table = BigQueryCreateEmptyTableOperator( - task_id="create_bq_table", - dataset_id=BQ_SQL_DATASET, - table_id=BQ_SQL_TABLE_INPUT, - schema_fields=[ - {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}, - ], - ) - - insert_query_job = BigQueryInsertJobOperator( - task_id="insert_query_job", - configuration={ - "query": { - "query": INSERT_ROWS_QUERY, - "useLegacySql": False, - "priority": "BATCH", - } - }, - location=LOCATION, - ) - - # [START howto_operator_start_sql_job] - start_sql = DataflowStartSqlJobOperator( - task_id="start_sql_query", - job_name=DATAFLOW_SQL_JOB_NAME, - query=f""" - SELECT - emp_name as employee, - salary as employee_salary - FROM - bigquery.table.`{PROJECT_ID}`.`{BQ_SQL_DATASET}`.`{BQ_SQL_TABLE_INPUT}` - WHERE salary >= 1000; - """, - options={ - "bigquery-project": PROJECT_ID, - "bigquery-dataset": BQ_SQL_DATASET, - "bigquery-table": BQ_SQL_TABLE_OUTPUT, - }, - location=LOCATION, - do_xcom_push=True, - ) - # [END howto_operator_start_sql_job] - - delete_bq_table = BigQueryDeleteTableOperator( - task_id="delete_bq_table", - deletion_dataset_table=f"{PROJECT_ID}.{BQ_SQL_DATASET}.{BQ_SQL_TABLE_INPUT}", - trigger_rule=TriggerRule.ALL_DONE, - ) - - delete_bq_dataset = BigQueryDeleteDatasetOperator( - task_id="delete_bq_dataset", - dataset_id=BQ_SQL_DATASET, - delete_contents=True, - trigger_rule=TriggerRule.ALL_DONE, - ) - - ( - # TEST SETUP - create_bq_dataset - >> create_bq_table - >> insert_query_job - # TEST BODY - >> start_sql - # TEST TEARDOWN - >> delete_bq_table - >> delete_bq_dataset - ) - - from tests_common.test_utils.watcher import watcher - - # This test needs watcher in order to properly mark success/failure - # when "tearDown" task with trigger rule is part of the DAG - list(dag.tasks) >> watcher() - -from tests_common.test_utils.system_tests import get_test_run # noqa: E402 - -# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) -test_run = get_test_run(dag) diff --git a/pyproject.toml b/pyproject.toml index 8f3418fc5a409..e725d8d02dd9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -625,6 +625,8 @@ dev = [ "apache-airflow-providers-alibaba", "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-cassandra", + "apache-airflow-providers-apache-drill", + "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-iceberg", "apache-airflow-providers-apache-kafka", "apache-airflow-providers-apache-kylin", @@ -632,7 +634,9 @@ dev = [ "apache-airflow-providers-apache-pig", "apache-airflow-providers-apache-pinot", "apache-airflow-providers-apache-spark", + "apache-airflow-providers-arangodb", "apache-airflow-providers-apprise", + "apache-airflow-providers-arangodb", "apache-airflow-providers-asana", "apache-airflow-providers-atlassian-jira", "apache-airflow-providers-celery", @@ -642,11 +646,14 @@ dev = [ "apache-airflow-providers-common-sql", "apache-airflow-providers-docker", "apache-airflow-providers-datadog", + "apache-airflow-providers-dbt-cloud", "apache-airflow-providers-discord", "apache-airflow-providers-edge", + "apache-airflow-providers-elasticsearch", "apache-airflow-providers-exasol", "apache-airflow-providers-facebook", "apache-airflow-providers-ftp", + "apache-airflow-providers-github", "apache-airflow-providers-http", "apache-airflow-providers-influxdb", "apache-airflow-providers-mongo", @@ -656,6 +663,7 @@ dev = [ "apache-airflow-providers-neo4j", "apache-airflow-providers-openai", "apache-airflow-providers-openfaas", + "apache-airflow-providers-opensearch", "apache-airflow-providers-opsgenie", "apache-airflow-providers-papermill", "apache-airflow-providers-pgvector", @@ -663,6 +671,7 @@ dev = [ "apache-airflow-providers-odbc", "apache-airflow-providers-jenkins", "apache-airflow-providers-pagerduty", + "apache-airflow-providers-sendgrid", "apache-airflow-providers-sftp", "apache-airflow-providers-slack", "apache-airflow-providers-snowflake", @@ -694,6 +703,8 @@ apache-airflow-providers-airbyte = {workspace = true} apache-airflow-providers-alibaba = { workspace = true } apache-airflow-providers-apache-beam = { workspace = true } apache-airflow-providers-apache-cassandra = { workspace = true } +apache-airflow-providers-apache-drill = { workspace = true } +apache-airflow-providers-apache-druid = { workspace = true } apache-airflow-providers-apache-iceberg = {workspace = true} apache-airflow-providers-apache-kafka = { workspace = true } apache-airflow-providers-apache-kylin = { workspace = true } @@ -701,7 +712,9 @@ apache-airflow-providers-apache-livy = { workspace = true } apache-airflow-providers-apache-pig = { workspace = true } apache-airflow-providers-apache-pinot = { workspace = true } apache-airflow-providers-apache-spark = { workspace = true } +apache-airflow-providers-arangodb = { workspace = true } apache-airflow-providers-apprise = { workspace = true } +apache-airflow-providers-arangodb = { workspace = true } apache-airflow-providers-asana = { workspace = true } apache-airflow-providers-atlassian-jira = { workspace = true } apache-airflow-providers-celery = {workspace = true} @@ -710,12 +723,15 @@ apache-airflow-providers-common-compat = { workspace = true } apache-airflow-providers-common-io = { workspace = true } apache-airflow-providers-common-sql = { workspace = true } apache-airflow-providers-datadog = { workspace = true } +apache-airflow-providers-dbt-cloud = { workspace = true } apache-airflow-providers-discord = { workspace = true } apache-airflow-providers-docker = { workspace = true } apache-airflow-providers-edge = {workspace = true} +apache-airflow-providers-elasticsearch = { workspace = true } apache-airflow-providers-exasol = { workspace = true } apache-airflow-providers-facebook = { workspace = true } apache-airflow-providers-ftp = { workspace = true } +apache-airflow-providers-github = { workspace = true } apache-airflow-providers-http = { workspace = true } apache-airflow-providers-influxdb = { workspace = true } apache-airflow-providers-mongo = { workspace = true } @@ -727,6 +743,7 @@ apache-airflow-providers-openai = { workspace = true } apache-airflow-providers-jenkins = { workspace = true } apache-airflow-providers-mysql = { workspace = true } apache-airflow-providers-odbc = { workspace = true } +apache-airflow-providers-opensearch = { workspace = true } apache-airflow-providers-pagerduty = { workspace = true } apache-airflow-providers-openfaas = { workspace = true } apache-airflow-providers-opsgenie = { workspace = true } @@ -738,6 +755,7 @@ apache-airflow-providers-presto = { workspace = true } apache-airflow-providers-qdrant = { workspace = true } apache-airflow-providers-samba = { workspace = true } apache-airflow-providers-segment = { workspace = true } +apache-airflow-providers-sendgrid = { workspace = true } apache-airflow-providers-sftp = { workspace = true } apache-airflow-providers-singularity = { workspace = true } apache-airflow-providers-slack = { workspace = true } @@ -761,6 +779,8 @@ members = [ "providers/alibaba", "providers/apache/beam", "providers/apache/cassandra", + "providers/apache/drill", + "providers/apache/druid", "providers/apache/iceberg", "providers/apache/kafka", "providers/apache/kylin", @@ -768,7 +788,9 @@ members = [ "providers/apache/pig", "providers/apache/pinot", "providers/apache/spark", + "providers/arangodb", "providers/apprise", + "providers/arangodb", "providers/asana", "providers/atlassian/jira", "providers/celery", @@ -777,12 +799,15 @@ members = [ "providers/common/io", "providers/common/sql", "providers/datadog", + "providers/dbt/cloud", "providers/discord", "providers/docker", "providers/edge", + "providers/elasticsearch", "providers/exasol", "providers/facebook", "providers/ftp", + "providers/github", "providers/hashicorp", "providers/http", "providers/imap", @@ -795,6 +820,7 @@ members = [ "providers/openai", "providers/openfaas", "providers/openlineage", + "providers/opensearch", "providers/opsgenie", "providers/pagerduty", "providers/papermill", @@ -805,6 +831,7 @@ members = [ "providers/qdrant", "providers/samba", "providers/segment", + "providers/sendgrid", "providers/sftp", "providers/singularity", "providers/slack", diff --git a/scripts/ci/docker-compose/remove-sources.yml b/scripts/ci/docker-compose/remove-sources.yml index 3e72fa832f890..8149609ddc592 100644 --- a/scripts/ci/docker-compose/remove-sources.yml +++ b/scripts/ci/docker-compose/remove-sources.yml @@ -36,6 +36,8 @@ services: - ../../../empty:/opt/airflow/providers/alibaba/src - ../../../empty:/opt/airflow/providers/apache/beam/src - ../../../empty:/opt/airflow/providers/apache/cassandra/src + - ../../../empty:/opt/airflow/providers/apache/drill/src + - ../../../empty:/opt/airflow/providers/apache/druid/src - ../../../empty:/opt/airflow/providers/apache/iceberg/src - ../../../empty:/opt/airflow/providers/apache/kafka/src - ../../../empty:/opt/airflow/providers/apache/kylin/src @@ -43,7 +45,9 @@ services: - ../../../empty:/opt/airflow/providers/apache/pig/src - ../../../empty:/opt/airflow/providers/apache/pinot/src - ../../../empty:/opt/airflow/providers/apache/spark/src + - ../../../empty:/opt/airflow/providers/arangodb/src - ../../../empty:/opt/airflow/providers/apprise/src + - ../../../empty:/opt/airflow/providers/arangodb/src - ../../../empty:/opt/airflow/providers/asana/src - ../../../empty:/opt/airflow/providers/atlassian/jira/src - ../../../empty:/opt/airflow/providers/celery/src @@ -52,12 +56,15 @@ services: - ../../../empty:/opt/airflow/providers/common/io/src - ../../../empty:/opt/airflow/providers/common/sql/src - ../../../empty:/opt/airflow/providers/datadog/src + - ../../../empty:/opt/airflow/providers/dbt/cloud/src - ../../../empty:/opt/airflow/providers/discord/src - ../../../empty:/opt/airflow/providers/docker/src - ../../../empty:/opt/airflow/providers/edge/src + - ../../../empty:/opt/airflow/providers/elasticsearch/src - ../../../empty:/opt/airflow/providers/exasol/src - ../../../empty:/opt/airflow/providers/facebook/src - ../../../empty:/opt/airflow/providers/ftp/src + - ../../../empty:/opt/airflow/providers/github/src - ../../../empty:/opt/airflow/providers/hashicorp/src - ../../../empty:/opt/airflow/providers/http/src - ../../../empty:/opt/airflow/providers/imap/src @@ -70,6 +77,7 @@ services: - ../../../empty:/opt/airflow/providers/openai/src - ../../../empty:/opt/airflow/providers/openfaas/src - ../../../empty:/opt/airflow/providers/openlineage/src + - ../../../empty:/opt/airflow/providers/opensearch/src - ../../../empty:/opt/airflow/providers/opsgenie/src - ../../../empty:/opt/airflow/providers/pagerduty/src - ../../../empty:/opt/airflow/providers/papermill/src @@ -80,6 +88,7 @@ services: - ../../../empty:/opt/airflow/providers/qdrant/src - ../../../empty:/opt/airflow/providers/samba/src - ../../../empty:/opt/airflow/providers/segment/src + - ../../../empty:/opt/airflow/providers/sendgrid/src - ../../../empty:/opt/airflow/providers/sftp/src - ../../../empty:/opt/airflow/providers/singularity/src - ../../../empty:/opt/airflow/providers/slack/src diff --git a/scripts/ci/docker-compose/tests-sources.yml b/scripts/ci/docker-compose/tests-sources.yml index 4864c3f5b51e8..0d0e6dccf460b 100644 --- a/scripts/ci/docker-compose/tests-sources.yml +++ b/scripts/ci/docker-compose/tests-sources.yml @@ -43,6 +43,8 @@ services: - ../../../providers/alibaba/tests:/opt/airflow/providers/alibaba/tests - ../../../providers/apache/beam/tests:/opt/airflow/providers/apache/beam/tests - ../../../providers/apache/cassandra/tests:/opt/airflow/providers/apache/cassandra/tests + - ../../../providers/apache/drill/tests:/opt/airflow/providers/apache/drill/tests + - ../../../providers/apache/druid/tests:/opt/airflow/providers/apache/druid/tests - ../../../providers/apache/iceberg/tests:/opt/airflow/providers/apache/iceberg/tests - ../../../providers/apache/kafka/tests:/opt/airflow/providers/apache/kafka/tests - ../../../providers/apache/kylin/tests:/opt/airflow/providers/apache/kylin/tests @@ -50,7 +52,9 @@ services: - ../../../providers/apache/pig/tests:/opt/airflow/providers/apache/pig/tests - ../../../providers/apache/pinot/tests:/opt/airflow/providers/apache/pinot/tests - ../../../providers/apache/spark/tests:/opt/airflow/providers/apache/spark/tests + - ../../../providers/arangodb/tests:/opt/airflow/providers/arangodb/tests - ../../../providers/apprise/tests:/opt/airflow/providers/apprise/tests + - ../../../providers/arangodb/tests:/opt/airflow/providers/arangodb/tests - ../../../providers/asana/tests:/opt/airflow/providers/asana/tests - ../../../providers/atlassian/jira/tests:/opt/airflow/providers/atlassian/jira/tests - ../../../providers/celery/tests:/opt/airflow/providers/celery/tests @@ -59,12 +63,15 @@ services: - ../../../providers/common/io/tests:/opt/airflow/providers/common/io/tests - ../../../providers/common/sql/tests:/opt/airflow/providers/common/sql/tests - ../../../providers/datadog/tests:/opt/airflow/providers/datadog/tests + - ../../../providers/dbt/cloud/tests:/opt/airflow/providers/dbt/cloud/tests - ../../../providers/discord/tests:/opt/airflow/providers/discord/tests - ../../../providers/docker/tests:/opt/airflow/providers/docker/tests - ../../../providers/edge/tests:/opt/airflow/providers/edge/tests + - ../../../providers/elasticsearch/tests:/opt/airflow/providers/elasticsearch/tests - ../../../providers/exasol/tests:/opt/airflow/providers/exasol/tests - ../../../providers/facebook/tests:/opt/airflow/providers/facebook/tests - ../../../providers/ftp/tests:/opt/airflow/providers/ftp/tests + - ../../../providers/github/tests:/opt/airflow/providers/github/tests - ../../../providers/hashicorp/tests:/opt/airflow/providers/hashicorp/tests - ../../../providers/http/tests:/opt/airflow/providers/http/tests - ../../../providers/imap/tests:/opt/airflow/providers/imap/tests @@ -77,6 +84,7 @@ services: - ../../../providers/openai/tests:/opt/airflow/providers/openai/tests - ../../../providers/openfaas/tests:/opt/airflow/providers/openfaas/tests - ../../../providers/openlineage/tests:/opt/airflow/providers/openlineage/tests + - ../../../providers/opensearch/tests:/opt/airflow/providers/opensearch/tests - ../../../providers/opsgenie/tests:/opt/airflow/providers/opsgenie/tests - ../../../providers/pagerduty/tests:/opt/airflow/providers/pagerduty/tests - ../../../providers/papermill/tests:/opt/airflow/providers/papermill/tests @@ -87,6 +95,7 @@ services: - ../../../providers/qdrant/tests:/opt/airflow/providers/qdrant/tests - ../../../providers/samba/tests:/opt/airflow/providers/samba/tests - ../../../providers/segment/tests:/opt/airflow/providers/segment/tests + - ../../../providers/sendgrid/tests:/opt/airflow/providers/sendgrid/tests - ../../../providers/sftp/tests:/opt/airflow/providers/sftp/tests - ../../../providers/singularity/tests:/opt/airflow/providers/singularity/tests - ../../../providers/slack/tests:/opt/airflow/providers/slack/tests diff --git a/task_sdk/src/airflow/sdk/__init__.py b/task_sdk/src/airflow/sdk/__init__.py index b8d6b6609dba7..6762f43ef8a30 100644 --- a/task_sdk/src/airflow/sdk/__init__.py +++ b/task_sdk/src/airflow/sdk/__init__.py @@ -33,7 +33,7 @@ "get_parsing_context", ] -__version__ = "1.0.0.dev1" +__version__ = "1.0.0.alpha1" if TYPE_CHECKING: from airflow.sdk.definitions.baseoperator import BaseOperator @@ -48,6 +48,8 @@ __lazy_imports: dict[str, str] = { "BaseOperator": ".definitions.baseoperator", "Connection": ".definitions.connection", + "Param": ".definitions.param", + "ParamsDict": ".definitions.param", "DAG": ".definitions.dag", "EdgeModifier": ".definitions.edges", "Label": ".definitions.edges", diff --git a/task_sdk/src/airflow/sdk/definitions/asset/decorators.py b/task_sdk/src/airflow/sdk/definitions/asset/decorators.py index 1f1d90883240b..579cc94b3ce34 100644 --- a/task_sdk/src/airflow/sdk/definitions/asset/decorators.py +++ b/task_sdk/src/airflow/sdk/definitions/asset/decorators.py @@ -31,9 +31,9 @@ from sqlalchemy.orm import Session from airflow.io.path import ObjectStoragePath - from airflow.models.param import ParamsDict from airflow.sdk.definitions.asset import AssetAlias, AssetUniqueKey from airflow.sdk.definitions.dag import DAG, DagStateChangeCallback, ScheduleArg + from airflow.sdk.definitions.param import ParamsDict from airflow.serialization.dag_dependency import DagDependency from airflow.triggers.base import BaseTrigger from airflow.typing_compat import Self diff --git a/task_sdk/src/airflow/sdk/definitions/baseoperator.py b/task_sdk/src/airflow/sdk/definitions/baseoperator.py index e7ecec69411ba..14d67656008e5 100644 --- a/task_sdk/src/airflow/sdk/definitions/baseoperator.py +++ b/task_sdk/src/airflow/sdk/definitions/baseoperator.py @@ -33,7 +33,6 @@ import attrs -from airflow.models.param import ParamsDict from airflow.sdk.definitions._internal.abstractoperator import ( DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST, DEFAULT_OWNER, @@ -54,6 +53,7 @@ from airflow.sdk.definitions._internal.node import validate_key from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet, validate_instance_args from airflow.sdk.definitions.mappedoperator import OperatorPartial, validate_mapping_kwargs +from airflow.sdk.definitions.param import ParamsDict from airflow.task.priority_strategy import ( PriorityWeightStrategy, airflow_priority_weight_strategies, diff --git a/task_sdk/src/airflow/sdk/definitions/dag.py b/task_sdk/src/airflow/sdk/definitions/dag.py index 1c37a57e17bcd..5662d542859f7 100644 --- a/task_sdk/src/airflow/sdk/definitions/dag.py +++ b/task_sdk/src/airflow/sdk/definitions/dag.py @@ -51,12 +51,12 @@ ParamValidationError, TaskNotFound, ) -from airflow.models.param import DagParam, ParamsDict from airflow.sdk.definitions._internal.abstractoperator import AbstractOperator from airflow.sdk.definitions._internal.types import NOTSET from airflow.sdk.definitions.asset import AssetAll, BaseAsset from airflow.sdk.definitions.baseoperator import BaseOperator from airflow.sdk.definitions.context import Context +from airflow.sdk.definitions.param import DagParam, ParamsDict from airflow.timetables.base import Timetable from airflow.timetables.simple import ( AssetTriggeredTimetable, diff --git a/task_sdk/src/airflow/sdk/definitions/mappedoperator.py b/task_sdk/src/airflow/sdk/definitions/mappedoperator.py index 0fc0a7fa1896a..00bd2ab8ab2f9 100644 --- a/task_sdk/src/airflow/sdk/definitions/mappedoperator.py +++ b/task_sdk/src/airflow/sdk/definitions/mappedoperator.py @@ -72,12 +72,13 @@ OperatorExpandArgument, OperatorExpandKwargsArgument, ) - from airflow.models.param import ParamsDict from airflow.models.xcom_arg import XComArg from airflow.sdk.definitions.baseoperator import BaseOperator from airflow.sdk.definitions.dag import DAG + from airflow.sdk.definitions.param import ParamsDict from airflow.sdk.types import Operator from airflow.ti_deps.deps.base_ti_dep import BaseTIDep + from airflow.typing_compat import TypeGuard from airflow.utils.context import Context from airflow.utils.operator_resources import Resources from airflow.utils.task_group import TaskGroup @@ -136,6 +137,22 @@ def ensure_xcomarg_return_value(arg: Any) -> None: ensure_xcomarg_return_value(v) +def is_mappable_value(value: Any) -> TypeGuard[Collection]: + """ + Whether a value can be used for task mapping. + + We only allow collections with guaranteed ordering, but exclude character + sequences since that's usually not what users would expect to be mappable. + + :meta private: + """ + if not isinstance(value, (Sequence, dict)): + return False + if isinstance(value, (bytearray, bytes, str)): + return False + return True + + @attrs.define(kw_only=True, repr=False) class OperatorPartial: """ diff --git a/task_sdk/src/airflow/sdk/definitions/param.py b/task_sdk/src/airflow/sdk/definitions/param.py new file mode 100644 index 0000000000000..cd3ccec26a48a --- /dev/null +++ b/task_sdk/src/airflow/sdk/definitions/param.py @@ -0,0 +1,353 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import contextlib +import copy +import json +import logging +from collections.abc import ItemsView, Iterable, MutableMapping, ValuesView +from typing import TYPE_CHECKING, Any, ClassVar + +from airflow.exceptions import AirflowException, ParamValidationError +from airflow.sdk.definitions._internal.mixins import ResolveMixin +from airflow.utils.types import NOTSET, ArgNotSet + +if TYPE_CHECKING: + from airflow.sdk.definitions.context import Context + from airflow.sdk.definitions.dag import DAG + from airflow.sdk.types import Operator + +logger = logging.getLogger(__name__) + + +class Param: + """ + Class to hold the default value of a Param and rule set to do the validations. + + Without the rule set it always validates and returns the default value. + + :param default: The value this Param object holds + :param description: Optional help text for the Param + :param schema: The validation schema of the Param, if not given then all kwargs except + default & description will form the schema + """ + + __version__: ClassVar[int] = 1 + + CLASS_IDENTIFIER = "__class" + + def __init__(self, default: Any = NOTSET, description: str | None = None, **kwargs): + if default is not NOTSET: + self._check_json(default) + self.value = default + self.description = description + self.schema = kwargs.pop("schema") if "schema" in kwargs else kwargs + + def __copy__(self) -> Param: + return Param(self.value, self.description, schema=self.schema) + + @staticmethod + def _check_json(value): + try: + json.dumps(value) + except Exception: + raise ParamValidationError( + "All provided parameters must be json-serializable. " + f"The value '{value}' is not serializable." + ) + + def resolve(self, value: Any = NOTSET, suppress_exception: bool = False) -> Any: + """ + Run the validations and returns the Param's final value. + + May raise ValueError on failed validations, or TypeError + if no value is passed and no value already exists. + We first check that value is json-serializable; if not, warn. + In future release we will require the value to be json-serializable. + + :param value: The value to be updated for the Param + :param suppress_exception: To raise an exception or not when the validations fails. + If true and validations fails, the return value would be None. + """ + import jsonschema + from jsonschema import FormatChecker + from jsonschema.exceptions import ValidationError + + if value is not NOTSET: + self._check_json(value) + final_val = self.value if value is NOTSET else value + if isinstance(final_val, ArgNotSet): + if suppress_exception: + return None + raise ParamValidationError("No value passed and Param has no default value") + try: + jsonschema.validate(final_val, self.schema, format_checker=FormatChecker()) + except ValidationError as err: + if suppress_exception: + return None + raise ParamValidationError(err) from None + self.value = final_val + return final_val + + def dump(self) -> dict: + """Dump the Param as a dictionary.""" + out_dict: dict[str, str | None] = { + self.CLASS_IDENTIFIER: f"{self.__module__}.{self.__class__.__name__}" + } + out_dict.update(self.__dict__) + # Ensure that not set is translated to None + if self.value is NOTSET: + out_dict["value"] = None + return out_dict + + @property + def has_value(self) -> bool: + return self.value is not NOTSET and self.value is not None + + def serialize(self) -> dict: + return {"value": self.value, "description": self.description, "schema": self.schema} + + @staticmethod + def deserialize(data: dict[str, Any], version: int) -> Param: + if version > Param.__version__: + raise TypeError("serialized version > class version") + + return Param(default=data["value"], description=data["description"], schema=data["schema"]) + + +class ParamsDict(MutableMapping[str, Any]): + """ + Class to hold all params for dags or tasks. + + All the keys are strictly string and values are converted into Param's object + if they are not already. This class is to replace param's dictionary implicitly + and ideally not needed to be used directly. + + + :param dict_obj: A dict or dict like object to init ParamsDict + :param suppress_exception: Flag to suppress value exceptions while initializing the ParamsDict + """ + + __version__: ClassVar[int] = 1 + __slots__ = ["__dict", "suppress_exception"] + + def __init__(self, dict_obj: MutableMapping | None = None, suppress_exception: bool = False): + params_dict: dict[str, Param] = {} + dict_obj = dict_obj or {} + for k, v in dict_obj.items(): + if not isinstance(v, Param): + params_dict[k] = Param(v) + else: + params_dict[k] = v + self.__dict = params_dict + self.suppress_exception = suppress_exception + + def __bool__(self) -> bool: + return bool(self.__dict) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, ParamsDict): + return self.dump() == other.dump() + if isinstance(other, dict): + return self.dump() == other + return NotImplemented + + def __copy__(self) -> ParamsDict: + return ParamsDict(self.__dict, self.suppress_exception) + + def __deepcopy__(self, memo: dict[int, Any] | None) -> ParamsDict: + return ParamsDict(copy.deepcopy(self.__dict, memo), self.suppress_exception) + + def __contains__(self, o: object) -> bool: + return o in self.__dict + + def __len__(self) -> int: + return len(self.__dict) + + def __delitem__(self, v: str) -> None: + del self.__dict[v] + + def __iter__(self): + return iter(self.__dict) + + def __repr__(self): + return repr(self.dump()) + + def __setitem__(self, key: str, value: Any) -> None: + """ + Override for dictionary's ``setitem`` method to ensure all values are of Param's type only. + + :param key: A key which needs to be inserted or updated in the dict + :param value: A value which needs to be set against the key. It could be of any + type but will be converted and stored as a Param object eventually. + """ + if isinstance(value, Param): + param = value + elif key in self.__dict: + param = self.__dict[key] + try: + param.resolve(value=value, suppress_exception=self.suppress_exception) + except ParamValidationError as ve: + raise ParamValidationError(f"Invalid input for param {key}: {ve}") from None + else: + # if the key isn't there already and if the value isn't of Param type create a new Param object + param = Param(value) + + self.__dict[key] = param + + def __getitem__(self, key: str) -> Any: + """ + Override for dictionary's ``getitem`` method to call the resolve method after fetching the key. + + :param key: The key to fetch + """ + param = self.__dict[key] + return param.resolve(suppress_exception=self.suppress_exception) + + def get_param(self, key: str) -> Param: + """Get the internal :class:`.Param` object for this key.""" + return self.__dict[key] + + def items(self): + return ItemsView(self.__dict) + + def values(self): + return ValuesView(self.__dict) + + def update(self, *args, **kwargs) -> None: + if len(args) == 1 and not kwargs and isinstance(args[0], ParamsDict): + return super().update(args[0].__dict) + super().update(*args, **kwargs) + + def dump(self) -> dict[str, Any]: + """Dump the ParamsDict object as a dictionary, while suppressing exceptions.""" + return {k: v.resolve(suppress_exception=True) for k, v in self.items()} + + def validate(self) -> dict[str, Any]: + """Validate & returns all the Params object stored in the dictionary.""" + resolved_dict = {} + try: + for k, v in self.items(): + resolved_dict[k] = v.resolve(suppress_exception=self.suppress_exception) + except ParamValidationError as ve: + raise ParamValidationError(f"Invalid input for param {k}: {ve}") from None + + return resolved_dict + + def serialize(self) -> dict[str, Any]: + return self.dump() + + @staticmethod + def deserialize(data: dict, version: int) -> ParamsDict: + if version > ParamsDict.__version__: + raise TypeError("serialized version > class version") + + return ParamsDict(data) + + +class DagParam(ResolveMixin): + """ + DAG run parameter reference. + + This binds a simple Param object to a name within a DAG instance, so that it + can be resolved during the runtime via the ``{{ context }}`` dictionary. The + ideal use case of this class is to implicitly convert args passed to a + method decorated by ``@dag``. + + It can be used to parameterize a DAG. You can overwrite its value by setting + it on conf when you trigger your DagRun. + + This can also be used in templates by accessing ``{{ context.params }}``. + + **Example**: + + with DAG(...) as dag: + EmailOperator(subject=dag.param('subject', 'Hi from Airflow!')) + + :param current_dag: Dag being used for parameter. + :param name: key value which is used to set the parameter + :param default: Default value used if no parameter was set. + """ + + def __init__(self, current_dag: DAG, name: str, default: Any = NOTSET): + if default is not NOTSET: + current_dag.params[name] = default + self._name = name + self._default = default + self.current_dag = current_dag + + def iter_references(self) -> Iterable[tuple[Operator, str]]: + return () + + def resolve(self, context: Context, *, include_xcom: bool = True) -> Any: + """Pull DagParam value from DagRun context. This method is run during ``op.execute()``.""" + with contextlib.suppress(KeyError): + if context["dag_run"].conf: + return context["dag_run"].conf[self._name] + if self._default is not NOTSET: + return self._default + with contextlib.suppress(KeyError): + return context["params"][self._name] + raise AirflowException(f"No value could be resolved for parameter {self._name}") + + def serialize(self) -> dict: + """Serialize the DagParam object into a dictionary.""" + return { + "dag_id": self.current_dag.dag_id, + "name": self._name, + "default": self._default, + } + + @classmethod + def deserialize(cls, data: dict, dags: dict) -> DagParam: + """ + Deserializes the dictionary back into a DagParam object. + + :param data: The serialized representation of the DagParam. + :param dags: A dictionary of available DAGs to look up the DAG. + """ + dag_id = data["dag_id"] + # Retrieve the current DAG from the provided DAGs dictionary + current_dag = dags.get(dag_id) + if not current_dag: + raise ValueError(f"DAG with id {dag_id} not found.") + + return cls(current_dag=current_dag, name=data["name"], default=data["default"]) + + +def process_params( + dag: DAG, + task: Operator, + dagrun_conf: dict[str, Any] | None, + *, + suppress_exception: bool, +) -> dict[str, Any]: + """Merge, validate params, and convert them into a simple dict.""" + from airflow.configuration import conf + + dagrun_conf = dagrun_conf or {} + + params = ParamsDict(suppress_exception=suppress_exception) + with contextlib.suppress(AttributeError): + params.update(dag.params) + if task.params: + params.update(task.params) + if conf.getboolean("core", "dag_run_conf_overrides_params") and dagrun_conf: + logger.debug("Updating task params (%s) with DagRun.conf (%s)", params, dagrun_conf) + params.update(dagrun_conf) + return params.validate() diff --git a/task_sdk/src/airflow/sdk/exceptions.py b/task_sdk/src/airflow/sdk/exceptions.py index c713f38eef861..4dd4ff5910a06 100644 --- a/task_sdk/src/airflow/sdk/exceptions.py +++ b/task_sdk/src/airflow/sdk/exceptions.py @@ -18,7 +18,7 @@ from __future__ import annotations import enum -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from airflow.sdk.execution_time.comms import ErrorResponse @@ -35,3 +35,23 @@ class ErrorType(enum.Enum): VARIABLE_NOT_FOUND = "VARIABLE_NOT_FOUND" XCOM_NOT_FOUND = "XCOM_NOT_FOUND" GENERIC_ERROR = "GENERIC_ERROR" + + +class XComForMappingNotPushed(TypeError): + """Raise when a mapped downstream's dependency fails to push XCom for task mapping.""" + + def __str__(self) -> str: + return "did not push XCom for task mapping" + + +class UnmappableXComTypePushed(TypeError): + """Raise when an unmappable type is pushed as a mapped downstream's dependency.""" + + def __init__(self, value: Any, *values: Any) -> None: + super().__init__(value, *values) + + def __str__(self) -> str: + typename = type(self.args[0]).__qualname__ + for arg in self.args[1:]: + typename = f"{typename}[{type(arg).__qualname__}]" + return f"unmappable return type {typename!r}" diff --git a/task_sdk/src/airflow/sdk/execution_time/comms.py b/task_sdk/src/airflow/sdk/execution_time/comms.py index 93ea133f4895f..1f398cb8b6009 100644 --- a/task_sdk/src/airflow/sdk/execution_time/comms.py +++ b/task_sdk/src/airflow/sdk/execution_time/comms.py @@ -268,6 +268,7 @@ class SetXCom(BaseModel): run_id: str task_id: str map_index: int | None = None + mapped_length: int | None = None type: Literal["SetXCom"] = "SetXCom" diff --git a/task_sdk/src/airflow/sdk/execution_time/supervisor.py b/task_sdk/src/airflow/sdk/execution_time/supervisor.py index 30050c0b95519..3a65247e0ee40 100644 --- a/task_sdk/src/airflow/sdk/execution_time/supervisor.py +++ b/task_sdk/src/airflow/sdk/execution_time/supervisor.py @@ -166,38 +166,27 @@ def _configure_logs_over_json_channel(log_fd: int): from airflow.sdk.log import configure_logging log_io = os.fdopen(log_fd, "wb", buffering=0) - configure_logging(enable_pretty_log=False, output=log_io) + configure_logging(enable_pretty_log=False, output=log_io, sending_to_supervisor=True) def _reopen_std_io_handles(child_stdin, child_stdout, child_stderr): - if "PYTEST_CURRENT_TEST" in os.environ: - # When we are running in pytest, it's output capturing messes us up. This works around it - sys.stdout = sys.__stdout__ - sys.stderr = sys.__stderr__ - # Ensure that sys.stdout et al (and the underlying filehandles for C libraries etc) are connected to the # pipes from the supervisor - for handle_name, sock, mode in ( - ("stdin", child_stdin, "r"), - ("stdout", child_stdout, "w"), - ("stderr", child_stderr, "w"), + for handle_name, fd, sock, mode in ( + ("stdin", 0, child_stdin, "r"), + ("stdout", 1, child_stdout, "w"), + ("stderr", 2, child_stderr, "w"), ): handle = getattr(sys, handle_name) - try: - fd = handle.fileno() - os.dup2(sock.fileno(), fd) - # dup2 creates another open copy of the fd, we can close the "socket" copy of it. - sock.close() - except io.UnsupportedOperation: - if "PYTEST_CURRENT_TEST" in os.environ: - # When we're running under pytest, the stdin is not a real filehandle with an fd, so we need - # to handle that differently - fd = sock.fileno() - else: - raise - # We can't open text mode fully unbuffered (python throws an exception if we try), but we can make it line buffered with `buffering=1` - handle = os.fdopen(fd, mode, buffering=1) + handle.close() + os.dup2(sock.fileno(), fd) + del sock + + # We open the socket/fd as binary, and then pass it to a TextIOWrapper so that it looks more like a + # normal sys.stdout etc. + binary = os.fdopen(fd, mode + "b") + handle = io.TextIOWrapper(binary, line_buffering=True) setattr(sys, handle_name, handle) @@ -352,8 +341,19 @@ def start( del constructor_kwargs del logger - # Run the child entrypoint - _fork_main(child_stdin, child_stdout, child_stderr, child_logs.fileno(), target) + try: + # Run the child entrypoint + _fork_main(child_stdin, child_stdout, child_stderr, child_logs.fileno(), target) + except BaseException as e: + try: + # We can't use log here, as if we except out of _fork_main something _weird_ went on. + print("Exception in _fork_main, exiting with code 124", e, file=sys.stderr) + except BaseException as e: + pass + + # It's really super super important we never exit this block. We are in the forked child, and if we + # do then _THINGS GET WEIRD_.. (Normally `_fork_main` itself will `_exit()` so we never get here) + os._exit(124) requests_fd = child_comms.fileno() diff --git a/task_sdk/src/airflow/sdk/execution_time/task_runner.py b/task_sdk/src/airflow/sdk/execution_time/task_runner.py index 9e0bb13972a5b..a0f6189b89b2d 100644 --- a/task_sdk/src/airflow/sdk/execution_time/task_runner.py +++ b/task_sdk/src/airflow/sdk/execution_time/task_runner.py @@ -37,6 +37,7 @@ from airflow.sdk.definitions._internal.dag_parsing_context import _airflow_parsing_context_manager from airflow.sdk.definitions.asset import Asset, AssetAlias, AssetNameRef, AssetUriRef from airflow.sdk.definitions.baseoperator import BaseOperator +from airflow.sdk.definitions.param import process_params from airflow.sdk.execution_time.comms import ( DeferTask, GetXCom, @@ -86,6 +87,16 @@ def get_template_context(self) -> Context: # TODO: Move this to `airflow.sdk.execution_time.context` # once we port the entire context logic from airflow/utils/context.py ? + dag_run_conf = None + if ( + self._ti_context_from_server + and self._ti_context_from_server.dag_run + and self._ti_context_from_server.dag_run.conf + ): + dag_run_conf = self._ti_context_from_server.dag_run.conf + + validated_params = process_params(self.task.dag, self.task, dag_run_conf, suppress_exception=False) + # TODO: Assess if we need to it through airflow.utils.timezone.coerce_datetime() context: Context = { # From the Task Execution interface @@ -102,7 +113,7 @@ def get_template_context(self) -> Context: "outlet_events": OutletEventAccessors(), # "inlet_events": InletEventsAccessors(task.inlets, session=session), "macros": MacrosAccessor(), - # "params": validated_params, + "params": validated_params, # TODO: Make this go through Public API longer term. # "test_mode": task_instance.test_mode, # "triggering_asset_events": lazy_object_proxy.Proxy(get_triggering_events), @@ -284,25 +295,9 @@ def xcom_push(self, key: str, value: Any): Make an XCom available for tasks to pull. :param key: Key to store the value under. - :param value: Value to store. Only be JSON-serializable may be used otherwise. + :param value: Value to store. Only be JSON-serializable values may be used. """ - from airflow.models.xcom import XCom - - # TODO: Move XCom serialization & deserialization to Task SDK - # https://github.com/apache/airflow/issues/45231 - value = XCom.serialize_value(value) - - log = structlog.get_logger(logger_name="task") - SUPERVISOR_COMMS.send_request( - log=log, - msg=SetXCom( - key=key, - value=value, - dag_id=self.dag_id, - task_id=self.task_id, - run_id=self.run_id, - ), - ) + _xcom_push(self, key, value) def get_relevant_upstream_map_indexes( self, upstream: BaseOperator, ti_count: int | None, session: Any @@ -311,6 +306,30 @@ def get_relevant_upstream_map_indexes( return None +def _xcom_push(ti: RuntimeTaskInstance, key: str, value: Any, mapped_length: int | None = None) -> None: + # Private function, as we don't want to expose the ability to manually set `mapped_length` to SDK + # consumers + from airflow.models.xcom import XCom + + # TODO: Move XCom serialization & deserialization to Task SDK + # https://github.com/apache/airflow/issues/45231 + value = XCom.serialize_value(value) + + log = structlog.get_logger(logger_name="task") + SUPERVISOR_COMMS.send_request( + log=log, + msg=SetXCom( + key=key, + value=value, + dag_id=ti.dag_id, + task_id=ti.task_id, + run_id=ti.run_id, + map_index=ti.map_index, + mapped_length=mapped_length, + ), + ) + + def parse(what: StartupDetails) -> RuntimeTaskInstance: # TODO: Task-SDK: # Using DagBag here is about 98% wrong, but it'll do for now @@ -508,32 +527,31 @@ def run(ti: RuntimeTaskInstance, log: Logger): inlets = [asset.asprofile() for asset in ti.task.inlets if isinstance(asset, Asset)] outlets = [asset.asprofile() for asset in ti.task.outlets if isinstance(asset, Asset)] SUPERVISOR_COMMS.send_request(msg=RuntimeCheckOnTask(inlets=inlets, outlets=outlets), log=log) # type: ignore - msg = SUPERVISOR_COMMS.get_message() # type: ignore - - if isinstance(msg, OKResponse) and not msg.ok: - log.info("Runtime checks failed for task, marking task as failed..") - msg = TaskState( - state=TerminalTIState.FAILED, - end_date=datetime.now(tz=timezone.utc), - ) - else: - context = ti.get_template_context() - with set_current_context(context): - jinja_env = ti.task.dag.get_template_env() - ti.task = ti.render_templates(context=context, jinja_env=jinja_env) - # TODO: Get things from _execute_task_with_callbacks - # - Pre Execute - # etc - result = _execute_task(context, ti.task) - - _push_xcom_if_needed(result, ti) - - task_outlets, outlet_events = _process_outlets(context, ti.task.outlets) - msg = SucceedTask( - end_date=datetime.now(tz=timezone.utc), - task_outlets=task_outlets, - outlet_events=outlet_events, - ) + ok_response = SUPERVISOR_COMMS.get_message() # type: ignore + if not isinstance(ok_response, OKResponse) or not ok_response.ok: + log.info("Runtime checks failed for task, marking task as failed..") + msg = TaskState( + state=TerminalTIState.FAILED, + end_date=datetime.now(tz=timezone.utc), + ) + return + context = ti.get_template_context() + with set_current_context(context): + jinja_env = ti.task.dag.get_template_env() + ti.task = ti.render_templates(context=context, jinja_env=jinja_env) + # TODO: Get things from _execute_task_with_callbacks + # - Pre Execute + # etc + result = _execute_task(context, ti.task) + + _push_xcom_if_needed(result, ti) + + task_outlets, outlet_events = _process_outlets(context, ti.task.outlets) + msg = SucceedTask( + end_date=datetime.now(tz=timezone.utc), + task_outlets=task_outlets, + outlet_events=outlet_events, + ) except TaskDeferred as defer: # TODO: Should we use structlog.bind_contextvars here for dag_id, task_id & run_id? log.info("Pausing task as DEFERRED. ", dag_id=ti.dag_id, task_id=ti.task_id, run_id=ti.run_id) @@ -599,8 +617,9 @@ def run(ti: RuntimeTaskInstance, log: Logger): log.exception("Task failed with exception") # TODO: Run task failure callbacks here msg = TaskState(state=TerminalTIState.FAILED, end_date=datetime.now(tz=timezone.utc)) - if msg: - SUPERVISOR_COMMS.send_request(msg=msg, log=log) + finally: + if msg: + SUPERVISOR_COMMS.send_request(msg=msg, log=log) def _execute_task(context: Context, task: BaseOperator): @@ -634,10 +653,25 @@ def _push_xcom_if_needed(result: Any, ti: RuntimeTaskInstance): else: xcom_value = None - # If the task returns a result, push an XCom containing it. + is_mapped = next(ti.task.iter_mapped_dependants(), None) is not None or ti.task.is_mapped + if xcom_value is None: + if is_mapped: + # Uhoh, a downstream mapped task depends on us to push something to map over + from airflow.sdk.exceptions import XComForMappingNotPushed + + raise XComForMappingNotPushed() return + mapped_length: int | None = None + if is_mapped: + from airflow.sdk.definitions.mappedoperator import is_mappable_value + from airflow.sdk.exceptions import UnmappableXComTypePushed + + if not is_mappable_value(xcom_value): + raise UnmappableXComTypePushed(xcom_value) + mapped_length = len(xcom_value) + # If the task has multiple outputs, push each output as a separate XCom. if ti.task.multiple_outputs: if not isinstance(xcom_value, Mapping): @@ -654,7 +688,7 @@ def _push_xcom_if_needed(result: Any, ti: RuntimeTaskInstance): ti.xcom_push(k, v) # TODO: Use constant for XCom return key & use serialize_value from Task SDK - ti.xcom_push("return_value", result) + _xcom_push(ti, "return_value", result, mapped_length=mapped_length) def finalize(log: Logger): ... diff --git a/task_sdk/src/airflow/sdk/log.py b/task_sdk/src/airflow/sdk/log.py index fa5b113588bf5..8549518e205b2 100644 --- a/task_sdk/src/airflow/sdk/log.py +++ b/task_sdk/src/airflow/sdk/log.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +import io import itertools import logging.config import os @@ -39,7 +40,9 @@ ] -def exception_group_tracebacks(format_exception: Callable[[ExcInfo], list[dict[str, Any]]]) -> Processor: +def exception_group_tracebacks( + format_exception: Callable[[ExcInfo], list[dict[str, Any]]], +) -> Processor: # Make mypy happy if not hasattr(__builtins__, "BaseExceptionGroup"): T = TypeVar("T") @@ -178,13 +181,6 @@ def logging_processors( "console": console, } else: - # Imports to suppress showing code from these modules - import contextlib - - import click - import httpcore - import httpx - dict_exc_formatter = structlog.tracebacks.ExceptionDictTransformer( use_rich=False, show_locals=False, suppress=suppress ) @@ -197,9 +193,19 @@ def logging_processors( exc_group_processor = None def json_dumps(msg, default): + # Note: this is likely an "expensive" step, but lets massage the dict order for nice + # viewing of the raw JSON logs. + # Maybe we don't need this once the UI renders the JSON instead of displaying the raw text + msg = { + "timestamp": msg.pop("timestamp"), + "level": msg.pop("level"), + "event": msg.pop("event"), + **msg, + } return msgspec.json.encode(msg, enc_hook=default) def json_processor(logger: Any, method_name: Any, event_dict: EventDict) -> str: + # Stdlib logging doesn't need the re-ordering, it's fine as it is return msgspec.json.encode(event_dict).decode("utf-8") json = structlog.processors.JSONRenderer(serializer=json_dumps) @@ -224,13 +230,11 @@ def json_processor(logger: Any, method_name: Any, event_dict: EventDict) -> str: def configure_logging( enable_pretty_log: bool = True, log_level: str = "DEBUG", - output: BinaryIO | None = None, + output: BinaryIO | TextIO | None = None, cache_logger_on_first_use: bool = True, + sending_to_supervisor: bool = False, ): """Set up struct logging and stdlib logging config.""" - if enable_pretty_log and output is not None: - raise ValueError("output can only be set if enable_pretty_log is not") - lvl = structlog.stdlib.NAME_TO_LEVEL[log_level.lower()] if enable_pretty_log: @@ -263,13 +267,30 @@ def configure_logging( wrapper_class = structlog.make_filtering_bound_logger(lvl) if enable_pretty_log: + if output is not None and not isinstance(output, TextIO): + wrapper = io.TextIOWrapper(output, line_buffering=True) + logger_factory = structlog.WriteLoggerFactory(wrapper) + else: + logger_factory = structlog.WriteLoggerFactory(output) structlog.configure( processors=processors, cache_logger_on_first_use=cache_logger_on_first_use, wrapper_class=wrapper_class, + logger_factory=logger_factory, ) color_formatter.append(named["console"]) else: + if output is not None and "b" not in output.mode: + if not hasattr(output, "buffer"): + raise ValueError( + f"output needed to be a binary stream, but it didn't have a buffer attribute ({output=})" + ) + else: + output = output.buffer + if TYPE_CHECKING: + # Not all binary streams are isinstance of BinaryIO, so we check via looking at `mode` at + # runtime. mypy doesn't grok that though + assert isinstance(output, BinaryIO) structlog.configure( processors=processors, cache_logger_on_first_use=cache_logger_on_first_use, @@ -324,7 +345,7 @@ def configure_logging( "loggers": { # Set Airflow logging to the level requested, but most everything else at "INFO" "": { - "handlers": ["to_supervisor" if output else "default"], + "handlers": ["to_supervisor" if sending_to_supervisor else "default"], "level": "INFO", "propagate": True, }, @@ -413,10 +434,12 @@ def init_log_file(local_relative_path: str) -> Path: from airflow.configuration import conf new_file_permissions = int( - conf.get("logging", "file_task_handler_new_file_permissions", fallback="0o664"), 8 + conf.get("logging", "file_task_handler_new_file_permissions", fallback="0o664"), + 8, ) new_folder_permissions = int( - conf.get("logging", "file_task_handler_new_folder_permissions", fallback="0o775"), 8 + conf.get("logging", "file_task_handler_new_folder_permissions", fallback="0o775"), + 8, ) base_log_folder = conf.get("logging", "base_log_folder") diff --git a/task_sdk/tests/conftest.py b/task_sdk/tests/conftest.py index e24f6e397d3e5..cc4bc4f96148a 100644 --- a/task_sdk/tests/conftest.py +++ b/task_sdk/tests/conftest.py @@ -184,6 +184,7 @@ def _make_context( data_interval_end: str | datetime = "2024-12-01T01:00:00Z", start_date: str | datetime = "2024-12-01T01:00:00Z", run_type: str = "manual", + conf=None, ) -> TIRunContext: return TIRunContext( dag_run=DagRun( @@ -194,6 +195,7 @@ def _make_context( data_interval_end=data_interval_end, # type: ignore start_date=start_date, # type: ignore run_type=run_type, # type: ignore + conf=conf, ), max_tries=0, ) diff --git a/task_sdk/tests/definitions/test_dag.py b/task_sdk/tests/definitions/test_dag.py index f0e634f19b667..e6baeabe98dee 100644 --- a/task_sdk/tests/definitions/test_dag.py +++ b/task_sdk/tests/definitions/test_dag.py @@ -23,9 +23,9 @@ import pytest from airflow.exceptions import DuplicateTaskIdFound -from airflow.models.param import Param, ParamsDict from airflow.sdk.definitions.baseoperator import BaseOperator from airflow.sdk.definitions.dag import DAG, dag as dag_decorator +from airflow.sdk.definitions.param import Param, ParamsDict DEFAULT_DATE = datetime(2016, 1, 1, tzinfo=timezone.utc) diff --git a/task_sdk/tests/definitions/test_mappedoperator.py b/task_sdk/tests/definitions/test_mappedoperator.py index aba7523b5ad39..eeb79f31b4d47 100644 --- a/task_sdk/tests/definitions/test_mappedoperator.py +++ b/task_sdk/tests/definitions/test_mappedoperator.py @@ -22,10 +22,10 @@ import pendulum import pytest -from airflow.models.param import ParamsDict from airflow.sdk.definitions.baseoperator import BaseOperator from airflow.sdk.definitions.dag import DAG from airflow.sdk.definitions.mappedoperator import MappedOperator +from airflow.sdk.definitions.param import ParamsDict from airflow.sdk.definitions.xcom_arg import XComArg from airflow.utils.trigger_rule import TriggerRule diff --git a/task_sdk/tests/definitions/test_param.py b/task_sdk/tests/definitions/test_param.py new file mode 100644 index 0000000000000..93e863222ef87 --- /dev/null +++ b/task_sdk/tests/definitions/test_param.py @@ -0,0 +1,308 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from contextlib import nullcontext + +import pytest + +from airflow.exceptions import ParamValidationError +from airflow.sdk.definitions.param import Param, ParamsDict +from airflow.serialization.serialized_objects import BaseSerialization + + +class TestParam: + def test_param_without_schema(self): + p = Param("test") + assert p.resolve() == "test" + + p.value = 10 + assert p.resolve() == 10 + + def test_null_param(self): + p = Param() + with pytest.raises(ParamValidationError, match="No value passed and Param has no default value"): + p.resolve() + assert p.resolve(None) is None + assert p.dump()["value"] is None + assert not p.has_value + + p = Param(None) + assert p.resolve() is None + assert p.resolve(None) is None + assert p.dump()["value"] is None + assert not p.has_value + + p = Param(None, type="null") + assert p.resolve() is None + assert p.resolve(None) is None + assert p.dump()["value"] is None + assert not p.has_value + with pytest.raises(ParamValidationError): + p.resolve("test") + + def test_string_param(self): + p = Param("test", type="string") + assert p.resolve() == "test" + + p = Param("test") + assert p.resolve() == "test" + + p = Param("10.0.0.0", type="string", format="ipv4") + assert p.resolve() == "10.0.0.0" + + p = Param(type="string") + with pytest.raises(ParamValidationError): + p.resolve(None) + with pytest.raises(ParamValidationError, match="No value passed and Param has no default value"): + p.resolve() + + @pytest.mark.parametrize( + "dt", + [ + pytest.param("2022-01-02T03:04:05.678901Z", id="microseconds-zed-timezone"), + pytest.param("2022-01-02T03:04:05.678Z", id="milliseconds-zed-timezone"), + pytest.param("2022-01-02T03:04:05+00:00", id="seconds-00-00-timezone"), + pytest.param("2022-01-02T03:04:05+04:00", id="seconds-custom-timezone"), + ], + ) + def test_string_rfc3339_datetime_format(self, dt): + """Test valid rfc3339 datetime.""" + assert Param(dt, type="string", format="date-time").resolve() == dt + + @pytest.mark.parametrize( + "dt", + [ + pytest.param("2022-01-02", id="date"), + pytest.param("03:04:05", id="time"), + pytest.param("Thu, 04 Mar 2021 05:06:07 GMT", id="rfc2822-datetime"), + ], + ) + def test_string_datetime_invalid_format(self, dt): + """Test invalid iso8601 and rfc3339 datetime format.""" + with pytest.raises(ParamValidationError, match="is not a 'date-time'"): + Param(dt, type="string", format="date-time").resolve() + + def test_string_time_format(self): + """Test string time format.""" + assert Param("03:04:05", type="string", format="time").resolve() == "03:04:05" + + error_pattern = "is not a 'time'" + with pytest.raises(ParamValidationError, match=error_pattern): + Param("03:04:05.06", type="string", format="time").resolve() + + with pytest.raises(ParamValidationError, match=error_pattern): + Param("03:04", type="string", format="time").resolve() + + with pytest.raises(ParamValidationError, match=error_pattern): + Param("24:00:00", type="string", format="time").resolve() + + @pytest.mark.parametrize( + "date_string", + [ + "2021-01-01", + ], + ) + def test_string_date_format(self, date_string): + """Test string date format.""" + assert Param(date_string, type="string", format="date").resolve() == date_string + + # Note that 20120503 behaved differently in 3.11.3 Official python image. It was validated as a date + # there but it started to fail again in 3.11.4 released on 2023-07-05. + @pytest.mark.parametrize( + "date_string", + [ + "01/01/2021", + "21 May 1975", + "20120503", + ], + ) + def test_string_date_format_error(self, date_string): + """Test string date format failures.""" + with pytest.raises(ParamValidationError, match="is not a 'date'"): + Param(date_string, type="string", format="date").resolve() + + def test_int_param(self): + p = Param(5) + assert p.resolve() == 5 + + p = Param(type="integer", minimum=0, maximum=10) + assert p.resolve(value=5) == 5 + + with pytest.raises(ParamValidationError): + p.resolve(value=20) + + def test_number_param(self): + p = Param(42, type="number") + assert p.resolve() == 42 + + p = Param(1.2, type="number") + assert p.resolve() == 1.2 + + p = Param("42", type="number") + with pytest.raises(ParamValidationError): + p.resolve() + + def test_list_param(self): + p = Param([1, 2], type="array") + assert p.resolve() == [1, 2] + + def test_dict_param(self): + p = Param({"a": 1, "b": 2}, type="object") + assert p.resolve() == {"a": 1, "b": 2} + + def test_composite_param(self): + p = Param(type=["string", "number"]) + assert p.resolve(value="abc") == "abc" + assert p.resolve(value=5.0) == 5.0 + + def test_param_with_description(self): + p = Param(10, description="Sample description") + assert p.description == "Sample description" + + def test_suppress_exception(self): + p = Param("abc", type="string", minLength=2, maxLength=4) + assert p.resolve() == "abc" + + p.value = "long_string" + assert p.resolve(suppress_exception=True) is None + + def test_explicit_schema(self): + p = Param("abc", schema={type: "string"}) + assert p.resolve() == "abc" + + def test_custom_param(self): + class S3Param(Param): + def __init__(self, path: str): + schema = {"type": "string", "pattern": r"s3:\/\/(.+?)\/(.+)"} + super().__init__(default=path, schema=schema) + + p = S3Param("s3://my_bucket/my_path") + assert p.resolve() == "s3://my_bucket/my_path" + + p = S3Param("file://not_valid/s3_path") + with pytest.raises(ParamValidationError): + p.resolve() + + def test_value_saved(self): + p = Param("hello", type="string") + assert p.resolve("world") == "world" + assert p.resolve() == "world" + + def test_dump(self): + p = Param("hello", description="world", type="string", minLength=2) + dump = p.dump() + assert dump["__class"] == "airflow.sdk.definitions.param.Param" + assert dump["value"] == "hello" + assert dump["description"] == "world" + assert dump["schema"] == {"type": "string", "minLength": 2} + + @pytest.mark.parametrize( + "param", + [ + Param("my value", description="hello", schema={"type": "string"}), + Param("my value", description="hello"), + Param(None, description=None), + Param([True], type="array", items={"type": "boolean"}), + Param(), + ], + ) + def test_param_serialization(self, param: Param): + """ + Test to make sure that native Param objects can be correctly serialized + """ + + serializer = BaseSerialization() + serialized_param = serializer.serialize(param) + restored_param: Param = serializer.deserialize(serialized_param) + + assert restored_param.value == param.value + assert isinstance(restored_param, Param) + assert restored_param.description == param.description + assert restored_param.schema == param.schema + + @pytest.mark.parametrize( + "default, should_raise", + [ + pytest.param({0, 1, 2}, True, id="default-non-JSON-serializable"), + pytest.param(None, False, id="default-None"), # Param init should not warn + pytest.param({"b": 1}, False, id="default-JSON-serializable"), # Param init should not warn + ], + ) + def test_param_json_validation(self, default, should_raise): + exception_msg = "All provided parameters must be json-serializable" + cm = pytest.raises(ParamValidationError, match=exception_msg) if should_raise else nullcontext() + with cm: + p = Param(default=default) + if not should_raise: + p.resolve() # when resolved with NOTSET, should not warn. + p.resolve(value={"a": 1}) # when resolved with JSON-serializable, should not warn. + with pytest.raises(ParamValidationError, match=exception_msg): + p.resolve(value={1, 2, 3}) # when resolved with not JSON-serializable, should warn. + + +class TestParamsDict: + def test_params_dict(self): + # Init with a simple dictionary + pd = ParamsDict(dict_obj={"key": "value"}) + assert isinstance(pd.get_param("key"), Param) + assert pd["key"] == "value" + assert pd.suppress_exception is False + + # Init with a dict which contains Param objects + pd2 = ParamsDict({"key": Param("value", type="string")}, suppress_exception=True) + assert isinstance(pd2.get_param("key"), Param) + assert pd2["key"] == "value" + assert pd2.suppress_exception is True + + # Init with another object of another ParamsDict + pd3 = ParamsDict(pd2) + assert isinstance(pd3.get_param("key"), Param) + assert pd3["key"] == "value" + assert pd3.suppress_exception is False # as it's not a deepcopy of pd2 + + # Dump the ParamsDict + assert pd.dump() == {"key": "value"} + assert pd2.dump() == {"key": "value"} + assert pd3.dump() == {"key": "value"} + + # Validate the ParamsDict + plain_dict = pd.validate() + assert isinstance(plain_dict, dict) + pd2.validate() + pd3.validate() + + # Update the ParamsDict + with pytest.raises(ParamValidationError, match=r"Invalid input for param key: 1 is not"): + pd3["key"] = 1 + + # Should not raise an error as suppress_exception is True + pd2["key"] = 1 + pd2.validate() + + def test_update(self): + pd = ParamsDict({"key": Param("value", type="string")}) + + pd.update({"key": "a"}) + internal_value = pd.get_param("key") + assert isinstance(internal_value, Param) + with pytest.raises(ParamValidationError, match=r"Invalid input for param key: 1 is not"): + pd.update({"key": 1}) + + def test_repr(self): + pd = ParamsDict({"key": Param("value", type="string")}) + assert repr(pd) == "{'key': 'value'}" diff --git a/task_sdk/tests/execution_time/conftest.py b/task_sdk/tests/execution_time/conftest.py index 832f2b60ca351..ac0c21246c1ce 100644 --- a/task_sdk/tests/execution_time/conftest.py +++ b/task_sdk/tests/execution_time/conftest.py @@ -71,6 +71,8 @@ def set_dag(what: StartupDetails, dag_id: str, task: BaseOperator) -> RuntimeTas from airflow.utils import timezone dag = DAG(dag_id=dag_id, start_date=timezone.datetime(2024, 12, 3)) + if what.ti_context.dag_run.conf: + dag.params = what.ti_context.dag_run.conf # type: ignore[assignment] task.dag = dag t = dag.task_dict[task.task_id] ti = RuntimeTaskInstance.model_construct( @@ -120,6 +122,7 @@ def _create_task_instance( start_date: str | datetime = "2024-12-01T01:00:00Z", run_type: str = "manual", try_number: int = 1, + conf=None, ti_id=None, ) -> RuntimeTaskInstance: if not ti_id: @@ -133,6 +136,7 @@ def _create_task_instance( data_interval_end=data_interval_end, start_date=start_date, run_type=run_type, + conf=conf, ) startup_details = StartupDetails( diff --git a/task_sdk/tests/execution_time/test_task_runner.py b/task_sdk/tests/execution_time/test_task_runner.py index d9aa675242cd0..99e380754d4d0 100644 --- a/task_sdk/tests/execution_time/test_task_runner.py +++ b/task_sdk/tests/execution_time/test_task_runner.py @@ -67,6 +67,7 @@ CommsDecoder, RuntimeTaskInstance, _push_xcom_if_needed, + _xcom_push, parse, run, startup, @@ -647,14 +648,58 @@ def test_run_with_asset_outlets( ti = create_runtime_ti(task=task, dag_id="dag_with_asset_outlet_task") instant = timezone.datetime(2024, 12, 3, 10, 0) time_machine.move_to(instant, tick=False) + mock_supervisor_comms.get_message.return_value = OKResponse( + ok=True, + ) run(ti, log=mock.MagicMock()) mock_supervisor_comms.send_request.assert_any_call(msg=expected_msg, log=mock.ANY) -def test_run_with_inlets_and_outlets(create_runtime_ti, mock_supervisor_comms): +@pytest.mark.parametrize( + ["ok", "last_expected_msg"], + [ + pytest.param( + True, + SucceedTask( + end_date=timezone.datetime(2024, 12, 3, 10, 0), + task_outlets=[ + AssetProfile(name="name", uri="s3://bucket/my-task", asset_type="Asset"), + AssetProfile(name="new-name", uri="s3://bucket/my-task", asset_type="Asset"), + ], + outlet_events=[ + { + "asset_alias_events": [], + "extra": {}, + "key": {"name": "name", "uri": "s3://bucket/my-task"}, + }, + { + "asset_alias_events": [], + "extra": {}, + "key": {"name": "new-name", "uri": "s3://bucket/my-task"}, + }, + ], + ), + id="runtime_checks_pass", + ), + pytest.param( + False, + TaskState( + state=TerminalTIState.FAILED, + end_date=timezone.datetime(2024, 12, 3, 10, 0), + ), + id="runtime_checks_fail", + ), + ], +) +def test_run_with_inlets_and_outlets( + create_runtime_ti, mock_supervisor_comms, time_machine, ok, last_expected_msg +): """Test running a basic tasks with inlets and outlets.""" + instant = timezone.datetime(2024, 12, 3, 10, 0) + time_machine.move_to(instant, tick=False) + from airflow.providers.standard.operators.bash import BashOperator task = BashOperator( @@ -672,7 +717,7 @@ def test_run_with_inlets_and_outlets(create_runtime_ti, mock_supervisor_comms): ti = create_runtime_ti(task=task, dag_id="dag_with_inlets_and_outlets") mock_supervisor_comms.get_message.return_value = OKResponse( - ok=True, + ok=ok, ) run(ti, log=mock.MagicMock()) @@ -688,6 +733,7 @@ def test_run_with_inlets_and_outlets(create_runtime_ti, mock_supervisor_comms): ], ) mock_supervisor_comms.send_request.assert_any_call(msg=expected, log=mock.ANY) + mock_supervisor_comms.send_request.assert_any_call(msg=last_expected_msg, log=mock.ANY) class TestRuntimeTaskInstance: @@ -711,6 +757,7 @@ def test_get_context_without_ti_context_from_server(self, mocked_parse, make_ti_ # Verify the context keys and values assert context == { + "params": {}, "var": { "json": VariableAccessor(deserialize_json=True), "value": VariableAccessor(deserialize_json=False), @@ -751,6 +798,7 @@ def test_get_context_with_ti_context_from_server(self, create_runtime_ti, mock_s context = runtime_ti.get_template_context() assert context == { + "params": {}, "var": { "json": VariableAccessor(deserialize_json=True), "value": VariableAccessor(deserialize_json=False), @@ -983,6 +1031,36 @@ def execute(self, context): ), ) + def test_get_param_from_context( + self, mocked_parse, make_ti_context, mock_supervisor_comms, create_runtime_ti + ): + """Test that a params can be retrieved from context.""" + + class CustomOperator(BaseOperator): + def execute(self, context): + value = context["params"] + print("The dag params are", value) + + task = CustomOperator(task_id="print-params") + runtime_ti = create_runtime_ti( + dag_id="basic_param_dag", + task=task, + conf={ + "x": 3, + "text": "Hello World!", + "flag": False, + "a_simple_list": ["one", "two", "three", "actually one value is made per line"], + }, + ) + run(runtime_ti, log=mock.MagicMock()) + + assert runtime_ti.task.dag.params == { + "x": 3, + "text": "Hello World!", + "flag": False, + "a_simple_list": ["one", "two", "three", "actually one value is made per line"], + } + class TestXComAfterTaskExecution: @pytest.mark.parametrize( @@ -1012,16 +1090,16 @@ def execute(self, context): runtime_ti = create_runtime_ti(task=task) spy_agency.spy_on(_push_xcom_if_needed, call_original=True) - spy_agency.spy_on(runtime_ti.xcom_push, call_original=False) + spy_agency.spy_on(_xcom_push, call_original=False) run(runtime_ti, log=mock.MagicMock()) spy_agency.assert_spy_called(_push_xcom_if_needed) if should_push_xcom: - spy_agency.assert_spy_called_with(runtime_ti.xcom_push, "return_value", expected_xcom_value) + spy_agency.assert_spy_called_with(_xcom_push, runtime_ti, "return_value", expected_xcom_value) else: - spy_agency.assert_spy_not_called(runtime_ti.xcom_push) + spy_agency.assert_spy_not_called(_xcom_push) def test_xcom_with_multiple_outputs(self, create_runtime_ti, spy_agency): """Test that the task pushes to XCom when multiple outputs are returned.""" @@ -1037,7 +1115,7 @@ def execute(self, context): runtime_ti = create_runtime_ti(task=task) - spy_agency.spy_on(runtime_ti.xcom_push, call_original=False) + spy_agency.spy_on(_xcom_push, call_original=False) _push_xcom_if_needed(result=result, ti=runtime_ti) expected_calls = [ @@ -1045,9 +1123,9 @@ def execute(self, context): ("key2", "value2"), ("return_value", result), ] - spy_agency.assert_spy_call_count(runtime_ti.xcom_push, len(expected_calls)) + spy_agency.assert_spy_call_count(_xcom_push, len(expected_calls)) for key, value in expected_calls: - spy_agency.assert_spy_called_with(runtime_ti.xcom_push, key, value) + spy_agency.assert_spy_called_with(_xcom_push, runtime_ti, key, value, mapped_length=None) def test_xcom_with_multiple_outputs_and_no_mapping_result(self, create_runtime_ti, spy_agency): """Test that error is raised when multiple outputs are returned without mapping.""" @@ -1092,3 +1170,85 @@ def execute(self, context): assert str(exc_info.value) == ( f"Returned dictionary keys must be strings when using multiple_outputs, found 2 ({int}) instead" ) + + +class TestDagParamRuntime: + def test_dag_param_resolves_from_task(self, create_runtime_ti, mock_supervisor_comms, time_machine): + """Test dagparam resolves on operator execution""" + instant = timezone.datetime(2024, 12, 3, 10, 0) + time_machine.move_to(instant, tick=False) + + dag = DAG(dag_id="dag_with_dag_params", start_date=timezone.datetime(2024, 12, 3)) + dag.param("value", default="NOTSET") + + class CustomOperator(BaseOperator): + def execute(self, context): + assert dag.params["value"] == "NOTSET" + + task = CustomOperator(task_id="task_with_dag_params") + runtime_ti = create_runtime_ti(task=task, dag_id="dag_with_dag_params") + + run(runtime_ti, log=mock.MagicMock()) + + mock_supervisor_comms.send_request.assert_called_once_with( + msg=SucceedTask( + state=TerminalTIState.SUCCESS, end_date=instant, task_outlets=[], outlet_events=[] + ), + log=mock.ANY, + ) + + def test_dag_param_dag_overwrite(self, create_runtime_ti, mock_supervisor_comms, time_machine): + """Test dag param is overwritten from dagrun config""" + instant = timezone.datetime(2024, 12, 3, 10, 0) + time_machine.move_to(instant, tick=False) + + dag = DAG(dag_id="dag_with_dag_params_overwrite", start_date=timezone.datetime(2024, 12, 3)) + dag.param("value", default="NOTSET") + + class CustomOperator(BaseOperator): + def execute(self, context): + # important to use self.dag here + assert self.dag.params["value"] == "new_value" + + # asserting on the default value when not set in dag run + assert dag.params["value"] == "NOTSET" + task = CustomOperator(task_id="task_with_dag_params_overwrite") + + # we reparse the dag here, and if conf passed, added as params + runtime_ti = create_runtime_ti( + task=task, dag_id="dag_with_dag_params_overwrite", conf={"value": "new_value"} + ) + run(runtime_ti, log=mock.MagicMock()) + mock_supervisor_comms.send_request.assert_called_once_with( + msg=SucceedTask( + state=TerminalTIState.SUCCESS, end_date=instant, task_outlets=[], outlet_events=[] + ), + log=mock.ANY, + ) + + def test_dag_param_dag_default(self, create_runtime_ti, mock_supervisor_comms, time_machine): + """ "Test dag param is retrieved from default config""" + instant = timezone.datetime(2024, 12, 3, 10, 0) + time_machine.move_to(instant, tick=False) + + dag = DAG( + dag_id="dag_with_dag_params_default", + start_date=timezone.datetime(2024, 12, 3), + params={"value": "test"}, + ) + + class CustomOperator(BaseOperator): + def execute(self, context): + assert dag.params["value"] == "test" + + assert dag.params["value"] == "test" + task = CustomOperator(task_id="task_with_dag_params_default") + runtime_ti = create_runtime_ti(task=task, dag_id="dag_with_dag_params_default") + + run(runtime_ti, log=mock.MagicMock()) + mock_supervisor_comms.send_request.assert_called_once_with( + msg=SucceedTask( + state=TerminalTIState.SUCCESS, end_date=instant, task_outlets=[], outlet_events=[] + ), + log=mock.ANY, + ) diff --git a/tests/always/test_project_structure.py b/tests/always/test_project_structure.py index 3d609c3048e60..2548193c2fba3 100644 --- a/tests/always/test_project_structure.py +++ b/tests/always/test_project_structure.py @@ -381,7 +381,6 @@ class TestGoogleProviderProjectStructure(ExampleCoverageTest, AssetsCoverageTest "airflow.providers.google.cloud.operators.automl.AutoMLTablesListTableSpecsOperator", "airflow.providers.google.cloud.operators.automl.AutoMLTablesUpdateDatasetOperator", "airflow.providers.google.cloud.operators.automl.AutoMLDeployModelOperator", - "airflow.providers.google.cloud.operators.automl.AutoMLBatchPredictOperator", "airflow.providers.google.cloud.operators.automl.AutoMLTrainModelOperator", "airflow.providers.google.cloud.operators.automl.AutoMLPredictOperator", "airflow.providers.google.cloud.operators.automl.AutoMLCreateDatasetOperator", @@ -405,10 +404,6 @@ class TestGoogleProviderProjectStructure(ExampleCoverageTest, AssetsCoverageTest "airflow.providers.google.cloud.operators.mlengine.MLEngineStartBatchPredictionJobOperator", "airflow.providers.google.cloud.operators.mlengine.MLEngineStartTrainingJobOperator", "airflow.providers.google.cloud.operators.mlengine.MLEngineTrainingCancelJobOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.PromptLanguageModelOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.GenerateTextEmbeddingsOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.PromptMultimodalModelOperator", - "airflow.providers.google.cloud.operators.vertex_ai.generative_model.PromptMultimodalModelWithMediaOperator", "airflow.providers.google.cloud.operators.vertex_ai.generative_model.TextGenerationModelPredictOperator", "airflow.providers.google.marketing_platform.operators.GoogleDisplayVideo360CreateQueryOperator", "airflow.providers.google.marketing_platform.operators.GoogleDisplayVideo360RunQueryOperator", diff --git a/tests/api_connexion/endpoints/test_dag_endpoint.py b/tests/api_connexion/endpoints/test_dag_endpoint.py index 93bdb600cb80a..79190a3664709 100644 --- a/tests/api_connexion/endpoints/test_dag_endpoint.py +++ b/tests/api_connexion/endpoints/test_dag_endpoint.py @@ -320,7 +320,7 @@ def test_should_respond_200(self, url_safe_serializer): "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "description": None, "schema": {}, "value": 1, @@ -380,7 +380,7 @@ def test_should_respond_200_with_asset_expression(self, url_safe_serializer): "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "description": None, "schema": {}, "value": 1, @@ -533,7 +533,7 @@ def test_should_respond_200_serialized(self, url_safe_serializer, testing_dag_bu "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "description": None, "schema": {}, "value": 1, @@ -591,7 +591,7 @@ def test_should_respond_200_serialized(self, url_safe_serializer, testing_dag_bu "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "description": None, "schema": {}, "value": 1, diff --git a/tests/api_connexion/endpoints/test_dag_run_endpoint.py b/tests/api_connexion/endpoints/test_dag_run_endpoint.py index 9558dd4fd256a..dc3073a475c3d 100644 --- a/tests/api_connexion/endpoints/test_dag_run_endpoint.py +++ b/tests/api_connexion/endpoints/test_dag_run_endpoint.py @@ -30,9 +30,9 @@ from airflow.models.asset import AssetEvent, AssetModel from airflow.models.dag import DAG, DagModel from airflow.models.dagrun import DagRun -from airflow.models.param import Param from airflow.operators.empty import EmptyOperator from airflow.sdk.definitions.asset import Asset +from airflow.sdk.definitions.param import Param from airflow.utils import timezone from airflow.utils.session import create_session, provide_session from airflow.utils.state import DagRunState, State diff --git a/tests/api_connexion/endpoints/test_mapped_task_instance_endpoint.py b/tests/api_connexion/endpoints/test_mapped_task_instance_endpoint.py index 79dcde6cbd2a5..68ba4395cfc0a 100644 --- a/tests/api_connexion/endpoints/test_mapped_task_instance_endpoint.py +++ b/tests/api_connexion/endpoints/test_mapped_task_instance_endpoint.py @@ -226,6 +226,7 @@ def test_mapped_task_instances(self, one_task_with_mapped_tis, session): "priority_weight": 1, "queue": "default", "queued_when": None, + "scheduled_when": None, "rendered_fields": {}, "rendered_map_index": None, "start_date": "2020-01-01T00:00:00+00:00", diff --git a/tests/api_connexion/endpoints/test_task_endpoint.py b/tests/api_connexion/endpoints/test_task_endpoint.py index 826b912ddc989..874c5d0508547 100644 --- a/tests/api_connexion/endpoints/test_task_endpoint.py +++ b/tests/api_connexion/endpoints/test_task_endpoint.py @@ -124,7 +124,7 @@ def test_should_respond_200(self): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -207,7 +207,7 @@ def test_unscheduled_task(self): "owner": "airflow", "params": { "is_unscheduled": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": True, "description": None, "schema": {}, @@ -271,7 +271,7 @@ def test_should_respond_200_serialized(self, testing_dag_bundle): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -348,7 +348,7 @@ def test_should_respond_200(self): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -508,7 +508,7 @@ def test_get_unscheduled_tasks(self): "owner": "airflow", "params": { "is_unscheduled": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": True, "description": None, "schema": {}, diff --git a/tests/api_connexion/endpoints/test_task_instance_endpoint.py b/tests/api_connexion/endpoints/test_task_instance_endpoint.py index b5079c47aa17e..c51116c368ded 100644 --- a/tests/api_connexion/endpoints/test_task_instance_endpoint.py +++ b/tests/api_connexion/endpoints/test_task_instance_endpoint.py @@ -196,6 +196,7 @@ def test_should_respond_200(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "print_the_context", @@ -254,6 +255,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "deferred", "task_id": "print_the_context", @@ -301,6 +303,7 @@ def test_should_respond_200_with_task_state_in_removed(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "removed", "task_id": "print_the_context", @@ -344,6 +347,7 @@ def test_should_respond_200_task_instance_with_rendered(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "print_the_context", @@ -396,6 +400,7 @@ def test_should_respond_200_mapped_task_instance_with_rtif(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "print_the_context", @@ -2305,6 +2310,7 @@ def test_should_respond_200(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "print_the_context", @@ -2364,6 +2370,7 @@ def test_should_respond_200_mapped_task_instance_with_rtif(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "print_the_context", @@ -2586,6 +2593,7 @@ def test_should_respond_200(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "success", "task_id": "print_the_context", @@ -2621,6 +2629,7 @@ def test_should_respond_200_with_different_try_numbers(self, try_number, session "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "success" if try_number == 1 else None, "task_id": "print_the_context", @@ -2683,6 +2692,7 @@ def test_should_respond_200_with_mapped_task_at_different_try_numbers(self, try_ "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "failed" if try_number == 1 else None, "task_id": "print_the_context", @@ -2745,6 +2755,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "failed", "task_id": "print_the_context", @@ -2780,6 +2791,7 @@ def test_should_respond_200_with_task_state_in_removed(self, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "removed", "task_id": "print_the_context", diff --git a/tests/api_connexion/schemas/test_dag_schema.py b/tests/api_connexion/schemas/test_dag_schema.py index d6438045249aa..800b512f993bc 100644 --- a/tests/api_connexion/schemas/test_dag_schema.py +++ b/tests/api_connexion/schemas/test_dag_schema.py @@ -167,7 +167,7 @@ def test_serialize_test_dag_detail_schema(url_safe_serializer): "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": 1, "description": None, "schema": {}, @@ -229,7 +229,7 @@ def test_serialize_test_dag_with_asset_schedule_detail_schema(url_safe_serialize "owners": [], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": 1, "description": None, "schema": {}, diff --git a/tests/api_connexion/schemas/test_task_instance_schema.py b/tests/api_connexion/schemas/test_task_instance_schema.py index 3f130517dc3ed..a14cd7dbbd1ec 100644 --- a/tests/api_connexion/schemas/test_task_instance_schema.py +++ b/tests/api_connexion/schemas/test_task_instance_schema.py @@ -87,6 +87,7 @@ def test_task_instance_schema_without_rendered(self, session): "priority_weight": 1, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00+00:00", "state": "running", "task_id": "TEST_TASK_ID", diff --git a/tests/api_connexion/schemas/test_task_schema.py b/tests/api_connexion/schemas/test_task_schema.py index 5748529b864af..eee51c3aac73a 100644 --- a/tests/api_connexion/schemas/test_task_schema.py +++ b/tests/api_connexion/schemas/test_task_schema.py @@ -86,7 +86,7 @@ def test_serialize(self): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, diff --git a/tests/api_fastapi/core_api/routes/public/test_dag_run.py b/tests/api_fastapi/core_api/routes/public/test_dag_run.py index b316b0119dd19..3d70f4dbf29f8 100644 --- a/tests/api_fastapi/core_api/routes/public/test_dag_run.py +++ b/tests/api_fastapi/core_api/routes/public/test_dag_run.py @@ -27,9 +27,9 @@ from airflow.listeners.listener import get_listener_manager from airflow.models import DagModel, DagRun from airflow.models.asset import AssetEvent, AssetModel -from airflow.models.param import Param from airflow.operators.empty import EmptyOperator from airflow.sdk.definitions.asset import Asset +from airflow.sdk.definitions.param import Param from airflow.utils import timezone from airflow.utils.session import provide_session from airflow.utils.state import DagRunState, State diff --git a/tests/api_fastapi/core_api/routes/public/test_dags.py b/tests/api_fastapi/core_api/routes/public/test_dags.py index 748baae71a413..8ef4a82613775 100644 --- a/tests/api_fastapi/core_api/routes/public/test_dags.py +++ b/tests/api_fastapi/core_api/routes/public/test_dags.py @@ -377,7 +377,7 @@ def test_dag_details( "owners": ["airflow"], "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "description": None, "schema": {}, "value": 1, diff --git a/tests/api_fastapi/core_api/routes/public/test_task_instances.py b/tests/api_fastapi/core_api/routes/public/test_task_instances.py index 80cf61dc684cf..857ca1ffa65db 100644 --- a/tests/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/tests/api_fastapi/core_api/routes/public/test_task_instances.py @@ -189,6 +189,7 @@ def test_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": "print_the_context", @@ -247,6 +248,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "deferred", "task_id": "print_the_context", @@ -294,6 +296,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "removed", "task_id": "print_the_context", @@ -337,6 +340,7 @@ def test_should_respond_200_task_instance_with_rendered(self, test_client, sessi "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": "print_the_context", @@ -437,6 +441,7 @@ def test_should_respond_200_mapped_task_instance_with_rtif(self, test_client, se "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": "print_the_context", @@ -1509,6 +1514,7 @@ def test_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "success", "task_id": "print_the_context", @@ -1542,6 +1548,7 @@ def test_should_respond_200_with_different_try_numbers(self, test_client, try_nu "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "success" if try_number == 1 else None, "task_id": "print_the_context", @@ -1604,6 +1611,7 @@ def test_should_respond_200_with_mapped_task_at_different_try_numbers( "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "failed" if try_number == 1 else None, "task_id": "print_the_context", @@ -1664,6 +1672,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "failed", "task_id": "print_the_context", @@ -1698,6 +1707,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "removed", "task_id": "print_the_context", @@ -2146,6 +2156,7 @@ def test_should_respond_200_with_dag_run_id(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "rendered_fields": {}, "rendered_map_index": None, "start_date": "2020-01-02T00:00:00Z", @@ -2485,6 +2496,7 @@ def test_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "success", "task_id": "print_the_context", @@ -2509,6 +2521,7 @@ def test_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": None, "task_id": "print_the_context", @@ -2554,6 +2567,7 @@ def test_ti_in_retry_state_not_returned(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "success", "task_id": "print_the_context", @@ -2620,6 +2634,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "failed", "task_id": "print_the_context", @@ -2644,6 +2659,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": None, "task_id": "print_the_context", @@ -2718,6 +2734,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_display_name": self.TASK_ID, @@ -2912,6 +2929,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_display_name": "print_the_context", @@ -3009,6 +3027,7 @@ def test_update_mask_set_note_should_respond_200(self, test_client, session, new "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": self.TASK_ID, @@ -3049,6 +3068,7 @@ def test_set_note_should_respond_200(self, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": self.TASK_ID, @@ -3103,6 +3123,7 @@ def test_set_note_should_respond_200_mapped_task_instance_with_rtif(self, test_c "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_id": self.TASK_ID, @@ -3202,6 +3223,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_display_name": self.TASK_ID, @@ -3423,6 +3445,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "priority_weight": 9, "queue": "default_queue", "queued_when": None, + "scheduled_when": None, "start_date": "2020-01-02T00:00:00Z", "state": "running", "task_display_name": "print_the_context", diff --git a/tests/api_fastapi/core_api/routes/public/test_tasks.py b/tests/api_fastapi/core_api/routes/public/test_tasks.py index 2c00a9e96a7b5..b2e7671365690 100644 --- a/tests/api_fastapi/core_api/routes/public/test_tasks.py +++ b/tests/api_fastapi/core_api/routes/public/test_tasks.py @@ -103,7 +103,7 @@ def test_should_respond_200(self, test_client): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -185,7 +185,7 @@ def test_unscheduled_task(self, test_client): "owner": "airflow", "params": { "is_unscheduled": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": True, "description": None, "schema": {}, @@ -248,7 +248,7 @@ def test_should_respond_200_serialized(self, test_client, testing_dag_bundle): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -313,7 +313,7 @@ def test_should_respond_200(self, test_client): "owner": "airflow", "params": { "foo": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": "bar", "description": None, "schema": {}, @@ -469,7 +469,7 @@ def test_get_unscheduled_tasks(self, test_client): "owner": "airflow", "params": { "is_unscheduled": { - "__class": "airflow.models.param.Param", + "__class": "airflow.sdk.definitions.param.Param", "value": True, "description": None, "schema": {}, diff --git a/tests/api_fastapi/execution_api/routes/test_xcoms.py b/tests/api_fastapi/execution_api/routes/test_xcoms.py index 6347db9b6db28..3232622b3ac1f 100644 --- a/tests/api_fastapi/execution_api/routes/test_xcoms.py +++ b/tests/api_fastapi/execution_api/routes/test_xcoms.py @@ -17,12 +17,15 @@ from __future__ import annotations +import contextlib from unittest import mock +import httpx import pytest from airflow.api_fastapi.execution_api.datamodels.xcom import XComResponse from airflow.models.dagrun import DagRun +from airflow.models.taskmap import TaskMap from airflow.models.xcom import XCom from airflow.utils.session import create_session @@ -114,12 +117,45 @@ def test_xcom_set(self, client, create_task_instance, session, value, expected_v xcom = session.query(XCom).filter_by(task_id=ti.task_id, dag_id=ti.dag_id, key="xcom_1").first() assert xcom.value == expected_value + task_map = session.query(TaskMap).filter_by(task_id=ti.task_id, dag_id=ti.dag_id).one_or_none() + assert task_map is None, "Should not be mapped" @pytest.mark.parametrize( - "value", - ["value1", {"key2": "value2"}, ["value1"]], + ("length", "err_context"), + [ + pytest.param( + 20, + contextlib.nullcontext(), + id="20-success", + ), + pytest.param( + 2000, + pytest.raises(httpx.HTTPStatusError), + id="2000-too-long", + ), + ], ) - def test_xcom_set_invalid_json(self, client, create_task_instance, value): + def test_xcom_set_downstream_of_mapped(self, client, create_task_instance, session, length, err_context): + """ + Test that XCom value is set correctly. The value is passed as a JSON string in the request body. + XCom.set then uses json.dumps to serialize it and store the value in the database. + This is done so that Task SDK in multiple languages can use the same API to set XCom values. + """ + ti = create_task_instance() + session.commit() + + with err_context: + response = client.post( + f"/execution/xcoms/{ti.dag_id}/{ti.run_id}/{ti.task_id}/xcom_1", + json='"valid json"', + params={"mapped_length": length}, + ) + response.raise_for_status() + + task_map = session.query(TaskMap).filter_by(task_id=ti.task_id, dag_id=ti.dag_id).one_or_none() + assert task_map.length == length + + def test_xcom_set_invalid_json(self, client): response = client.post( "/execution/xcoms/dag/runid/task/xcom_1", json="invalid_json", diff --git a/tests/dags/test_invalid_param.py b/tests/dags/test_invalid_param.py index fb0d3c854d12d..547fc7c11253d 100644 --- a/tests/dags/test_invalid_param.py +++ b/tests/dags/test_invalid_param.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow.models.dag import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_invalid_param", diff --git a/tests/dags/test_invalid_param2.py b/tests/dags/test_invalid_param2.py index 69ffda442301d..5678f46090c89 100644 --- a/tests/dags/test_invalid_param2.py +++ b/tests/dags/test_invalid_param2.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_invalid_param2", diff --git a/tests/dags/test_invalid_param3.py b/tests/dags/test_invalid_param3.py index a8017a3402b66..ea3bfa202a319 100644 --- a/tests/dags/test_invalid_param3.py +++ b/tests/dags/test_invalid_param3.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_invalid_param3", diff --git a/tests/dags/test_invalid_param4.py b/tests/dags/test_invalid_param4.py index bbfc7e970c51c..0156072ba11cf 100644 --- a/tests/dags/test_invalid_param4.py +++ b/tests/dags/test_invalid_param4.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_invalid_param4", diff --git a/tests/dags/test_valid_param.py b/tests/dags/test_valid_param.py index afa0f98ce21d5..ddb858a9acc1e 100644 --- a/tests/dags/test_valid_param.py +++ b/tests/dags/test_valid_param.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_valid_param", diff --git a/tests/dags/test_valid_param2.py b/tests/dags/test_valid_param2.py index d59d6278c3a71..ee6920bd92ee7 100644 --- a/tests/dags/test_valid_param2.py +++ b/tests/dags/test_valid_param2.py @@ -19,8 +19,8 @@ from datetime import datetime from airflow import DAG -from airflow.models.param import Param from airflow.providers.standard.operators.python import PythonOperator +from airflow.sdk.definitions.param import Param with DAG( "test_valid_param2", diff --git a/tests/integration/executors/test_celery_executor.py b/tests/integration/executors/test_celery_executor.py index 0f9f0b45ae9c1..e1ece9b387bba 100644 --- a/tests/integration/executors/test_celery_executor.py +++ b/tests/integration/executors/test_celery_executor.py @@ -39,14 +39,15 @@ from airflow.configuration import conf from airflow.exceptions import AirflowException, AirflowTaskTimeout -from airflow.executors import base_executor +from airflow.executors import base_executor, workloads from airflow.models.dag import DAG -from airflow.models.taskinstance import SimpleTaskInstance, TaskInstance +from airflow.models.taskinstance import TaskInstance from airflow.models.taskinstancekey import TaskInstanceKey from airflow.providers.standard.operators.bash import BashOperator from airflow.utils.state import State, TaskInstanceState from tests_common.test_utils import db +from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS logger = logging.getLogger(__name__) @@ -69,16 +70,19 @@ def _prepare_app(broker_url=None, execute=None): from airflow.providers.celery.executors import celery_executor_utils broker_url = broker_url or conf.get("celery", "BROKER_URL") - execute = execute or celery_executor_utils.execute_command.__wrapped__ + if AIRFLOW_V_3_0_PLUS: + execute_name = "execute_workload" + execute = execute or celery_executor_utils.execute_workload.__wrapped__ + else: + execute_name = "execute_command" + execute = execute or celery_executor_utils.execute_command.__wrapped__ test_config = dict(celery_executor_utils.celery_configuration) test_config.update({"broker_url": broker_url}) test_app = Celery(broker_url, config_source=test_config) test_execute = test_app.task(execute) - patch_app = mock.patch("airflow.providers.celery.executors.celery_executor.app", test_app) - patch_execute = mock.patch( - "airflow.providers.celery.executors.celery_executor_utils.execute_command", test_execute - ) + patch_app = mock.patch.object(celery_executor_utils, "app", test_app) + patch_execute = mock.patch.object(celery_executor_utils, execute_name, test_execute) backend = test_app.backend @@ -136,42 +140,35 @@ def _change_state(self, key: TaskInstanceKey, state: TaskInstanceState, info=Non def test_celery_integration(self, broker_url): from airflow.providers.celery.executors import celery_executor, celery_executor_utils - success_command = ["airflow", "tasks", "run", "true", "some_parameter"] - fail_command = ["airflow", "version"] - - def fake_execute_command(command): - if command != success_command: + def fake_execute_workload(command): + if "fail" in command: raise AirflowException("fail") - with _prepare_app(broker_url, execute=fake_execute_command) as app: + with _prepare_app(broker_url, execute=fake_execute_workload) as app: executor = celery_executor.CeleryExecutor() assert executor.tasks == {} executor.start() with start_worker(app=app, logfile=sys.stdout, loglevel="info"): - execute_date = datetime.now() - - task_tuples_to_send = [ - ( - ("success", "fake_simple_ti", execute_date, 0), - success_command, - celery_executor_utils.celery_configuration["task_default_queue"], - celery_executor_utils.execute_command, - ), - ( - ("fail", "fake_simple_ti", execute_date, 0), - fail_command, - celery_executor_utils.celery_configuration["task_default_queue"], - celery_executor_utils.execute_command, - ), + ti = workloads.TaskInstance.model_construct( + task_id="success", + dag_id="id", + run_id="abc", + try_number=0, + priority_weight=1, + queue=celery_executor_utils.celery_configuration["task_default_queue"], + ) + keys = [ + TaskInstanceKey("id", "success", "abc", 0, -1), + TaskInstanceKey("id", "fail", "abc", 0, -1), ] + for w in ( + workloads.ExecuteTask.model_construct(ti=ti), + workloads.ExecuteTask.model_construct(ti=ti.model_copy(update={"task_id": "fail"})), + ): + executor.queue_workload(w, session=None) - # "Enqueue" them. We don't have a real SimpleTaskInstance, so directly edit the dict - for key, command, queue, _ in task_tuples_to_send: - executor.queued_tasks[key] = (command, 1, queue, None) - executor.task_publish_retries[key] = 1 - - executor._process_tasks(task_tuples_to_send) + executor.trigger_tasks(open_slots=10) for _ in range(20): num_tasks = len(executor.tasks.keys()) if num_tasks == 2: @@ -181,54 +178,47 @@ def fake_execute_command(command): num_tasks, ) sleep(0.4) - assert list(executor.tasks.keys()) == [ - ("success", "fake_simple_ti", execute_date, 0), - ("fail", "fake_simple_ti", execute_date, 0), - ] - assert ( - executor.event_buffer[("success", "fake_simple_ti", execute_date, 0)][0] == State.QUEUED - ) - assert executor.event_buffer[("fail", "fake_simple_ti", execute_date, 0)][0] == State.QUEUED + assert list(executor.tasks.keys()) == keys + assert executor.event_buffer[keys[0]][0] == State.QUEUED + assert executor.event_buffer[keys[1]][0] == State.QUEUED executor.end(synchronous=True) - assert executor.event_buffer[("success", "fake_simple_ti", execute_date, 0)][0] == State.SUCCESS - assert executor.event_buffer[("fail", "fake_simple_ti", execute_date, 0)][0] == State.FAILED + assert executor.event_buffer[keys[0]][0] == State.SUCCESS + assert executor.event_buffer[keys[1]][0] == State.FAILED - assert "success" not in executor.tasks - assert "fail" not in executor.tasks + assert keys[0] not in executor.tasks + assert keys[1] not in executor.tasks assert executor.queued_tasks == {} def test_error_sending_task(self): from airflow.providers.celery.executors import celery_executor - def fake_execute_command(): + def fake_task(): pass - with _prepare_app(execute=fake_execute_command): - # fake_execute_command takes no arguments while execute_command takes 1, + with _prepare_app(execute=fake_task): + # fake_execute_command takes no arguments while execute_workload takes 1, # which will cause TypeError when calling task.apply_async() executor = celery_executor.CeleryExecutor() task = BashOperator( task_id="test", bash_command="true", - dag=DAG(dag_id="id", schedule=None), + dag=DAG(dag_id="dag_id"), start_date=datetime.now(), ) - when = datetime.now() - value_tuple = ( - "command", - 1, - None, - SimpleTaskInstance.from_ti(ti=TaskInstance(task=task, run_id=None)), + ti = TaskInstance(task=task, run_id="abc") + workload = workloads.ExecuteTask.model_construct( + ti=workloads.TaskInstance.model_validate(ti, from_attributes=True), ) - key = ("fail", "fake_simple_ti", when, 0) - executor.queued_tasks[key] = value_tuple + + key = (task.dag.dag_id, task.task_id, ti.run_id, 0, -1) + executor.queued_tasks[key] = workload executor.task_publish_retries[key] = 1 executor.heartbeat() assert len(executor.queued_tasks) == 0, "Task should no longer be queued" - assert executor.event_buffer[("fail", "fake_simple_ti", when, 0)][0] == State.FAILED + assert executor.event_buffer[key][0] == State.FAILED def test_retry_on_error_sending_task(self, caplog): """Test that Airflow retries publishing tasks to Celery Broker at least 3 times""" @@ -251,18 +241,16 @@ def test_retry_on_error_sending_task(self, caplog): task = BashOperator( task_id="test", bash_command="true", - dag=DAG(dag_id="id", schedule=None), + dag=DAG(dag_id="id"), start_date=datetime.now(), ) - when = datetime.now() - value_tuple = ( - "command", - 1, - None, - SimpleTaskInstance.from_ti(ti=TaskInstance(task=task, run_id=None)), + ti = TaskInstance(task=task, run_id="abc") + workload = workloads.ExecuteTask.model_construct( + ti=workloads.TaskInstance.model_validate(ti, from_attributes=True), ) - key = ("fail", "fake_simple_ti", when, 0) - executor.queued_tasks[key] = value_tuple + + key = (task.dag.dag_id, task.task_id, ti.run_id, 0, -1) + executor.queued_tasks[key] = workload # Test that when heartbeat is called again, task is published again to Celery Queue executor.heartbeat() @@ -286,7 +274,7 @@ def test_retry_on_error_sending_task(self, caplog): executor.heartbeat() assert dict(executor.task_publish_retries) == {} assert len(executor.queued_tasks) == 0, "Task should no longer be in queue" - assert executor.event_buffer[("fail", "fake_simple_ti", when, 0)][0] == State.FAILED + assert executor.event_buffer[key][0] == State.FAILED class ClassWithCustomAttributes: diff --git a/tests/models/test_dag.py b/tests/models/test_dag.py index e8ce71a714c21..8766a89a8102d 100644 --- a/tests/models/test_dag.py +++ b/tests/models/test_dag.py @@ -64,7 +64,6 @@ ) from airflow.models.dag_version import DagVersion from airflow.models.dagrun import DagRun -from airflow.models.param import DagParam, Param from airflow.models.serialized_dag import SerializedDagModel from airflow.models.taskinstance import TaskInstance as TI from airflow.operators.empty import EmptyOperator @@ -74,6 +73,7 @@ from airflow.sdk.definitions._internal.contextmanager import TaskGroupContext from airflow.sdk.definitions._internal.templater import NativeEnvironment, SandboxedEnvironment from airflow.sdk.definitions.asset import Asset, AssetAlias, AssetAll, AssetAny +from airflow.sdk.definitions.param import DagParam, Param from airflow.security import permissions from airflow.timetables.base import DagRunInfo, DataInterval, TimeRestriction, Timetable from airflow.timetables.simple import ( @@ -159,7 +159,7 @@ def _create_dagrun( triggered_by_kwargs: dict = {"triggered_by": DagRunTriggeredByType.TEST} if AIRFLOW_V_3_0_PLUS else {} run_id = dag.timetable.generate_run_id( run_type=run_type, - logical_date=logical_date, + logical_date=logical_date, # type: ignore data_interval=data_interval, ) return dag.create_dagrun( diff --git a/tests/models/test_param.py b/tests/models/test_param.py index 152419db2fa4d..77cf96eda2226 100644 --- a/tests/models/test_param.py +++ b/tests/models/test_param.py @@ -22,278 +22,13 @@ from airflow.decorators import task from airflow.exceptions import ParamValidationError -from airflow.models.param import Param, ParamsDict -from airflow.serialization.serialized_objects import BaseSerialization +from airflow.sdk.definitions.param import Param from airflow.utils import timezone from airflow.utils.types import DagRunType from tests_common.test_utils.db import clear_db_dags, clear_db_runs, clear_db_xcom -class TestParam: - def test_param_without_schema(self): - p = Param("test") - assert p.resolve() == "test" - - p.value = 10 - assert p.resolve() == 10 - - def test_null_param(self): - p = Param() - with pytest.raises(ParamValidationError, match="No value passed and Param has no default value"): - p.resolve() - assert p.resolve(None) is None - assert p.dump()["value"] is None - assert not p.has_value - - p = Param(None) - assert p.resolve() is None - assert p.resolve(None) is None - assert p.dump()["value"] is None - assert not p.has_value - - p = Param(None, type="null") - assert p.resolve() is None - assert p.resolve(None) is None - assert p.dump()["value"] is None - assert not p.has_value - with pytest.raises(ParamValidationError): - p.resolve("test") - - def test_string_param(self): - p = Param("test", type="string") - assert p.resolve() == "test" - - p = Param("test") - assert p.resolve() == "test" - - p = Param("10.0.0.0", type="string", format="ipv4") - assert p.resolve() == "10.0.0.0" - - p = Param(type="string") - with pytest.raises(ParamValidationError): - p.resolve(None) - with pytest.raises(ParamValidationError, match="No value passed and Param has no default value"): - p.resolve() - - @pytest.mark.parametrize( - "dt", - [ - pytest.param("2022-01-02T03:04:05.678901Z", id="microseconds-zed-timezone"), - pytest.param("2022-01-02T03:04:05.678Z", id="milliseconds-zed-timezone"), - pytest.param("2022-01-02T03:04:05+00:00", id="seconds-00-00-timezone"), - pytest.param("2022-01-02T03:04:05+04:00", id="seconds-custom-timezone"), - ], - ) - def test_string_rfc3339_datetime_format(self, dt): - """Test valid rfc3339 datetime.""" - assert Param(dt, type="string", format="date-time").resolve() == dt - - @pytest.mark.parametrize( - "dt", - [ - pytest.param("2022-01-02", id="date"), - pytest.param("03:04:05", id="time"), - pytest.param("Thu, 04 Mar 2021 05:06:07 GMT", id="rfc2822-datetime"), - ], - ) - def test_string_datetime_invalid_format(self, dt): - """Test invalid iso8601 and rfc3339 datetime format.""" - with pytest.raises(ParamValidationError, match="is not a 'date-time'"): - Param(dt, type="string", format="date-time").resolve() - - def test_string_time_format(self): - """Test string time format.""" - assert Param("03:04:05", type="string", format="time").resolve() == "03:04:05" - - error_pattern = "is not a 'time'" - with pytest.raises(ParamValidationError, match=error_pattern): - Param("03:04:05.06", type="string", format="time").resolve() - - with pytest.raises(ParamValidationError, match=error_pattern): - Param("03:04", type="string", format="time").resolve() - - with pytest.raises(ParamValidationError, match=error_pattern): - Param("24:00:00", type="string", format="time").resolve() - - @pytest.mark.parametrize( - "date_string", - [ - "2021-01-01", - ], - ) - def test_string_date_format(self, date_string): - """Test string date format.""" - assert Param(date_string, type="string", format="date").resolve() == date_string - - # Note that 20120503 behaved differently in 3.11.3 Official python image. It was validated as a date - # there but it started to fail again in 3.11.4 released on 2023-07-05. - @pytest.mark.parametrize( - "date_string", - [ - "01/01/2021", - "21 May 1975", - "20120503", - ], - ) - def test_string_date_format_error(self, date_string): - """Test string date format failures.""" - with pytest.raises(ParamValidationError, match="is not a 'date'"): - Param(date_string, type="string", format="date").resolve() - - def test_int_param(self): - p = Param(5) - assert p.resolve() == 5 - - p = Param(type="integer", minimum=0, maximum=10) - assert p.resolve(value=5) == 5 - - with pytest.raises(ParamValidationError): - p.resolve(value=20) - - def test_number_param(self): - p = Param(42, type="number") - assert p.resolve() == 42 - - p = Param(1.2, type="number") - assert p.resolve() == 1.2 - - p = Param("42", type="number") - with pytest.raises(ParamValidationError): - p.resolve() - - def test_list_param(self): - p = Param([1, 2], type="array") - assert p.resolve() == [1, 2] - - def test_dict_param(self): - p = Param({"a": 1, "b": 2}, type="object") - assert p.resolve() == {"a": 1, "b": 2} - - def test_composite_param(self): - p = Param(type=["string", "number"]) - assert p.resolve(value="abc") == "abc" - assert p.resolve(value=5.0) == 5.0 - - def test_param_with_description(self): - p = Param(10, description="Sample description") - assert p.description == "Sample description" - - def test_suppress_exception(self): - p = Param("abc", type="string", minLength=2, maxLength=4) - assert p.resolve() == "abc" - - p.value = "long_string" - assert p.resolve(suppress_exception=True) is None - - def test_explicit_schema(self): - p = Param("abc", schema={type: "string"}) - assert p.resolve() == "abc" - - def test_custom_param(self): - class S3Param(Param): - def __init__(self, path: str): - schema = {"type": "string", "pattern": r"s3:\/\/(.+?)\/(.+)"} - super().__init__(default=path, schema=schema) - - p = S3Param("s3://my_bucket/my_path") - assert p.resolve() == "s3://my_bucket/my_path" - - p = S3Param("file://not_valid/s3_path") - with pytest.raises(ParamValidationError): - p.resolve() - - def test_value_saved(self): - p = Param("hello", type="string") - assert p.resolve("world") == "world" - assert p.resolve() == "world" - - def test_dump(self): - p = Param("hello", description="world", type="string", minLength=2) - dump = p.dump() - assert dump["__class"] == "airflow.models.param.Param" - assert dump["value"] == "hello" - assert dump["description"] == "world" - assert dump["schema"] == {"type": "string", "minLength": 2} - - @pytest.mark.parametrize( - "param", - [ - Param("my value", description="hello", schema={"type": "string"}), - Param("my value", description="hello"), - Param(None, description=None), - Param([True], type="array", items={"type": "boolean"}), - Param(), - ], - ) - def test_param_serialization(self, param: Param): - """ - Test to make sure that native Param objects can be correctly serialized - """ - - serializer = BaseSerialization() - serialized_param = serializer.serialize(param) - restored_param: Param = serializer.deserialize(serialized_param) - - assert restored_param.value == param.value - assert isinstance(restored_param, Param) - assert restored_param.description == param.description - assert restored_param.schema == param.schema - - -class TestParamsDict: - def test_params_dict(self): - # Init with a simple dictionary - pd = ParamsDict(dict_obj={"key": "value"}) - assert isinstance(pd.get_param("key"), Param) - assert pd["key"] == "value" - assert pd.suppress_exception is False - - # Init with a dict which contains Param objects - pd2 = ParamsDict({"key": Param("value", type="string")}, suppress_exception=True) - assert isinstance(pd2.get_param("key"), Param) - assert pd2["key"] == "value" - assert pd2.suppress_exception is True - - # Init with another object of another ParamsDict - pd3 = ParamsDict(pd2) - assert isinstance(pd3.get_param("key"), Param) - assert pd3["key"] == "value" - assert pd3.suppress_exception is False # as it's not a deepcopy of pd2 - - # Dump the ParamsDict - assert pd.dump() == {"key": "value"} - assert pd2.dump() == {"key": "value"} - assert pd3.dump() == {"key": "value"} - - # Validate the ParamsDict - plain_dict = pd.validate() - assert isinstance(plain_dict, dict) - pd2.validate() - pd3.validate() - - # Update the ParamsDict - with pytest.raises(ParamValidationError, match=r"Invalid input for param key: 1 is not"): - pd3["key"] = 1 - - # Should not raise an error as suppress_exception is True - pd2["key"] = 1 - pd2.validate() - - def test_update(self): - pd = ParamsDict({"key": Param("value", type="string")}) - - pd.update({"key": "a"}) - internal_value = pd.get_param("key") - assert isinstance(internal_value, Param) - with pytest.raises(ParamValidationError, match=r"Invalid input for param key: 1 is not"): - pd.update({"key": 1}) - - def test_repr(self): - pd = ParamsDict({"key": Param("value", type="string")}) - assert repr(pd) == "{'key': 'value'}" - - class TestDagParamRuntime: VALUE = 42 DEFAULT_DATE = timezone.datetime(2016, 1, 1) diff --git a/tests/models/test_taskinstance.py b/tests/models/test_taskinstance.py index f819784b3e1d1..628945ebf2aaa 100644 --- a/tests/models/test_taskinstance.py +++ b/tests/models/test_taskinstance.py @@ -60,7 +60,6 @@ from airflow.models.dagbag import DagBag from airflow.models.dagrun import DagRun from airflow.models.expandinput import EXPAND_INPUT_EMPTY, NotFullyPopulated -from airflow.models.param import process_params from airflow.models.pool import Pool from airflow.models.renderedtifields import RenderedTaskInstanceFields from airflow.models.serialized_dag import SerializedDagModel @@ -81,6 +80,7 @@ from airflow.providers.standard.operators.python import PythonOperator from airflow.providers.standard.sensors.python import PythonSensor from airflow.sdk.definitions.asset import Asset, AssetAlias +from airflow.sdk.definitions.param import process_params from airflow.sensors.base import BaseSensorOperator from airflow.serialization.serialized_objects import SerializedBaseOperator, SerializedDAG from airflow.stats import Stats @@ -4007,6 +4007,7 @@ def test_refresh_from_db(self, create_task_instance): "operator": "some_custom_operator", "custom_operator_name": "some_custom_operator", "queued_dttm": run_date + datetime.timedelta(hours=1), + "scheduled_dttm": run_date + datetime.timedelta(hours=1), "rendered_map_index": None, "queued_by_job_id": 321, "pid": 123, diff --git a/tests/serialization/serializers/test_serializers.py b/tests/serialization/serializers/test_serializers.py index 5936a95b23d6d..f3afdbbf769cc 100644 --- a/tests/serialization/serializers/test_serializers.py +++ b/tests/serialization/serializers/test_serializers.py @@ -31,7 +31,7 @@ from pendulum import DateTime from pendulum.tz.timezone import FixedTimezone, Timezone -from airflow.models.param import Param, ParamsDict +from airflow.sdk.definitions.param import Param, ParamsDict from airflow.serialization.serde import DATA, deserialize, serialize PENDULUM3 = version.parse(metadata.version("pendulum")).major == 3 diff --git a/tests/serialization/test_dag_serialization.py b/tests/serialization/test_dag_serialization.py index 563e132aa0f92..2cd1ce14a5073 100644 --- a/tests/serialization/test_dag_serialization.py +++ b/tests/serialization/test_dag_serialization.py @@ -63,13 +63,13 @@ from airflow.models.dagbag import DagBag from airflow.models.expandinput import EXPAND_INPUT_EMPTY from airflow.models.mappedoperator import MappedOperator -from airflow.models.param import Param, ParamsDict from airflow.models.xcom import XCom from airflow.operators.empty import EmptyOperator from airflow.providers.cncf.kubernetes.pod_generator import PodGenerator from airflow.providers.standard.operators.bash import BashOperator from airflow.providers.standard.sensors.bash import BashSensor from airflow.sdk.definitions.asset import Asset +from airflow.sdk.definitions.param import Param, ParamsDict from airflow.security import permissions from airflow.serialization.enums import Encoding from airflow.serialization.json_schema import load_dag_schema_dict diff --git a/tests/serialization/test_serialized_objects.py b/tests/serialization/test_serialized_objects.py index 06bb477becdf4..ca6cb78a62794 100644 --- a/tests/serialization/test_serialized_objects.py +++ b/tests/serialization/test_serialized_objects.py @@ -37,12 +37,12 @@ from airflow.models.connection import Connection from airflow.models.dag import DAG from airflow.models.dagrun import DagRun -from airflow.models.param import Param from airflow.models.taskinstance import SimpleTaskInstance, TaskInstance from airflow.models.xcom_arg import XComArg from airflow.operators.empty import EmptyOperator from airflow.providers.standard.operators.python import PythonOperator from airflow.sdk.definitions.asset import Asset, AssetAlias, AssetAliasEvent, AssetUniqueKey +from airflow.sdk.definitions.param import Param from airflow.sdk.execution_time.context import OutletEventAccessor, OutletEventAccessors from airflow.serialization.enums import DagAttributeTypes as DAT, Encoding from airflow.serialization.serialized_objects import BaseSerialization diff --git a/tests/www/views/test_views_tasks.py b/tests/www/views/test_views_tasks.py index 44c4316058171..3efb0a13189cc 100644 --- a/tests/www/views/test_views_tasks.py +++ b/tests/www/views/test_views_tasks.py @@ -1094,6 +1094,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1131,6 +1132,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1168,6 +1170,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1205,6 +1208,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1242,6 +1246,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1279,6 +1284,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, @@ -1316,6 +1322,7 @@ def test_task_instances(admin_client): "queue": "default", "queued_by_job_id": None, "queued_dttm": None, + "scheduled_dttm": None, "rendered_map_index": None, "run_id": "TEST_DAGRUN", "start_date": None, diff --git a/tests/www/views/test_views_trigger_dag.py b/tests/www/views/test_views_trigger_dag.py index 17d0b687b8572..c4136520d7f9b 100644 --- a/tests/www/views/test_views_trigger_dag.py +++ b/tests/www/views/test_views_trigger_dag.py @@ -25,8 +25,8 @@ import pytest from airflow.models import DagBag, DagRun -from airflow.models.param import Param from airflow.operators.empty import EmptyOperator +from airflow.sdk.definitions.param import Param from airflow.security import permissions from airflow.utils import timezone from airflow.utils.json import WebEncoder