diff --git a/.github/actions/checkout_target_commit/action.yml b/.github/actions/checkout_target_commit/action.yml deleted file mode 100644 index e95e8b86254a0..0000000000000 --- a/.github/actions/checkout_target_commit/action.yml +++ /dev/null @@ -1,81 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ---- -name: 'Checkout target commit' -description: > - Checks out target commit with the exception of .github scripts directories that come from the target branch -inputs: - target-commit-sha: - description: 'SHA of the target commit to checkout' - required: true - pull-request-target: - description: 'Whether the workflow is a pull request target workflow' - required: true - is-committer-build: - description: 'Whether the build is done by a committer' - required: true -runs: - using: "composite" - steps: - - name: "Checkout target commit" - uses: actions/checkout@v4 - with: - ref: ${{ inputs.target-commit-sha }} - persist-credentials: false - #################################################################################################### - # BE VERY CAREFUL HERE! THIS LINE AND THE END OF THE WARNING. IN PULL REQUEST TARGET WORKFLOW - # WE CHECK OUT THE TARGET COMMIT ABOVE TO BE ABLE TO BUILD THE IMAGE FROM SOURCES FROM THE - # INCOMING PR, RATHER THAN FROM TARGET BRANCH. THIS IS A SECURITY RISK, BECAUSE THE PR - # CAN CONTAIN ANY CODE AND WE EXECUTE IT HERE. THEREFORE, WE NEED TO BE VERY CAREFUL WHAT WE - # DO HERE. WE SHOULD NOT EXECUTE ANY CODE THAT COMES FROM THE PR. WE SHOULD NOT RUN ANY BREEZE - # COMMAND NOR SCRIPTS NOR COMPOSITE ACTIONS. WE SHOULD ONLY RUN CODE THAT IS EMBEDDED DIRECTLY IN - # THIS WORKFLOW - BECAUSE THIS IS THE ONLY CODE THAT WE CAN TRUST. - #################################################################################################### - - name: Checkout target branch to 'target-airflow' folder to use ci/scripts and breeze from there. - uses: actions/checkout@v4 - with: - path: "target-airflow" - ref: ${{ github.base_ref }} - persist-credentials: false - if: inputs.pull-request-target == 'true' && inputs.is-committer-build != 'true' - - name: > - Replace "scripts/ci", "dev", ".github/actions" and ".github/workflows" with the target branch - so that the those directories are not coming from the PR - shell: bash - run: | - echo - echo -e "\033[33m Replace scripts, dev, actions with target branch for non-committer builds!\033[0m" - echo - rm -rfv "scripts/ci" - rm -rfv "dev" - rm -rfv ".github/actions" - rm -rfv ".github/workflows" - rm -v ".dockerignore" || true - mv -v "target-airflow/scripts/ci" "scripts" - mv -v "target-airflow/dev" "." - mv -v "target-airflow/.github/actions" "target-airflow/.github/workflows" ".github" - mv -v "target-airflow/.dockerignore" ".dockerignore" || true - if: inputs.pull-request-target == 'true' && inputs.is-committer-build != 'true' - #################################################################################################### - # AFTER IT'S SAFE. THE `dev`, `scripts/ci` AND `.github/actions` and `.dockerignore` ARE NOW COMING - # FROM THE BASE_REF - WHICH IS THE TARGET BRANCH OF THE PR. WE CAN TRUST THAT THOSE SCRIPTS ARE - # SAFE TO RUN AND CODE AVAILABLE IN THE DOCKER BUILD PHASE IS CONTROLLED BY THE `.dockerignore`. - # ALL THE REST OF THE CODE COMES FROM THE PR, AND FOR EXAMPLE THE CODE IN THE `Dockerfile.ci` CAN - # BE RUN SAFELY AS PART OF DOCKER BUILD. BECAUSE IT RUNS INSIDE THE DOCKER CONTAINER AND IT IS - # ISOLATED FROM THE RUNNER. - #################################################################################################### diff --git a/.github/actions/post_tests_success/action.yml b/.github/actions/post_tests_success/action.yml index 37b51154d3e13..b7b00a6fc0df3 100644 --- a/.github/actions/post_tests_success/action.yml +++ b/.github/actions/post_tests_success/action.yml @@ -33,7 +33,7 @@ runs: - name: "Upload artifact for warnings" uses: actions/upload-artifact@v4 with: - name: test-warnings-${{env.JOB_ID}} + name: test-warnings-${{ env.JOB_ID }} path: ./files/warnings-*.txt retention-days: 7 if-no-files-found: ignore @@ -50,5 +50,5 @@ runs: if: env.ENABLE_COVERAGE == 'true' && env.TEST_TYPES != 'Helm' && inputs.python-version != '3.12' with: name: coverage-${{env.JOB_ID}} - flags: python-${{env.PYTHON_MAJOR_MINOR_VERSION}},${{env.BACKEND}}-${{env.BACKEND_VERSION}} + flags: python-${{ env.PYTHON_MAJOR_MINOR_VERSION }},${{ env.BACKEND }}-${{ env.BACKEND_VERSION }} directory: "./files/coverage-reports/" diff --git a/.github/actions/prepare_all_ci_images/action.yml b/.github/actions/prepare_all_ci_images/action.yml new file mode 100644 index 0000000000000..f815c27228928 --- /dev/null +++ b/.github/actions/prepare_all_ci_images/action.yml @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +--- +name: 'Prepare all CI images' +description: 'Recreates current python CI images from artifacts for all python versions' +inputs: + python-versions-list-as-string: + description: 'Stringified array of all Python versions to test - separated by spaces.' + required: true + platform: + description: 'Platform for the build - linux/amd64 or linux/arm64' + required: true +outputs: + host-python-version: + description: Python version used in host + value: ${{ steps.breeze.outputs.host-python-version }} +runs: + using: "composite" + steps: + - name: "Cleanup docker" + run: ./scripts/ci/cleanup_docker.sh + shell: bash + # TODO: Currently we cannot loop through the list of python versions and have dynamic list of + # tasks. Instead we hardcode all possible python versions and they - but + # this should be implemented in stash action as list of keys to download. + # That includes 3.8 - 3.12 as we are backporting it to v2-10-test branch + # This is captured in https://github.com/apache/airflow/issues/45268 + - name: "Restore CI docker image ${{ inputs.platform }}:3.8" + uses: ./.github/actions/prepare_single_ci_image + with: + platform: ${{ inputs.platform }} + python: "3.8" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} + - name: "Restore CI docker image ${{ inputs.platform }}:3.9" + uses: ./.github/actions/prepare_single_ci_image + with: + platform: ${{ inputs.platform }} + python: "3.9" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} + - name: "Restore CI docker image ${{ inputs.platform }}:3.10" + uses: ./.github/actions/prepare_single_ci_image + with: + platform: ${{ inputs.platform }} + python: "3.10" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} + - name: "Restore CI docker image ${{ inputs.platform }}:3.11" + uses: ./.github/actions/prepare_single_ci_image + with: + platform: ${{ inputs.platform }} + python: "3.11" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} + - name: "Restore CI docker image ${{ inputs.platform }}:3.12" + uses: ./.github/actions/prepare_single_ci_image + with: + platform: ${{ inputs.platform }} + python: "3.12" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} diff --git a/.github/actions/prepare_breeze_and_image/action.yml b/.github/actions/prepare_breeze_and_image/action.yml index 41aa17092d589..2f8757c14cddd 100644 --- a/.github/actions/prepare_breeze_and_image/action.yml +++ b/.github/actions/prepare_breeze_and_image/action.yml @@ -16,12 +16,18 @@ # under the License. # --- -name: 'Prepare breeze && current python image' -description: 'Installs breeze and pulls current python image' +name: 'Prepare breeze && current image (CI or PROD)' +description: 'Installs breeze and recreates current python image from artifact' inputs: - pull-image-type: - description: 'Which image to pull' - default: CI + python: + description: 'Python version for image to prepare' + required: true + image-type: + description: 'Which image type to prepare (ci/prod)' + default: "ci" + platform: + description: 'Platform for the build - linux/amd64 or linux/arm64' + required: true outputs: host-python-version: description: Python version used in host @@ -29,17 +35,19 @@ outputs: runs: using: "composite" steps: + - name: "Cleanup docker" + run: ./scripts/ci/cleanup_docker.sh + shell: bash - name: "Install Breeze" uses: ./.github/actions/breeze id: breeze - - name: Login to ghcr.io - shell: bash - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG }} - shell: bash - run: breeze ci-image pull --tag-as-latest - if: inputs.pull-image-type == 'CI' - - name: Pull PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG }} + - name: "Restore ${{ inputs.image-type }} docker image ${{ inputs.platform }}:${{ inputs.python }}" + uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: "${{ inputs.image-type }}-image-save-${{ inputs.platform }}-${{ inputs.python }}" + path: "/tmp/" + - name: "Load ${{ inputs.image-type }} image ${{ inputs.platform }}:${{ inputs.python }}" + run: > + breeze ${{ inputs.image-type }}-image load + --platform ${{ inputs.platform }} --python ${{ inputs.python }} shell: bash - run: breeze prod-image pull --tag-as-latest - if: inputs.pull-image-type == 'PROD' diff --git a/.github/actions/prepare_single_ci_image/action.yml b/.github/actions/prepare_single_ci_image/action.yml new file mode 100644 index 0000000000000..e44a2e62321cc --- /dev/null +++ b/.github/actions/prepare_single_ci_image/action.yml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +--- +name: 'Prepare single CI image' +description: > + Recreates current python image from artifacts (needed for the hard-coded actions calling all + possible Python versions in "prepare_all_ci_images" action. Hopefully we can get rid of it when + the https://github.com/apache/airflow/issues/45268 is resolved and we contribute capability of + downloading multiple keys to the stash action. +inputs: + python: + description: 'Python version for image to prepare' + required: true + python-versions-list-as-string: + description: 'Stringified array of all Python versions to prepare - separated by spaces.' + required: true + platform: + description: 'Platform for the build - linux/amd64 or linux/arm64' + required: true +runs: + using: "composite" + steps: + - name: "Restore CI docker images ${{ inputs.platform }}:${{ inputs.python }}" + uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: "ci-image-save-${{ inputs.platform }}-${{ inputs.python }}" + path: "/tmp/" + if: contains(inputs.python-versions-list-as-string, inputs.python) + - name: "Load CI image ${{ inputs.platform }}:${{ inputs.python }}" + run: breeze ci-image load --platform "${{ inputs.platform }}" --python "${{ inputs.python }}" + shell: bash + if: contains(inputs.python-versions-list-as-string, inputs.python) diff --git a/.github/workflows/additional-ci-image-checks.yml b/.github/workflows/additional-ci-image-checks.yml index 8a3b46e70d37d..40196a6e04296 100644 --- a/.github/workflows/additional-ci-image-checks.yml +++ b/.github/workflows/additional-ci-image-checks.yml @@ -32,10 +32,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining self-hosted runners." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string python-versions: description: "The list of python versions (stringified JSON array) to run the tests on." required: true @@ -103,8 +99,6 @@ jobs: contents: read # This write is only given here for `push` events from "apache/airflow" repo. It is not given for PRs # from forks. This is to prevent malicious PRs from creating images in the "apache/airflow" repo. - # For regular build for PRS this "build-prod-images" workflow will be skipped anyway by the - # "in-workflow-build" condition packages: write secrets: inherit with: @@ -159,7 +153,7 @@ jobs: # # There is no point in running this one in "canary" run, because the above step is doing the # # same build anyway. # build-ci-arm-images: -# name: Build CI ARM images (in-workflow) +# name: Build CI ARM images # uses: ./.github/workflows/ci-image-build.yml # permissions: # contents: read @@ -169,7 +163,6 @@ jobs: # push-image: "false" # runs-on-as-json-public: ${{ inputs.runs-on-as-json-public }} # runs-on-as-json-self-hosted: ${{ inputs.runs-on-as-json-self-hosted }} -# image-tag: ${{ inputs.image-tag }} # python-versions: ${{ inputs.python-versions }} # platform: "linux/arm64" # branch: ${{ inputs.branch }} diff --git a/.github/workflows/additional-prod-image-tests.yml b/.github/workflows/additional-prod-image-tests.yml index 5ffd2001e0e26..62dd723999a8c 100644 --- a/.github/workflows/additional-prod-image-tests.yml +++ b/.github/workflows/additional-prod-image-tests.yml @@ -32,10 +32,6 @@ on: # yamllint disable-line rule:truthy description: "Branch used to construct constraints URL from." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string upgrade-to-newer-dependencies: description: "Whether to upgrade to newer dependencies (true/false)" required: true @@ -70,7 +66,6 @@ jobs: default-python-version: ${{ inputs.default-python-version }} branch: ${{ inputs.default-branch }} use-uv: "false" - image-tag: ${{ inputs.image-tag }} build-provider-packages: ${{ inputs.default-branch == 'main' }} upgrade-to-newer-dependencies: ${{ inputs.upgrade-to-newer-dependencies }} chicken-egg-providers: ${{ inputs.chicken-egg-providers }} @@ -88,7 +83,6 @@ jobs: default-python-version: ${{ inputs.default-python-version }} branch: ${{ inputs.default-branch }} use-uv: "false" - image-tag: ${{ inputs.image-tag }} build-provider-packages: ${{ inputs.default-branch == 'main' }} upgrade-to-newer-dependencies: ${{ inputs.upgrade-to-newer-dependencies }} chicken-egg-providers: ${{ inputs.chicken-egg-providers }} @@ -117,36 +111,25 @@ jobs: persist-credentials: false - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - - name: "Install Breeze" - uses: ./.github/actions/breeze - - name: Login to ghcr.io - shell: bash - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: Pull PROD image ${{ inputs.default-python-version}}:${{ inputs.image-tag }} - run: breeze prod-image pull --tag-as-latest - env: - PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" - IMAGE_TAG: "${{ inputs.image-tag }}" - - name: "Setup python" - uses: actions/setup-python@v5 + - name: "Prepare breeze & PROD image: ${{ inputs.default-python-version }}" + uses: ./.github/actions/prepare_breeze_and_image with: - python-version: ${{ inputs.default-python-version }} - cache: 'pip' - cache-dependency-path: ./dev/requirements.txt + platform: "linux/amd64" + image-type: "prod" + python: ${{ inputs.default-python-version }} - name: "Test examples of PROD image building" run: " cd ./docker_tests && \ python -m pip install -r requirements.txt && \ TEST_IMAGE=\"ghcr.io/${{ github.repository }}/${{ inputs.default-branch }}\ - /prod/python${{ inputs.default-python-version }}:${{ inputs.image-tag }}\" \ + /prod/python${{ inputs.default-python-version }}\" \ python -m pytest test_examples_of_prod_image_building.py -n auto --color=yes" test-docker-compose-quick-start: timeout-minutes: 60 - name: "Docker-compose quick start with PROD image verifying" + name: "Docker Compose quick start with PROD image verifying" runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} env: - IMAGE_TAG: "${{ inputs.image-tag }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -161,14 +144,12 @@ jobs: with: fetch-depth: 2 persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Install Breeze" - uses: ./.github/actions/breeze - - name: Login to ghcr.io - shell: bash - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: "Pull image ${{ inputs.default-python-version}}:${{ inputs.image-tag }}" - run: breeze prod-image pull --tag-as-latest + - name: "Prepare breeze & PROD image: ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + image-type: "prod" + python: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} + id: breeze - name: "Test docker-compose quick start" run: breeze testing docker-compose-tests diff --git a/.github/workflows/basic-tests.yml b/.github/workflows/basic-tests.yml index c8ba85969f5e3..47f80f05b7ac7 100644 --- a/.github/workflows/basic-tests.yml +++ b/.github/workflows/basic-tests.yml @@ -288,7 +288,6 @@ jobs: runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} env: PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" - IMAGE_TAG: ${{ inputs.image-tag }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml deleted file mode 100644 index 9135dcb9d9e94..0000000000000 --- a/.github/workflows/build-images.yml +++ /dev/null @@ -1,264 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ---- -name: Build Images -run-name: > - Build images for ${{ github.event.pull_request.title }} ${{ github.event.pull_request._links.html.href }} -on: # yamllint disable-line rule:truthy - pull_request_target: - branches: - - main - - v2-10-stable - - v2-10-test - - providers-[a-z]+-?[a-z]*/v[0-9]+-[0-9]+ -permissions: - # all other permissions are set to none - contents: read - pull-requests: read - packages: read -env: - ANSWER: "yes" - # You can override CONSTRAINTS_GITHUB_REPOSITORY by setting secret in your repo but by default the - # Airflow one is going to be used - CONSTRAINTS_GITHUB_REPOSITORY: >- - ${{ secrets.CONSTRAINTS_GITHUB_REPOSITORY != '' && - secrets.CONSTRAINTS_GITHUB_REPOSITORY || 'apache/airflow' }} - # This token is WRITE one - pull_request_target type of events always have the WRITE token - DB_RESET: "true" - GITHUB_REPOSITORY: ${{ github.repository }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ github.event.pull_request.head.sha || github.sha }}" - INCLUDE_SUCCESS_OUTPUTS: "true" - USE_SUDO: "true" - VERBOSE: "true" - -concurrency: - group: build-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - build-info: - timeout-minutes: 10 - name: Build Info - # At build-info stage we do not yet have outputs so we need to hard-code the runs-on to public runners - runs-on: ["ubuntu-22.04"] - env: - TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - outputs: - image-tag: ${{ github.event.pull_request.head.sha || github.sha }} - python-versions: ${{ steps.selective-checks.outputs.python-versions }} - python-versions-list-as-string: ${{ steps.selective-checks.outputs.python-versions-list-as-string }} - default-python-version: ${{ steps.selective-checks.outputs.default-python-version }} - upgrade-to-newer-dependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} - run-tests: ${{ steps.selective-checks.outputs.run-tests }} - run-kubernetes-tests: ${{ steps.selective-checks.outputs.run-kubernetes-tests }} - ci-image-build: ${{ steps.selective-checks.outputs.ci-image-build }} - prod-image-build: ${{ steps.selective-checks.outputs.prod-image-build }} - docker-cache: ${{ steps.selective-checks.outputs.docker-cache }} - default-branch: ${{ steps.selective-checks.outputs.default-branch }} - disable-airflow-repo-cache: ${{ steps.selective-checks.outputs.disable-airflow-repo-cache }} - force-pip: ${{ steps.selective-checks.outputs.force-pip }} - constraints-branch: ${{ steps.selective-checks.outputs.default-constraints-branch }} - runs-on-as-json-default: ${{ steps.selective-checks.outputs.runs-on-as-json-default }} - runs-on-as-json-public: ${{ steps.selective-checks.outputs.runs-on-as-json-public }} - runs-on-as-json-self-hosted: ${{ steps.selective-checks.outputs.runs-on-as-json-self-hosted }} - is-self-hosted-runner: ${{ steps.selective-checks.outputs.is-self-hosted-runner }} - is-committer-build: ${{ steps.selective-checks.outputs.is-committer-build }} - is-airflow-runner: ${{ steps.selective-checks.outputs.is-airflow-runner }} - is-amd-runner: ${{ steps.selective-checks.outputs.is-amd-runner }} - is-arm-runner: ${{ steps.selective-checks.outputs.is-arm-runner }} - is-vm-runner: ${{ steps.selective-checks.outputs.is-vm-runner }} - is-k8s-runner: ${{ steps.selective-checks.outputs.is-k8s-runner }} - chicken-egg-providers: ${{ steps.selective-checks.outputs.chicken-egg-providers }} - target-commit-sha: "${{steps.discover-pr-merge-commit.outputs.target-commit-sha || - github.event.pull_request.head.sha || - github.sha - }}" - if: github.repository == 'apache/airflow' - steps: - - name: Cleanup repo - shell: bash - run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - - name: Discover PR merge commit - id: discover-pr-merge-commit - run: | - # Sometimes target-commit-sha cannot be - TARGET_COMMIT_SHA="$(gh api '${{ github.event.pull_request.url }}' --jq .merge_commit_sha)" - if [[ ${TARGET_COMMIT_SHA} == "" ]]; then - # Sometimes retrieving the merge commit SHA from PR fails. We retry it once. Otherwise we - # fall-back to github.event.pull_request.head.sha - echo - echo "Could not retrieve merge commit SHA from PR, waiting for 3 seconds and retrying." - echo - sleep 3 - TARGET_COMMIT_SHA="$(gh api '${{ github.event.pull_request.url }}' --jq .merge_commit_sha)" - if [[ ${TARGET_COMMIT_SHA} == "" ]]; then - echo - echo "Could not retrieve merge commit SHA from PR, falling back to PR head SHA." - echo - TARGET_COMMIT_SHA="${{ github.event.pull_request.head.sha }}" - fi - fi - echo "TARGET_COMMIT_SHA=${TARGET_COMMIT_SHA}" - echo "TARGET_COMMIT_SHA=${TARGET_COMMIT_SHA}" >> ${GITHUB_ENV} - echo "target-commit-sha=${TARGET_COMMIT_SHA}" >> ${GITHUB_OUTPUT} - if: github.event_name == 'pull_request_target' - # The labels in the event aren't updated when re-triggering the job, So lets hit the API to get - # up-to-date values - - name: Get latest PR labels - id: get-latest-pr-labels - run: | - echo -n "pull-request-labels=" >> ${GITHUB_OUTPUT} - gh api graphql --paginate -F node_id=${{github.event.pull_request.node_id}} -f query=' - query($node_id: ID!, $endCursor: String) { - node(id:$node_id) { - ... on PullRequest { - labels(first: 100, after: $endCursor) { - nodes { name } - pageInfo { hasNextPage endCursor } - } - } - } - }' --jq '.data.node.labels.nodes[]' | jq --slurp -c '[.[].name]' >> ${GITHUB_OUTPUT} - if: github.event_name == 'pull_request_target' - - uses: actions/checkout@v4 - with: - ref: ${{ env.TARGET_COMMIT_SHA }} - persist-credentials: false - fetch-depth: 2 - #################################################################################################### - # WE ONLY DO THAT CHECKOUT ABOVE TO RETRIEVE THE TARGET COMMIT AND IT'S PARENT. DO NOT RUN ANY CODE - # RIGHT AFTER THAT AS WE ARE GOING TO RESTORE THE TARGET BRANCH CODE IN THE NEXT STEP. - #################################################################################################### - - name: Checkout target branch to use ci/scripts and breeze from there. - uses: actions/checkout@v4 - with: - ref: ${{ github.base_ref }} - persist-credentials: false - #################################################################################################### - # HERE EVERYTHING IS PERFECTLY SAFE TO RUN. AT THIS POINT WE HAVE THE TARGET BRANCH CHECKED OUT - # AND WE CAN RUN ANY CODE FROM IT. WE CAN RUN BREEZE COMMANDS, WE CAN RUN SCRIPTS, WE CAN RUN - # COMPOSITE ACTIONS. WE CAN RUN ANYTHING THAT IS IN THE TARGET BRANCH AND THERE IS NO RISK THAT - # CODE WILL BE RUN FROM THE PR. - #################################################################################################### - - name: Cleanup docker - run: ./scripts/ci/cleanup_docker.sh - - name: Setup python - uses: actions/setup-python@v5 - with: - python-version: "3.9" - - name: Install Breeze - uses: ./.github/actions/breeze - #################################################################################################### - # WE RUN SELECTIVE CHECKS HERE USING THE TARGET COMMIT AND ITS PARENT TO BE ABLE TO COMPARE THEM - # AND SEE WHAT HAS CHANGED IN THE PR. THE CODE IS STILL RUN FROM THE TARGET BRANCH, SO IT IS SAFE - # TO RUN IT, WE ONLY PASS TARGET_COMMIT_SHA SO THAT SELECTIVE CHECKS CAN SEE WHAT'S COMING IN THE PR - #################################################################################################### - - name: Selective checks - id: selective-checks - env: - PR_LABELS: "${{ steps.get-latest-pr-labels.outputs.pull-request-labels }}" - COMMIT_REF: "${{ env.TARGET_COMMIT_SHA }}" - VERBOSE: "false" - AIRFLOW_SOURCES_ROOT: "${{ github.workspace }}" - run: breeze ci selective-check 2>> ${GITHUB_OUTPUT} - - name: env - run: printenv - env: - PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pull-request-labels }} - GITHUB_CONTEXT: ${{ toJson(github) }} - - - build-ci-images: - name: Build CI images - permissions: - contents: read - packages: write - secrets: inherit - needs: [build-info] - uses: ./.github/workflows/ci-image-build.yml - # Only run this it if the PR comes from fork, otherwise build will be done "in-PR-workflow" - if: | - needs.build-info.outputs.ci-image-build == 'true' && - github.event.pull_request.head.repo.full_name != 'apache/airflow' - with: - runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} - runs-on-as-json-self-hosted: ${{ needs.build-info.outputs.runs-on-as-json-self-hosted }} - do-build: ${{ needs.build-info.outputs.ci-image-build }} - target-commit-sha: ${{ needs.build-info.outputs.target-commit-sha }} - pull-request-target: "true" - is-committer-build: ${{ needs.build-info.outputs.is-committer-build }} - push-image: "true" - use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} - image-tag: ${{ needs.build-info.outputs.image-tag }} - platform: "linux/amd64" - python-versions: ${{ needs.build-info.outputs.python-versions }} - branch: ${{ needs.build-info.outputs.default-branch }} - constraints-branch: ${{ needs.build-info.outputs.constraints-branch }} - upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} - docker-cache: ${{ needs.build-info.outputs.docker-cache }} - disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} - - - generate-constraints: - name: Generate constraints - needs: [build-info, build-ci-images] - uses: ./.github/workflows/generate-constraints.yml - with: - runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} - python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} - # For regular PRs we do not need "no providers" constraints - they are only needed in canary builds - generate-no-providers-constraints: "false" - image-tag: ${{ needs.build-info.outputs.image-tag }} - chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} - debug-resources: ${{ needs.build-info.outputs.debug-resources }} - - build-prod-images: - name: Build PROD images - permissions: - contents: read - packages: write - secrets: inherit - needs: [build-info, generate-constraints] - uses: ./.github/workflows/prod-image-build.yml - # Only run this it if the PR comes from fork, otherwise build will be done "in-PR-workflow" - if: | - needs.build-info.outputs.prod-image-build == 'true' && - github.event.pull_request.head.repo.full_name != 'apache/airflow' - with: - runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} - build-type: "Regular" - do-build: ${{ needs.build-info.outputs.ci-image-build }} - upload-package-artifact: "true" - target-commit-sha: ${{ needs.build-info.outputs.target-commit-sha }} - pull-request-target: "true" - is-committer-build: ${{ needs.build-info.outputs.is-committer-build }} - push-image: "true" - use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} - image-tag: ${{ needs.build-info.outputs.image-tag }} - platform: linux/amd64 - python-versions: ${{ needs.build-info.outputs.python-versions }} - default-python-version: ${{ needs.build-info.outputs.default-python-version }} - branch: ${{ needs.build-info.outputs.default-branch }} - constraints-branch: ${{ needs.build-info.outputs.constraints-branch }} - build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} - upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} - chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} - docker-cache: ${{ needs.build-info.outputs.docker-cache }} - disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} diff --git a/.github/workflows/ci-image-build.yml b/.github/workflows/ci-image-build.yml index b8e2feac1755f..bed2233dfcb07 100644 --- a/.github/workflows/ci-image-build.yml +++ b/.github/workflows/ci-image-build.yml @@ -28,13 +28,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining self-hosted runners." required: true type: string - do-build: - description: > - Whether to actually do the build (true/false). If set to false, the build is done - already in pull-request-target workflow, so we skip it here. - required: false - default: "true" - type: string target-commit-sha: description: "The commit SHA to checkout for the build" required: false @@ -59,6 +52,10 @@ on: # yamllint disable-line rule:truthy required: false default: "true" type: string + upload-image-artifact: + description: "Whether to upload docker image artifact" + required: true + type: string debian-version: description: "Base Debian distribution to use for the build (bookworm)" type: string @@ -71,10 +68,6 @@ on: # yamllint disable-line rule:truthy description: "Whether to use uv to build the image (true/false)" required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string python-versions: description: "JSON-formatted array of Python versions to build images from" required: true @@ -104,20 +97,9 @@ jobs: strategy: fail-fast: true matrix: - # yamllint disable-line rule:line-length - python-version: ${{ inputs.do-build == 'true' && fromJSON(inputs.python-versions) || fromJSON('[""]') }} + python-version: ${{ fromJSON(inputs.python-versions) || fromJSON('[""]') }} timeout-minutes: 110 - name: "\ -${{ inputs.do-build == 'true' && 'Build' || 'Skip building' }} \ -CI ${{ inputs.platform }} image\ -${{ matrix.python-version }}${{ inputs.do-build == 'true' && ':' || '' }}\ -${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" - # The ARM images need to be built using self-hosted runners as ARM macos public runners - # do not yet allow us to run docker effectively and fast. - # https://github.com/actions/runner-images/issues/9254#issuecomment-1917916016 - # https://github.com/abiosoft/colima/issues/970 - # https://github.com/actions/runner/issues/1456 - # See https://github.com/apache/airflow/pull/38640 + name: "Build CI ${{ inputs.platform }} image ${{ matrix.python-version }}" # NOTE!!!!! This has to be put in one line for runs-on to recognize the "fromJSON" properly !!!! # adding space before (with >) apparently turns the `runs-on` processed line into a string "Array" # instead of an array of strings. @@ -125,6 +107,7 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" runs-on: ${{ (inputs.platform == 'linux/amd64') && fromJSON(inputs.runs-on-as-json-public) || fromJSON(inputs.runs-on-as-json-self-hosted) }} env: BACKEND: sqlite + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} DEFAULT_BRANCH: ${{ inputs.branch }} DEFAULT_CONSTRAINTS_BRANCH: ${{ inputs.constraints-branch }} VERSION_SUFFIX_FOR_PYPI: "dev0" @@ -137,42 +120,23 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: inputs.do-build == 'true' - name: "Checkout target branch" uses: actions/checkout@v4 with: persist-credentials: false - - name: "Checkout target commit" - uses: ./.github/actions/checkout_target_commit - if: inputs.do-build == 'true' - with: - target-commit-sha: ${{ inputs.target-commit-sha }} - pull-request-target: ${{ inputs.pull-request-target }} - is-committer-build: ${{ inputs.is-committer-build }} - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - if: inputs.do-build == 'true' - name: "Install Breeze" uses: ./.github/actions/breeze - if: inputs.do-build == 'true' - - name: "Regenerate dependencies in case they were modified manually so that we can build an image" - shell: bash - run: | - pip install rich>=12.4.4 pyyaml - python scripts/ci/pre_commit/update_providers_dependencies.py - if: inputs.do-build == 'true' && inputs.upgrade-to-newer-dependencies != 'false' - - name: "Start ARM instance" - run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh - if: inputs.do-build == 'true' && inputs.platform == 'linux/arm64' - - name: Login to ghcr.io + - name: "Login to ghcr.io" run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - if: inputs.do-build == 'true' - name: > Build ${{ inputs.push-image == 'true' && ' & push ' || '' }} - ${{ inputs.platform }}:${{ matrix.python-version }}:${{ inputs.image-tag }} + ${{ inputs.platform }}:${{ env.PYTHON_MAJOR_MINOR_VERSION }} image run: > - breeze ci-image build --builder airflow_cache --tag-as-latest --image-tag "${{ inputs.image-tag }}" - --python "${{ matrix.python-version }}" --platform "${{ inputs.platform }}" + breeze ci-image build + --builder airflow_cache + --platform "${{ inputs.platform }}" env: DOCKER_CACHE: ${{ inputs.docker-cache }} DISABLE_AIRFLOW_REPO_CACHE: ${{ inputs.disable-airflow-repo-cache }} @@ -189,7 +153,14 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" GITHUB_USERNAME: ${{ github.actor }} PUSH: ${{ inputs.push-image }} VERBOSE: "true" - if: inputs.do-build == 'true' - - name: "Stop ARM instance" - run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() && inputs.do-build == 'true' && inputs.platform == 'linux/arm64' + - name: "Export CI docker image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + run: breeze ci-image save --platform "${{ inputs.platform }}" + if: inputs.upload-image-artifact == 'true' + - name: "Stash CI docker image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: "ci-image-save-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + path: "/tmp/ci-image-save-*-${{ env.PYTHON_MAJOR_MINOR_VERSION }}.tar" + if-no-files-found: 'error' + retention-days: 2 + if: inputs.upload-image-artifact == 'true' diff --git a/.github/workflows/ci-image-checks.yml b/.github/workflows/ci-image-checks.yml index 63598755c32d0..8e0e5e71dd00a 100644 --- a/.github/workflows/ci-image-checks.yml +++ b/.github/workflows/ci-image-checks.yml @@ -28,10 +28,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining the labels used for docs build." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string needs-mypy: description: "Whether to run mypy checks (true/false)" required: true @@ -117,7 +113,6 @@ jobs: env: PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" UPGRADE_TO_NEWER_DEPENDENCIES: "${{ inputs.upgrade-to-newer-dependencies }}" - IMAGE_TAG: ${{ inputs.image-tag }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} if: inputs.basic-checks-only == 'false' && inputs.latest-versions-only != 'true' steps: @@ -134,10 +129,11 @@ jobs: python-version: ${{ inputs.default-python-version }} cache: 'pip' cache-dependency-path: ./dev/breeze/pyproject.toml - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version}}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} id: breeze - name: "Install pre-commit" uses: ./.github/actions/install-pre-commit @@ -165,7 +161,6 @@ jobs: mypy-check: ${{ fromJSON(inputs.mypy-checks) }} env: PYTHON_MAJOR_MINOR_VERSION: "${{inputs.default-python-version}}" - IMAGE_TAG: "${{ inputs.image-tag }}" GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - name: "Cleanup repo" @@ -175,10 +170,11 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} id: breeze - name: "Install pre-commit" uses: ./.github/actions/install-pre-commit @@ -208,7 +204,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_NOT_READY_PROVIDERS: "true" INCLUDE_SUCCESS_OUTPUTS: "${{ inputs.include-success-outputs }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" @@ -221,10 +216,11 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} - uses: actions/cache@v4 id: cache-doc-inventories with: @@ -254,7 +250,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_NOT_READY_PROVIDERS: "true" INCLUDE_SUCCESS_OUTPUTS: "${{ inputs.include-success-outputs }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" @@ -283,8 +278,11 @@ jobs: run: > git clone https://github.com/apache/airflow-site.git /mnt/airflow-site/airflow-site && echo "AIRFLOW_SITE_DIRECTORY=/mnt/airflow-site/airflow-site" >> "$GITHUB_ENV" - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} - name: "Publish docs" run: > breeze release-management publish-docs --override-versioned --run-in-parallel @@ -331,7 +329,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" JOB_ID: "python-api-client-tests" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" VERBOSE: "true" @@ -353,8 +350,11 @@ jobs: fetch-depth: 1 persist-credentials: false path: ./airflow-client-python - - name: "Prepare breeze & CI image: ${{inputs.default-python-version}}:${{inputs.image-tag}}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} - name: "Generate airflow python client" run: > breeze release-management prepare-python-client --package-format both diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09cc3328dd8a7..3ee18ec1a6ece 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ on: # yamllint disable-line rule:truthy - providers-[a-z]+-?[a-z]*/v[0-9]+-[0-9]+ workflow_dispatch: permissions: - # All other permissions are set to none + # All other permissions are set to none by default contents: read # Technically read access while waiting for images should be more than enough. However, # there is a bug in GitHub Actions/Packages and in case private repositories are used, you get a permission @@ -44,7 +44,6 @@ env: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ github.event.pull_request.head.sha || github.sha }}" SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} VERBOSE: "true" @@ -64,7 +63,6 @@ jobs: all-python-versions-list-as-string: >- ${{ steps.selective-checks.outputs.all-python-versions-list-as-string }} basic-checks-only: ${{ steps.selective-checks.outputs.basic-checks-only }} - build-job-description: ${{ steps.source-run-info.outputs.build-job-description }} canary-run: ${{ steps.source-run-info.outputs.canary-run }} chicken-egg-providers: ${{ steps.selective-checks.outputs.chicken-egg-providers }} ci-image-build: ${{ steps.selective-checks.outputs.ci-image-build }} @@ -88,8 +86,6 @@ jobs: full-tests-needed: ${{ steps.selective-checks.outputs.full-tests-needed }} has-migrations: ${{ steps.selective-checks.outputs.has-migrations }} helm-test-packages: ${{ steps.selective-checks.outputs.helm-test-packages }} - image-tag: ${{ github.event.pull_request.head.sha || github.sha }} - in-workflow-build: ${{ steps.source-run-info.outputs.in-workflow-build }} include-success-outputs: ${{ steps.selective-checks.outputs.include-success-outputs }} individual-providers-test-types-list-as-string: >- ${{ steps.selective-checks.outputs.individual-providers-test-types-list-as-string }} @@ -99,6 +95,7 @@ jobs: is-k8s-runner: ${{ steps.selective-checks.outputs.is-k8s-runner }} is-self-hosted-runner: ${{ steps.selective-checks.outputs.is-self-hosted-runner }} is-vm-runner: ${{ steps.selective-checks.outputs.is-vm-runner }} + kubernetes-combos: ${{ steps.selective-checks.outputs.kubernetes-combos }} kubernetes-combos-list-as-string: >- ${{ steps.selective-checks.outputs.kubernetes-combos-list-as-string }} kubernetes-versions-list-as-string: >- @@ -197,25 +194,21 @@ jobs: canary-run: ${{needs.build-info.outputs.canary-run}} latest-versions-only: ${{needs.build-info.outputs.latest-versions-only}} build-ci-images: - name: > - ${{ needs.build-info.outputs.in-workflow-build == 'true' && 'Build' || 'Skip building' }} - CI images in-workflow + name: Build CI images needs: [build-info] uses: ./.github/workflows/ci-image-build.yml permissions: contents: read # This write is only given here for `push` events from "apache/airflow" repo. It is not given for PRs # from forks. This is to prevent malicious PRs from creating images in the "apache/airflow" repo. - # For regular build for PRS this "build-prod-images" workflow will be skipped anyway by the - # "in-workflow-build" condition packages: write secrets: inherit with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} runs-on-as-json-self-hosted: ${{ needs.build-info.outputs.runs-on-as-json-self-hosted }} - do-build: ${{ needs.build-info.outputs.in-workflow-build }} - image-tag: ${{ needs.build-info.outputs.image-tag }} platform: "linux/amd64" + push-image: "false" + upload-image-artifact: "true" python-versions: ${{ needs.build-info.outputs.python-versions }} branch: ${{ needs.build-info.outputs.default-branch }} use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} @@ -224,54 +217,15 @@ jobs: docker-cache: ${{ needs.build-info.outputs.docker-cache }} disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} - wait-for-ci-images: - timeout-minutes: 120 - name: "Wait for CI images" - runs-on: ${{ fromJSON(needs.build-info.outputs.runs-on-as-json-public) }} - needs: [build-info, build-ci-images] - if: needs.build-info.outputs.ci-image-build == 'true' - env: - BACKEND: sqlite - # Force more parallelism for pull even on public images - PARALLELISM: 6 - INCLUDE_SUCCESS_OUTPUTS: "${{needs.build-info.outputs.include-success-outputs}}" - steps: - - name: "Cleanup repo" - shell: bash - run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 - with: - persist-credentials: false - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Install Breeze" - uses: ./.github/actions/breeze - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: Wait for CI images ${{ env.PYTHON_VERSIONS }}:${{ needs.build-info.outputs.image-tag }} - id: wait-for-images - run: breeze ci-image pull --run-in-parallel --wait-for-image --tag-as-latest - env: - PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} - DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} - if: needs.build-info.outputs.in-workflow-build == 'false' - additional-ci-image-checks: name: "Additional CI image checks" - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] uses: ./.github/workflows/additional-ci-image-checks.yml if: needs.build-info.outputs.canary-run == 'true' with: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} runs-on-as-json-self-hosted: ${{ needs.build-info.outputs.runs-on-as-json-self-hosted }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} branch: ${{ needs.build-info.outputs.default-branch }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} @@ -289,7 +243,7 @@ jobs: generate-constraints: name: "Generate constraints" - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] uses: ./.github/workflows/generate-constraints.yml if: > needs.build-info.outputs.ci-image-build == 'true' && @@ -300,19 +254,17 @@ jobs: # generate no providers constraints only in canary builds - they take quite some time to generate # they are not needed for regular builds, they are only needed to update constraints in canaries generate-no-providers-constraints: ${{ needs.build-info.outputs.canary-run }} - image-tag: ${{ needs.build-info.outputs.image-tag }} chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} ci-image-checks: name: "CI image checks" - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] uses: ./.github/workflows/ci-image-checks.yml secrets: inherit with: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} runs-on-as-json-docs-build: ${{ needs.build-info.outputs.runs-on-as-json-docs-build }} - image-tag: ${{ needs.build-info.outputs.image-tag }} needs-mypy: ${{ needs.build-info.outputs.needs-mypy }} mypy-checks: ${{ needs.build-info.outputs.mypy-checks }} python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} @@ -336,7 +288,7 @@ jobs: providers: name: "Provider packages tests" uses: ./.github/workflows/test-provider-packages.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -346,7 +298,6 @@ jobs: needs.build-info.outputs.latest-versions-only != 'true' with: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} - image-tag: ${{ needs.build-info.outputs.image-tag }} canary-run: ${{ needs.build-info.outputs.canary-run }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} @@ -360,7 +311,7 @@ jobs: tests-helm: name: "Helm tests" uses: ./.github/workflows/helm-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -369,7 +320,6 @@ jobs: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} helm-test-packages: ${{ needs.build-info.outputs.helm-test-packages }} - image-tag: ${{ needs.build-info.outputs.image-tag }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} if: > needs.build-info.outputs.needs-helm-tests == 'true' && @@ -379,7 +329,7 @@ jobs: tests-postgres: name: "Postgres tests" uses: ./.github/workflows/run-unit-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -390,7 +340,6 @@ jobs: test-name: "Postgres" test-scope: "DB" test-groups: ${{ needs.build-info.outputs.test-groups }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} backend-versions: ${{ needs.build-info.outputs.postgres-versions }} excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} @@ -406,7 +355,7 @@ jobs: tests-mysql: name: "MySQL tests" uses: ./.github/workflows/run-unit-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -417,7 +366,6 @@ jobs: test-name: "MySQL" test-scope: "DB" test-groups: ${{ needs.build-info.outputs.test-groups }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} backend-versions: ${{ needs.build-info.outputs.mysql-versions }} excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} @@ -433,7 +381,7 @@ jobs: tests-sqlite: name: "Sqlite tests" uses: ./.github/workflows/run-unit-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -445,7 +393,6 @@ jobs: test-name-separator: "" test-scope: "DB" test-groups: ${{ needs.build-info.outputs.test-groups }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} # No versions for sqlite backend-versions: "['']" @@ -462,7 +409,7 @@ jobs: tests-non-db: name: "Non-DB tests" uses: ./.github/workflows/run-unit-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -474,7 +421,6 @@ jobs: test-name-separator: "" test-scope: "Non-DB" test-groups: ${{ needs.build-info.outputs.test-groups }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} # No versions for non-db backend-versions: "['']" @@ -490,7 +436,7 @@ jobs: tests-special: name: "Special tests" uses: ./.github/workflows/special-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -504,7 +450,6 @@ jobs: test-groups: ${{ needs.build-info.outputs.test-groups }} default-branch: ${{ needs.build-info.outputs.default-branch }} runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} - image-tag: ${{ needs.build-info.outputs.image-tag }} core-test-types-list-as-string: ${{ needs.build-info.outputs.core-test-types-list-as-string }} providers-test-types-list-as-string: ${{ needs.build-info.outputs.providers-test-types-list-as-string }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} @@ -519,7 +464,7 @@ jobs: tests-integration-system: name: Integration and System Tests - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] uses: ./.github/workflows/integration-system-tests.yml permissions: contents: read @@ -527,7 +472,6 @@ jobs: secrets: inherit with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} - image-tag: ${{ needs.build-info.outputs.image-tag }} testable-core-integrations: ${{ needs.build-info.outputs.testable-core-integrations }} testable-providers-integrations: ${{ needs.build-info.outputs.testable-providers-integrations }} run-system-tests: ${{ needs.build-info.outputs.run-tests }} @@ -541,7 +485,7 @@ jobs: tests-with-lowest-direct-resolution: name: "Lowest direct dependency providers tests" - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] uses: ./.github/workflows/run-unit-tests.yml permissions: contents: read @@ -556,7 +500,6 @@ jobs: test-scope: "All" test-groups: ${{ needs.build-info.outputs.test-groups }} backend: "postgres" - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} backend-versions: "['${{ needs.build-info.outputs.default-postgres-version }}']" excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} @@ -570,30 +513,25 @@ jobs: monitor-delay-time-in-seconds: 120 build-prod-images: - name: > - ${{ needs.build-info.outputs.in-workflow-build == 'true' && 'Build' || 'Skip building' }} - PROD images in-workflow + name: Build PROD images needs: [build-info, build-ci-images, generate-constraints] uses: ./.github/workflows/prod-image-build.yml permissions: contents: read # This write is only given here for `push` events from "apache/airflow" repo. It is not given for PRs # from forks. This is to prevent malicious PRs from creating images in the "apache/airflow" repo. - # For regular build for PRS this "build-prod-images" workflow will be skipped anyway by the - # "in-workflow-build" condition packages: write secrets: inherit with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} build-type: "Regular" - do-build: ${{ needs.build-info.outputs.in-workflow-build }} - upload-package-artifact: "true" - image-tag: ${{ needs.build-info.outputs.image-tag }} platform: "linux/amd64" + push-image: "false" + upload-image-artifact: "true" + upload-package-artifact: "true" python-versions: ${{ needs.build-info.outputs.python-versions }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} branch: ${{ needs.build-info.outputs.default-branch }} - push-image: "true" use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} @@ -602,58 +540,14 @@ jobs: docker-cache: ${{ needs.build-info.outputs.docker-cache }} disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} - wait-for-prod-images: - timeout-minutes: 80 - name: "Wait for PROD images" - runs-on: ${{ fromJSON(needs.build-info.outputs.runs-on-as-json-public) }} - needs: [build-info, wait-for-ci-images, build-prod-images] - if: needs.build-info.outputs.prod-image-build == 'true' - env: - BACKEND: sqlite - PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" - # Force more parallelism for pull on public images - PARALLELISM: 6 - INCLUDE_SUCCESS_OUTPUTS: "${{needs.build-info.outputs.include-success-outputs}}" - IMAGE_TAG: ${{ needs.build-info.outputs.image-tag }} - steps: - - name: "Cleanup repo" - shell: bash - run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 - with: - persist-credentials: false - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Install Breeze" - uses: ./.github/actions/breeze - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: Wait for PROD images ${{ env.PYTHON_VERSIONS }}:${{ needs.build-info.outputs.image-tag }} - # We wait for the images to be available either from "build-images.yml' run as pull_request_target - # or from build-prod-images (or build-prod-images-release-branch) above. - # We are utilising single job to wait for all images because this job merely waits - # For the images to be available. - run: breeze prod-image pull --wait-for-image --run-in-parallel - env: - PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} - DEBUG_RESOURCES: ${{ needs.build-info.outputs.debug-resources }} - if: needs.build-info.outputs.in-workflow-build == 'false' - additional-prod-image-tests: name: "Additional PROD image tests" - needs: [build-info, wait-for-prod-images, generate-constraints] + needs: [build-info, build-prod-images, generate-constraints] uses: ./.github/workflows/additional-prod-image-tests.yml with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} default-branch: ${{ needs.build-info.outputs.default-branch }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} - image-tag: ${{ needs.build-info.outputs.image-tag }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} @@ -665,20 +559,19 @@ jobs: tests-kubernetes: name: "Kubernetes tests" uses: ./.github/workflows/k8s-tests.yml - needs: [build-info, wait-for-prod-images] + needs: [build-info, build-prod-images] permissions: contents: read packages: read secrets: inherit with: + platform: "linux/amd64" runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} - kubernetes-versions-list-as-string: ${{ needs.build-info.outputs.kubernetes-versions-list-as-string }} - kubernetes-combos-list-as-string: ${{ needs.build-info.outputs.kubernetes-combos-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} + kubernetes-combos: ${{ needs.build-info.outputs.kubernetes-combos }} if: > ( needs.build-info.outputs.run-kubernetes-tests == 'true' || needs.build-info.outputs.needs-helm-tests == 'true') @@ -686,14 +579,13 @@ jobs: tests-task-sdk: name: "Task SDK tests" uses: ./.github/workflows/task-sdk-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read secrets: inherit with: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} - image-tag: ${{ needs.build-info.outputs.image-tag }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} python-versions: ${{ needs.build-info.outputs.python-versions }} run-task-sdk-tests: ${{ needs.build-info.outputs.run-task-sdk-tests }} @@ -711,8 +603,6 @@ jobs: needs: - build-info - generate-constraints - - wait-for-ci-images - - wait-for-prod-images - ci-image-checks - tests-sqlite - tests-mysql @@ -723,13 +613,11 @@ jobs: with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} runs-on-as-json-self-hosted: ${{ needs.build-info.outputs.runs-on-as-json-self-hosted }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} branch: ${{ needs.build-info.outputs.default-branch }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} - in-workflow-build: ${{ needs.build-info.outputs.in-workflow-build }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} diff --git a/.github/workflows/finalize-tests.yml b/.github/workflows/finalize-tests.yml index 6f9bc74168b42..b8fd240235f10 100644 --- a/.github/workflows/finalize-tests.yml +++ b/.github/workflows/finalize-tests.yml @@ -28,10 +28,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining self-hosted runners." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string python-versions: description: "JSON-formatted array of Python versions to test" required: true @@ -52,10 +48,6 @@ on: # yamllint disable-line rule:truthy description: "Which version of python should be used by default" required: true type: string - in-workflow-build: - description: "Whether the build is executed as part of the workflow (true/false)" - required: true - type: string upgrade-to-newer-dependencies: description: "Whether to upgrade to newer dependencies (true/false)" required: true @@ -87,7 +79,6 @@ jobs: env: DEBUG_RESOURCES: ${{ inputs.debug-resources}} PYTHON_VERSIONS: ${{ inputs.python-versions-list-as-string }} - IMAGE_TAG: ${{ inputs.image-tag }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} diff --git a/.github/workflows/generate-constraints.yml b/.github/workflows/generate-constraints.yml index d6e536dfd091a..332d32e3160d2 100644 --- a/.github/workflows/generate-constraints.yml +++ b/.github/workflows/generate-constraints.yml @@ -32,10 +32,6 @@ on: # yamllint disable-line rule:truthy description: "Whether to generate constraints without providers (true/false)" required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string chicken-egg-providers: description: "Space-separated list of providers that should be installed from context files" required: true @@ -57,7 +53,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} INCLUDE_SUCCESS_OUTPUTS: "true" - IMAGE_TAG: ${{ inputs.image-tag }} PYTHON_VERSIONS: ${{ inputs.python-versions-list-as-string }} VERBOSE: "true" VERSION_SUFFIX_FOR_PYPI: "dev0" @@ -69,21 +64,15 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze - - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: "\ - Pull CI images \ - ${{ inputs.python-versions-list-as-string }}:\ - ${{ inputs.image-tag }}" - run: breeze ci-image pull --run-in-parallel --tag-as-latest - - name: " - Verify CI images \ - ${{ inputs.python-versions-list-as-string }}:\ - ${{ inputs.image-tag }}" + id: breeze + - name: "Prepare all CI images: ${{ inputs.python-versions-list-as-string}}" + uses: ./.github/actions/prepare_all_ci_images + with: + platform: "linux/amd64" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} + - name: "Verify all CI images ${{ inputs.python-versions-list-as-string }}" run: breeze ci-image verify --run-in-parallel - name: "Source constraints" shell: bash diff --git a/.github/workflows/helm-tests.yml b/.github/workflows/helm-tests.yml index 4c1ec1023fc90..d7f3f0b4d5bf0 100644 --- a/.github/workflows/helm-tests.yml +++ b/.github/workflows/helm-tests.yml @@ -32,10 +32,6 @@ on: # yamllint disable-line rule:truthy description: "Stringified JSON array of helm test packages to test" required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string default-python-version: description: "Which version of python should be used by default" required: true @@ -57,7 +53,6 @@ jobs: DB_RESET: "false" JOB_ID: "helm-tests" USE_XDIST: "true" - IMAGE_TAG: "${{ inputs.image-tag }}" GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} @@ -70,10 +65,11 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{inputs.default-python-version}}:${{inputs.image-tag}}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} - name: "Helm Unit Tests: ${{ matrix.helm-test-package }}" run: breeze testing helm-tests --test-type "${{ matrix.helm-test-package }}" diff --git a/.github/workflows/integration-system-tests.yml b/.github/workflows/integration-system-tests.yml index 7fde2ae968363..0a6fd1fb406c7 100644 --- a/.github/workflows/integration-system-tests.yml +++ b/.github/workflows/integration-system-tests.yml @@ -24,10 +24,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining public runners." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string testable-core-integrations: description: "The list of testable core integrations as JSON array." required: true @@ -75,7 +71,6 @@ jobs: matrix: integration: ${{ fromJSON(inputs.testable-core-integrations) }} env: - IMAGE_TAG: "${{ inputs.image-tag }}" BACKEND: "postgres" BACKEND_VERSION: ${{ inputs.default-postgres-version }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" @@ -95,10 +90,11 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} - name: "Integration: core ${{ matrix.integration }}" # yamllint disable rule:line-length run: ./scripts/ci/testing/run_integration_tests_with_retry.sh core "${{ matrix.integration }}" @@ -121,7 +117,6 @@ jobs: matrix: integration: ${{ fromJSON(inputs.testable-providers-integrations) }} env: - IMAGE_TAG: "${{ inputs.image-tag }}" BACKEND: "postgres" BACKEND_VERSION: ${{ inputs.default-postgres-version }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" @@ -141,10 +136,11 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} - name: "Integration: providers ${{ matrix.integration }}" run: ./scripts/ci/testing/run_integration_tests_with_retry.sh providers "${{ matrix.integration }}" - name: "Post Tests success" @@ -162,7 +158,6 @@ jobs: name: "System Tests" runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} env: - IMAGE_TAG: "${{ inputs.image-tag }}" BACKEND: "postgres" BACKEND_VERSION: ${{ inputs.default-postgres-version }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" @@ -182,10 +177,11 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} - name: "System Tests" run: > ./scripts/ci/testing/run_system_tests.sh diff --git a/.github/workflows/k8s-tests.yml b/.github/workflows/k8s-tests.yml index 3b3e067038db9..e83cf68fb0e6f 100644 --- a/.github/workflows/k8s-tests.yml +++ b/.github/workflows/k8s-tests.yml @@ -20,24 +20,20 @@ name: K8s tests on: # yamllint disable-line rule:truthy workflow_call: inputs: - runs-on-as-json-default: - description: "The array of labels (in json form) determining default runner used for the build." + platform: + description: "Platform for the build - 'linux/amd64' or 'linux/arm64'" required: true type: string - image-tag: - description: "Tag to set for the image" + runs-on-as-json-default: + description: "The array of labels (in json form) determining default runner used for the build." required: true type: string python-versions-list-as-string: description: "List of Python versions to test: space separated string" required: true type: string - kubernetes-versions-list-as-string: - description: "List of Kubernetes versions to test" - required: true - type: string - kubernetes-combos-list-as-string: - description: "List of combinations of Kubernetes and Python versions to test: space separated string" + kubernetes-combos: + description: "Array of combinations of Kubernetes and Python versions to test" required: true type: string include-success-outputs: @@ -55,19 +51,17 @@ on: # yamllint disable-line rule:truthy jobs: tests-kubernetes: timeout-minutes: 240 - name: "\ - K8S System:${{ matrix.executor }} - ${{ matrix.use-standard-naming }} - \ - ${{ inputs.kubernetes-versions-list-as-string }}" + name: "K8S System:${{ matrix.executor }}-${{ matrix.kubernetes-combo }}-${{ matrix.use-standard-naming }}" runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }} strategy: matrix: executor: [KubernetesExecutor, CeleryExecutor, LocalExecutor] use-standard-naming: [true, false] + kubernetes-combo: ${{ fromJSON(inputs.kubernetes-combos) }} fail-fast: false env: DEBUG_RESOURCES: ${{ inputs.debug-resources }} INCLUDE_SUCCESS_OUTPUTS: ${{ inputs.include-success-outputs }} - IMAGE_TAG: ${{ inputs.image-tag }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} @@ -76,23 +70,21 @@ jobs: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Prepare PYTHON_MAJOR_MINOR_VERSION and KUBERNETES_VERSION" + id: prepare-versions + run: | + echo "PYTHON_MAJOR_MINOR_VERSION=${{ matrix.kubernetes-combo }}" | sed 's/-.*//' >> $GITHUB_ENV + echo "KUBERNETES_VERSION=${{ matrix.kubernetes-combo }}" | sed 's/=[^-]*-/=/' >> $GITHUB_ENV - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Install Breeze" - uses: ./.github/actions/breeze - id: breeze - - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: Pull PROD images ${{ inputs.python-versions-list-as-string }}:${{ inputs.image-tag }} - run: breeze prod-image pull --run-in-parallel --tag-as-latest - env: - PYTHON_VERSIONS: ${{ inputs.python-versions-list-as-string }} - # Force more parallelism for pull even on public images - PARALLELISM: 6 + - name: "Prepare breeze & PROD image: ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: ./.github/actions/prepare_breeze_and_image + with: + platform: ${{ inputs.platform }} + image-type: "prod" + python: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} - name: "Cache bin folder with tools for kubernetes testing" uses: actions/cache@v4 with: @@ -100,29 +92,34 @@ jobs: key: "\ k8s-env-${{ steps.breeze.outputs.host-python-version }}-\ ${{ hashFiles('scripts/ci/kubernetes/k8s_requirements.txt','hatch_build.py') }}" - - name: "Switch breeze to use uv" - run: breeze setup config --use-uv - if: inputs.use-uv == 'true' - - name: Run complete K8S tests ${{ inputs.kubernetes-combos-list-as-string }} - run: breeze k8s run-complete-tests --run-in-parallel --upgrade --no-copy-local-sources + - name: "\ + Run complete K8S tests ${{ matrix.executor }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}-\ + ${{env.KUBERNETES_VERSION}}-${{ matrix.use-standard-naming }}" + run: breeze k8s run-complete-tests --upgrade --no-copy-local-sources env: - PYTHON_VERSIONS: ${{ inputs.python-versions-list-as-string }} - KUBERNETES_VERSIONS: ${{ inputs.kubernetes-versions-list-as-string }} EXECUTOR: ${{ matrix.executor }} USE_STANDARD_NAMING: ${{ matrix.use-standard-naming }} VERBOSE: "false" - - name: Upload KinD logs on failure ${{ inputs.kubernetes-combos-list-as-string }} + - name: "\ + Upload KinD logs on failure ${{ matrix.executor }}-${{ matrix.kubernetes-combo }}-\ + ${{ matrix.use-standard-naming }}" uses: actions/upload-artifact@v4 if: failure() || cancelled() with: - name: kind-logs-${{ matrix.executor }}-${{ matrix.use-standard-naming }} + name: "\ + kind-logs-${{ matrix.kubernetes-combo }}-${{ matrix.executor }}-\ + ${{ matrix.use-standard-naming }}" path: /tmp/kind_logs_* retention-days: 7 - - name: Upload test resource logs on failure ${{ inputs.kubernetes-combos-list-as-string }} + - name: "\ + Upload test resource logs on failure ${{ matrix.executor }}-${{ matrix.kubernetes-combo }}-\ + ${{ matrix.use-standard-naming }}" uses: actions/upload-artifact@v4 if: failure() || cancelled() with: - name: k8s-test-resources-${{ matrix.executor }}-${{ matrix.use-standard-naming }} + name: "\ + k8s-test-resources-${{ matrix.kubernetes-combo }}-${{ matrix.executor }}-\ + ${{ matrix.use-standard-naming }}" path: /tmp/k8s_test_resources_* retention-days: 7 - name: "Delete clusters just in case they are left" diff --git a/.github/workflows/prod-image-build.yml b/.github/workflows/prod-image-build.yml index df4f24981ff30..4ee3538429222 100644 --- a/.github/workflows/prod-image-build.yml +++ b/.github/workflows/prod-image-build.yml @@ -30,13 +30,6 @@ on: # yamllint disable-line rule:truthy variations. required: true type: string - do-build: - description: > - Whether to actually do the build (true/false). If set to false, the build is done - already in pull-request-target workflow, so we skip it here. - required: false - default: "true" - type: string upload-package-artifact: description: > Whether to upload package artifacts (true/false). If false, the job will rely on artifacts prepared @@ -62,6 +55,11 @@ on: # yamllint disable-line rule:truthy description: "Whether to push image to the registry (true/false)" required: true type: string + upload-image-artifact: + description: "Whether to upload docker image artifact" + required: false + default: "false" + type: string debian-version: description: "Base Debian distribution to use for the build (bookworm)" type: string @@ -74,10 +72,6 @@ on: # yamllint disable-line rule:truthy description: "Whether to use uv to build the image (true/false)" required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string python-versions: description: "JSON-formatted array of Python versions to build images from" required: true @@ -121,7 +115,7 @@ on: # yamllint disable-line rule:truthy jobs: build-prod-packages: - name: "${{ inputs.do-build == 'true' && 'Build' || 'Skip building' }} Airflow and provider packages" + name: "Build Airflow and provider packages" timeout-minutes: 10 runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} env: @@ -131,32 +125,25 @@ jobs: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Checkout target branch" uses: actions/checkout@v4 with: persist-credentials: false - - name: "Checkout target commit" - uses: ./.github/actions/checkout_target_commit - with: - target-commit-sha: ${{ inputs.target-commit-sha }} - pull-request-target: ${{ inputs.pull-request-target }} - is-committer-build: ${{ inputs.is-committer-build }} - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - uses: actions/setup-python@v5 with: python-version: "${{ inputs.default-python-version }}" - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Cleanup dist and context file" shell: bash run: rm -fv ./dist/* ./docker-context-files/* - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Install Breeze" uses: ./.github/actions/breeze - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Prepare providers packages" shell: bash run: > @@ -164,7 +151,6 @@ jobs: --package-list-file ./prod_image_installed_providers.txt --package-format wheel if: > - inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' && inputs.build-provider-packages == 'true' - name: "Prepare chicken-eggs provider packages" @@ -173,19 +159,18 @@ jobs: breeze release-management prepare-provider-packages --package-format wheel ${{ inputs.chicken-egg-providers }} if: > - inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' && inputs.chicken-egg-providers != '' - name: "Prepare airflow package" shell: bash run: > breeze release-management prepare-airflow-package --package-format wheel - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Prepare task-sdk package" shell: bash run: > breeze release-management prepare-task-sdk-package --package-format wheel - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Upload prepared packages as artifacts" uses: actions/upload-artifact@v4 with: @@ -193,25 +178,21 @@ jobs: path: ./dist retention-days: 7 if-no-files-found: error - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' build-prod-images: strategy: fail-fast: false matrix: - # yamllint disable-line rule:line-length - python-version: ${{ inputs.do-build == 'true' && fromJSON(inputs.python-versions) || fromJSON('[""]') }} + python-version: ${{ fromJSON(inputs.python-versions) || fromJSON('[""]') }} timeout-minutes: 80 - name: "\ -${{ inputs.do-build == 'true' && 'Build' || 'Skip building' }} \ -PROD ${{ inputs.build-type }} image\ -${{ matrix.python-version }}${{ inputs.do-build == 'true' && ':' || '' }}\ -${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" + name: "Build PROD ${{ inputs.build-type }} image ${{ matrix.python-version }}" runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} needs: - build-prod-packages env: BACKEND: sqlite + PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python-version }}" DEFAULT_BRANCH: ${{ inputs.branch }} DEFAULT_CONSTRAINTS_BRANCH: ${{ inputs.constraints-branch }} VERSION_SUFFIX_FOR_PYPI: ${{ inputs.branch == 'main' && 'dev0' || '' }} @@ -231,57 +212,38 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: inputs.do-build == 'true' - name: "Checkout target branch" uses: actions/checkout@v4 with: persist-credentials: false - - name: "Checkout target commit" - uses: ./.github/actions/checkout_target_commit - with: - target-commit-sha: ${{ inputs.target-commit-sha }} - pull-request-target: ${{ inputs.pull-request-target }} - is-committer-build: ${{ inputs.is-committer-build }} - if: inputs.do-build == 'true' - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - if: inputs.do-build == 'true' - name: "Install Breeze" uses: ./.github/actions/breeze - if: inputs.do-build == 'true' - - name: "Regenerate dependencies in case they was modified manually so that we can build an image" - shell: bash - run: | - pip install rich>=12.4.4 pyyaml - python scripts/ci/pre_commit/update_providers_dependencies.py - if: inputs.do-build == 'true' && inputs.upgrade-to-newer-dependencies != 'false' - name: "Cleanup dist and context file" shell: bash run: rm -fv ./dist/* ./docker-context-files/* - if: inputs.do-build == 'true' - name: "Download packages prepared as artifacts" uses: actions/download-artifact@v4 with: name: prod-packages path: ./docker-context-files - if: inputs.do-build == 'true' - name: "Download constraints" uses: actions/download-artifact@v4 with: name: constraints path: ./docker-context-files - if: inputs.do-build == 'true' - - name: Login to ghcr.io - shell: bash + - name: "Login to ghcr.io" run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - if: inputs.do-build == 'true' - - name: "Build PROD images w/ source providers ${{ matrix.python-version }}:${{ inputs.image-tag }}" + - name: "Build PROD images w/ source providers ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" shell: bash run: > - breeze prod-image build --tag-as-latest --image-tag "${{ inputs.image-tag }}" + breeze prod-image build + --builder airflow_cache --commit-sha "${{ github.sha }}" - --install-packages-from-context --airflow-constraints-mode constraints-source-providers - --use-constraints-for-context-packages --python "${{ matrix.python-version }}" + --install-packages-from-context + --airflow-constraints-mode constraints-source-providers + --use-constraints-for-context-packages env: PUSH: ${{ inputs.push-image }} DOCKER_CACHE: ${{ inputs.docker-cache }} @@ -290,14 +252,16 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} UPGRADE_TO_NEWER_DEPENDENCIES: ${{ inputs.upgrade-to-newer-dependencies }} INCLUDE_NOT_READY_PROVIDERS: "true" - if: inputs.do-build == 'true' && inputs.build-provider-packages == 'true' - - name: "Build PROD images with PyPi providers ${{ matrix.python-version }}:${{ inputs.image-tag }}" + if: inputs.build-provider-packages == 'true' + - name: "Build PROD images with PyPi providers ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" shell: bash run: > - breeze prod-image build --builder airflow_cache --tag-as-latest - --image-tag "${{ inputs.image-tag }}" --commit-sha "${{ github.sha }}" - --install-packages-from-context --airflow-constraints-mode constraints - --use-constraints-for-context-packages --python "${{ matrix.python-version }}" + breeze prod-image build + --builder airflow_cache + --commit-sha "${{ github.sha }}" + --install-packages-from-context + --airflow-constraints-mode constraints + --use-constraints-for-context-packages env: PUSH: ${{ inputs.push-image }} DOCKER_CACHE: ${{ inputs.docker-cache }} @@ -306,9 +270,18 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} UPGRADE_TO_NEWER_DEPENDENCIES: ${{ inputs.upgrade-to-newer-dependencies }} INCLUDE_NOT_READY_PROVIDERS: "true" - if: inputs.do-build == 'true' && inputs.build-provider-packages != 'true' - - name: Verify PROD image ${{ matrix.python-version }}:${{ inputs.image-tag }} + if: inputs.build-provider-packages != 'true' + - name: "Verify PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + run: breeze prod-image verify + - name: "Export PROD docker image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" run: > - breeze prod-image verify --image-tag "${{ inputs.image-tag }}" - --python "${{ matrix.python-version }}" - if: inputs.do-build == 'true' + breeze prod-image save --platform "${{ inputs.platform }}" + if: inputs.upload-image-artifact == 'true' + - name: "Stash PROD docker image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: "prod-image-save-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + path: "/tmp/prod-image-save-*-${{ env.PYTHON_MAJOR_MINOR_VERSION }}.tar" + if-no-files-found: 'error' + retention-days: 2 + if: inputs.upload-image-artifact == 'true' diff --git a/.github/workflows/prod-image-extra-checks.yml b/.github/workflows/prod-image-extra-checks.yml index bb63faef7b243..d8d51d0d4fc74 100644 --- a/.github/workflows/prod-image-extra-checks.yml +++ b/.github/workflows/prod-image-extra-checks.yml @@ -40,9 +40,6 @@ on: # yamllint disable-line rule:truthy description: "Whether to use uv to build the image (true/false)" required: true type: string - image-tag: - required: true - type: string build-provider-packages: description: "Whether to build provider packages (true/false). If false providers are from PyPI" required: true @@ -73,8 +70,8 @@ jobs: with: runs-on-as-json-public: ${{ inputs.runs-on-as-json-public }} build-type: "MySQL Client" + upload-image-artifact: "false" upload-package-artifact: "false" - image-tag: mysql-${{ inputs.image-tag }} install-mysql-client-type: "mysql" python-versions: ${{ inputs.python-versions }} default-python-version: ${{ inputs.default-python-version }} @@ -97,8 +94,8 @@ jobs: with: runs-on-as-json-public: ${{ inputs.runs-on-as-json-public }} build-type: "pip" + upload-image-artifact: "false" upload-package-artifact: "false" - image-tag: mysql-${{ inputs.image-tag }} install-mysql-client-type: "mysql" python-versions: ${{ inputs.python-versions }} default-python-version: ${{ inputs.default-python-version }} diff --git a/.github/workflows/push-image-cache.yml b/.github/workflows/push-image-cache.yml index 10a33275ad3f3..004c97a647bd9 100644 --- a/.github/workflows/push-image-cache.yml +++ b/.github/workflows/push-image-cache.yml @@ -110,6 +110,7 @@ jobs: GITHUB_USERNAME: ${{ github.actor }} INCLUDE_SUCCESS_OUTPUTS: "${{ inputs.include-success-outputs }}" INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} + PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python }}" USE_UV: ${{ inputs.use-uv }} UPGRADE_TO_NEWER_DEPENDENCIES: "false" VERBOSE: "true" @@ -126,23 +127,24 @@ jobs: run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze - - name: "Start ARM instance" - run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh - if: inputs.platform == 'linux/arm64' - name: Login to ghcr.io run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: "Push CI ${{ inputs.cache-type }} cache: ${{ matrix.python }} ${{ inputs.platform }}" + - name: "Push CI latest images: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} (linux/amd64 only)" run: > - breeze ci-image build --builder airflow_cache --prepare-buildx-cache - --platform "${{ inputs.platform }}" --python ${{ matrix.python }} - - name: "Stop ARM instance" - run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() && inputs.platform == 'linux/arm64' - - name: "Push CI latest images: ${{ matrix.python }} (linux/amd64 only)" - run: > - breeze ci-image build --builder airflow_cache --push - --python "${{ matrix.python }}" --platform "${{ inputs.platform }}" + breeze + ci-image build + --builder airflow_cache + --platform "${{ inputs.platform }}" + --push if: inputs.push-latest-images == 'true' && inputs.platform == 'linux/amd64' + # yamllint disable-line rule:line-length + - name: "Push CI ${{ inputs.cache-type }} cache:${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ inputs.platform }}" + run: > + breeze ci-image build + --builder airflow_cache + --prepare-buildx-cache + --platform "${{ inputs.platform }}" + --push push-prod-image-cache: name: "Push PROD ${{ inputs.cache-type }}:${{ matrix.python }} image cache" @@ -172,6 +174,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} + PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python }}" UPGRADE_TO_NEWER_DEPENDENCIES: "false" USE_UV: ${{ inputs.branch == 'main' && inputs.use-uv || 'false' }} VERBOSE: "true" @@ -196,25 +199,26 @@ jobs: with: name: prod-packages path: ./docker-context-files - - name: "Start ARM instance" - run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh - if: inputs.platform == 'linux/arm64' - name: Login to ghcr.io run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: "Push PROD ${{ inputs.cache-type }} cache: ${{ matrix.python-version }} ${{ inputs.platform }}" - run: > - breeze prod-image build --builder airflow_cache - --prepare-buildx-cache --platform "${{ inputs.platform }}" - --install-packages-from-context --airflow-constraints-mode constraints-source-providers - --python ${{ matrix.python }} - - name: "Stop ARM instance" - run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() && inputs.platform == 'linux/arm64' # We only push "AMD" images as it is really only needed for any kind of automated builds in CI # and currently there is not an easy way to make multi-platform image from two separate builds # and we can do it after we stopped the ARM instance as it is not needed anymore - - name: "Push PROD latest image: ${{ matrix.python }} (linux/amd64 ONLY)" + - name: "Push PROD latest image: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} (linux/amd64 ONLY)" run: > - breeze prod-image build --builder airflow_cache --install-packages-from-context - --push --platform "${{ inputs.platform }}" + breeze prod-image build + --builder airflow_cache + --install-packages-from-context + --platform "${{ inputs.platform }}" + --airflow-constraints-mode constraints-source-providers if: inputs.push-latest-images == 'true' && inputs.platform == 'linux/amd64' + # yamllint disable-line rule:line-length + - name: "Push PROD ${{ inputs.cache-type }} cache: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} ${{ inputs.platform }}" + run: > + breeze prod-image build + --builder airflow_cache + --prepare-buildx-cache + --install-packages-from-context + --platform "${{ inputs.platform }}" + --airflow-constraints-mode constraints-source-providers + --push diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 6b491f6bff4ab..3a9440e528eaf 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -45,10 +45,6 @@ on: # yamllint disable-line rule:truthy required: false default: ":" type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string python-versions: description: "The list of python versions (stringified JSON array) to run the tests on." required: true @@ -144,7 +140,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_SUCCESS_OUTPUTS: ${{ inputs.include-success-outputs }} # yamllint disable rule:line-length JOB_ID: "${{ matrix.test-group }}-${{ inputs.test-scope }}-${{ inputs.test-name }}-${{inputs.backend}}-${{ matrix.backend-version }}-${{ matrix.python-version }}" @@ -163,10 +158,11 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{matrix.python-version}}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ matrix.python-version }} - name: > Migration Tests: ${{ matrix.python-version }}:${{ env.PARALLEL_TEST_TYPES }} uses: ./.github/actions/migration_tests diff --git a/.github/workflows/special-tests.yml b/.github/workflows/special-tests.yml index decc7271b728b..d416d55575fb9 100644 --- a/.github/workflows/special-tests.yml +++ b/.github/workflows/special-tests.yml @@ -32,10 +32,6 @@ on: # yamllint disable-line rule:truthy description: "The json representing list of test test groups to run" required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string core-test-types-list-as-string: description: "The list of core test types to run separated by spaces" required: true @@ -96,7 +92,6 @@ jobs: test-scope: "DB" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -120,7 +115,6 @@ jobs: test-scope: "All" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -145,7 +139,6 @@ jobs: test-scope: "All" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -169,7 +162,6 @@ jobs: test-scope: "Quarantined" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -193,7 +185,6 @@ jobs: test-scope: "ARM collection" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -218,7 +209,6 @@ jobs: test-scope: "System" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} diff --git a/.github/workflows/task-sdk-tests.yml b/.github/workflows/task-sdk-tests.yml index acc9872e6ed96..756a66e546479 100644 --- a/.github/workflows/task-sdk-tests.yml +++ b/.github/workflows/task-sdk-tests.yml @@ -24,10 +24,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining default runner used for the build." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string default-python-version: description: "Which version of python should be used by default" required: true @@ -53,7 +49,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_NOT_READY_PROVIDERS: "true" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" VERBOSE: "true" @@ -66,10 +61,11 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ matrix.python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ matrix.python-version }} - name: "Cleanup dist files" run: rm -fv ./dist/* - name: "Prepare Task SDK packages: wheel" diff --git a/.github/workflows/test-provider-packages.yml b/.github/workflows/test-provider-packages.yml index 08715af6b58ba..7b2bc485528c4 100644 --- a/.github/workflows/test-provider-packages.yml +++ b/.github/workflows/test-provider-packages.yml @@ -24,10 +24,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining default runner used for the build." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string canary-run: description: "Whether this is a canary run" required: true @@ -75,7 +71,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_NOT_READY_PROVIDERS: "true" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" VERBOSE: "true" @@ -87,11 +82,11 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: > - Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }} + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} - name: "Cleanup dist files" run: rm -fv ./dist/* - name: "Prepare provider documentation" @@ -161,9 +156,8 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_NOT_READY_PROVIDERS: "true" - PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" + PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python-version }}" VERSION_SUFFIX_FOR_PYPI: "dev0" VERBOSE: "true" CLEAN_AIRFLOW_INSTALLATION: "${{ inputs.canary-run }}" @@ -176,10 +170,11 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ matrix.python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ matrix.python-version }} - name: "Cleanup dist files" run: rm -fv ./dist/* - name: "Prepare provider packages: wheel" @@ -212,6 +207,9 @@ jobs: --airflow-constraints-reference constraints-${{matrix.airflow-version}} --providers-skip-constraints --install-airflow-with-constraints + - name: Check amount of disk space available + run: df -H + shell: bash - name: > Run provider unit tests on Airflow ${{ matrix.airflow-version }}:Python ${{ matrix.python-version }} diff --git a/Dockerfile b/Dockerfile index fe49db186479d..f32fbef633bc4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -422,85 +422,6 @@ common::show_packaging_tool_version_and_location common::install_packaging_tools EOF -# The content below is automatically copied from scripts/docker/install_airflow_dependencies_from_branch_tip.sh -COPY <<"EOF" /install_airflow_dependencies_from_branch_tip.sh -#!/usr/bin/env bash - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_REPO:?Should be set}" -: "${AIRFLOW_BRANCH:?Should be set}" -: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" -: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" - -function install_airflow_dependencies_from_branch_tip() { - echo - echo "${COLOR_BLUE}Installing airflow from ${AIRFLOW_BRANCH}. It is used to cache dependencies${COLOR_RESET}" - echo - if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} - fi - if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} - fi - local TEMP_AIRFLOW_DIR - TEMP_AIRFLOW_DIR=$(mktemp -d) - # Install latest set of dependencies - without constraints. This is to download a "base" set of - # dependencies that we can cache and reuse when installing airflow using constraints and latest - # pyproject.toml in the next step (when we install regular airflow). - set -x - curl -fsSL "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" | \ - tar xz -C "${TEMP_AIRFLOW_DIR}" --strip 1 - # Make sure editable dependencies are calculated when devel-ci dependencies are installed - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - --editable "${TEMP_AIRFLOW_DIR}[${AIRFLOW_EXTRAS}]" - set +x - common::install_packaging_tools - set -x - echo "${COLOR_BLUE}Uninstalling providers. Dependencies remain${COLOR_RESET}" - # Uninstall airflow and providers to keep only the dependencies. In the future when - # planned https://github.com/pypa/pip/issues/11440 is implemented in pip we might be able to use this - # flag and skip the remove step. - pip freeze | grep apache-airflow-providers | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} || true - set +x - echo - echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" - echo - set +x - ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow - rm -rf "${TEMP_AIRFLOW_DIR}" - set -x - # If you want to make sure dependency is removed from cache in your PR when you removed it from - # pyproject.toml - please add your dependency here as a list of strings - # for example: - # DEPENDENCIES_TO_REMOVE=("package_a" "package_b") - # Once your PR is merged, you should make a follow-up PR to remove it from this list - # and increase the AIRFLOW_CI_BUILD_EPOCH in Dockerfile.ci to make sure your cache is rebuilt. - local DEPENDENCIES_TO_REMOVE - # IMPORTANT!! Make sure to increase AIRFLOW_CI_BUILD_EPOCH in Dockerfile.ci when you remove a dependency from that list - DEPENDENCIES_TO_REMOVE=() - if [[ "${DEPENDENCIES_TO_REMOVE[*]}" != "" ]]; then - echo - echo "${COLOR_BLUE}Uninstalling just removed dependencies (temporary until cache refreshes)${COLOR_RESET}" - echo "${COLOR_BLUE}Dependencies to uninstall: ${DEPENDENCIES_TO_REMOVE[*]}${COLOR_RESET}" - echo - set +x - ${PACKAGING_TOOL_CMD} uninstall "${DEPENDENCIES_TO_REMOVE[@]}" || true - set -x - # make sure that the dependency is not needed by something else - pip check - fi -} - -common::get_colors -common::get_packaging_tool -common::get_airflow_version_specification -common::get_constraints_location -common::show_packaging_tool_version_and_location - -install_airflow_dependencies_from_branch_tip -EOF - # The content below is automatically copied from scripts/docker/common.sh COPY <<"EOF" /common.sh #!/usr/bin/env bash @@ -524,8 +445,6 @@ function common::get_packaging_tool() { ## IMPORTANT: IF YOU MODIFY THIS FUNCTION YOU SHOULD ALSO MODIFY CORRESPONDING FUNCTION IN ## `scripts/in_container/_in_container_utils.sh` - local PYTHON_BIN - PYTHON_BIN=$(which python) if [[ ${AIRFLOW_USE_UV} == "true" ]]; then echo echo "${COLOR_BLUE}Using 'uv' to install Airflow${COLOR_RESET}" @@ -533,8 +452,8 @@ function common::get_packaging_tool() { export PACKAGING_TOOL="uv" export PACKAGING_TOOL_CMD="uv pip" if [[ -z ${VIRTUAL_ENV=} ]]; then - export EXTRA_INSTALL_FLAGS="--python ${PYTHON_BIN}" - export EXTRA_UNINSTALL_FLAGS="--python ${PYTHON_BIN}" + export EXTRA_INSTALL_FLAGS="--system" + export EXTRA_UNINSTALL_FLAGS="--system" else export EXTRA_INSTALL_FLAGS="" export EXTRA_UNINSTALL_FLAGS="" @@ -900,18 +819,12 @@ function install_airflow() { # Determine the installation_command_flags based on AIRFLOW_INSTALLATION_METHOD method local installation_command_flags if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then - # We need _a_ file in there otherwise the editable install doesn't include anything in the .pth file - mkdir -p ./providers/src/airflow/providers/ - touch ./providers/src/airflow/providers/__init__.py - - # Similarly we need _a_ file for task_sdk too - mkdir -p ./task_sdk/src/airflow/sdk/ - echo '__version__ = "0.0.0dev0"' > ./task_sdk/src/airflow/sdk/__init__.py - - trap 'rm -f ./providers/src/airflow/providers/__init__.py ./task_sdk/src/airflow/__init__.py 2>/dev/null' EXIT - # When installing from sources - we always use `--editable` mode - installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION} --editable ./providers --editable ./task_sdk" + installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION} --editable ./task_sdk" + while IFS= read -r -d '' pyproject_toml_file; do + project_folder=$(dirname ${pyproject_toml_file}) + installation_command_flags="${installation_command_flags} --editable ${project_folder}" + done < <(find "providers" -name "pyproject.toml" -print0) elif [[ ${AIRFLOW_INSTALLATION_METHOD} == "apache-airflow" ]]; then installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" elif [[ ${AIRFLOW_INSTALLATION_METHOD} == apache-airflow\ @\ * ]]; then @@ -1407,7 +1320,8 @@ ARG PYTHON_BASE_IMAGE ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \ - PIP_CACHE_DIR=/tmp/.cache/pip + PIP_CACHE_DIR=/tmp/.cache/pip \ + UV_CACHE_DIR=/tmp/.cache/uv ARG DEV_APT_DEPS="" ARG ADDITIONAL_DEV_APT_DEPS="" @@ -1473,9 +1387,6 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" # By default PIP has progress bar but you can disable it. ARG PIP_PROGRESS_BAR -# By default we do not use pre-cached packages, but in CI/Breeze environment we override this to speed up -# builds in case pyproject.toml changed. This is pure optimisation of CI/Breeze builds. -ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="false" # This is airflow version that is put in the label of the image build ARG AIRFLOW_VERSION # By default latest released version of airflow is installed (when empty) but this value can be overridden @@ -1513,7 +1424,6 @@ ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT} \ AIRFLOW_USE_UV=${AIRFLOW_USE_UV} \ - AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} \ AIRFLOW_VERSION_SPECIFICATION=${AIRFLOW_VERSION_SPECIFICATION} \ @@ -1538,8 +1448,7 @@ ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ # Copy all scripts required for installation - changing any of those should lead to # rebuilding from here -COPY --from=scripts common.sh install_packaging_tools.sh \ - install_airflow_dependencies_from_branch_tip.sh create_prod_venv.sh /scripts/docker/ +COPY --from=scripts common.sh install_packaging_tools.sh create_prod_venv.sh /scripts/docker/ # We can set this value to true in case we want to install .whl/.tar.gz packages placed in the # docker-context-files folder. This can be done for both additional packages you want to install @@ -1569,13 +1478,7 @@ ENV AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} # By default PIP installs everything to ~/.local and it's also treated as VIRTUALENV ENV VIRTUAL_ENV="${AIRFLOW_USER_HOME_DIR}/.local" -RUN bash /scripts/docker/install_packaging_tools.sh; \ - bash /scripts/docker/create_prod_venv.sh; \ - if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \ - ${INSTALL_PACKAGES_FROM_CONTEXT} == "false" && \ - ${UPGRADE_INVALIDATION_STRING} == "" ]]; then \ - bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \ - fi +RUN bash /scripts/docker/install_packaging_tools.sh; bash /scripts/docker/create_prod_venv.sh COPY --chown=airflow:0 ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO} @@ -1599,10 +1502,10 @@ COPY --from=scripts install_from_docker_context_files.sh install_airflow.sh \ # an incorrect architecture. ARG TARGETARCH # Value to be able to easily change cache id and therefore use a bare new cache -ARG PIP_CACHE_EPOCH="9" +ARG DEPENDENCY_CACHE_EPOCH="9" # hadolint ignore=SC2086, SC2010, DL3042 -RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ +RUN --mount=type=cache,id=prod-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/tmp/.cache/,uid=${AIRFLOW_UID} \ if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ bash /scripts/docker/install_from_docker_context_files.sh; \ fi; \ @@ -1622,7 +1525,7 @@ RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$P # during the build additionally to whatever has been installed so far. It is recommended that # the requirements.txt contains only dependencies with == version specification # hadolint ignore=DL3042 -RUN --mount=type=cache,id=additional-requirements-$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ +RUN --mount=type=cache,id=prod-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/tmp/.cache/,uid=${AIRFLOW_UID} \ if [[ -f /docker-context-files/requirements.txt ]]; then \ pip install -r /docker-context-files/requirements.txt; \ fi @@ -1650,7 +1553,9 @@ ARG PYTHON_BASE_IMAGE ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ # Make sure noninteractive debian install is used and language variables set DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ - LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 LD_LIBRARY_PATH=/usr/local/lib + LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 LD_LIBRARY_PATH=/usr/local/lib \ + PIP_CACHE_DIR=/tmp/.cache/pip \ + UV_CACHE_DIR=/tmp/.cache/uv ARG RUNTIME_APT_DEPS="" ARG ADDITIONAL_RUNTIME_APT_DEPS="" diff --git a/Dockerfile.ci b/Dockerfile.ci index 7c0b529d4711f..6499a5be76c33 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -363,85 +363,6 @@ common::show_packaging_tool_version_and_location common::install_packaging_tools EOF -# The content below is automatically copied from scripts/docker/install_airflow_dependencies_from_branch_tip.sh -COPY <<"EOF" /install_airflow_dependencies_from_branch_tip.sh -#!/usr/bin/env bash - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_REPO:?Should be set}" -: "${AIRFLOW_BRANCH:?Should be set}" -: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" -: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" - -function install_airflow_dependencies_from_branch_tip() { - echo - echo "${COLOR_BLUE}Installing airflow from ${AIRFLOW_BRANCH}. It is used to cache dependencies${COLOR_RESET}" - echo - if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} - fi - if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} - fi - local TEMP_AIRFLOW_DIR - TEMP_AIRFLOW_DIR=$(mktemp -d) - # Install latest set of dependencies - without constraints. This is to download a "base" set of - # dependencies that we can cache and reuse when installing airflow using constraints and latest - # pyproject.toml in the next step (when we install regular airflow). - set -x - curl -fsSL "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" | \ - tar xz -C "${TEMP_AIRFLOW_DIR}" --strip 1 - # Make sure editable dependencies are calculated when devel-ci dependencies are installed - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - --editable "${TEMP_AIRFLOW_DIR}[${AIRFLOW_EXTRAS}]" - set +x - common::install_packaging_tools - set -x - echo "${COLOR_BLUE}Uninstalling providers. Dependencies remain${COLOR_RESET}" - # Uninstall airflow and providers to keep only the dependencies. In the future when - # planned https://github.com/pypa/pip/issues/11440 is implemented in pip we might be able to use this - # flag and skip the remove step. - pip freeze | grep apache-airflow-providers | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} || true - set +x - echo - echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" - echo - set +x - ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow - rm -rf "${TEMP_AIRFLOW_DIR}" - set -x - # If you want to make sure dependency is removed from cache in your PR when you removed it from - # pyproject.toml - please add your dependency here as a list of strings - # for example: - # DEPENDENCIES_TO_REMOVE=("package_a" "package_b") - # Once your PR is merged, you should make a follow-up PR to remove it from this list - # and increase the AIRFLOW_CI_BUILD_EPOCH in Dockerfile.ci to make sure your cache is rebuilt. - local DEPENDENCIES_TO_REMOVE - # IMPORTANT!! Make sure to increase AIRFLOW_CI_BUILD_EPOCH in Dockerfile.ci when you remove a dependency from that list - DEPENDENCIES_TO_REMOVE=() - if [[ "${DEPENDENCIES_TO_REMOVE[*]}" != "" ]]; then - echo - echo "${COLOR_BLUE}Uninstalling just removed dependencies (temporary until cache refreshes)${COLOR_RESET}" - echo "${COLOR_BLUE}Dependencies to uninstall: ${DEPENDENCIES_TO_REMOVE[*]}${COLOR_RESET}" - echo - set +x - ${PACKAGING_TOOL_CMD} uninstall "${DEPENDENCIES_TO_REMOVE[@]}" || true - set -x - # make sure that the dependency is not needed by something else - pip check - fi -} - -common::get_colors -common::get_packaging_tool -common::get_airflow_version_specification -common::get_constraints_location -common::show_packaging_tool_version_and_location - -install_airflow_dependencies_from_branch_tip -EOF - # The content below is automatically copied from scripts/docker/common.sh COPY <<"EOF" /common.sh #!/usr/bin/env bash @@ -465,8 +386,6 @@ function common::get_packaging_tool() { ## IMPORTANT: IF YOU MODIFY THIS FUNCTION YOU SHOULD ALSO MODIFY CORRESPONDING FUNCTION IN ## `scripts/in_container/_in_container_utils.sh` - local PYTHON_BIN - PYTHON_BIN=$(which python) if [[ ${AIRFLOW_USE_UV} == "true" ]]; then echo echo "${COLOR_BLUE}Using 'uv' to install Airflow${COLOR_RESET}" @@ -474,8 +393,8 @@ function common::get_packaging_tool() { export PACKAGING_TOOL="uv" export PACKAGING_TOOL_CMD="uv pip" if [[ -z ${VIRTUAL_ENV=} ]]; then - export EXTRA_INSTALL_FLAGS="--python ${PYTHON_BIN}" - export EXTRA_UNINSTALL_FLAGS="--python ${PYTHON_BIN}" + export EXTRA_INSTALL_FLAGS="--system" + export EXTRA_UNINSTALL_FLAGS="--system" else export EXTRA_INSTALL_FLAGS="" export EXTRA_UNINSTALL_FLAGS="" @@ -670,18 +589,12 @@ function install_airflow() { # Determine the installation_command_flags based on AIRFLOW_INSTALLATION_METHOD method local installation_command_flags if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then - # We need _a_ file in there otherwise the editable install doesn't include anything in the .pth file - mkdir -p ./providers/src/airflow/providers/ - touch ./providers/src/airflow/providers/__init__.py - - # Similarly we need _a_ file for task_sdk too - mkdir -p ./task_sdk/src/airflow/sdk/ - echo '__version__ = "0.0.0dev0"' > ./task_sdk/src/airflow/sdk/__init__.py - - trap 'rm -f ./providers/src/airflow/providers/__init__.py ./task_sdk/src/airflow/__init__.py 2>/dev/null' EXIT - # When installing from sources - we always use `--editable` mode - installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION} --editable ./providers --editable ./task_sdk" + installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION} --editable ./task_sdk" + while IFS= read -r -d '' pyproject_toml_file; do + project_folder=$(dirname ${pyproject_toml_file}) + installation_command_flags="${installation_command_flags} --editable ${project_folder}" + done < <(find "providers" -name "pyproject.toml" -print0) elif [[ ${AIRFLOW_INSTALLATION_METHOD} == "apache-airflow" ]]; then installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" elif [[ ${AIRFLOW_INSTALLATION_METHOD} == apache-airflow\ @\ * ]]; then @@ -980,9 +893,12 @@ function determine_airflow_to_use() { echo echo "${COLOR_BLUE}Uninstalling all packages first${COLOR_RESET}" echo - pip freeze | grep -ve "^-e" | grep -ve "^#" | grep -ve "^uv" | xargs pip uninstall -y --root-user-action ignore + # shellcheck disable=SC2086 + ${PACKAGING_TOOL_CMD} freeze | grep -ve "^-e" | grep -ve "^#" | grep -ve "^uv" | \ + xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} # Now install rich ad click first to use the installation script - uv pip install rich rich-click click --python "/usr/local/bin/python" \ + # shellcheck disable=SC2086 + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} rich rich-click click --python "/usr/local/bin/python" \ --constraint https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt fi python "${IN_CONTAINER_DIR}/install_airflow_and_providers.py" @@ -992,7 +908,8 @@ function determine_airflow_to_use() { python "${IN_CONTAINER_DIR}/install_devel_deps.py" \ --constraint https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt # Some packages might leave legacy typing module which causes test issues - pip uninstall -y typing || true + # shellcheck disable=SC2086 + ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} typing || true if [[ ${LINK_PROVIDERS_TO_AIRFLOW_PACKAGE=} == "true" ]]; then echo echo "${COLOR_BLUE}Linking providers to airflow package as we are using them from mounted sources.${COLOR_RESET}" @@ -1202,7 +1119,10 @@ ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ DEPENDENCIES_EPOCH_NUMBER=${DEPENDENCIES_EPOCH_NUMBER} \ INSTALL_MYSQL_CLIENT="true" \ INSTALL_MSSQL_CLIENT="true" \ - INSTALL_POSTGRES_CLIENT="true" + INSTALL_POSTGRES_CLIENT="true" \ + PIP_CACHE_DIR=/root/.cache/pip \ + UV_CACHE_DIR=/root/.cache/uv + RUN echo "Base image version: ${PYTHON_BASE_IMAGE}" @@ -1282,12 +1202,7 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" # By changing the epoch we can force reinstalling Airflow and pip all dependencies # It can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable. ARG AIRFLOW_CI_BUILD_EPOCH="10" -ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true" # Setup PIP -# By default PIP install run without cache to make image smaller -ARG PIP_NO_CACHE_DIR="true" -# By default UV install run without cache to make image smaller -ARG UV_NO_CACHE="true" ARG UV_HTTP_TIMEOUT="300" # By default PIP has progress bar but you can disable it. ARG PIP_PROGRESS_BAR="on" @@ -1315,7 +1230,6 @@ ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \ DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} \ - AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ @@ -1327,9 +1241,7 @@ ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ INSTALL_POSTGRES_CLIENT="true" \ AIRFLOW_INSTALLATION_METHOD="." \ AIRFLOW_VERSION_SPECIFICATION="" \ - PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} \ PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \ - UV_NO_CACHE=${UV_NO_CACHE} \ ADDITIONAL_PIP_INSTALL_FLAGS=${ADDITIONAL_PIP_INSTALL_FLAGS} \ CASS_DRIVER_BUILD_CONCURRENCY=${CASS_DRIVER_BUILD_CONCURRENCY} \ CASS_DRIVER_NO_CYTHON=${CASS_DRIVER_NO_CYTHON} @@ -1338,25 +1250,10 @@ RUN echo "Airflow version: ${AIRFLOW_VERSION}" # Copy all scripts required for installation - changing any of those should lead to # rebuilding from here -COPY --from=scripts install_packaging_tools.sh install_airflow_dependencies_from_branch_tip.sh \ - common.sh /scripts/docker/ +COPY --from=scripts common.sh install_packaging_tools.sh install_additional_dependencies.sh /scripts/docker/ # We are first creating a venv where all python packages and .so binaries needed by those are # installed. -# In case of CI builds we want to pre-install main version of airflow dependencies so that -# We do not have to always reinstall it from the scratch. -# And is automatically reinstalled from the scratch every time patch release of python gets released -# The Airflow and providers are uninstalled, only dependencies remain. -# the cache is only used when "upgrade to newer dependencies" is not set to automatically -# account for removed dependencies (we do not install them in the first place) -# -# We are installing from branch tip without fixing UV or PIP version - in order to avoid rebuilding the -# base cache layer every time the UV or PIP version changes. -RUN bash /scripts/docker/install_packaging_tools.sh; \ - if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \ - bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \ - fi - # Here we fix the versions so all subsequent commands will use the versions # from the sources @@ -1372,31 +1269,33 @@ ARG AIRFLOW_PRE_COMMIT_UV_VERSION="4.1.4" ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ + # This is needed since we are using cache mounted from the host + UV_LINK_MODE=copy \ AIRFLOW_PRE_COMMIT_VERSION=${AIRFLOW_PRE_COMMIT_VERSION} # The PATH is needed for PIPX to find the tools installed ENV PATH="/root/.local/bin:${PATH}" +# Useful for creating a cache id based on the underlying architecture, preventing the use of cached python packages from +# an incorrect architecture. +ARG TARGETARCH +# Value to be able to easily change cache id and therefore use a bare new cache +ARG DEPENDENCY_CACHE_EPOCH="0" + # Install useful command line tools in their own virtualenv so that they do not clash with # dependencies installed in Airflow also reinstall PIP and UV to make sure they are installed # in the version specified above -RUN bash /scripts/docker/install_packaging_tools.sh - -# Airflow sources change frequently but dependency configuration won't change that often -# We copy pyproject.toml and other files needed to perform setup of dependencies -# So in case pyproject.toml changes we can install latest dependencies required. -COPY pyproject.toml ${AIRFLOW_SOURCES}/pyproject.toml -COPY providers/pyproject.toml ${AIRFLOW_SOURCES}/providers/pyproject.toml -COPY task_sdk/pyproject.toml ${AIRFLOW_SOURCES}/task_sdk/pyproject.toml -COPY task_sdk/README.md ${AIRFLOW_SOURCES}/task_sdk/README.md -COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/ -COPY tests_common/ ${AIRFLOW_SOURCES}/tests_common/ -COPY generated/* ${AIRFLOW_SOURCES}/generated/ -COPY constraints/* ${AIRFLOW_SOURCES}/constraints/ -COPY LICENSE ${AIRFLOW_SOURCES}/LICENSE -COPY hatch_build.py ${AIRFLOW_SOURCES}/ +RUN --mount=type=cache,id=ci-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/root/.cache/ \ + bash /scripts/docker/install_packaging_tools.sh + COPY --from=scripts install_airflow.sh /scripts/docker/ +# We can copy everything here. The Context is filtered by dockerignore. This makes sure we are not +# copying over stuff that is accidentally generated or that we do not need (such as egg-info) +# if you want to add something that is missing and you expect to see it in the image you can +# add it with ! in .dockerignore next to the airflow, test etc. directories there +COPY . ${AIRFLOW_SOURCES}/ + # Those are additional constraints that are needed for some extras but we do not want to # force them on the main Airflow package. Currently we need no extra limits as PIP 23.1+ has much better # dependency resolution and we do not need to limit the versions of the dependencies @@ -1415,36 +1314,30 @@ ENV EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=${EAGER_UPGRADE_ADDITIONAL_REQUIREMENT # Usually we will install versions based on the dependencies in pyproject.toml and upgraded only if needed. # But in cron job we will install latest versions matching pyproject.toml to see if there is no breaking change # and push the constraints if everything is successful -RUN bash /scripts/docker/install_airflow.sh - -COPY --from=scripts entrypoint_ci.sh /entrypoint -COPY --from=scripts entrypoint_exec.sh /entrypoint-exec -RUN chmod a+x /entrypoint /entrypoint-exec +RUN --mount=type=cache,id=ci-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/root/.cache/ bash /scripts/docker/install_airflow.sh COPY --from=scripts install_packaging_tools.sh install_additional_dependencies.sh /scripts/docker/ -# Additional python deps to install ARG ADDITIONAL_PYTHON_DEPS="" -RUN bash /scripts/docker/install_packaging_tools.sh; \ +ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} + +RUN --mount=type=cache,id=ci-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/root/.cache/ \ + bash /scripts/docker/install_packaging_tools.sh; \ if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \ bash /scripts/docker/install_additional_dependencies.sh; \ fi -# Install autocomplete for airflow -RUN if command -v airflow; then \ - register-python-argcomplete airflow >> ~/.bashrc ; \ - fi - -# Install autocomplete for Kubectl -RUN echo "source /etc/bash_completion" >> ~/.bashrc +COPY --from=scripts entrypoint_ci.sh /entrypoint +COPY --from=scripts entrypoint_exec.sh /entrypoint-exec +RUN chmod a+x /entrypoint /entrypoint-exec -# We can copy everything here. The Context is filtered by dockerignore. This makes sure we are not -# copying over stuff that is accidentally generated or that we do not need (such as egg-info) -# if you want to add something that is missing and you expect to see it in the image you can -# add it with ! in .dockerignore next to the airflow, test etc. directories there -COPY . ${AIRFLOW_SOURCES}/ +# Install autocomplete for airflow and kubectl +RUN if command -v airflow; then \ + register-python-argcomplete airflow >> ~/.bashrc ; \ + fi; \ + echo "source /etc/bash_completion" >> ~/.bashrc WORKDIR ${AIRFLOW_SOURCES} @@ -1455,7 +1348,13 @@ ARG AIRFLOW_IMAGE_DATE_CREATED ENV PATH="/files/bin/:/opt/airflow/scripts/in_container/bin/:${PATH}" \ GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm/" \ BUILD_ID=${BUILD_ID} \ - COMMIT_SHA=${COMMIT_SHA} + COMMIT_SHA=${COMMIT_SHA} \ + # When we enter the image, the /root/.cache is not mounted from temporary mount cache. + # We do not want to share the cache from host to avoid all kinds of problems where cache + # is different with different platforms / python versions. We want to have a clean cache + # in the image - and in this case /root/.cache is on the same filesystem as the installed packages. + # so we can go back to the default link mode being hardlink. + UV_LINK_MODE=hardlink # Link dumb-init for backwards compatibility (so that older images also work) RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init diff --git a/dev/breeze/doc/06_managing_docker_images.rst b/dev/breeze/doc/06_managing_docker_images.rst index bb4c4f9e06f62..84e4e77a010f1 100644 --- a/dev/breeze/doc/06_managing_docker_images.rst +++ b/dev/breeze/doc/06_managing_docker_images.rst @@ -76,7 +76,7 @@ These are all available flags of ``pull`` command: Verifying CI image .................. -Finally, you can verify CI image by running tests - either with the pulled/built images or +You can verify CI image by running tests - either with the pulled/built images or with an arbitrary image. These are all available flags of ``verify`` command: @@ -86,6 +86,41 @@ These are all available flags of ``verify`` command: :width: 100% :alt: Breeze ci-image verify +Loading and saving CI image +........................... + +You can load and save PROD image - for example to transfer it to another machine or to load an image +that has been built in our CI. + +These are all available flags of ``save`` command: + +.. image:: ./images/output_ci-image_save.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_ci-image_save.svg + :width: 100% + :alt: Breeze ci-image save + +These are all available flags of ``load`` command: + +.. image:: ./images/output_ci-image_load.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_ci-image_load.svg + :width: 100% + :alt: Breeze ci-image load + +Images for every build from our CI are uploaded as artifacts to the +GitHub Action run (in summary) and can be downloaded from there for 2 days in order to reproduce the complete +environment used during the tests and loaded to the local Docker registry (note that you have +to use the same platform as the CI run). + +You will find the artifacts for each image in the summary of the CI run. The artifacts are named +``ci-image-docker-export---_merge``. Those are compressed zip files that +contain the ".tar" image that should be used with ``--image-file`` flag of the load method. Make sure to +use the same ``--python`` version as the image was built with. + +.. image:: ./images/image_artifacts.png + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_ci-image_load.svg + :width: 100% + :alt: Breeze image artifacts + PROD Image tasks ---------------- @@ -170,7 +205,7 @@ These are all available flags of ``pull-prod-image`` command: Verifying PROD image .................... -Finally, you can verify PROD image by running tests - either with the pulled/built images or +You can verify PROD image by running tests - either with the pulled/built images or with an arbitrary image. These are all available flags of ``verify-prod-image`` command: @@ -180,6 +215,31 @@ These are all available flags of ``verify-prod-image`` command: :width: 100% :alt: Breeze prod-image verify +Loading and saving PROD image +............................. + +You can load and save PROD image - for example to transfer it to another machine or to load an image +that has been built in our CI. + +These are all available flags of ``save`` command: + +.. image:: ./images/output_prod-image_save.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_prod-image_save.svg + :width: 100% + :alt: Breeze prod-image save + +These are all available flags of ``load`` command: + +.. image:: ./images/output-prod-image_load.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_prod-image_load.svg + :width: 100% + :alt: Breeze prod-image load + +Similarly as in case of CI images, Images for every build from our CI are uploaded as artifacts to the +GitHub Action run (in summary) and can be downloaded from there for 2 days in order to reproduce the complete +environment used during the tests and loaded to the local Docker registry (note that you have +to use the same platform as the CI run). + ------ Next step: Follow the `Breeze maintenance tasks <07_breeze_maintenance_tasks.rst>`_ to learn about tasks that diff --git a/dev/breeze/doc/ci/01_ci_environment.md b/dev/breeze/doc/ci/01_ci_environment.md index c9501a13b208a..c1cc51c325417 100644 --- a/dev/breeze/doc/ci/01_ci_environment.md +++ b/dev/breeze/doc/ci/01_ci_environment.md @@ -23,8 +23,9 @@ - [CI Environment](#ci-environment) - [GitHub Actions workflows](#github-actions-workflows) - - [Container Registry used as cache](#container-registry-used-as-cache) + - [GitHub Registry used as cache](#github-registry-used-as-cache) - [Authentication in GitHub Registry](#authentication-in-github-registry) + - [GitHub Artifacts used to store built images](#github-artifacts-used-to-store-built-images) @@ -32,7 +33,8 @@ Continuous Integration is an important component of making Apache Airflow robust and stable. We run a lot of tests for every pull request, -for main and v2-\*-test branches and regularly as scheduled jobs. +for `canary` runs (from `main` and `v*-\*-test` branches and +regularly as scheduled jobs. Our execution environment for CI is [GitHub Actions](https://github.com/features/actions). GitHub Actions. @@ -60,57 +62,22 @@ To run the tests, we need to ensure that the images are built using the latest sources and that the build process is efficient. A full rebuild of such an image from scratch might take approximately 15 minutes. Therefore, we've implemented optimization techniques that efficiently -use the cache from the GitHub Docker registry. In most cases, this -reduces the time needed to rebuild the image to about 4 minutes. -However, when dependencies change, it can take around 6-7 minutes, and -if the base image of Python releases a new patch-level, it can take -approximately 12 minutes. - -## Container Registry used as cache - -We are using GitHub Container Registry to store the results of the -`Build Images` workflow which is used in the `Tests` workflow. - -Currently in main version of Airflow we run tests in all versions of -Python supported, which means that we have to build multiple images (one -CI and one PROD for each Python version). Yet we run many jobs (\>15) - -for each of the CI images. That is a lot of time to just build the -environment to run. Therefore we are utilising the `pull_request_target` -feature of GitHub Actions. - -This feature allows us to run a separate, independent workflow, when the -main workflow is run -this separate workflow is different than the main -one, because by default it runs using `main` version of the sources but -also - and most of all - that it has WRITE access to the GitHub -Container Image registry. - -This is especially important in our case where Pull Requests to Airflow -might come from any repository, and it would be a huge security issue if -anyone from outside could utilise the WRITE access to the Container -Image Registry via external Pull Request. - -Thanks to the WRITE access and fact that the `pull_request_target` workflow named -`Build Imaages` which - by default - uses the `main` version of the sources. -There we can safely run some code there as it has been reviewed and merged. -The workflow checks-out the incoming Pull Request, builds -the container image from the sources from the incoming PR (which happens in an -isolated Docker build step for security) and pushes such image to the -GitHub Docker Registry - so that this image can be built only once and -used by all the jobs running tests. The image is tagged with unique -`COMMIT_SHA` of the incoming Pull Request and the tests run in the `pull` workflow -can simply pull such image rather than build it from the scratch. -Pulling such image takes ~ 1 minute, thanks to that we are saving a -lot of precious time for jobs. - -We use [GitHub Container Registry](https://docs.github.com/en/packages/guides/about-github-container-registry). -A `GITHUB_TOKEN` is needed to push to the registry. We configured -scopes of the tokens in our jobs to be able to write to the registry, -but only for the jobs that need it. - -The latest cache is kept as `:cache-linux-amd64` and `:cache-linux-arm64` -tagged cache of our CI images (suitable for `--cache-from` directive of -buildx). It contains metadata and cache for all segments in the image, -and cache is kept separately for different platform. +use the cache from Github Actions Artifacts. + +## GitHub Registry used as cache + +We are using GitHub Registry to store the last image built in canary run +to build images in CI and local docker container. +This is done to speed up the build process and to ensure that the +first - time-consuming-to-build layers of the image are +reused between the builds. The cache is stored in the GitHub Registry +by the `canary` runs and then used in the subsequent runs. + +The latest GitHub registry cache is kept as `:cache-linux-amd64` and +`:cache-linux-arm64` tagged cache of our CI images (suitable for +`--cache-from` directive of buildx). It contains +metadata and cache for all segments in the image, +and cache is kept separately for different platforms. The `latest` images of CI and PROD are `amd64` only images for CI, because there is no easy way to push multiplatform images without @@ -118,11 +85,25 @@ merging the manifests, and it is not really needed nor used for cache. ## Authentication in GitHub Registry -We are using GitHub Container Registry as cache for our images. -Authentication uses GITHUB_TOKEN mechanism. Authentication is needed for -pushing the images (WRITE) only in `push`, `pull_request_target` -workflows. When you are running the CI jobs in GitHub Actions, -GITHUB_TOKEN is set automatically by the actions. +Authentication to GitHub Registry in CI uses GITHUB_TOKEN mechanism. +The Authentication is needed for pushing the images (WRITE) in the `canary` runs. +When you are running the CI jobs in GitHub Actions, vGITHUB_TOKEN is set automatically +by the actions. This is used only in the `canary` runs that have "write" access +to the repository. + +No `write` access is needed (nor possible) by Pull Requests coming from the forks, +since we are only using "GitHub Artifacts" for cache source in those runs. + +## GitHub Artifacts used to store built images + +We are running most tests in reproducible CI image for all the jobs and +instead of build the image multiple times we build image for each python +version only once (one CI and one PROD). Those images are then used by +All jobs that need them in the same build. The images - after building +are exported to a file and stored in the GitHub Artifacts. +The export files are then downloaded from artifacts and image is +loaded from the file in all jobs in the same workflow after they are +built and uploaded in the build image job. ---- diff --git a/dev/breeze/doc/ci/02_images.md b/dev/breeze/doc/ci/02_images.md index eb3af6ae6ce87..bfc434427d174 100644 --- a/dev/breeze/doc/ci/02_images.md +++ b/dev/breeze/doc/ci/02_images.md @@ -215,10 +215,11 @@ in `docker-context-files` folder. # Using docker cache during builds -Default mechanism used in Breeze for building CI images uses images -pulled from GitHub Container Registry. This is done to speed up local +Default mechanism used in Breeze for building CI images locally uses images +pulled from GitHub Container Registry combined with locally mounted cache +folders where `uv` cache is stored. This is done to speed up local builds and building images for CI runs - instead of \> 12 minutes for -rebuild of CI images, it takes usually about 1 minute when cache is +rebuild of CI images, it takes usually less than a minute when cache is used. For CI images this is usually the best strategy - to use default "pull" cache. This is default strategy when [Breeze](../README.rst) builds are performed. @@ -227,7 +228,8 @@ For Production Image - which is far smaller and faster to build, it's better to use local build cache (the standard mechanism that docker uses. This is the default strategy for production images when [Breeze](../README.rst) builds are -performed. The first time you run it, it will take considerably longer +performed. The local `uv` cache is used from mounted sources. +The first time you run it, it will take considerably longer time than if you use the pull mechanism, but then when you do small, incremental changes to local sources, Dockerfile image and scripts, further rebuilds with local build cache will be considerably faster. @@ -293,19 +295,12 @@ See Naming convention for the GitHub packages. -Images with a commit SHA (built for pull requests and pushes). Those are -images that are snapshot of the currently run build. They are built once -per each build and pulled by each test job. - ``` bash -ghcr.io/apache/airflow//ci/python: - for CI images -ghcr.io/apache/airflow//prod/python: - for production images +ghcr.io/apache/airflow//ci/python - for CI images +ghcr.io/apache/airflow//prod/python - for production images ``` -Thoe image contain inlined cache. - -You can see all the current GitHub images at - +You can see all the current GitHub images at Note that you need to be committer and have the right to refresh the images in the GitHub Registry with latest sources from main via @@ -314,12 +309,23 @@ need to login with your Personal Access Token with "packages" write scope to be able to push to those repositories or pull from them in case of GitHub Packages. -GitHub Container Registry +You need to login to GitHub Container Registry with your API token +if you want to interact with the GitHub Registry for writing (only +committers). ``` bash docker login ghcr.io ``` +Note that when your token is expired and you are still +logged in, you are not able to interact even with read-only operations +like pulling images. You need to logout and login again to refresh the +token. + +``` bash +docker logout ghcr.io +``` + Since there are different naming conventions used for Airflow images and there are multiple images used, [Breeze](../README.rst) provides easy to use management interface for the images. The CI @@ -329,22 +335,14 @@ new version of base Python is released. However, occasionally, you might need to rebuild images locally and push them directly to the registries to refresh them. -Every developer can also pull and run images being result of a specific +Every contributor can also pull and run images being result of a specific CI run in GitHub Actions. This is a powerful tool that allows to reproduce CI failures locally, enter the images and fix them much -faster. It is enough to pass `--image-tag` and the registry and Breeze -will download and execute commands using the same image that was used -during the CI tests. +faster. It is enough to download and uncompress the artifact that stores the +image and run ``breeze ci-image load -i `` to load the +image and mark the image as refreshed in the local cache. -For example this command will run the same Python 3.9 image as was used -in build identified with 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e commit -SHA with enabled rabbitmq integration. - -``` bash -breeze --image-tag 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e --python 3.9 --integration rabbitmq -``` - -You can see more details and examples in[Breeze](../README.rst) +You can see more details and examples in[Breeze](../06_managing_docker_images.rst) # Customizing the CI image @@ -427,8 +425,6 @@ can be used for CI images: | `PYTHON_MAJOR_MINOR_VERSION` | `3.9` | major/minor version of Python (should match base image) | | `DEPENDENCIES_EPOCH_NUMBER` | `2` | increasing this number will reinstall all apt dependencies | | `ADDITIONAL_PIP_INSTALL_FLAGS` | | additional `pip` flags passed to the installation commands (except when reinstalling `pip` itself) | -| `PIP_NO_CACHE_DIR` | `true` | if true, then no pip cache will be stored | -| `UV_NO_CACHE` | `true` | if true, then no uv cache will be stored | | `HOME` | `/root` | Home directory of the root user (CI image has root user as default) | | `AIRFLOW_HOME` | `/root/airflow` | Airflow's HOME (that's where logs and sqlite databases are stored) | | `AIRFLOW_SOURCES` | `/opt/airflow` | Mounted sources of Airflow | @@ -439,7 +435,6 @@ can be used for CI images: | `AIRFLOW_CONSTRAINTS_REFERENCE` | | reference (branch or tag) from GitHub repository from which constraints are used. By default it is set to `constraints-main` but can be `constraints-2-X`. | | `AIRFLOW_EXTRAS` | `all` | extras to install | | `UPGRADE_INVALIDATION_STRING` | | If set to any random value the dependencies are upgraded to newer versions. In CI it is set to build id. | -| `AIRFLOW_PRE_CACHED_PIP_PACKAGES` | `true` | Allows to pre-cache airflow PIP packages from the GitHub of Apache Airflow This allows to optimize iterations for Image builds and speeds up CI jobs. | | `ADDITIONAL_AIRFLOW_EXTRAS` | | additional extras to install | | `ADDITIONAL_PYTHON_DEPS` | | additional Python dependencies to install | | `DEV_APT_COMMAND` | | Dev apt command executed before dev deps are installed in the first part of image | @@ -553,10 +548,6 @@ The images produced during the `Build Images` workflow of CI jobs are stored in the [GitHub Container Registry](https://github.com/orgs/apache/packages?repo_name=airflow) -The images are stored with both "latest" tag (for last main push image -that passes all the tests as well with the COMMIT_SHA id for images that -were used in particular build. - The image names follow the patterns (except the Python image, all the images are stored in in `apache` organization. @@ -567,21 +558,15 @@ percent-encoded when you access them via UI (/ = %2F) `https://github.com/apache/airflow/pkgs/container/` -| Image | Name:tag (both cases latest version and per-build) | Description | -|--------------------------|----------------------------------------------------|---------------------------------------------------------------| -| Python image (DockerHub) | python:\-slim-bookworm | Base Python image used by both production and CI image. | -| CI image | airflow/\/ci/python\:\ | CI image - this is the image used for most of the tests. | -| PROD image | airflow/\/prod/python\:\ | faster to build or pull. Production image optimized for size. | +| Image | Name | Description | +|--------------------------|----------------------------------------|---------------------------------------------------------------| +| Python image (DockerHub) | python:\-slim-bookworm | Base Python image used by both production and CI image. | +| CI image | airflow/\/ci/python\ | CI image - this is the image used for most of the tests. | +| PROD image | airflow/\/prod/python\ | faster to build or pull. Production image optimized for size. | - \ might be either "main" or "v2-\*-test" - \ - Python version (Major + Minor).Should be one of \["3.9", "3.10", "3.11", "3.12" \]. -- \ - full-length SHA of commit either from the tip of the - branch (for pushes/schedule) or commit from the tip of the branch used - for the PR. -- \ - tag of the image. It is either "latest" or \ - (full-length SHA of commit either from the tip of the branch (for - pushes/schedule) or commit from the tip of the branch used for the - PR). + ---- diff --git a/dev/breeze/doc/ci/04_selective_checks.md b/dev/breeze/doc/ci/04_selective_checks.md index 08e5906745cbf..91b50bea48fe2 100644 --- a/dev/breeze/doc/ci/04_selective_checks.md +++ b/dev/breeze/doc/ci/04_selective_checks.md @@ -273,20 +273,10 @@ modified. This can be overridden by setting `full tests needed` label in the PR. There is a difference in how the CI jobs are run for committer and non-committer PRs from forks. The main reason is security; we do not want to run untrusted code on our infrastructure for self-hosted runners. -Additionally, we do not want to run unverified code during the `Build imaage` workflow, as that workflow has -access to the `GITHUB_TOKEN`, which can write to our Github Registry (used to cache -images between runs). These images are built on self-hosted runners, and we must ensure that -those runners are not misused, such as for mining cryptocurrencies on behalf of the person who opened the -pull request from their newly created fork of Airflow. - -This is why the `Build Images` workflow checks whether the actor of the PR (`GITHUB_ACTOR`) is one of the committers. -If not, the workflows and scripts used to run image building come only from the ``target`` branch -of the repository, where these scripts have been reviewed and approved by committers before being merged. This is controlled by the selective checks that set `is-committer-build` to `true` in -the build-info job of the workflow to determine if the actor is in the committers' -list. This setting can be overridden by the `non-committer build` label in the PR. - -Also, for most of the jobs, committer builds use "Self-hosted" runners by default, while non-committer -builds use "Public" runners. For committers, this can be overridden by setting the + +Currently there is no difference because we are not using `self-hosted` runners (until we implement `Action +Runner Controller` but most of the jobs, committer builds will use "Self-hosted" runners by default, +while non-committer builds will use "Public" runners. For committers, this can be overridden by setting the `use public runners` label in the PR. ## Changing behaviours of the CI runs by setting labels diff --git a/dev/breeze/doc/ci/05_workflows.md b/dev/breeze/doc/ci/05_workflows.md index 130774a730cb6..0c66505508f02 100644 --- a/dev/breeze/doc/ci/05_workflows.md +++ b/dev/breeze/doc/ci/05_workflows.md @@ -24,11 +24,8 @@ - [CI run types](#ci-run-types) - [Pull request run](#pull-request-run) - [Canary run](#canary-run) - - [Scheduled run](#scheduled-run) - [Workflows](#workflows) - - [Build Images Workflow](#build-images-workflow) - - [Differences for main and release branches](#differences-for-main-and-release-branches) - - [Committer vs. Non-committer PRs](#committer-vs-non-committer-prs) + - [Differences for `main` and `v*-*-test` branches](#differences-for-main-and-v--test-branches) - [Tests Workflow](#tests-workflow) - [CodeQL scan](#codeql-scan) - [Publishing documentation](#publishing-documentation) @@ -86,16 +83,16 @@ run in the context of the "apache/airflow" repository and has WRITE access to the GitHub Container Registry. When the PR changes important files (for example `generated/provider_depdencies.json` or -`pyproject.toml`), the PR is run in "upgrade to newer dependencies" mode - where instead -of using constraints to build images, attempt is made to upgrade all dependencies to latest -versions and build images with them. This way we check how Airflow behaves when the +`pyproject.toml` or `hatch_build.py`), the PR is run in "upgrade to newer dependencies" mode - +where instead of using constraints to build images, attempt is made to upgrade +all dependencies to latest versions and build images with them. This way we check how Airflow behaves when the dependencies are upgraded. This can also be forced by setting the `upgrade to newer dependencies` label in the PR if you are a committer and want to force dependency upgrade. ## Canary run -This workflow is triggered when a pull request is merged into the "main" -branch or pushed to any of the "v2-\*-test" branches. The "Canary" run +This workflow is triggered when a pull request is merged into the `main` +branch or pushed to any of the `v*-*-test` branches. The `canary` run aims to upgrade dependencies to their latest versions and promptly pushes a preview of the CI/PROD image cache to the GitHub Registry. This allows pull requests to quickly utilize the new cache, which is @@ -106,84 +103,36 @@ updates the constraint files in the "constraints-main" branch with the latest constraints and pushes both the cache and the latest CI/PROD images to the GitHub Registry. -If the "Canary" build fails, it often indicates that a new version of +If the `canary` build fails, it often indicates that a new version of our dependencies is incompatible with the current tests or Airflow code. Alternatively, it could mean that a breaking change has been merged into -"main". Both scenarios require prompt attention from the maintainers. +`main`. Both scenarios require prompt attention from the maintainers. While a "broken main" due to our code should be fixed quickly, "broken dependencies" may take longer to resolve. Until the tests pass, the constraints will not be updated, meaning that regular PRs will continue using the older version of dependencies that passed one of the previous -"Canary" runs. +`canary` runs. -## Scheduled run - -The "scheduled" workflow, which is designed to run regularly (typically -overnight), is triggered when a scheduled run occurs. This workflow is -largely identical to the "Canary" run, with one key difference: the -image is always built from scratch, not from a cache. This approach -ensures that we can verify whether any "system" dependencies in the -Debian base image have changed, and confirm that the build process -remains reproducible. Since the process for a scheduled run mirrors that -of a "Canary" run, no separate diagram is necessary to illustrate it. +The `canary` runs are executed 6 times a day on schedule, you can also +trigger the `canary` run manually via `workflow-dispatch` mechanism. # Workflows -A general note about cancelling duplicated workflows: for the -`Build Images`, `Tests` and `CodeQL` workflows, we use the `concurrency` -feature of GitHub actions to automatically cancel "old" workflow runs of +A general note about cancelling duplicated workflows: for `Tests` and `CodeQL` workflows, +we use the `concurrency` feature of GitHub actions to automatically cancel "old" workflow runs of each type. This means that if you push a new commit to a branch or to a pull request while a workflow is already running, GitHub Actions will automatically cancel the old workflow run. -## Build Images Workflow - -This workflow builds images for the CI Workflow for pull requests coming -from forks. - -The GitHub Actions event that trigger this workflow is `pull_request_target`, which means that -it is triggered when a pull request is opened. This also means that the -workflow has Write permission to push to the GitHub registry the images, which are -used by CI jobs. As a result, the images can be built only once and -reused by all CI jobs (including matrix jobs). We've implemented -it so that the `Tests` workflow waits for the images to be built by the -`Build Images` workflow before running. - -Those "Build Image" steps are skipped for pull requests that do not come -from "forks" (i.e. internal PRs for the Apache Airflow repository). -This is because, in case of PRs originating from Apache Airflow (which only -committers can create those) the "pull_request" workflows have enough -permission to push images to GitHub Registry. - -This workflow is not triggered by normal pushes to our "main" branches, -i.e., after a pull request is merged or when a `scheduled` run is -triggered. In these cases, the "CI" workflow has enough permissions -to push the images, so this workflow is simply not run. - -The workflow has the following jobs: - -| Job | Description | -|-------------------|---------------------------------------------| -| Build Info | Prints detailed information about the build | -| Build CI images | Builds all configured CI images | -| Build PROD images | Builds all configured PROD images | - -The images are stored in the [GitHub Container -Registry](https://github.com/orgs/apache/packages?repo_name=airflow), and their names follow the patterns -described in [Images](02_images.md#naming-conventions) - -Image building is configured in "fail-fast" mode. If any image -fails to build, it cancels the other builds and the `Tests` workflow -run that triggered it. - -## Differences for main and release branches +## Differences for `main` and `v*-*-test` branches The type of tests executed varies depending on the version or branch being tested. For the "main" development branch, we run all tests to maintain the quality of Airflow. However, when releasing patch-level updates on older branches, we only run a subset of tests. This is because older branches are used exclusively for releasing Airflow and -its corresponding image, not for releasing providers or Helm charts. +its corresponding image, not for releasing providers or Helm charts, +so all those tests are skipped there by default. This behaviour is controlled by `default-branch` output of the build-info job. Whenever we create a branch for an older version, we update @@ -192,90 +141,75 @@ the new branch. In several places, the selection of tests is based on whether this output is `main`. They are marked in the "Release branches" column of the table below. -## Committer vs. Non-committer PRs - -Please refer to the appropriate section in [selective CI checks](04_selective_checks.md#committer-vs-non-committer-prs) docs. - ## Tests Workflow -This workflow is a regular workflow that performs all checks of Airflow -code. - -| Job | Description | PR | Canary | Scheduled | Release branches | -|---------------------------------|----------------------------------------------------------|----------|----------|------------|------------------| -| Build info | Prints detailed information about the build | Yes | Yes | Yes | Yes | -| Push early cache & images | Pushes early cache/images to GitHub Registry | | Yes | | | -| Check that image builds quickly | Checks that image builds quickly | | Yes | | Yes | -| Build CI images | Builds images in-workflow (not in the build images) | | Yes | Yes (1) | Yes (4) | -| Generate constraints/CI verify | Generate constraints for the build and verify CI image | Yes (2) | Yes (2) | Yes (2) | Yes (2) | -| Build PROD images | Builds images in-workflow (not in the build images) | | Yes | Yes (1) | Yes (4) | -| Run breeze tests | Run unit tests for Breeze | Yes | Yes | Yes | Yes | -| Test OpenAPI client gen | Tests if OpenAPIClient continues to generate | Yes | Yes | Yes | Yes | -| React WWW tests | React UI tests for new Airflow UI | Yes | Yes | Yes | Yes | -| Test examples image building | Tests if PROD image build examples work | Yes | Yes | Yes | Yes | -| Test git clone on Windows | Tests if Git clone for for Windows | Yes (5) | Yes (5) | Yes (5) | Yes (5) | -| Waits for CI Images | Waits for and verify CI Images | Yes (2) | Yes (2) | Yes (2) | Yes (2) | -| Upgrade checks | Performs checks if there are some pending upgrades | | Yes | Yes | Yes | -| Static checks | Performs full static checks | Yes (6) | Yes | Yes | Yes (7) | -| Basic static checks | Performs basic static checks (no image) | Yes (6) | | | | -| Build docs | Builds and tests publishing of the documentation | Yes | Yes (11) | Yes | Yes | -| Spellcheck docs | Spellcheck docs | Yes | Yes | Yes | Yes | -| Tests wheel provider packages | Tests if provider packages can be built and released | Yes | Yes | Yes | | -| Tests Airflow compatibility | Compatibility of provider packages with older Airflow | Yes | Yes | Yes | | -| Tests dist provider packages | Tests if dist provider packages can be built | | Yes | Yes | | -| Tests airflow release commands | Tests if airflow release command works | | Yes | Yes | | -| Tests (Backend/Python matrix) | Run the Pytest unit DB tests (Backend/Python matrix) | Yes | Yes | Yes | Yes (8) | -| No DB tests | Run the Pytest unit Non-DB tests (with pytest-xdist) | Yes | Yes | Yes | Yes (8) | -| Integration tests | Runs integration tests (Postgres/Mysql) | Yes | Yes | Yes | Yes (9) | -| Quarantined tests | Runs quarantined tests (with flakiness and side-effects) | Yes | Yes | Yes | Yes (8) | -| Test airflow packages | Tests that Airflow package can be built and released | Yes | Yes | Yes | Yes | -| Helm tests | Run the Helm integration tests | Yes | Yes | Yes | | -| Helm release tests | Run the tests for Helm releasing | Yes | Yes | Yes | | -| Summarize warnings | Summarizes warnings from all other tests | Yes | Yes | Yes | Yes | -| Wait for PROD Images | Waits for and verify PROD Images | Yes (2) | Yes (2) | Yes (2) | Yes (2) | -| Docker Compose test/PROD verify | Tests quick-start Docker Compose and verify PROD image | Yes | Yes | Yes | Yes | -| Tests Kubernetes | Run Kubernetes test | Yes | Yes | Yes | | -| Update constraints | Upgrade constraints to latest ones | Yes (3) | Yes (3) | Yes (3) | Yes (3) | -| Push cache & images | Pushes cache/images to GitHub Registry (3) | | Yes (3) | | Yes | -| Build CI ARM images | Builds CI images for ARM | Yes (10) | | Yes | | +This workflow is a regular workflow that performs all checks of Airflow code. The `main` and `v*-*-test` +pushes are `canary` runs. + +| Job | Description | PR | main | v*-*-test | +|---------------------------------|----------------------------------------------------------|---------|---------|-----------| +| Build info | Prints detailed information about the build | Yes | Yes | Yes | +| Push early cache & images | Pushes early cache/images to GitHub Registry | | Yes (2) | Yes (2) | +| Check that image builds quickly | Checks that image builds quickly | | Yes | Yes | +| Build CI images | Builds images | Yes | Yes | Yes | +| Generate constraints/CI verify | Generate constraints for the build and verify CI image | Yes | Yes | Yes | +| Build PROD images | Builds images | Yes | Yes | Yes (3) | +| Run breeze tests | Run unit tests for Breeze | Yes | Yes | Yes | +| Test OpenAPI client gen | Tests if OpenAPIClient continues to generate | Yes | Yes | Yes | +| React WWW tests | React UI tests for new Airflow UI | Yes | Yes | Yes | +| Test examples image building | Tests if PROD image build examples work | Yes | Yes | Yes | +| Test git clone on Windows | Tests if Git clone for for Windows | Yes (4) | Yes (4) | Yes (4) | +| Upgrade checks | Performs checks if there are some pending upgrades | | Yes | Yes | +| Static checks | Performs full static checks | Yes (5) | Yes | Yes (6) | +| Basic static checks | Performs basic static checks (no image) | Yes (5) | | | +| Build and publish docs | Builds and tests publishing of the documentation | Yes (8) | Yes (8) | Yes (8) | +| Spellcheck docs | Spellcheck docs | Yes | Yes | Yes (7) | +| Tests wheel provider packages | Tests if provider packages can be built and released | Yes | Yes | | +| Tests Airflow compatibility | Compatibility of provider packages with older Airflow | Yes | Yes | | +| Tests dist provider packages | Tests if dist provider packages can be built | | Yes | | +| Tests airflow release commands | Tests if airflow release command works | | Yes | Yes | +| DB tests matrix | Run the Pytest unit DB tests | Yes | Yes | Yes (7) | +| No DB tests | Run the Pytest unit Non-DB tests (with pytest-xdist) | Yes | Yes | Yes (7) | +| Integration tests | Runs integration tests (Postgres/Mysql) | Yes | Yes | Yes (7) | +| Quarantined tests | Runs quarantined tests (with flakiness and side-effects) | Yes | Yes | Yes (7) | +| Test airflow packages | Tests that Airflow package can be built and released | Yes | Yes | Yes | +| Helm tests | Run the Helm integration tests | Yes | Yes | | +| Helm release tests | Run the tests for Helm releasing | Yes | Yes | | +| Summarize warnings | Summarizes warnings from all other tests | Yes | Yes | Yes | +| Docker Compose test/PROD verify | Tests quick-start Docker Compose and verify PROD image | Yes | Yes | Yes | +| Tests Kubernetes | Run Kubernetes test | Yes | Yes | | +| Update constraints | Upgrade constraints to latest ones | Yes | Yes (2) | Yes (2) | +| Push cache & images | Pushes cache/images to GitHub Registry (3) | | Yes (3) | | +| Build CI ARM images | Builds CI images for ARM | Yes (9) | | | `(1)` Scheduled jobs builds images from scratch - to test if everything works properly for clean builds -`(2)` The jobs wait for CI images to be available. It only actually runs when build image is needed (in -case of simpler PRs that do not change dependencies or source code, -images are not build) - -`(3)` PROD and CI cache & images are pushed as "cache" (both AMD and -ARM) and "latest" (only AMD) to GitHub Container registry and +`(2)` PROD and CI cache & images are pushed as "cache" (both AMD and +ARM) and "latest" (only AMD) to GitHub Container Registry and constraints are upgraded only if all tests are successful. The images are rebuilt in this step using constraints pushed in the previous step. -Constraints are only actually pushed in the `canary/scheduled` runs. +Constraints are only actually pushed in the `canary` runs. -`(4)` In main, PROD image uses locally build providers using "latest" +`(3)` In main, PROD image uses locally build providers using "latest" version of the provider code. In the non-main version of the build, the latest released providers from PyPI are used. -`(5)` Always run with public runners to test if Git clone works on +`(4)` Always run with public runners to test if Git clone works on Windows. -`(6)` Run full set of static checks when selective-checks determine that +`(5)` Run full set of static checks when selective-checks determine that they are needed (basically, when Python code has been modified). -`(7)` On non-main builds some of the static checks that are related to +`(6)` On non-main builds some of the static checks that are related to Providers are skipped via selective checks (`skip-pre-commits` check). -`(8)` On non-main builds the unit tests for providers are skipped via -selective checks removing the "Providers" test type. - -`(9)` On non-main builds the integration tests for providers are skipped -via `skip-providers-tests` selective check output. +`(7)` On non-main builds the unit tests, docs and integration tests +for providers are skipped via selective checks. -`(10)` Only run the builds in case PR is run by a committer from -"apache" repository and in scheduled build. +`(8)` Docs publishing is only done in Canary run. -`(11)` Docs publishing is only done in Canary run, to handle the case where -cloning whole airflow site on Public Runner cannot complete due to the size of the repository. +`(9)` ARM images are not currently built - until we have ARM runners available. ## CodeQL scan @@ -285,8 +219,7 @@ violations. It is run for JavaScript and Python code. ## Publishing documentation -Documentation from the `main` branch is automatically published on -Amazon S3. +Documentation from the `main` branch is automatically published on Amazon S3. To make this possible, GitHub Action has secrets set up with credentials for an Amazon Web Service account - `DOCS_AWS_ACCESS_KEY_ID` and @@ -303,4 +236,4 @@ Website endpoint: ----- -Read next about [Diagrams](06_diagrams.md) +Read next about [Debugging CI builds](06_debugging.md) diff --git a/dev/breeze/doc/ci/07_debugging.md b/dev/breeze/doc/ci/06_debugging.md similarity index 93% rename from dev/breeze/doc/ci/07_debugging.md rename to dev/breeze/doc/ci/06_debugging.md index 9e7173ae84721..8d030034728c7 100644 --- a/dev/breeze/doc/ci/07_debugging.md +++ b/dev/breeze/doc/ci/06_debugging.md @@ -34,10 +34,7 @@ either run in our Self-Hosted runners (with 64 GB RAM 8 CPUs) or in the GitHub Public runners (6 GB of RAM, 2 CPUs) and the results will vastly differ depending on which environment is used. We are utilizing parallelism to make use of all the available CPU/Memory but sometimes -you need to enable debugging and force certain environments. Additional -difficulty is that `Build Images` workflow is `pull-request-target` -type, which means that it will always run using the `main` version - no -matter what is in your Pull Request. +you need to enable debugging and force certain environments. There are several ways how you can debug the CI jobs and modify their behaviour when you are maintainer. @@ -64,4 +61,4 @@ the PR to apply the label to the PR. ----- -Read next about [Running CI locally](08_running_ci_locally.md) +Read next about [Running CI locally](07_running_ci_locally.md) diff --git a/dev/breeze/doc/ci/06_diagrams.md b/dev/breeze/doc/ci/06_diagrams.md deleted file mode 100644 index afe51a309e8eb..0000000000000 --- a/dev/breeze/doc/ci/06_diagrams.md +++ /dev/null @@ -1,466 +0,0 @@ - - - - -**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* - -- [CI Sequence diagrams](#ci-sequence-diagrams) - - [Pull request flow from fork](#pull-request-flow-from-fork) - - [Pull request flow from "apache/airflow" repo](#pull-request-flow-from-apacheairflow-repo) - - [Merge "Canary" run](#merge-canary-run) - - [Scheduled run](#scheduled-run) - - - -# CI Sequence diagrams - -You can see here the sequence diagrams of the flow happening during the CI Jobs. - -## Pull request flow from fork - -This is the flow that happens when a pull request is created from a fork - which is the most frequent -pull request flow that happens in Airflow. The "pull_request" workflow does not have write access -to the GitHub Registry, so it cannot push the CI/PROD images there. Instead, we push the images -from the "pull_request_target" workflow, which has write access to the GitHub Registry. Note that -this workflow always uses scripts and workflows from the "target" branch of the "apache/airflow" -repository, so the user submitting such pull request cannot override our build scripts and inject malicious -code into the workflow that has potentially write access to the GitHub Registry (and can override cache). - -Security is the main reason why we have two workflows for pull requests and such complex workflows. - -```mermaid -sequenceDiagram - Note over Airflow Repo: pull request - Note over Tests: pull_request
[Read Token] - Note over Build Images: pull_request_target
[Write Token] - activate Airflow Repo - Airflow Repo -->> Tests: Trigger 'pull_request' - activate Tests - Tests -->> Build Images: Trigger 'pull_request_target' - activate Build Images - Note over Tests: Build info - Note over Tests: Selective checks
Decide what to do - Note over Build Images: Build info - Note over Build Images: Selective checks
Decide what to do - Note over Tests: Skip Build
(Runs in 'Build Images')
CI Images - Note over Tests: Skip Build
(Runs in 'Build Images')
PROD Images - par - GitHub Registry ->> Build Images: Use cache from registry - Airflow Repo ->> Build Images: Use constraints from `constraints-BRANCH` - Note over Build Images: Build CI Images
[COMMIT_SHA]
Upgrade to newer dependencies if deps changed - Build Images ->> GitHub Registry: Push CI Images
[COMMIT_SHA] - Build Images ->> Artifacts: Upload source constraints - and - Note over Tests: OpenAPI client gen - and - Note over Tests: React WWW tests - and - Note over Tests: Test git clone on Windows - and - Note over Tests: Helm release tests - and - opt - Note over Tests: Run basic
static checks - end - end - loop Wait for CI images - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - end - par - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Verify CI Images
[COMMIT_SHA] - Note over Tests: Generate constraints
source,pypi,no-providers - Tests ->> Artifacts: Upload source,pypi,no-providers constraints - and - Artifacts ->> Build Images: Download source constraints - GitHub Registry ->> Build Images: Use cache from registry - Note over Build Images: Build PROD Images
[COMMIT_SHA] - Build Images ->> GitHub Registry: Push PROD Images
[COMMIT_SHA] - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Run static checks - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Build docs - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Spellcheck docs - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Unit Tests
Python/DB matrix - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Unit Tests
Python/Non-DB matrix - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Integration Tests - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Quarantined Tests - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Build/test provider packages
wheel, sdist, old airflow - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Test airflow
release commands - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Helm tests - end - end - par - Note over Tests: Summarize Warnings - and - opt - Artifacts ->> Tests: Download source,pypi,no-providers constraints - Note over Tests: Display constraints diff - end - and - opt - loop Wait for PROD images - GitHub Registry ->> Tests: Pull PROD Images
[COMMIT_SHA] - end - end - and - opt - Note over Tests: Build ARM CI images - end - end - par - opt - GitHub Registry ->> Tests: Pull PROD Images
[COMMIT_SHA] - Note over Tests: Test examples
PROD image building - end - and - opt - GitHub Registry ->> Tests: Pull PROD Images
[COMMIT_SHA] - Note over Tests: Run Kubernetes
tests - end - and - opt - GitHub Registry ->> Tests: Pull PROD Images
[COMMIT_SHA] - Note over Tests: Verify PROD Images
[COMMIT_SHA] - Note over Tests: Run docker-compose
tests - end - end - Tests -->> Airflow Repo: Status update - deactivate Airflow Repo - deactivate Tests -``` - -## Pull request flow from "apache/airflow" repo - -The difference between this flow and the previous one is that the CI/PROD images are built in the -CI workflow and pushed to the GitHub Registry from there. This cannot be done in case of fork -pull request, because Pull Request from forks cannot have "write" access to GitHub Registry. All the steps -except "Build Info" from the "Build Images" workflows are skipped in this case. - -THis workflow can be used by maintainers in case they have a Pull Request that changes the scripts and -CI workflows used to build images, because in this case the "Build Images" workflow will use them -from the Pull Request. This is safe, because the Pull Request is from the "apache/airflow" repository -and only maintainers can push to that repository and create Pull Requests from it. - -```mermaid -sequenceDiagram - Note over Airflow Repo: pull request - Note over Tests: pull_request
[Write Token] - Note over Build Images: pull_request_target
[Unused Token] - activate Airflow Repo - Airflow Repo -->> Tests: Trigger 'pull_request' - activate Tests - Tests -->> Build Images: Trigger 'pull_request_target' - activate Build Images - Note over Tests: Build info - Note over Tests: Selective checks
Decide what to do - Note over Build Images: Build info - Note over Build Images: Selective checks
Decide what to do - Note over Build Images: Skip Build
(Runs in 'Tests')
CI Images - Note over Build Images: Skip Build
(Runs in 'Tests')
PROD Images - deactivate Build Images - Note over Tests: Build info - Note over Tests: Selective checks
Decide what to do - par - GitHub Registry ->> Tests: Use cache from registry - Airflow Repo ->> Tests: Use constraints from `constraints-BRANCH` - Note over Tests: Build CI Images
[COMMIT_SHA]
Upgrade to newer dependencies if deps changed - Tests ->> GitHub Registry: Push CI Images
[COMMIT_SHA] - Tests ->> Artifacts: Upload source constraints - and - Note over Tests: OpenAPI client gen - and - Note over Tests: React WWW tests - and - Note over Tests: Test examples
PROD image building - and - Note over Tests: Test git clone on Windows - and - Note over Tests: Helm release tests - and - opt - Note over Tests: Run basic
static checks - end - end - Note over Tests: Skip waiting for CI images - par - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Verify CI Images
[COMMIT_SHA] - Note over Tests: Generate constraints
source,pypi,no-providers - Tests ->> Artifacts: Upload source,pypi,no-providers constraints - and - Artifacts ->> Tests: Download source constraints - GitHub Registry ->> Tests: Use cache from registry - Note over Tests: Build PROD Images
[COMMIT_SHA] - Tests ->> GitHub Registry: Push PROD Images
[COMMIT_SHA] - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Run static checks - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Build docs - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Spellcheck docs - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Unit Tests
Python/DB matrix - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Unit Tests
Python/Non-DB matrix - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Integration Tests - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Quarantined Tests - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Build/test provider packages
wheel, sdist, old airflow - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Test airflow
release commands - end - and - opt - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Helm tests - end - end - Note over Tests: Skip waiting for PROD images - par - Note over Tests: Summarize Warnings - and - opt - Artifacts ->> Tests: Download source,pypi,no-providers constraints - Note over Tests: Display constraints diff - end - and - Note over Tests: Build ARM CI images - and - opt - GitHub Registry ->> Tests: Pull PROD Images
[COMMIT_SHA] - Note over Tests: Run Kubernetes
tests - end - and - opt - GitHub Registry ->> Tests: Pull PROD Images
[COMMIT_SHA] - Note over Tests: Verify PROD Images
[COMMIT_SHA] - Note over Tests: Run docker-compose
tests - end - end - Tests -->> Airflow Repo: Status update - deactivate Airflow Repo - deactivate Tests -``` - -## Merge "Canary" run - -This is the flow that happens when a pull request is merged to the "main" branch or pushed to any of -the "v2-*-test" branches. The "Canary" run attempts to upgrade dependencies to the latest versions -and quickly pushes an early cache the CI/PROD images to the GitHub Registry - so that pull requests -can quickly use the new cache - this is useful when Dockerfile or installation scripts change because such -cache will already have the latest Dockerfile and scripts pushed even if some tests will fail. -When successful, the run updates the constraints files in the "constraints-BRANCH" branch with the latest -constraints and pushes both cache and latest CI/PROD images to the GitHub Registry. - -```mermaid -sequenceDiagram - Note over Airflow Repo: push/merge - Note over Tests: push
[Write Token] - activate Airflow Repo - Airflow Repo -->> Tests: Trigger 'push' - activate Tests - Note over Tests: Build info - Note over Tests: Selective checks
Decide what to do - par - GitHub Registry ->> Tests: Use cache from registry
(Not for scheduled run) - Airflow Repo ->> Tests: Use constraints from `constraints-BRANCH` - Note over Tests: Build CI Images
[COMMIT_SHA]
Always upgrade to newer deps - Tests ->> GitHub Registry: Push CI Images
[COMMIT_SHA] - Tests ->> Artifacts: Upload source constraints - and - GitHub Registry ->> Tests: Use cache from registry
(Not for scheduled run) - Note over Tests: Check that image builds quickly - and - GitHub Registry ->> Tests: Use cache from registry
(Not for scheduled run) - Note over Tests: Push early CI Image cache - Tests ->> GitHub Registry: Push CI cache Images - and - Note over Tests: OpenAPI client gen - and - Note over Tests: React WWW tests - and - Note over Tests: Test git clone on Windows - and - Note over Tests: Run upgrade checks - end - Note over Tests: Skip waiting for CI images - par - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Verify CI Images
[COMMIT_SHA] - Note over Tests: Generate constraints
source,pypi,no-providers - Tests ->> Artifacts: Upload source,pypi,no-providers constraints - and - Artifacts ->> Tests: Download source constraints - GitHub Registry ->> Tests: Use cache from registry - Note over Tests: Build PROD Images
[COMMIT_SHA] - Tests ->> GitHub Registry: Push PROD Images
[COMMIT_SHA] - and - Artifacts ->> Tests: Download source constraints - and - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Run static checks - and - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Build docs - and - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Spellcheck docs - and - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Unit Tests
Python/DB matrix - and - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Unit Tests
Python/Non-DB matrix - and - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Integration Tests - and - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Quarantined Tests - and - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Build/test provider packages
wheel, sdist, old airflow - and - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Test airflow
release commands - and - GitHub Registry ->> Tests: Pull CI Images
[COMMIT_SHA] - Note over Tests: Helm tests - end - Note over Tests: Skip waiting for PROD images - par - Note over Tests: Summarize Warnings - and - Artifacts ->> Tests: Download source,pypi,no-providers constraints - Note over Tests: Display constraints diff - Tests ->> Airflow Repo: Push constraints if changed to 'constraints-BRANCH' - and - GitHub Registry ->> Tests: Pull PROD Images
[COMMIT_SHA] - Note over Tests: Test examples
PROD image building - and - GitHub Registry ->> Tests: Pull PROD Image
[COMMIT_SHA] - Note over Tests: Run Kubernetes
tests - and - GitHub Registry ->> Tests: Pull PROD Image
[COMMIT_SHA] - Note over Tests: Verify PROD Images
[COMMIT_SHA] - Note over Tests: Run docker-compose
tests - end - par - GitHub Registry ->> Tests: Use cache from registry - Airflow Repo ->> Tests: Get latest constraints from 'constraints-BRANCH' - Note over Tests: Build CI latest images/cache - Tests ->> GitHub Registry: Push CI latest images/cache - GitHub Registry ->> Tests: Use cache from registry - Airflow Repo ->> Tests: Get latest constraints from 'constraints-BRANCH' - Note over Tests: Build PROD latest images/cache - Tests ->> GitHub Registry: Push PROD latest images/cache - and - GitHub Registry ->> Tests: Use cache from registry - Airflow Repo ->> Tests: Get latest constraints from 'constraints-BRANCH' - Note over Tests: Build ARM CI cache - Tests ->> GitHub Registry: Push ARM CI cache - GitHub Registry ->> Tests: Use cache from registry - Airflow Repo ->> Tests: Get latest constraints from 'constraints-BRANCH' - Note over Tests: Build ARM PROD cache - Tests ->> GitHub Registry: Push ARM PROD cache - end - Tests -->> Airflow Repo: Status update - deactivate Airflow Repo - deactivate Tests -``` - -## Scheduled run - -This is the flow that happens when a scheduled run is triggered. The "scheduled" workflow is aimed to -run regularly (overnight) even if no new PRs are merged to "main". Scheduled run is generally the -same as "Canary" run, with the difference that the image used to run the tests is built without using -cache - it's always built from the scratch. This way we can check that no "system" dependencies in debian -base image have changed and that the build is still reproducible. No separate diagram is needed for -scheduled run as it is identical to that of "Canary" run. - ------ - -Read next about [Debugging](07_debugging.md) diff --git a/dev/breeze/doc/ci/07_running_ci_locally.md b/dev/breeze/doc/ci/07_running_ci_locally.md new file mode 100644 index 0000000000000..21df374b0e16c --- /dev/null +++ b/dev/breeze/doc/ci/07_running_ci_locally.md @@ -0,0 +1,129 @@ + + + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* + +- [Running the CI Jobs locally](#running-the-ci-jobs-locally) + - [Basic variables](#basic-variables) + - [Host & GIT variables](#host--git-variables) + - [In-container environment initialization](#in-container-environment-initialization) +- [Image build variables](#image-build-variables) +- [Upgrade to newer dependencies](#upgrade-to-newer-dependencies) + + + +# Running the CI Jobs locally + +The main goal of the CI philosophy we have that no matter how complex +the test and integration infrastructure, as a developer you should be +able to reproduce and re-run any of the failed checks locally. One part +of it are pre-commit checks, that allow you to run the same static +checks in CI and locally, but another part is the CI environment which +is replicated locally with Breeze. + +You can read more about Breeze in +[README.rst](../README.rst) but in essence it is a python wrapper around +docker commands that allows you (among others) to re-create CI environment +in your local development instance and interact with it. +In its basic form, when you do development you can run all the same +tests that will be run in CI - but +locally, before you submit them as PR. Another use case where Breeze is +useful is when tests fail on CI. + +All our CI jobs are executed via `breeze` commands. You can replicate +exactly what our CI is doing by running the sequence of corresponding +`breeze` command. Make sure however that you look at both: + +- flags passed to `breeze` commands +- environment variables used when `breeze` command is run - this is + useful when we want to set a common flag for all `breeze` commands in + the same job or even the whole workflow. For example `VERBOSE` + variable is set to `true` for all our workflows so that more detailed + information about internal commands executed in CI is printed. + +In the output of the CI jobs, you will find both - the flags passed and +environment variables set. + +Every contributor can also pull and run images being result of a specific +CI run in GitHub Actions. This is a powerful tool that allows to +reproduce CI failures locally, enter the images and fix them much +faster. It is enough to download and uncompress the artifact that stores the +image and run ``breeze ci-image load -i --python python`` +to load the image and mark the image as refreshed in the local cache. + +You can read more about it in [Breeze](../README.rst) and +[Testing](../../../../contributing-docs/09_testing.rst) + +Depending whether the scripts are run locally via +[Breeze](../README.rst) or whether they are run in +`Build Images` or `Tests` workflows they can take different values. + +You can use those variables when you try to reproduce the build locally +(alternatively you can pass those via corresponding command line flags +passed to `breeze shell` command. + +## Basic variables + +| Variable | Local dev | CI | Comment | +|-----------------------------|-----------|------|------------------------------------------------------------------------------| +| PYTHON_MAJOR_MINOR_VERSION | | | Major/Minor version of Python used. | +| DB_RESET | false | true | Determines whether database should be reset at the container entry. | +| ANSWER | | yes | This variable determines if answer to questions should be automatically set. | + +## Host & GIT variables + +| Variable | Local dev | CI | Comment | +|-------------------|-----------|------------|-----------------------------------------| +| HOST_USER_ID | Host UID | | User id of the host user. | +| HOST_GROUP_ID | Host GID | | Group id of the host user. | +| HOST_OS | | linux | OS of the Host (darwin/linux/windows). | +| COMMIT_SHA | | GITHUB_SHA | SHA of the commit of the build is run | + +## In-container environment initialization + +| Variable | Local dev | CI | Comment | +|---------------------------------|-----------|-----------|-----------------------------------------------------------------------------| +| SKIP_ENVIRONMENT_INITIALIZATION | false (*) | false (*) | Skip initialization of test environment (*) set to true in pre-commits. | +| SKIP_IMAGE_UPGRADE_CHECK | false (*) | false (*) | Skip checking if image should be upgraded (*) set to true in pre-commits. | +| SKIP_PROVIDERS_TESTS | false | false | Skip running provider integration tests. | +| SKIP_SSH_SETUP | false | false (*) | Skip setting up SSH server for tests. (*) set to true in GitHub CodeSpaces. | +| VERBOSE_COMMANDS | false | false | Whether every command executed in docker should be printed. | + +# Image build variables + +| Variable | Local dev | CI | Comment | +|---------------------------------|-----------|-----------|--------------------------------------------------------------------| +| UPGRADE_TO_NEWER_DEPENDENCIES | false | false (*) | Whether dependencies should be upgraded. (*) set in CI when needed | + +# Upgrade to newer dependencies + +By default, we are using a tested set of dependency constraints stored in separated "orphan" branches of the airflow repository +("constraints-main, "constraints-2-0") but when this flag is set to anything but false (for example random value), +they are not used and "eager" upgrade strategy is used when installing dependencies. We set it to true in case of direct +pushes (merges) to main and scheduled builds so that the constraints are tested. In those builds, in case we determine +that the tests pass we automatically push latest set of "tested" constraints to the repository. Setting the value to random +value is best way to assure that constraints are upgraded even if there is no change to pyproject.toml +This way our constraints are automatically tested and updated whenever new versions of libraries are released. +(*) true in case of direct pushes and scheduled builds + +---- + +**Thank you** for reading this far. We hope that you have learned a lot about Airflow's CI. diff --git a/dev/breeze/doc/ci/08_running_ci_locally.md b/dev/breeze/doc/ci/08_running_ci_locally.md deleted file mode 100644 index 4fd0a7c993799..0000000000000 --- a/dev/breeze/doc/ci/08_running_ci_locally.md +++ /dev/null @@ -1,141 +0,0 @@ - - - - -**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* - -- [Running the CI Jobs locally](#running-the-ci-jobs-locally) -- [Upgrade to newer dependencies](#upgrade-to-newer-dependencies) - - - -# Running the CI Jobs locally - -The main goal of the CI philosophy we have that no matter how complex -the test and integration infrastructure, as a developer you should be -able to reproduce and re-run any of the failed checks locally. One part -of it are pre-commit checks, that allow you to run the same static -checks in CI and locally, but another part is the CI environment which -is replicated locally with Breeze. - -You can read more about Breeze in -[README.rst](../README.rst) but in essence it is a script -that allows you to re-create CI environment in your local development -instance and interact with it. In its basic form, when you do -development you can run all the same tests that will be run in CI - but -locally, before you submit them as PR. Another use case where Breeze is -useful is when tests fail on CI. You can take the full `COMMIT_SHA` of -the failed build pass it as `--image-tag` parameter of Breeze and it -will download the very same version of image that was used in CI and run -it locally. This way, you can very easily reproduce any failed test that -happens in CI - even if you do not check out the sources connected with -the run. - -All our CI jobs are executed via `breeze` commands. You can replicate -exactly what our CI is doing by running the sequence of corresponding -`breeze` command. Make sure however that you look at both: - -- flags passed to `breeze` commands -- environment variables used when `breeze` command is run - this is - useful when we want to set a common flag for all `breeze` commands in - the same job or even the whole workflow. For example `VERBOSE` - variable is set to `true` for all our workflows so that more detailed - information about internal commands executed in CI is printed. - -In the output of the CI jobs, you will find both - the flags passed and -environment variables set. - -You can read more about it in [Breeze](../README.rst) and -[Testing](../../../../contributing-docs/09_testing.rst) - -Since we store images from every CI run, you should be able easily -reproduce any of the CI tests problems locally. You can do it by pulling -and using the right image and running it with the right docker command, -For example knowing that the CI job was for commit -`cd27124534b46c9688a1d89e75fcd137ab5137e3`: - -``` bash -docker pull ghcr.io/apache/airflow/main/ci/python3.9:cd27124534b46c9688a1d89e75fcd137ab5137e3 - -docker run -it ghcr.io/apache/airflow/main/ci/python3.9:cd27124534b46c9688a1d89e75fcd137ab5137e3 -``` - -But you usually need to pass more variables and complex setup if you -want to connect to a database or enable some integrations. Therefore it -is easiest to use [Breeze](../README.rst) for that. For -example if you need to reproduce a MySQL environment in python 3.9 -environment you can run: - -``` bash -breeze --image-tag cd27124534b46c9688a1d89e75fcd137ab5137e3 --python 3.9 --backend mysql -``` - -You will be dropped into a shell with the exact version that was used -during the CI run and you will be able to run pytest tests manually, -easily reproducing the environment that was used in CI. Note that in -this case, you do not need to checkout the sources that were used for -that run - they are already part of the image - but remember that any -changes you make in those sources are lost when you leave the image as -the sources are not mapped from your host machine. - -Depending whether the scripts are run locally via -[Breeze](../README.rst) or whether they are run in -`Build Images` or `Tests` workflows they can take different values. - -You can use those variables when you try to reproduce the build locally -(alternatively you can pass those via corresponding command line flags -passed to `breeze shell` command. - -| Variable | Local development | Build Images workflow | CI Workflow | Comment | -|-----------------------------------------|--------------------|------------------------|--------------|--------------------------------------------------------------------------------| -| Basic variables | | | | | -| PYTHON_MAJOR_MINOR_VERSION | | | | Major/Minor version of Python used. | -| DB_RESET | false | true | true | Determines whether database should be reset at the container entry. | -| Forcing answer | | | | | -| ANSWER | | yes | yes | This variable determines if answer to questions should be automatically given. | -| Host variables | | | | | -| HOST_USER_ID | | | | User id of the host user. | -| HOST_GROUP_ID | | | | Group id of the host user. | -| HOST_OS | | linux | linux | OS of the Host (darwin/linux/windows). | -| Git variables | | | | | -| COMMIT_SHA | | GITHUB_SHA | GITHUB_SHA | SHA of the commit of the build is run | -| In container environment initialization | | | | | -| SKIP_ENVIRONMENT_INITIALIZATION | false* | false* | false* | Skip initialization of test environment * set to true in pre-commits | -| SKIP_IMAGE_UPGRADE_CHECK | false* | false* | false* | Skip checking if image should be upgraded * set to true in pre-commits | -| SKIP_PROVIDERS_TESTS | false* | false* | false* | Skip running provider integration tests | -| SKIP_SSH_SETUP | false* | false* | false* | Skip setting up SSH server for tests. * set to true in GitHub CodeSpaces | -| VERBOSE_COMMANDS | false | false | false | Determines whether every command executed in docker should be printed. | -| Image build variables | | | | | -| UPGRADE_TO_NEWER_DEPENDENCIES | false | false | false* | Determines whether the build should attempt to upgrade dependencies. | - -# Upgrade to newer dependencies - -By default we are using a tested set of dependency constraints stored in separated "orphan" branches of the airflow repository -("constraints-main, "constraints-2-0") but when this flag is set to anything but false (for example random value), -they are not used used and "eager" upgrade strategy is used when installing dependencies. We set it to true in case of direct -pushes (merges) to main and scheduled builds so that the constraints are tested. In those builds, in case we determine -that the tests pass we automatically push latest set of "tested" constraints to the repository. Setting the value to random -value is best way to assure that constraints are upgraded even if there is no change to pyproject.toml -This way our constraints are automatically tested and updated whenever new versions of libraries are released. -(*) true in case of direct pushes and scheduled builds - ----- - -**Thank you** for reading this far. We hope that you have learned a lot about Airflow's CI. diff --git a/dev/breeze/doc/ci/README.md b/dev/breeze/doc/ci/README.md index f52376e18b125..bf20a3a700923 100644 --- a/dev/breeze/doc/ci/README.md +++ b/dev/breeze/doc/ci/README.md @@ -24,6 +24,5 @@ This directory contains detailed design of the Airflow CI setup. * [GitHub Variables](03_github_variables.md) - contains description of the GitHub variables used in CI * [Selective checks](04_selective_checks.md) - contains description of the selective checks performed in CI * [Workflows](05_workflows.md) - contains description of the workflows used in CI -* [Diagrams](06_diagrams.md) - contains diagrams of the CI workflows -* [Debugging](07_debugging.md) - contains description of debugging CI issues -* [Running CI Locally](08_running_ci_locally.md) - contains description of running CI locally +* [Debugging](06_debugging.md) - contains description of debugging CI issues +* [Running CI Locally](07_running_ci_locally.md) - contains description of running CI locally diff --git a/dev/breeze/doc/images/image_artifacts.png b/dev/breeze/doc/images/image_artifacts.png new file mode 100644 index 0000000000000..485a6a2c9cf10 Binary files /dev/null and b/dev/breeze/doc/images/image_artifacts.png differ diff --git a/dev/breeze/doc/images/output_ci-image.svg b/dev/breeze/doc/images/output_ci-image.svg index 8d7c7893f55c8..6fc5b425c5c7a 100644 --- a/dev/breeze/doc/images/output_ci-image.svg +++ b/dev/breeze/doc/images/output_ci-image.svg @@ -1,4 +1,4 @@ - +