From 9bca819527f7e81da4bfc994bc14a58a3cc46fde Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Tue, 31 Dec 2024 18:06:43 +0100 Subject: [PATCH] Speed up image building in CI by exporting and importing mount cache During the build, cache of ``uv`` and ``pip`` is stored in a separate "cache mount" volum that is mounted during the build. This cache mount volume is preserved between builds and can be exported and imported to speed up the build process in CI - where cache is stored as artifact and can be imported in the next build. This PR implements it: * export and import commands are added to breeze to export/import cache mount content * the cache mount content is stashed as artifact in the build after image is built and it is restored before the image is built --- .../workflows/additional-ci-image-checks.yml | 4 +- .github/workflows/ci-image-build.yml | 28 ++++ .github/workflows/ci.yml | 1 + dev/breeze/doc/06_managing_docker_images.rst | 22 +++ dev/breeze/doc/images/output_ci-image.svg | 22 ++- dev/breeze/doc/images/output_ci-image.txt | 2 +- .../output_ci-image_export-mount-cache.svg | 118 ++++++++++++++++ .../output_ci-image_export-mount-cache.txt | 1 + .../output_ci-image_import-mount-cache.svg | 118 ++++++++++++++++ .../output_ci-image_import-mount-cache.txt | 1 + ...utput_setup_check-all-params-in-groups.svg | 22 +-- ...utput_setup_check-all-params-in-groups.txt | 2 +- ...output_setup_regenerate-command-images.svg | 82 +++++------ ...output_setup_regenerate-command-images.txt | 2 +- .../commands/ci_image_commands.py | 129 ++++++++++++++++++ .../commands/ci_image_commands_config.py | 20 +++ .../src/airflow_breeze/utils/run_utils.py | 4 +- 17 files changed, 519 insertions(+), 59 deletions(-) create mode 100644 dev/breeze/doc/images/output_ci-image_export-mount-cache.svg create mode 100644 dev/breeze/doc/images/output_ci-image_export-mount-cache.txt create mode 100644 dev/breeze/doc/images/output_ci-image_import-mount-cache.svg create mode 100644 dev/breeze/doc/images/output_ci-image_import-mount-cache.txt diff --git a/.github/workflows/additional-ci-image-checks.yml b/.github/workflows/additional-ci-image-checks.yml index 40196a6e04296..c6afc5a5b83ad 100644 --- a/.github/workflows/additional-ci-image-checks.yml +++ b/.github/workflows/additional-ci-image-checks.yml @@ -160,11 +160,13 @@ jobs: # packages: write # secrets: inherit # with: +# platform: "linux/arm64" # push-image: "false" +# upload-image-artifact: "true" +# upload-mount-cache-artifact: ${{ inputs.canary-run }} # runs-on-as-json-public: ${{ inputs.runs-on-as-json-public }} # runs-on-as-json-self-hosted: ${{ inputs.runs-on-as-json-self-hosted }} # python-versions: ${{ inputs.python-versions }} -# platform: "linux/arm64" # branch: ${{ inputs.branch }} # constraints-branch: ${{ inputs.constraints-branch }} # use-uv: ${{ inputs.use-uv}} diff --git a/.github/workflows/ci-image-build.yml b/.github/workflows/ci-image-build.yml index bed2233dfcb07..9405e6d0ff25d 100644 --- a/.github/workflows/ci-image-build.yml +++ b/.github/workflows/ci-image-build.yml @@ -56,6 +56,10 @@ on: # yamllint disable-line rule:truthy description: "Whether to upload docker image artifact" required: true type: string + upload-mount-cache-artifact: + description: "Whether to upload mount-cache artifact" + required: true + type: string debian-version: description: "Base Debian distribution to use for the build (bookworm)" type: string @@ -128,6 +132,17 @@ jobs: run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze + - name: "Restore ${{ inputs.image-type }} cache mount image ${{ inputs.platform }}:${{ inputs.python }}" + uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: "ci-cache-mount-save-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + path: "/tmp/" + id: restore-cache-mount + - name: "Import mount-cache ${{ inputs.platform }}:${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + run: > + breeze ci-image import-mount-cache --cache-file + /tmp/ci-cache-mount-save-${{ env.PYTHON_MAJOR_MINOR_VERSION }}.tar.gz + if: steps.restore-cache-mount.outputs.stash-hit == 'true' - name: "Login to ghcr.io" run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - name: > @@ -164,3 +179,16 @@ jobs: if-no-files-found: 'error' retention-days: 2 if: inputs.upload-image-artifact == 'true' + - name: "Export mount cache ${{ inputs.platform }}:${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + run: > + breeze ci-image export-mount-cache --cache-file + /tmp/ci-cache-mount-save-${{ env.PYTHON_MAJOR_MINOR_VERSION }}.tar.gz + if: inputs.upload-mount-cache-artifact == 'true' + - name: "Stash cache mount ${{ inputs.platform }}:${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: "ci-cache-mount-save-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + path: "/tmp/ci-cache-mount-save-${{ env.PYTHON_MAJOR_MINOR_VERSION }}.tar.gz" + if-no-files-found: 'error' + retention-days: 2 + if: inputs.upload-mount-cache-artifact == 'true' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 04e97f46d75ac..ae38195ea8aa5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -209,6 +209,7 @@ jobs: platform: "linux/amd64" push-image: "false" upload-image-artifact: "true" + upload-mount-cache-artifact: ${{ needs.build-info.outputs.canary-run }} python-versions: ${{ needs.build-info.outputs.python-versions }} branch: ${{ needs.build-info.outputs.default-branch }} use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} diff --git a/dev/breeze/doc/06_managing_docker_images.rst b/dev/breeze/doc/06_managing_docker_images.rst index 89c302c12d1b8..2b263486783d2 100644 --- a/dev/breeze/doc/06_managing_docker_images.rst +++ b/dev/breeze/doc/06_managing_docker_images.rst @@ -133,6 +133,28 @@ To load the image from specific job run (for example 12538475388), you can use t :width: 100% :alt: Breeze image artifacts +Exporting and importing CI image cache mount +............................................ + +During the build, cache of ``uv`` and ``pip`` is stored in a separate "cache mount" volum that is mounted +during the build. This cache mount volume is preserved between builds and can be exported and imported +to speed up the build process in CI - where cache is stored as artifact and can be imported in the next +build. + +These are all available flags of ``export-mount-cache`` command: + +.. image:: ./images/output_ci-image_export-mount-cache.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_ci-image_export-mount-cache.svg + :width: 100% + :alt: Breeze ci-image + +These are all available flags of ``import-mount-cache`` command: + +.. image:: ./images/output_ci-image_import-mount-cache.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_ci-image_import-mount-cache.svg + :width: 100% + :alt: Breeze ci-image import-mount-cache + PROD Image tasks ---------------- diff --git a/dev/breeze/doc/images/output_ci-image.svg b/dev/breeze/doc/images/output_ci-image.svg index 6fc5b425c5c7a..2b8c1414c8105 100644 --- a/dev/breeze/doc/images/output_ci-image.svg +++ b/dev/breeze/doc/images/output_ci-image.svg @@ -1,4 +1,4 @@ - +