From 79752af1557f0e1583e22fc7c89ea7791f9bedbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 17 Sep 2025 11:16:00 +0200 Subject: [PATCH 01/36] GH-47582: [CI][Packaging] Move linux-packaging tasks to apache/arrow repository --- .github/workflows/check_labels.yml | 79 ++++++ .github/workflows/cpp_extra.yml | 104 +------ .github/workflows/linux_packaging.yml | 373 ++++++++++++++++++++++++++ .github/workflows/report_ci.yml | 75 ++++++ dev/archery/archery/ci/core.py | 6 +- dev/tasks/linux-packages/helper.rb | 4 +- 6 files changed, 545 insertions(+), 96 deletions(-) create mode 100644 .github/workflows/check_labels.yml create mode 100644 .github/workflows/linux_packaging.yml create mode 100644 .github/workflows/report_ci.yml diff --git a/.github/workflows/check_labels.yml b/.github/workflows/check_labels.yml new file mode 100644 index 0000000000000..6a4ea2ed1ad24 --- /dev/null +++ b/.github/workflows/check_labels.yml @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +name: Check Labels Reusable + +on: + workflow_call: + inputs: + label: + description: "Label to check for" + required: true + type: string + parent-workflow: + description: "The parent workflow filename (without .yml)" + required: true + type: string + outputs: + ci-extra: + description: "Whether to run the extra CI" + value: ${{ jobs.check-labels.outputs.ci-extra }} + +jobs: + check-labels: + name: Check labels + runs-on: ubuntu-latest + timeout-minutes: 5 + outputs: + ci-extra: ${{ steps.check.outputs.ci-extra }} + steps: + - name: Checkout Arrow + if: github.event_name == 'pull_request' + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Check + id: check + env: + GH_TOKEN: ${{ github.token }} + run: | + set -ex + case "${GITHUB_EVENT_NAME}" in + push|schedule) + ci_extra=true + ;; + pull_request) + n_ci_extra_labels=$( + gh pr view ${{ github.event.number }} \ + --jq '.labels[].name | select(. == "${{ inputs.label }}")' \ + --json labels \ + --repo ${GITHUB_REPOSITORY} | wc -l) + if [ "${n_ci_extra_labels}" -eq 1 ]; then + ci_extra=true + else + git fetch origin ${GITHUB_BASE_REF} + git diff --stat origin/${GITHUB_BASE_REF}.. + if git diff --stat origin/${GITHUB_BASE_REF}.. | \ + grep \ + --fixed-strings ".github/workflows/${{ inputs.parent-workflow }}.yml" \ + --quiet; then + ci_extra=true + else + ci_extra=false + fi + fi + ;; + esac + + echo "ci-extra=${ci_extra}" >> "${GITHUB_OUTPUT}" diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 2907ae3bcd18a..b78372c97506f 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -24,7 +24,9 @@ on: - '!dependabot/**' paths: - '.dockerignore' + - '.github/workflows/check_labels.yml' - '.github/workflows/cpp_extra.yml' + - '.github/workflows/report_ci.yml' - 'ci/conda_env_*' - 'ci/docker/**' - 'ci/scripts/ccache_setup.sh' @@ -43,7 +45,9 @@ on: pull_request: paths: - '.dockerignore' + - '.github/workflows/check_labels.yml' - '.github/workflows/cpp_extra.yml' + - '.github/workflows/report_ci.yml' - 'ci/conda_env_*' - 'ci/docker/**' - 'ci/scripts/ccache_setup.sh' @@ -75,47 +79,11 @@ permissions: jobs: check-labels: - name: Check labels - runs-on: ubuntu-latest - timeout-minutes: 5 - outputs: - ci-extra: ${{ steps.check.outputs.ci-extra }} - steps: - - name: Checkout Arrow - if: github.event_name == 'pull_request' - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - - name: Check - id: check - env: - GH_TOKEN: ${{ github.token }} - run: | - case "${GITHUB_EVENT_NAME}" in - push|schedule) - ci_extra=true - ;; - pull_request) - n_ci_extra_labels=$( - gh pr view ${{ github.event.number }} \ - --jq '.labels[].name | select(. == "CI: Extra")' \ - --json labels \ - --repo ${GITHUB_REPOSITORY} | wc -l) - if [ "${n_ci_extra_labels}" -eq 1 ]; then - ci_extra=true - else - git fetch origin ${GITHUB_BASE_REF} - if git diff --stat origin/${GITHUB_BASE_REF}.. | \ - grep \ - --fixed-strings ".github/workflows/cpp_extra.yml" \ - --quiet; then - ci_extra=true - else - ci_extra=false - fi - fi - ;; - esac - - echo "ci-extra=${ci_extra}" >> "${GITHUB_OUTPUT}" + uses: ./.github/workflows/check_labels.yml + secrets: inherit + with: + label: "CI: Extra" + parent-workflow: cpp_extra docker: needs: check-labels @@ -280,58 +248,8 @@ jobs: ../minimal_build.build/arrow-example report-extra-cpp: - runs-on: ubuntu-latest needs: - docker - jni-macos - # We don't have the job id as part of the context neither the job name. - # The GitHub API exposes numeric id or job name but not the github.job (report-extra-cpp). - # We match github.job to the name so we can pass it via context in order to be ignored on the report. - # The job is still running. - name: ${{ github.job }} - if: github.event_name == 'schedule' && always() - steps: - - name: Checkout Arrow - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - fetch-depth: 0 - - name: Setup Python - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 - with: - python-version: 3 - - name: Setup Archery - run: python3 -m pip install -e dev/archery[crossbow] - - name: Prepare common options - run: | - if [ "${GITHUB_REPOSITORY}" = "apache/arrow" ]; then - echo "COMMON_OPTIONS=--send" >> "${GITHUB_ENV}" - else - echo "COMMON_OPTIONS=--dry-run" >> "${GITHUB_ENV}" - fi - - name: Send email - env: - GH_TOKEN: ${{ github.token }} - SMTP_PASSWORD: ${{ secrets.ARROW_SMTP_PASSWORD }} - run: | - archery ci report-email \ - --ignore ${{ github.job }} \ - --recipient-email 'builds@arrow.apache.org' \ - --repository ${{ github.repository }} \ - --sender-email 'arrow@commit-email.info' \ - --sender-name Arrow \ - --smtp-port 587 \ - --smtp-server 'commit-email.info' \ - --smtp-user arrow \ - ${COMMON_OPTIONS} \ - ${{ github.run_id }} - - name: Send chat message - if: always() - env: - GH_TOKEN: ${{ github.token }} - CHAT_WEBHOOK: ${{ secrets.ARROW_ZULIP_WEBHOOK }} - run: | - archery ci report-chat \ - --ignore ${{ github.job }} \ - --repository ${{ github.repository }} \ - ${COMMON_OPTIONS} \ - ${{ github.run_id }} + uses: ./.github/workflows/report_ci.yml + secrets: inherit diff --git a/.github/workflows/linux_packaging.yml b/.github/workflows/linux_packaging.yml new file mode 100644 index 0000000000000..3d44b23a329e2 --- /dev/null +++ b/.github/workflows/linux_packaging.yml @@ -0,0 +1,373 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Linux Packaging Extra + +on: + push: + branches: + - '**' + - '!dependabot/**' + paths: + - '.dockerignore' + - '.github/workflows/check_labels.yml' + - '.github/workflows/linux_packaging.yml' + - '.github/workflows/report_ci.yml' + - 'cpp/**' + - 'c_glib/**' + - 'dev/tasks/linux-packages/**' + - 'format/Flight.proto' + - 'testing' + tags: + - '**' + pull_request: + paths: + - '.dockerignore' + - '.github/workflows/check_labels.yml' + - '.github/workflows/linux_packaging.yml' + - '.github/workflows/report_ci.yml' + - 'cpp/**' + - 'c_glib/**' + - 'dev/tasks/linux-packages/**' + - 'format/Flight.proto' + - 'testing' + types: + - labeled + - opened + - reopened + - synchronize + schedule: + - cron: "0 2 * * *" + workflow_dispatch: + inputs: + version: + description: "The Arrow version" + type: string + required: true + no_rc_version: + description: "The Arrow version without RC" + type: string + required: true + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + check-labels: + uses: ./.github/workflows/check_labels.yml + secrets: inherit + with: + label: "CI: Extra: Linux Packaging" + parent-workflow: linux_packaging + + package: + needs: check-labels + name: ${{ matrix.title }} + runs-on: ${{ matrix.runs-on }} + if: needs.check-labels.outputs.ci-extra == 'true' + timeout-minutes: 75 + strategy: + fail-fast: false + matrix: + include: + - architecture: amd64 + runs-on: ubuntu-latest + target: almalinux-8 + title: AlmaLinux 8 AMD64 + task-namespace: yum + upload-extensions: + - rpm + env: + DOCKER_VOLUME_PREFIX: ".docker/" + ARROW_VERSION: ${{ inputs.version || ''}} + NO_RC_VERSION: ${{ inputs.no_rc_version || ''}} + steps: + - name: Checkout Arrow + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + submodules: recursive + - name: Get Arrow Version + id: get-arrow-version + run: | + if [ -z "${ARROW_VERSION}" ]; then + ARROW_VERSION=$(git describe --tags --abbrev=0 --match "apache-arrow-[0-9]*.*" | sed 's/^apache-arrow-//') + ARROW_VERSION="${ARROW_VERSION}$(date +%Y%m%d)" + echo "ARROW_VERSION=${ARROW_VERSION}" + echo "ARROW_VERSION=${ARROW_VERSION}" >> $GITHUB_ENV + # In case of dev (no workflow_dispatch input) use the same version for NO_RC_VERSION + echo "NO_RC_VERSION=${ARROW_VERSION}" >> $GITHUB_ENV + else + echo "Using provided ARROW_VERSION=${ARROW_VERSION}" + echo "Using provided NO_RC_VERSION=${NO_RC_VERSION}" + fi + - name: Free up disk space + if: runner.os == 'Linux' && runner.arch == 'X64' + shell: bash + run: | + ci/scripts/util_free_space.sh + - name: Cache Docker Volumes + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: .docker + key: extra-${{ matrix.target }}-${{ hashFiles('cpp/**') }} + restore-keys: extra-${{ matrix.target }}- + - name: Set up Ruby + run: | + sudo apt update + sudo apt install -y \ + rake \ + ruby \ + ruby-dev + - name: Build + run: | + set -e + pushd dev/tasks/linux-packages + rake version:update ARROW_RELEASE_TIME="$(date --iso-8601=seconds)" + rake docker:pull || : + rake --trace ${{ matrix.task-namespace }}:build BUILD_DIR=build + popd + env: + APT_TARGETS: ${{ matrix.target }} + # TODO: Investigate what is this REPO variable and where is coming from + REPO: ${{ secrets.REPO }} + YUM_TARGETS: ${{ matrix.target }} + - name: Login to Dockerhub + if: >- + success() && + github.event_name == 'push' && + github.repository == 'apache/arrow' && + github.ref_name == 'main' + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Docker Push + continue-on-error: true + if: >- + success() && + github.event_name == 'push' && + github.repository == 'apache/arrow' && + github.ref_name == 'main' + shell: bash + run: | + pushd dev/tasks/linux-packages + rake docker:push + popd + env: + APT_TARGETS: ${{ matrix.target }} + REPO: ${{ secrets.REPO }} + YUM_TARGETS: ${{ matrix.target }} + - name: Build artifact paths + id: artifact-paths + shell: bash + run: | + paths="" + for ext in ${{ join(matrix.upload-extensions, ' ') }}; do + paths="$paths dev/tasks/linux-packages/*/*/repositories/**/*.${ext}" + done + echo $paths + echo "paths=$paths" >> $GITHUB_OUTPUT + - name: Upload the artifacts to the job + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: ${{ matrix.target }}-${{ matrix.architecture }} + path: ${{ steps.artifact-paths.outputs.paths }} + - name: Download Artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts-downloaded + - name: Expected uploaded yum artifacts + if: matrix.task-namespace == 'yum' + id: expected-yum-artifacts + run: | + set -ex + artifacts=" + arrow[0-9]+-acero-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-acero-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-compute-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-compute-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-dataset-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-dataset-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-dataset-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-dataset-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-dataset-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-tools-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + parquet-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + parquet-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + parquet-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + parquet[0-9]+-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + parquet[0-9]+-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + parquet-tools-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + " + if [ "${{ matrix.architecture }}" = "amd64" ]; then + artifacts="$artifacts + apache-arrow-release-${NO_RC_VERSION}-1.[a-z0-9]+.noarch.rpm + apache-arrow-release-${NO_RC_VERSION}-1.[a-z0-9]+.src.rpm + arrow-${NO_RC_VERSION}-1.[a-z0-9]+.src.rpm" + fi + if [ "${{ matrix.target }}" != "centos-7" ]; then + artifacts="$artifacts + arrow[0-9]+-acero-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-compute-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-dataset-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-dataset-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-debugsource-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-flight-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-flight-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-flight-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-flight-sql-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-flight-sql-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-flight-sql-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-flight-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-flight-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-flight-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-flight-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-flight-sql-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-flight-sql-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-flight-sql-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-flight-sql-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow[0-9]+-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + arrow-tools-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + gandiva-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + gandiva-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + gandiva-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + gandiva[0-9]+-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + gandiva[0-9]+-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + gandiva[0-9]+-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + gandiva[0-9]+-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + parquet[0-9]+-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + parquet[0-9]+-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm + parquet-tools-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm" + fi + echo $artifacts + { + echo 'artifacts<> $GITHUB_OUTPUT + - name: Validate uploaded artifacts + id: validate-artifacts + shell: bash + run: | + set -ex + + # Get expected patterns from previous step output + expected_patterns="${{ steps.expected-yum-artifacts.outputs.artifacts }}" + + # Find all RPM files in the extracted directory + found_files=$(find artifacts-downloaded -type f -name '*.rpm' | sort) + + # Prepare expected patterns for matching + echo "$expected_patterns" | grep -v '^\s*$' > expected_patterns.txt + + # Check for missing artifacts + missing="" + for pattern in $(cat expected_patterns.txt); do + # Escape dots for grep + grep_pattern=$(echo "$pattern" | sed 's/\./\\./g') + match=$(echo "$found_files" | grep -E "$grep_pattern") + if [ -z "$match" ]; then + missing="$missing\n$pattern" + fi + done + + # Check for unexpected artifacts + unexpected="" + for file in $found_files; do + matched=false + for pattern in $(cat expected_patterns.txt); do + grep_pattern=$(echo "$pattern" | sed 's/\./\\./g') + if echo "$file" | grep -qE "$grep_pattern"; then + matched=true + break + fi + done + if [ "$matched" = false ]; then + unexpected="$unexpected\n$file" + fi + done + + if [ -n "$missing" ]; then + echo "Missing expected artifacts:" + echo -e "$missing" + exit 1 + fi + + if [ -n "$unexpected" ]; then + echo "Unexpected artifacts found:" + echo -e "$unexpected" + exit 1 + fi + + echo "All expected artifacts are present, and no unexpected artifacts found." + - name: Set up test + run: | + sudo apt install -y \ + apt-utils \ + cpio \ + createrepo-c \ + devscripts \ + gpg \ + rpm \ + rsync + gem install --user-install apt-dists-merge + (echo "Key-Type: RSA"; \ + echo "Key-Length: 4096"; \ + echo "Name-Real: Test"; \ + echo "Name-Email: test@example.com"; \ + echo "%no-protection") | \ + gpg --full-generate-key --batch + GPG_KEY_ID=$(gpg --list-keys --with-colon test@example.com | grep fpr | cut -d: -f10) + echo "GPG_KEY_ID=${GPG_KEY_ID}" >> ${GITHUB_ENV} + case "${{ matrix.target }}" in + almalinux-*|amazon-linux-*|centos-*) + repositories_dir=dev/tasks/linux-packages/apache-arrow-release/yum/repositories + rpm2cpio ${repositories_dir}/*/*/*/Packages/apache-arrow-release-*.rpm | \ + cpio -id + mv etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow \ + dev/tasks/linux-packages/KEYS + ;; + esac + gpg --export --armor test@example.com >> dev/tasks/linux-packages/KEYS + - name: Test + run: | + set -e + pushd dev/tasks/linux-packages + rake --trace ${{ matrix.task-namespace }}:test + rm -rf ${{ matrix.task-namespace }}/repositories + popd + env: + APT_TARGETS: ${{ matrix.target }} + YUM_TARGETS: ${{ matrix.target }} + + report-extra-linux-packaging: + needs: + - package + uses: ./.github/workflows/report_ci.yml + secrets: inherit diff --git a/.github/workflows/report_ci.yml b/.github/workflows/report_ci.yml new file mode 100644 index 0000000000000..8c1a683a196c4 --- /dev/null +++ b/.github/workflows/report_ci.yml @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +name: Report CI results + +on: + workflow_call: + +jobs: + report-ci: + runs-on: ubuntu-latest + # We don't have the job id as part of the context neither the job name. + # The GitHub API exposes numeric id or job name but not the github.job (report-ci). + # We match github.job to the name so we can pass it via context in order to be ignored on the report. + # The job is still running. + name: ${{ github.job }} + if: always() + steps: + - name: Checkout Arrow + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + - name: Setup Python + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + with: + python-version: 3 + - name: Setup Archery + run: python3 -m pip install -e dev/archery[crossbow] + - name: Prepare common options + run: | + if [ "${GITHUB_REPOSITORY}" = "apache/arrow" ]; then + echo "COMMON_OPTIONS=--dry-run" >> "${GITHUB_ENV}" + else + echo "COMMON_OPTIONS=--dry-run" >> "${GITHUB_ENV}" + fi + - name: Send email + env: + GH_TOKEN: ${{ github.token }} + SMTP_PASSWORD: ${{ secrets.ARROW_SMTP_PASSWORD }} + run: | + archery ci report-email \ + --ignore ${{ github.job }} \ + --recipient-email 'builds@arrow.apache.org' \ + --repository ${{ github.repository }} \ + --sender-email 'arrow@commit-email.info' \ + --sender-name Arrow \ + --smtp-port 587 \ + --smtp-server 'commit-email.info' \ + --smtp-user arrow \ + ${COMMON_OPTIONS} \ + ${{ github.run_id }} + - name: Send chat message + if: always() + env: + GH_TOKEN: ${{ github.token }} + CHAT_WEBHOOK: ${{ secrets.ARROW_ZULIP_WEBHOOK }} + run: | + archery ci report-chat \ + --ignore ${{ github.job }} \ + --repository ${{ github.repository }} \ + ${COMMON_OPTIONS} \ + ${{ github.run_id }} diff --git a/dev/archery/archery/ci/core.py b/dev/archery/archery/ci/core.py index 6cb4fa847ef4b..45364ffb0ddd9 100644 --- a/dev/archery/archery/ci/core.py +++ b/dev/archery/archery/ci/core.py @@ -66,7 +66,11 @@ def jobs(self): if jobs_resp.status_code == 200: jobs_data = jobs_resp.json() for job_data in jobs_data.get('jobs', []): - if job_data.get('name') != self.ignore_job: + # Ignore jobs that contain the ignore_job pattern + # from the reusable workflow this will be report-ci. + # The real job_data['name'] is the display name, like + # "report-extra-cpp / report-ci". + if self.ignore_job in job_data.get('name'): job = Job(job_data) jobs.append(job) return jobs diff --git a/dev/tasks/linux-packages/helper.rb b/dev/tasks/linux-packages/helper.rb index 0354188046d1c..55fae4593fe8a 100644 --- a/dev/tasks/linux-packages/helper.rb +++ b/dev/tasks/linux-packages/helper.rb @@ -47,13 +47,13 @@ def arrow_source_dir def detect_version(release_time) version_env = ENV["ARROW_VERSION"] - return version_env if version_env + return version_env unless version_env.nil? || version_env.empty? cmakelists_txt_path = File.join(arrow_source_dir, "cpp", "CMakeLists.txt") cmakelists_txt_content = File.read(cmakelists_txt_path) version = cmakelists_txt_content[/^set\(ARROW_VERSION "(.+?)"/, 1] formatted_release_time = release_time.strftime("%Y%m%d") - version.gsub(/-SNAPSHOT\z/) {"-dev#{formatted_release_time}"} + version.gsub(/-SNAPSHOT\z/) {".dev#{formatted_release_time}"} end def detect_env(name) From b715b1f468e74f6473317efc853fae1b076f1a3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 11:31:28 +0200 Subject: [PATCH 02/36] Add Release steps except upload --- .github/workflows/linux_packaging.yml | 31 ++++++++----------- dev/release/03-binary-submit.sh | 6 ++++ dev/release/04-binary-download.sh | 11 +++++++ .../post-05-update-gh-release-notes.sh | 2 +- dev/release/utils-watch-gh-workflow.sh | 29 ++++++++++++----- 5 files changed, 52 insertions(+), 27 deletions(-) diff --git a/.github/workflows/linux_packaging.yml b/.github/workflows/linux_packaging.yml index 3d44b23a329e2..c32a81856d357 100644 --- a/.github/workflows/linux_packaging.yml +++ b/.github/workflows/linux_packaging.yml @@ -105,20 +105,6 @@ jobs: with: fetch-depth: 0 submodules: recursive - - name: Get Arrow Version - id: get-arrow-version - run: | - if [ -z "${ARROW_VERSION}" ]; then - ARROW_VERSION=$(git describe --tags --abbrev=0 --match "apache-arrow-[0-9]*.*" | sed 's/^apache-arrow-//') - ARROW_VERSION="${ARROW_VERSION}$(date +%Y%m%d)" - echo "ARROW_VERSION=${ARROW_VERSION}" - echo "ARROW_VERSION=${ARROW_VERSION}" >> $GITHUB_ENV - # In case of dev (no workflow_dispatch input) use the same version for NO_RC_VERSION - echo "NO_RC_VERSION=${ARROW_VERSION}" >> $GITHUB_ENV - else - echo "Using provided ARROW_VERSION=${ARROW_VERSION}" - echo "Using provided NO_RC_VERSION=${NO_RC_VERSION}" - fi - name: Free up disk space if: runner.os == 'Linux' && runner.arch == 'X64' shell: bash @@ -137,6 +123,17 @@ jobs: rake \ ruby \ ruby-dev + - name: Get Arrow Version + id: get-arrow-version + run: | + # In case of dev (no workflow_dispatch inputs) compute ARROW_VERSION and + # use the same version for NO_RC_VERSION. + if [ -z "${ARROW_VERSION}" ]; then + ARROW_VERSION=$(git describe --tags --abbrev=0 --match "apache-arrow-[0-9]*.*" | sed 's/^apache-arrow-//') + ARROW_VERSION="${ARROW_VERSION}$(date +%Y%m%d)" + echo "ARROW_VERSION=${ARROW_VERSION}" >> $GITHUB_ENV + echo "NO_RC_VERSION=${ARROW_VERSION}" >> $GITHUB_ENV + fi - name: Build run: | set -e @@ -271,13 +268,11 @@ jobs: echo "$artifacts" echo 'EOF' } >> $GITHUB_OUTPUT - - name: Validate uploaded artifacts + - name: Validate uploaded yum artifacts + if: matrix.task-namespace == 'yum' id: validate-artifacts shell: bash run: | - set -ex - - # Get expected patterns from previous step output expected_patterns="${{ steps.expected-yum-artifacts.outputs.artifacts }}" # Find all RPM files in the extracted directory diff --git a/dev/release/03-binary-submit.sh b/dev/release/03-binary-submit.sh index e3a0fc4ee7a14..bbb987a9b43e7 100755 --- a/dev/release/03-binary-submit.sh +++ b/dev/release/03-binary-submit.sh @@ -51,6 +51,12 @@ archery crossbow submit \ --arrow-sha ${ARROW_SHA} \ --group packaging +# Submit linux_packaging.yml workflow to build linux artifacts +gh workflow run linux_packaging.yml \ + --ref ${ARROW_BRANCH} \ + -f version=${version_with_rc} \ + -f no_rc_version=${version} + # archery will add a comment to the automatically generated PR to track # the submitted jobs job_name=$(archery crossbow latest-prefix --no-fetch ${crossbow_job_prefix}) diff --git a/dev/release/04-binary-download.sh b/dev/release/04-binary-download.sh index 16530478ce41f..aade247e0c774 100755 --- a/dev/release/04-binary-download.sh +++ b/dev/release/04-binary-download.sh @@ -20,6 +20,8 @@ set -e +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + if [ "$#" -lt 2 ]; then echo "Usage: $0 [options]" exit @@ -31,6 +33,7 @@ rc_number=$1 shift version_with_rc="${version}-rc${rc_number}" crossbow_job_prefix="release-${version_with_rc}" +release_tag="apache-arrow-${version_with_rc}" # archery will submit a job with id: "${crossbow_job_prefix}-0" unless there # are jobs submitted with the same prefix (the integer at the end is auto @@ -39,3 +42,11 @@ crossbow_job_prefix="release-${version_with_rc}" : ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"} archery crossbow download-artifacts --no-fetch ${CROSSBOW_JOB_ID} "$@" + +# Wait for the GitHub Workflow that creates the Linux packages +# to finish before downloading the artifacts. +. "${SOURCE_DIR}/utils-watch-gh-workflow.sh" "${release_tag}" "linux_packaging.yml" + +RUN_ID=$(get_run_id) +# Download the artifacts created by the linux_packaging.yml workflow +download_artifacts "${SOURCE_DIR}/../../packages/${CROSSBOW_JOB_ID}" diff --git a/dev/release/post-05-update-gh-release-notes.sh b/dev/release/post-05-update-gh-release-notes.sh index 87c49fa85dbff..e2e2b92f33872 100755 --- a/dev/release/post-05-update-gh-release-notes.sh +++ b/dev/release/post-05-update-gh-release-notes.sh @@ -36,7 +36,7 @@ WORKFLOW="release.yml" # Wait for the GitHub Workflow that creates the GitHub Release # to finish before updating the release notes. -"${SOURCE_DIR}/utils-watch-gh-workflow.sh" "${TAG}" "${WORKFLOW}" +. "${SOURCE_DIR}/utils-watch-gh-workflow.sh" "${TAG}" "${WORKFLOW}" # Update the Release Notes section RELEASE_NOTES_URL="https://arrow.apache.org/release/${VERSION}.html" diff --git a/dev/release/utils-watch-gh-workflow.sh b/dev/release/utils-watch-gh-workflow.sh index 163f30251fb76..7117e7859f2fd 100755 --- a/dev/release/utils-watch-gh-workflow.sh +++ b/dev/release/utils-watch-gh-workflow.sh @@ -28,19 +28,32 @@ fi TAG=$1 WORKFLOW=$2 -: "${REPOSITORY:=${GITHUB_REPOSITORY:-apache/arrow}}" +: "${REPOSITORY:=${GITHUB_REPOSITORY:-raulcd/arrow}}" + +get_run_id() { + gh run list \ + --branch "${TAG}" \ + --jq '.[].databaseId' \ + --json databaseId \ + --limit 1 \ + --repo "${REPOSITORY}" \ + --workflow "${WORKFLOW}" +} + +download_artifacts() { + RUN_ID=$(get_run_id) + echo "Downloading artitfacts for workflow with ID: ${RUN_ID}" + gh run download \ + ${RUN_ID} \ + --repo "${REPOSITORY}" \ + --dir "$1" +} echo "Looking for GitHub Actions workflow on ${REPOSITORY}:${TAG}" RUN_ID="" while true; do echo "Waiting for run to start..." - RUN_ID=$(gh run list \ - --branch "${TAG}" \ - --jq '.[].databaseId' \ - --json databaseId \ - --limit 1 \ - --repo "${REPOSITORY}" \ - --workflow "${WORKFLOW}") + RUN_ID=$(get_run_id) if [ -n "${RUN_ID}" ]; then break fi From 01b842767cef4042ab8cbe64770fb1d8a3005348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 11:51:30 +0200 Subject: [PATCH 03/36] Move back report-ci to run only on scheduled and send report for apache/arrow repository --- .github/workflows/report_ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/report_ci.yml b/.github/workflows/report_ci.yml index 8c1a683a196c4..51bff6501392c 100644 --- a/.github/workflows/report_ci.yml +++ b/.github/workflows/report_ci.yml @@ -27,7 +27,7 @@ jobs: # We match github.job to the name so we can pass it via context in order to be ignored on the report. # The job is still running. name: ${{ github.job }} - if: always() + if: github.event_name == 'schedule' && always() steps: - name: Checkout Arrow uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 @@ -42,7 +42,7 @@ jobs: - name: Prepare common options run: | if [ "${GITHUB_REPOSITORY}" = "apache/arrow" ]; then - echo "COMMON_OPTIONS=--dry-run" >> "${GITHUB_ENV}" + echo "COMMON_OPTIONS=--send" >> "${GITHUB_ENV}" else echo "COMMON_OPTIONS=--dry-run" >> "${GITHUB_ENV}" fi From 696f63abbac7d326b6fd872673c2a9659e16d7c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 12:05:38 +0200 Subject: [PATCH 04/36] Temporarily add all the almalinux, amazon linux and centos versions to validate CI --- .github/workflows/linux_packaging.yml | 77 +++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/.github/workflows/linux_packaging.yml b/.github/workflows/linux_packaging.yml index c32a81856d357..7857f1291a3b6 100644 --- a/.github/workflows/linux_packaging.yml +++ b/.github/workflows/linux_packaging.yml @@ -95,6 +95,83 @@ jobs: task-namespace: yum upload-extensions: - rpm + - architecture: arm64 + runs-on: ubuntu-24.04-arm + target: almalinux-8 + title: AlmaLinux 8 ARM64 + task-namespace: yum + upload-extensions: + - rpm + - architecture: amd64 + runs-on: ubuntu-latest + target: almalinux-9 + title: AlmaLinux 9 AMD64 + task-namespace: yum + upload-extensions: + - rpm + - architecture: arm64 + runs-on: ubuntu-24.04-arm + target: almalinux-9 + title: AlmaLinux 9 ARM64 + task-namespace: yum + upload-extensions: + - rpm + - architecture: amd64 + runs-on: ubuntu-latest + target: almalinux-10 + title: AlmaLinux 10 AMD64 + task-namespace: yum + upload-extensions: + - rpm + - architecture: arm64 + runs-on: ubuntu-24.04-arm + target: almalinux-10 + title: AlmaLinux 10 ARM64 + task-namespace: yum + upload-extensions: + - rpm + - architecture: amd64 + runs-on: ubuntu-latest + target: amazon-linux-2023 + title: Amazon Linux 2023 AMD64 + task-namespace: yum + upload-extensions: + - rpm + - architecture: arm64 + runs-on: ubuntu-24.04-arm + target: amazon-linux-2023 + title: Amazon Linux 2023 ARM64 + task-namespace: yum + upload-extensions: + - rpm + - architecture: amd64 + runs-on: ubuntu-latest + target: centos-7 + title: CentOS 7 AMD64 + task-namespace: yum + upload-extensions: + - rpm + - architecture: arm64 + runs-on: ubuntu-24.04-arm + target: centos-7 + title: CentOS 7 ARM64 + task-namespace: yum + upload-extensions: + - rpm + - architecture: amd64 + runs-on: ubuntu-latest + target: centos-9-stream + title: CentOS 9 Stream AMD64 + task-namespace: yum + upload-extensions: + - rpm + - architecture: arm64 + runs-on: ubuntu-24.04-arm + target: centos-9-stream + title: CentOS 9 Stream ARM64 + task-namespace: yum + upload-extensions: + - rpm env: DOCKER_VOLUME_PREFIX: ".docker/" ARROW_VERSION: ${{ inputs.version || ''}} From 66df8cb7f715b67b3441580b9f0251c5c3e2cfa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 12:48:56 +0200 Subject: [PATCH 05/36] Remove Centos 7 arm64, remove check for files requiring amd64 (as they are present on arm64 too and add folder for specific job artifact on check --- .github/workflows/linux_packaging.yml | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/.github/workflows/linux_packaging.yml b/.github/workflows/linux_packaging.yml index 7857f1291a3b6..6b5599a664194 100644 --- a/.github/workflows/linux_packaging.yml +++ b/.github/workflows/linux_packaging.yml @@ -151,13 +151,6 @@ jobs: task-namespace: yum upload-extensions: - rpm - - architecture: arm64 - runs-on: ubuntu-24.04-arm - target: centos-7 - title: CentOS 7 ARM64 - task-namespace: yum - upload-extensions: - - rpm - architecture: amd64 runs-on: ubuntu-latest target: centos-9-stream @@ -275,6 +268,9 @@ jobs: run: | set -ex artifacts=" + apache-arrow-release-${NO_RC_VERSION}-1.[a-z0-9]+.noarch.rpm + apache-arrow-release-${NO_RC_VERSION}-1.[a-z0-9]+.src.rpm + arrow-${NO_RC_VERSION}-1.[a-z0-9]+.src.rpm arrow[0-9]+-acero-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm arrow-acero-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm arrow[0-9]+-compute-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm @@ -298,12 +294,6 @@ jobs: parquet[0-9]+-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm parquet-tools-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm " - if [ "${{ matrix.architecture }}" = "amd64" ]; then - artifacts="$artifacts - apache-arrow-release-${NO_RC_VERSION}-1.[a-z0-9]+.noarch.rpm - apache-arrow-release-${NO_RC_VERSION}-1.[a-z0-9]+.src.rpm - arrow-${NO_RC_VERSION}-1.[a-z0-9]+.src.rpm" - fi if [ "${{ matrix.target }}" != "centos-7" ]; then artifacts="$artifacts arrow[0-9]+-acero-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm @@ -353,7 +343,7 @@ jobs: expected_patterns="${{ steps.expected-yum-artifacts.outputs.artifacts }}" # Find all RPM files in the extracted directory - found_files=$(find artifacts-downloaded -type f -name '*.rpm' | sort) + found_files=$(find artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} -type f -name '*.rpm' | sort) # Prepare expected patterns for matching echo "$expected_patterns" | grep -v '^\s*$' > expected_patterns.txt From 0530d0be67a7a384b2e32e7ff5d83cdb96c09e67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 14:25:20 +0200 Subject: [PATCH 06/36] Apply fixes for arm64 --- .github/workflows/linux_packaging.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/linux_packaging.yml b/.github/workflows/linux_packaging.yml index 6b5599a664194..c623c7f4247aa 100644 --- a/.github/workflows/linux_packaging.yml +++ b/.github/workflows/linux_packaging.yml @@ -193,6 +193,22 @@ jobs: rake \ ruby \ ruby-dev + - name: Prepare apache-arrow-apt-source for arm64 + if: ${{ matrix.architecture }} == 'arm64' + run: | + pushd arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt + for target in *-*; do + cp -a ${target} ${target}-arm64 + done + popd + - name: Prepare apache-arrow-release for arm64 + if: ${{ matrix.architecture }} == 'arm64' + run: | + pushd arrow/dev/tasks/linux-packages/apache-arrow-release/yum + for target in *-*; do + cp -a ${target} ${target}-aarch64 + done + popd - name: Get Arrow Version id: get-arrow-version run: | @@ -426,6 +442,7 @@ jobs: popd env: APT_TARGETS: ${{ matrix.target }} + ARCHITECTURE: ${{ matrix.architecture }} YUM_TARGETS: ${{ matrix.target }} report-extra-linux-packaging: From 5f516f37662fc217f5971c84e97b9ae6b6401f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 14:26:16 +0200 Subject: [PATCH 07/36] Leave only almalinux amd64 and arm64 --- .github/workflows/linux_packaging.yml | 63 --------------------------- 1 file changed, 63 deletions(-) diff --git a/.github/workflows/linux_packaging.yml b/.github/workflows/linux_packaging.yml index c623c7f4247aa..99bed561e98e2 100644 --- a/.github/workflows/linux_packaging.yml +++ b/.github/workflows/linux_packaging.yml @@ -102,69 +102,6 @@ jobs: task-namespace: yum upload-extensions: - rpm - - architecture: amd64 - runs-on: ubuntu-latest - target: almalinux-9 - title: AlmaLinux 9 AMD64 - task-namespace: yum - upload-extensions: - - rpm - - architecture: arm64 - runs-on: ubuntu-24.04-arm - target: almalinux-9 - title: AlmaLinux 9 ARM64 - task-namespace: yum - upload-extensions: - - rpm - - architecture: amd64 - runs-on: ubuntu-latest - target: almalinux-10 - title: AlmaLinux 10 AMD64 - task-namespace: yum - upload-extensions: - - rpm - - architecture: arm64 - runs-on: ubuntu-24.04-arm - target: almalinux-10 - title: AlmaLinux 10 ARM64 - task-namespace: yum - upload-extensions: - - rpm - - architecture: amd64 - runs-on: ubuntu-latest - target: amazon-linux-2023 - title: Amazon Linux 2023 AMD64 - task-namespace: yum - upload-extensions: - - rpm - - architecture: arm64 - runs-on: ubuntu-24.04-arm - target: amazon-linux-2023 - title: Amazon Linux 2023 ARM64 - task-namespace: yum - upload-extensions: - - rpm - - architecture: amd64 - runs-on: ubuntu-latest - target: centos-7 - title: CentOS 7 AMD64 - task-namespace: yum - upload-extensions: - - rpm - - architecture: amd64 - runs-on: ubuntu-latest - target: centos-9-stream - title: CentOS 9 Stream AMD64 - task-namespace: yum - upload-extensions: - - rpm - - architecture: arm64 - runs-on: ubuntu-24.04-arm - target: centos-9-stream - title: CentOS 9 Stream ARM64 - task-namespace: yum - upload-extensions: - - rpm env: DOCKER_VOLUME_PREFIX: ".docker/" ARROW_VERSION: ${{ inputs.version || ''}} From 5ab9aed7d4e121c5f09129f7901f3e6d38a2397d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 14:29:02 +0200 Subject: [PATCH 08/36] Use correct folder for arm64 prepare --- .github/workflows/linux_packaging.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linux_packaging.yml b/.github/workflows/linux_packaging.yml index 99bed561e98e2..445361491f294 100644 --- a/.github/workflows/linux_packaging.yml +++ b/.github/workflows/linux_packaging.yml @@ -133,7 +133,7 @@ jobs: - name: Prepare apache-arrow-apt-source for arm64 if: ${{ matrix.architecture }} == 'arm64' run: | - pushd arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt + pushd dev/tasks/linux-packages/apache-arrow-apt-source/apt for target in *-*; do cp -a ${target} ${target}-arm64 done @@ -141,7 +141,7 @@ jobs: - name: Prepare apache-arrow-release for arm64 if: ${{ matrix.architecture }} == 'arm64' run: | - pushd arrow/dev/tasks/linux-packages/apache-arrow-release/yum + pushd dev/tasks/linux-packages/apache-arrow-release/yum for target in *-*; do cp -a ${target} ${target}-aarch64 done From 1659aaa3fde0be42a07f58d5d0b3ced9824c2961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 15:25:43 +0200 Subject: [PATCH 09/36] Fix target for aarch64 --- .github/workflows/linux_packaging.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux_packaging.yml b/.github/workflows/linux_packaging.yml index 445361491f294..70f409b244a1c 100644 --- a/.github/workflows/linux_packaging.yml +++ b/.github/workflows/linux_packaging.yml @@ -97,7 +97,7 @@ jobs: - rpm - architecture: arm64 runs-on: ubuntu-24.04-arm - target: almalinux-8 + target: almalinux-8-aarch64 title: AlmaLinux 8 ARM64 task-namespace: yum upload-extensions: From 45692ce2ea7c553e6c67dde7042b4e0ba234c083 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 16:32:34 +0200 Subject: [PATCH 10/36] Several review comments and refactors --- .github/workflows/check_labels.yml | 40 ++++++--------- .github/workflows/cpp_extra.yml | 11 ++-- ...{linux_packaging.yml => package_linux.yml} | 50 ++++++++++--------- .github/workflows/report_ci.yml | 1 + dev/release/03-binary-submit.sh | 4 +- dev/release/04-binary-download.sh | 4 +- dev/tasks/linux-packages/helper.rb | 4 +- 7 files changed, 58 insertions(+), 56 deletions(-) rename .github/workflows/{linux_packaging.yml => package_linux.yml} (90%) diff --git a/.github/workflows/check_labels.yml b/.github/workflows/check_labels.yml index 6a4ea2ed1ad24..ed572f3820748 100644 --- a/.github/workflows/check_labels.yml +++ b/.github/workflows/check_labels.yml @@ -14,15 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + name: Check Labels Reusable on: workflow_call: inputs: - label: - description: "Label to check for" - required: true - type: string parent-workflow: description: "The parent workflow filename (without .yml)" required: true @@ -50,30 +47,25 @@ jobs: run: | set -ex case "${GITHUB_EVENT_NAME}" in - push|schedule) - ci_extra=true + push|schedule|workflow_dispatch) + echo "force=true" >> "${GITHUB_OUTPUT}" ;; pull_request) - n_ci_extra_labels=$( + { + echo "ci-extra-labels<> "${GITHUB_OUTPUT}" + git fetch origin ${GITHUB_BASE_REF} + git diff --stat origin/${GITHUB_BASE_REF}.. + if git diff --stat origin/${GITHUB_BASE_REF}.. | \ + grep \ + --fixed-strings ".github/workflows/${{ inputs.parent-workflow }}.yml" \ + --quiet; then + echo "force=true" >> "${GITHUB_OUTPUT}" fi ;; esac - - echo "ci-extra=${ci_extra}" >> "${GITHUB_OUTPUT}" diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index b78372c97506f..97dbadd25d58c 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -82,14 +82,16 @@ jobs: uses: ./.github/workflows/check_labels.yml secrets: inherit with: - label: "CI: Extra" parent-workflow: cpp_extra docker: needs: check-labels name: ${{ matrix.title }} runs-on: ${{ matrix.runs-on }} - if: needs.check-labels.outputs.ci-extra == 'true' + if: >- + needs.check-labels.outputs.force == 'true' || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: C++') timeout-minutes: 75 strategy: fail-fast: false @@ -166,7 +168,10 @@ jobs: needs: check-labels name: JNI macOS runs-on: macos-14 - if: needs.check-labels.outputs.ci-extra == 'true' + if: >- + needs.check-labels.outputs.force == 'true' || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: C++') timeout-minutes: 45 env: MACOSX_DEPLOYMENT_TARGET: "14.0" diff --git a/.github/workflows/linux_packaging.yml b/.github/workflows/package_linux.yml similarity index 90% rename from .github/workflows/linux_packaging.yml rename to .github/workflows/package_linux.yml index 70f409b244a1c..5c1cfd58e45b3 100644 --- a/.github/workflows/linux_packaging.yml +++ b/.github/workflows/package_linux.yml @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -name: Linux Packaging Extra +name: Package Linux on: push: @@ -25,7 +25,7 @@ on: paths: - '.dockerignore' - '.github/workflows/check_labels.yml' - - '.github/workflows/linux_packaging.yml' + - '.github/workflows/package_linux.yml' - '.github/workflows/report_ci.yml' - 'cpp/**' - 'c_glib/**' @@ -38,7 +38,7 @@ on: paths: - '.dockerignore' - '.github/workflows/check_labels.yml' - - '.github/workflows/linux_packaging.yml' + - '.github/workflows/package_linux.yml' - '.github/workflows/report_ci.yml' - 'cpp/**' - 'c_glib/**' @@ -75,14 +75,16 @@ jobs: uses: ./.github/workflows/check_labels.yml secrets: inherit with: - label: "CI: Extra: Linux Packaging" - parent-workflow: linux_packaging + parent-workflow: package_linux package: needs: check-labels name: ${{ matrix.title }} runs-on: ${{ matrix.runs-on }} - if: needs.check-labels.outputs.ci-extra == 'true' + if: >- + needs.check-labels.outputs.force == 'true' || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: Package: Linux') timeout-minutes: 75 strategy: fail-fast: false @@ -121,8 +123,8 @@ jobs: uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 with: path: .docker - key: extra-${{ matrix.target }}-${{ hashFiles('cpp/**') }} - restore-keys: extra-${{ matrix.target }}- + key: package-linux-${{ matrix.target }}-${{ hashFiles('cpp/**') }} + restore-keys: package-linux-${{ matrix.target }}- - name: Set up Ruby run: | sudo apt update @@ -167,8 +169,7 @@ jobs: popd env: APT_TARGETS: ${{ matrix.target }} - # TODO: Investigate what is this REPO variable and where is coming from - REPO: ${{ secrets.REPO }} + REPO: ghcr.io/${{ github.repository }}-package-linux YUM_TARGETS: ${{ matrix.target }} - name: Login to Dockerhub if: >- @@ -176,10 +177,11 @@ jobs: github.event_name == 'push' && github.repository == 'apache/arrow' && github.ref_name == 'main' - uses: docker/login-action@v2 + uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 with: - username: ${{ secrets.DOCKERHUB_USER }} - password: ${{ secrets.DOCKERHUB_TOKEN }} + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN}} - name: Docker Push continue-on-error: true if: >- @@ -196,21 +198,17 @@ jobs: APT_TARGETS: ${{ matrix.target }} REPO: ${{ secrets.REPO }} YUM_TARGETS: ${{ matrix.target }} - - name: Build artifact paths - id: artifact-paths + - name: Build artifact tarball shell: bash run: | - paths="" - for ext in ${{ join(matrix.upload-extensions, ' ') }}; do - paths="$paths dev/tasks/linux-packages/*/*/repositories/**/*.${ext}" - done - echo $paths - echo "paths=$paths" >> $GITHUB_OUTPUT + pushd dev/tasks/linux-packages + tar cvzf ${{ matrix.id }}.tar.gz */${{ matrix.task_namespace }}/repositories + popd - name: Upload the artifacts to the job uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: ${{ matrix.target }}-${{ matrix.architecture }} - path: ${{ steps.artifact-paths.outputs.paths }} + path: dev/tasks/linux-packages/${{ matrix.id }}.tar.gz - name: Download Artifacts uses: actions/download-artifact@v4 with: @@ -293,9 +291,11 @@ jobs: id: validate-artifacts shell: bash run: | + set -ex expected_patterns="${{ steps.expected-yum-artifacts.outputs.artifacts }}" # Find all RPM files in the extracted directory + tar xf artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }}/${{ matrix.id }}.tar.gz found_files=$(find artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} -type f -name '*.rpm' | sort) # Prepare expected patterns for matching @@ -343,6 +343,11 @@ jobs: echo "All expected artifacts are present, and no unexpected artifacts found." - name: Set up test run: | + mv artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} dev/tasks/linux-packages + pushd dev/tasks/linux-packages + rm -rf */${{ matrix.task-namespace }}/repositories # Remove artifacts + tar xf ${{ matrix.id }}.tar.gz # Use uploaded artifacts + popd sudo apt install -y \ apt-utils \ cpio \ @@ -375,7 +380,6 @@ jobs: set -e pushd dev/tasks/linux-packages rake --trace ${{ matrix.task-namespace }}:test - rm -rf ${{ matrix.task-namespace }}/repositories popd env: APT_TARGETS: ${{ matrix.target }} diff --git a/.github/workflows/report_ci.yml b/.github/workflows/report_ci.yml index 51bff6501392c..8d3e6ffc04b04 100644 --- a/.github/workflows/report_ci.yml +++ b/.github/workflows/report_ci.yml @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + name: Report CI results on: diff --git a/dev/release/03-binary-submit.sh b/dev/release/03-binary-submit.sh index bbb987a9b43e7..02cf0db3cabec 100755 --- a/dev/release/03-binary-submit.sh +++ b/dev/release/03-binary-submit.sh @@ -51,8 +51,8 @@ archery crossbow submit \ --arrow-sha ${ARROW_SHA} \ --group packaging -# Submit linux_packaging.yml workflow to build linux artifacts -gh workflow run linux_packaging.yml \ +# Submit package_linux.yml workflow to build linux artifacts +gh workflow run package_linux.yml \ --ref ${ARROW_BRANCH} \ -f version=${version_with_rc} \ -f no_rc_version=${version} diff --git a/dev/release/04-binary-download.sh b/dev/release/04-binary-download.sh index aade247e0c774..2a247138a6d97 100755 --- a/dev/release/04-binary-download.sh +++ b/dev/release/04-binary-download.sh @@ -45,8 +45,8 @@ archery crossbow download-artifacts --no-fetch ${CROSSBOW_JOB_ID} "$@" # Wait for the GitHub Workflow that creates the Linux packages # to finish before downloading the artifacts. -. "${SOURCE_DIR}/utils-watch-gh-workflow.sh" "${release_tag}" "linux_packaging.yml" +. "${SOURCE_DIR}/utils-watch-gh-workflow.sh" "${release_tag}" "package_linux.yml" RUN_ID=$(get_run_id) -# Download the artifacts created by the linux_packaging.yml workflow +# Download the artifacts created by the package_linux.yml workflow download_artifacts "${SOURCE_DIR}/../../packages/${CROSSBOW_JOB_ID}" diff --git a/dev/tasks/linux-packages/helper.rb b/dev/tasks/linux-packages/helper.rb index 55fae4593fe8a..618d4691c5165 100644 --- a/dev/tasks/linux-packages/helper.rb +++ b/dev/tasks/linux-packages/helper.rb @@ -72,13 +72,13 @@ def detect_env(name) raise "Failed to detect #{name} environment variable" end - def detect_repo + def docker_image_name detect_env("REPO") end def docker_image(os, architecture) architecture ||= "amd64" - "#{detect_repo}:#{architecture}-#{os}-package-#{@package}" + "#{docker_image_name}:#{architecture}-#{os}-package-#{@package}" end end end From 82c71e31010547f9ca2ec3cb9e3c0095ac476eb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 16:41:56 +0200 Subject: [PATCH 11/36] Add check-labels testing --- .github/workflows/package_linux.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 5c1cfd58e45b3..257b902ec0923 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -77,6 +77,16 @@ jobs: with: parent-workflow: package_linux + check-labels-status: + needs: check-labels + name: Testing labels status + runs-on: ubuntu-latest + steps: + - name: Show labels status + run: | + echo "force: ${{ needs.check-labels.outputs.force }}" + echo "ci-extra-labels: ${{ needs.check-labels.outputs.ci-extra-labels }}" + package: needs: check-labels name: ${{ matrix.title }} From d2f5eb5a602d5d39945b49571732371766aa8832 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 16:44:18 +0200 Subject: [PATCH 12/36] Fix labels output --- .github/workflows/check_labels.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/check_labels.yml b/.github/workflows/check_labels.yml index ed572f3820748..9707e470f5688 100644 --- a/.github/workflows/check_labels.yml +++ b/.github/workflows/check_labels.yml @@ -25,9 +25,12 @@ on: required: true type: string outputs: - ci-extra: - description: "Whether to run the extra CI" - value: ${{ jobs.check-labels.outputs.ci-extra }} + ci-extra-labels: + description: "The extra CI labels" + value: ${{ jobs.check-labels.outputs.ci-extra-labels }} + force: + description: "Whether to force running the jobs" + value: ${{ jobs.check-labels.outputs.force }} jobs: check-labels: @@ -35,7 +38,8 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 5 outputs: - ci-extra: ${{ steps.check.outputs.ci-extra }} + ci-extra-labels: ${{ steps.check.outputs.ci-extra-labels }} + force: ${{ steps.check.outputs.force }} steps: - name: Checkout Arrow if: github.event_name == 'pull_request' From 58a6dc98ba057bb17e85cf5110e01445fc45a6c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 17:29:06 +0200 Subject: [PATCH 13/36] Add some debugging to generate tarball --- .github/workflows/package_linux.yml | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 257b902ec0923..91bd9e12178f3 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -77,16 +77,6 @@ jobs: with: parent-workflow: package_linux - check-labels-status: - needs: check-labels - name: Testing labels status - runs-on: ubuntu-latest - steps: - - name: Show labels status - run: | - echo "force: ${{ needs.check-labels.outputs.force }}" - echo "ci-extra-labels: ${{ needs.check-labels.outputs.ci-extra-labels }}" - package: needs: check-labels name: ${{ matrix.title }} @@ -211,7 +201,12 @@ jobs: - name: Build artifact tarball shell: bash run: | + set -ex pushd dev/tasks/linux-packages + ls -la * + tree + ls -la */${{ matrix.task_namespace }} + ls -la */${{ matrix.task_namespace }}/repositories tar cvzf ${{ matrix.id }}.tar.gz */${{ matrix.task_namespace }}/repositories popd - name: Upload the artifacts to the job From 03dc33dc43a12bb9c6b31a1630cc0b63de7a4a32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 18:26:35 +0200 Subject: [PATCH 14/36] Fix matrix variable to use --- .github/workflows/package_linux.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 91bd9e12178f3..4990e5098aabd 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -203,17 +203,16 @@ jobs: run: | set -ex pushd dev/tasks/linux-packages - ls -la * - tree - ls -la */${{ matrix.task_namespace }} - ls -la */${{ matrix.task_namespace }}/repositories - tar cvzf ${{ matrix.id }}.tar.gz */${{ matrix.task_namespace }}/repositories + ls -la */${{ matrix.task-namespace }}/repositories + tree apache-arrow-release/${{ matrix.task-namespace }}/repositories + tree apache-arrow/${{ matrix.task-namespace }}/repositories + tar cvzf ${{ matrix.target }}-${{ matrix.architecture }}.tar.gz */${{ matrix.task-namespace }}/repositories popd - name: Upload the artifacts to the job uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: ${{ matrix.target }}-${{ matrix.architecture }} - path: dev/tasks/linux-packages/${{ matrix.id }}.tar.gz + path: dev/tasks/linux-packages/${{ matrix.target }}-${{ matrix.architecture }}.tar.gz - name: Download Artifacts uses: actions/download-artifact@v4 with: @@ -391,7 +390,7 @@ jobs: ARCHITECTURE: ${{ matrix.architecture }} YUM_TARGETS: ${{ matrix.target }} - report-extra-linux-packaging: + report-package-linux: needs: - package uses: ./.github/workflows/report_ci.yml From afa14073c6d68795e1e4dab08eb9b6f5fba883bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 19 Sep 2025 19:26:32 +0200 Subject: [PATCH 15/36] Change matrix.id for {{ matrix.target }}-{{ matrix.architecture }} for testing purposes --- .github/workflows/package_linux.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 4990e5098aabd..3367d1a049ed1 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -203,9 +203,6 @@ jobs: run: | set -ex pushd dev/tasks/linux-packages - ls -la */${{ matrix.task-namespace }}/repositories - tree apache-arrow-release/${{ matrix.task-namespace }}/repositories - tree apache-arrow/${{ matrix.task-namespace }}/repositories tar cvzf ${{ matrix.target }}-${{ matrix.architecture }}.tar.gz */${{ matrix.task-namespace }}/repositories popd - name: Upload the artifacts to the job @@ -299,8 +296,14 @@ jobs: expected_patterns="${{ steps.expected-yum-artifacts.outputs.artifacts }}" # Find all RPM files in the extracted directory - tar xf artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }}/${{ matrix.id }}.tar.gz - found_files=$(find artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} -type f -name '*.rpm' | sort) + ls -larth artifacts-downloaded/ + ls -larth artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} + tar xvf artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }}/${{ matrix.target }}-${{ matrix.architecture }}.tar.gz + ls -larth artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} + tree artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} + ls -larth . + tree . + found_files=$(find . -type f -name '*.rpm' | sort) # Prepare expected patterns for matching echo "$expected_patterns" | grep -v '^\s*$' > expected_patterns.txt @@ -350,7 +353,7 @@ jobs: mv artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} dev/tasks/linux-packages pushd dev/tasks/linux-packages rm -rf */${{ matrix.task-namespace }}/repositories # Remove artifacts - tar xf ${{ matrix.id }}.tar.gz # Use uploaded artifacts + tar xf ${{ matrix.target }}-${{ matrix.architecture }}.tar.gz # Use uploaded artifacts popd sudo apt install -y \ apt-utils \ From dab9e38e29ed3da1ef593f70f5bd9666364672c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 22 Sep 2025 11:12:41 +0200 Subject: [PATCH 16/36] untar on artifacts-downloaded/{{ matrix.target }}-{{ matrix.architecture }} instead of arrow_src --- .github/workflows/package_linux.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 3367d1a049ed1..ceb5b805da3c2 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -296,14 +296,13 @@ jobs: expected_patterns="${{ steps.expected-yum-artifacts.outputs.artifacts }}" # Find all RPM files in the extracted directory - ls -larth artifacts-downloaded/ - ls -larth artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} - tar xvf artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }}/${{ matrix.target }}-${{ matrix.architecture }}.tar.gz - ls -larth artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} - tree artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} - ls -larth . + pushd artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} + tar xvf ${{ matrix.target }}-${{ matrix.architecture }}.tar.gz tree . - found_files=$(find . -type f -name '*.rpm' | sort) + popd + + found_files=$(find artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} -type f -name '*.rpm' | sort) + echo "Found files: $found_files" # Prepare expected patterns for matching echo "$expected_patterns" | grep -v '^\s*$' > expected_patterns.txt From 78e0ff2505ae8a68f8dc7f466cafc466ca80c384 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 22 Sep 2025 12:26:57 +0200 Subject: [PATCH 17/36] Reduce matrix definition by using id + set up environment variables Add missing esac --- .github/workflows/package_linux.yml | 94 ++++++++++++++++------------- 1 file changed, 52 insertions(+), 42 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index ceb5b805da3c2..0e9f824d8f29e 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -78,9 +78,9 @@ jobs: parent-workflow: package_linux package: + name: ${{ matrix.id }} + runs-on: ${{ contains(matrix.id, 'amd64') && 'ubuntu-latest' || 'ubuntu-24.04-arm' }} needs: check-labels - name: ${{ matrix.title }} - runs-on: ${{ matrix.runs-on }} if: >- needs.check-labels.outputs.force == 'true' || contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || @@ -89,26 +89,42 @@ jobs: strategy: fail-fast: false matrix: - include: - - architecture: amd64 - runs-on: ubuntu-latest - target: almalinux-8 - title: AlmaLinux 8 AMD64 - task-namespace: yum - upload-extensions: - - rpm - - architecture: arm64 - runs-on: ubuntu-24.04-arm - target: almalinux-8-aarch64 - title: AlmaLinux 8 ARM64 - task-namespace: yum - upload-extensions: - - rpm + id: + - almalinux-8-amd64 + - almalinux-8-arm64 env: DOCKER_VOLUME_PREFIX: ".docker/" ARROW_VERSION: ${{ inputs.version || ''}} NO_RC_VERSION: ${{ inputs.no_rc_version || ''}} steps: + - name: Prepare environment variables + env: + ID: ${{ matrix.id }} + run: | + set -ex + # Example: almalinux-8-amd64 -> amd64 + architecture="${ID##*-}" + echo "ARCHITECTURE=${architecture}" >> "${GITHUB_ENV}" + # Example: almalinux-8-amd64 -> almalinux-8 + target="${ID%-*}" + if [[ "${architecture}" == "arm64" ]]; then + # Example: almalinux-8 -> almalinux-8-aarch64 + target="${target}-aarch64" + fi + echo "TARGET=${target}" >> "${GITHUB_ENV}" + case "${target}" in + almalinux-*|amazon-linux-*|centos-*) + echo "TASK_NAMESPACE=yum" >> "${GITHUB_ENV}" + echo "YUM_TARGETS=${target}" >> "${GITHUB_ENV}" + echo "UPLOAD_EXTENSIONS=rpm" >> "${GITHUB_ENV}" + ;; + *) + echo "TASK_NAMESPACE=apt" >> "${GITHUB_ENV}" + echo "APT_TARGETS=${target}" >> "${GITHUB_ENV}" + upload_extensions=(ddeb deb debian.tar.xz .dsc .orig.tar.gz) + echo "UPLOAD_EXTENSIONS=${upload_extensions[*]}" >> "${GITHUB_ENV}" + ;; + esac - name: Checkout Arrow uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: @@ -123,8 +139,8 @@ jobs: uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 with: path: .docker - key: package-linux-${{ matrix.target }}-${{ hashFiles('cpp/**') }} - restore-keys: package-linux-${{ matrix.target }}- + key: package-linux-${{ matrix.id }}-${{ hashFiles('cpp/**') }} + restore-keys: package-linux-${{ matrix.id }}- - name: Set up Ruby run: | sudo apt update @@ -133,7 +149,7 @@ jobs: ruby \ ruby-dev - name: Prepare apache-arrow-apt-source for arm64 - if: ${{ matrix.architecture }} == 'arm64' + if: ${{ env.ARCHITECTURE == 'arm64' }} run: | pushd dev/tasks/linux-packages/apache-arrow-apt-source/apt for target in *-*; do @@ -141,7 +157,7 @@ jobs: done popd - name: Prepare apache-arrow-release for arm64 - if: ${{ matrix.architecture }} == 'arm64' + if: ${{ env.ARCHITECTURE == 'arm64' }} run: | pushd dev/tasks/linux-packages/apache-arrow-release/yum for target in *-*; do @@ -165,12 +181,10 @@ jobs: pushd dev/tasks/linux-packages rake version:update ARROW_RELEASE_TIME="$(date --iso-8601=seconds)" rake docker:pull || : - rake --trace ${{ matrix.task-namespace }}:build BUILD_DIR=build + rake --trace ${TASK_NAMESPACE}:build BUILD_DIR=build popd env: - APT_TARGETS: ${{ matrix.target }} REPO: ghcr.io/${{ github.repository }}-package-linux - YUM_TARGETS: ${{ matrix.target }} - name: Login to Dockerhub if: >- success() && @@ -195,27 +209,25 @@ jobs: rake docker:push popd env: - APT_TARGETS: ${{ matrix.target }} REPO: ${{ secrets.REPO }} - YUM_TARGETS: ${{ matrix.target }} - name: Build artifact tarball shell: bash run: | set -ex pushd dev/tasks/linux-packages - tar cvzf ${{ matrix.target }}-${{ matrix.architecture }}.tar.gz */${{ matrix.task-namespace }}/repositories + tar cvzf ${{ matrix.id }}.tar.gz */${TASK_NAMESPACE}/repositories popd - name: Upload the artifacts to the job uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: - name: ${{ matrix.target }}-${{ matrix.architecture }} - path: dev/tasks/linux-packages/${{ matrix.target }}-${{ matrix.architecture }}.tar.gz + name: ${{ matrix.id }} + path: dev/tasks/linux-packages/${{ matrix.id }}.tar.gz - name: Download Artifacts uses: actions/download-artifact@v4 with: path: artifacts-downloaded - name: Expected uploaded yum artifacts - if: matrix.task-namespace == 'yum' + if: ${{ env.TASK_NAMESPACE == 'yum' }} id: expected-yum-artifacts run: | set -ex @@ -246,7 +258,7 @@ jobs: parquet[0-9]+-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm parquet-tools-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm " - if [ "${{ matrix.target }}" != "centos-7" ]; then + if [ "${TARGET}" != "centos-7" ]; then artifacts="$artifacts arrow[0-9]+-acero-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm arrow[0-9]+-compute-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm @@ -288,7 +300,7 @@ jobs: echo 'EOF' } >> $GITHUB_OUTPUT - name: Validate uploaded yum artifacts - if: matrix.task-namespace == 'yum' + if: ${{ env.TASK_NAMESPACE == 'yum' }} id: validate-artifacts shell: bash run: | @@ -296,12 +308,12 @@ jobs: expected_patterns="${{ steps.expected-yum-artifacts.outputs.artifacts }}" # Find all RPM files in the extracted directory - pushd artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} - tar xvf ${{ matrix.target }}-${{ matrix.architecture }}.tar.gz + pushd artifacts-downloaded/${{ matrix.id }} + tar xvf ${{ matrix.id }}.tar.gz tree . popd - found_files=$(find artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} -type f -name '*.rpm' | sort) + found_files=$(find artifacts-downloaded/${{ matrix.id }} -type f -name '*.rpm' | sort) echo "Found files: $found_files" # Prepare expected patterns for matching @@ -349,10 +361,10 @@ jobs: echo "All expected artifacts are present, and no unexpected artifacts found." - name: Set up test run: | - mv artifacts-downloaded/${{ matrix.target }}-${{ matrix.architecture }} dev/tasks/linux-packages + mv artifacts-downloaded/${{ matrix.id }} dev/tasks/linux-packages pushd dev/tasks/linux-packages - rm -rf */${{ matrix.task-namespace }}/repositories # Remove artifacts - tar xf ${{ matrix.target }}-${{ matrix.architecture }}.tar.gz # Use uploaded artifacts + rm -rf */${TASK_NAMESPACE}/repositories # Remove artifacts + tar xf ${{ matrix.id }}.tar.gz # Use uploaded artifacts popd sudo apt install -y \ apt-utils \ @@ -371,7 +383,7 @@ jobs: gpg --full-generate-key --batch GPG_KEY_ID=$(gpg --list-keys --with-colon test@example.com | grep fpr | cut -d: -f10) echo "GPG_KEY_ID=${GPG_KEY_ID}" >> ${GITHUB_ENV} - case "${{ matrix.target }}" in + case "${{ matrix.id }}" in almalinux-*|amazon-linux-*|centos-*) repositories_dir=dev/tasks/linux-packages/apache-arrow-release/yum/repositories rpm2cpio ${repositories_dir}/*/*/*/Packages/apache-arrow-release-*.rpm | \ @@ -385,12 +397,10 @@ jobs: run: | set -e pushd dev/tasks/linux-packages - rake --trace ${{ matrix.task-namespace }}:test + rake --trace ${TASK_NAMESPACE}:test popd env: - APT_TARGETS: ${{ matrix.target }} ARCHITECTURE: ${{ matrix.architecture }} - YUM_TARGETS: ${{ matrix.target }} report-package-linux: needs: From 9a7b05413e88dc9c72ffe6d6854134d463dd3b45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 22 Sep 2025 13:34:25 +0200 Subject: [PATCH 18/36] Add some debian based packages --- .github/workflows/package_linux.yml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 0e9f824d8f29e..8a47113f7f1a2 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -92,6 +92,8 @@ jobs: id: - almalinux-8-amd64 - almalinux-8-arm64 + - ubuntu-noble-amd64 + - ubuntu-noble-arm64 env: DOCKER_VOLUME_PREFIX: ".docker/" ARROW_VERSION: ${{ inputs.version || ''}} @@ -107,24 +109,28 @@ jobs: echo "ARCHITECTURE=${architecture}" >> "${GITHUB_ENV}" # Example: almalinux-8-amd64 -> almalinux-8 target="${ID%-*}" - if [[ "${architecture}" == "arm64" ]]; then - # Example: almalinux-8 -> almalinux-8-aarch64 - target="${target}-aarch64" - fi - echo "TARGET=${target}" >> "${GITHUB_ENV}" case "${target}" in almalinux-*|amazon-linux-*|centos-*) echo "TASK_NAMESPACE=yum" >> "${GITHUB_ENV}" echo "YUM_TARGETS=${target}" >> "${GITHUB_ENV}" echo "UPLOAD_EXTENSIONS=rpm" >> "${GITHUB_ENV}" + if [[ "${architecture}" == "arm64" ]]; then + # Example: almalinux-8 -> almalinux-8-aarch64 + target="${target}-aarch64" + fi ;; *) echo "TASK_NAMESPACE=apt" >> "${GITHUB_ENV}" echo "APT_TARGETS=${target}" >> "${GITHUB_ENV}" upload_extensions=(ddeb deb debian.tar.xz .dsc .orig.tar.gz) echo "UPLOAD_EXTENSIONS=${upload_extensions[*]}" >> "${GITHUB_ENV}" + if [[ "${architecture}" == "arm64" ]]; then + # Example: ubuntu-noble -> ubuntu-noble-arm64 + target="${target}-arm64" + fi ;; esac + echo "TARGET=${target}" >> "${GITHUB_ENV}" - name: Checkout Arrow uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: From 98e9512f8c7429cdb3b1d2ef7761fd568fd789c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 22 Sep 2025 14:24:10 +0200 Subject: [PATCH 19/36] Fix architexture to use env variable --- .github/workflows/package_linux.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 8a47113f7f1a2..44e146149f599 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -405,8 +405,6 @@ jobs: pushd dev/tasks/linux-packages rake --trace ${TASK_NAMESPACE}:test popd - env: - ARCHITECTURE: ${{ matrix.architecture }} report-package-linux: needs: From dbcde9566db5b0a9c33148d15203fc1291fcf218 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 22 Sep 2025 16:03:40 +0200 Subject: [PATCH 20/36] Define YUM_TARGETS and APT_TARGETS after appending architecture if necessary --- .github/workflows/package_linux.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 44e146149f599..d0ab19cb78c4d 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -112,22 +112,22 @@ jobs: case "${target}" in almalinux-*|amazon-linux-*|centos-*) echo "TASK_NAMESPACE=yum" >> "${GITHUB_ENV}" - echo "YUM_TARGETS=${target}" >> "${GITHUB_ENV}" echo "UPLOAD_EXTENSIONS=rpm" >> "${GITHUB_ENV}" if [[ "${architecture}" == "arm64" ]]; then # Example: almalinux-8 -> almalinux-8-aarch64 target="${target}-aarch64" fi + echo "YUM_TARGETS=${target}" >> "${GITHUB_ENV}" ;; *) echo "TASK_NAMESPACE=apt" >> "${GITHUB_ENV}" - echo "APT_TARGETS=${target}" >> "${GITHUB_ENV}" upload_extensions=(ddeb deb debian.tar.xz .dsc .orig.tar.gz) echo "UPLOAD_EXTENSIONS=${upload_extensions[*]}" >> "${GITHUB_ENV}" if [[ "${architecture}" == "arm64" ]]; then # Example: ubuntu-noble -> ubuntu-noble-arm64 target="${target}-arm64" fi + echo "APT_TARGETS=${target}" >> "${GITHUB_ENV}" ;; esac echo "TARGET=${target}" >> "${GITHUB_ENV}" From b336d34ad72d57616c26c0c458e21f166b7b04bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 22 Sep 2025 17:54:38 +0200 Subject: [PATCH 21/36] Test all linux-packages together --- .github/workflows/package_linux.yml | 150 ++++------------------------ 1 file changed, 17 insertions(+), 133 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index d0ab19cb78c4d..e5557f0e512b0 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -92,6 +92,23 @@ jobs: id: - almalinux-8-amd64 - almalinux-8-arm64 + - almalinux-9-amd64 + - almalinux-9-arm64 + - almalinux-10-amd64 + - almalinux-10-arm64 + - amazon-linux-2023-amd64 + - amazon-linux-2023-arm64 + - centos-9-stream-amd64 + - centos-9-stream-arm64 + - centos-7-amd64 + - debian-bookworm-amd64 + - debian-bookworm-arm64 + - debian-trixie-amd64 + - debian-trixie-arm64 + - debian-forky-amd64 + - debian-forky-arm64 + - ubuntu-jammy-amd64 + - ubuntu-jammy-arm64 - ubuntu-noble-amd64 - ubuntu-noble-arm64 env: @@ -232,139 +249,6 @@ jobs: uses: actions/download-artifact@v4 with: path: artifacts-downloaded - - name: Expected uploaded yum artifacts - if: ${{ env.TASK_NAMESPACE == 'yum' }} - id: expected-yum-artifacts - run: | - set -ex - artifacts=" - apache-arrow-release-${NO_RC_VERSION}-1.[a-z0-9]+.noarch.rpm - apache-arrow-release-${NO_RC_VERSION}-1.[a-z0-9]+.src.rpm - arrow-${NO_RC_VERSION}-1.[a-z0-9]+.src.rpm - arrow[0-9]+-acero-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-acero-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-compute-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-compute-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-dataset-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-dataset-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-dataset-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-dataset-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-dataset-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-tools-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - parquet-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - parquet-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - parquet-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - parquet[0-9]+-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - parquet[0-9]+-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - parquet-tools-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - " - if [ "${TARGET}" != "centos-7" ]; then - artifacts="$artifacts - arrow[0-9]+-acero-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-compute-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-dataset-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-dataset-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-debugsource-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-flight-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-flight-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-flight-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-flight-sql-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-flight-sql-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-flight-sql-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-flight-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-flight-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-flight-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-flight-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-flight-sql-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-flight-sql-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-flight-sql-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-flight-sql-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow[0-9]+-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-tools-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - gandiva-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - gandiva-glib-devel-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - gandiva-glib-doc-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - gandiva[0-9]+-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - gandiva[0-9]+-glib-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - gandiva[0-9]+-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - gandiva[0-9]+-libs-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - parquet[0-9]+-glib-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - parquet[0-9]+-libs-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm - parquet-tools-debuginfo-${NO_RC_VERSION}-1.[a-z0-9]+.[a-z0-9_]+.rpm" - fi - echo $artifacts - { - echo 'artifacts<> $GITHUB_OUTPUT - - name: Validate uploaded yum artifacts - if: ${{ env.TASK_NAMESPACE == 'yum' }} - id: validate-artifacts - shell: bash - run: | - set -ex - expected_patterns="${{ steps.expected-yum-artifacts.outputs.artifacts }}" - - # Find all RPM files in the extracted directory - pushd artifacts-downloaded/${{ matrix.id }} - tar xvf ${{ matrix.id }}.tar.gz - tree . - popd - - found_files=$(find artifacts-downloaded/${{ matrix.id }} -type f -name '*.rpm' | sort) - echo "Found files: $found_files" - - # Prepare expected patterns for matching - echo "$expected_patterns" | grep -v '^\s*$' > expected_patterns.txt - - # Check for missing artifacts - missing="" - for pattern in $(cat expected_patterns.txt); do - # Escape dots for grep - grep_pattern=$(echo "$pattern" | sed 's/\./\\./g') - match=$(echo "$found_files" | grep -E "$grep_pattern") - if [ -z "$match" ]; then - missing="$missing\n$pattern" - fi - done - - # Check for unexpected artifacts - unexpected="" - for file in $found_files; do - matched=false - for pattern in $(cat expected_patterns.txt); do - grep_pattern=$(echo "$pattern" | sed 's/\./\\./g') - if echo "$file" | grep -qE "$grep_pattern"; then - matched=true - break - fi - done - if [ "$matched" = false ]; then - unexpected="$unexpected\n$file" - fi - done - - if [ -n "$missing" ]; then - echo "Missing expected artifacts:" - echo -e "$missing" - exit 1 - fi - - if [ -n "$unexpected" ]; then - echo "Unexpected artifacts found:" - echo -e "$unexpected" - exit 1 - fi - - echo "All expected artifacts are present, and no unexpected artifacts found." - name: Set up test run: | mv artifacts-downloaded/${{ matrix.id }} dev/tasks/linux-packages From bc8df4ff15a62d7f7da95047b6dd49411c56c1f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 22 Sep 2025 19:45:15 +0200 Subject: [PATCH 22/36] Only download the specific artifact instead of downloading all of them --- .github/workflows/package_linux.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index e5557f0e512b0..67554c40d3410 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -249,6 +249,7 @@ jobs: uses: actions/download-artifact@v4 with: path: artifacts-downloaded + name: ${{ matrix.id }} - name: Set up test run: | mv artifacts-downloaded/${{ matrix.id }} dev/tasks/linux-packages From ec040bb114f0ff95e4f91ac5267d777d4976d533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 22 Sep 2025 20:28:15 +0200 Subject: [PATCH 23/36] How does the artifact change when using a specific name? --- .github/workflows/package_linux.yml | 41 +++++++++++++++-------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 67554c40d3410..c2f73881e2f1f 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -92,25 +92,25 @@ jobs: id: - almalinux-8-amd64 - almalinux-8-arm64 - - almalinux-9-amd64 - - almalinux-9-arm64 - - almalinux-10-amd64 - - almalinux-10-arm64 - - amazon-linux-2023-amd64 - - amazon-linux-2023-arm64 - - centos-9-stream-amd64 - - centos-9-stream-arm64 - - centos-7-amd64 - - debian-bookworm-amd64 - - debian-bookworm-arm64 - - debian-trixie-amd64 - - debian-trixie-arm64 - - debian-forky-amd64 - - debian-forky-arm64 - - ubuntu-jammy-amd64 - - ubuntu-jammy-arm64 - - ubuntu-noble-amd64 - - ubuntu-noble-arm64 + #- almalinux-9-amd64 + #- almalinux-9-arm64 + #- almalinux-10-amd64 + #- almalinux-10-arm64 + #- amazon-linux-2023-amd64 + #- amazon-linux-2023-arm64 + #- centos-9-stream-amd64 + #- centos-9-stream-arm64 + #- centos-7-amd64 + #- debian-bookworm-amd64 + #- debian-bookworm-arm64 + #- debian-trixie-amd64 + #- debian-trixie-arm64 + #- debian-forky-amd64 + #- debian-forky-arm64 + #- ubuntu-jammy-amd64 + #- ubuntu-jammy-arm64 + #- ubuntu-noble-amd64 + #- ubuntu-noble-arm64 env: DOCKER_VOLUME_PREFIX: ".docker/" ARROW_VERSION: ${{ inputs.version || ''}} @@ -252,7 +252,8 @@ jobs: name: ${{ matrix.id }} - name: Set up test run: | - mv artifacts-downloaded/${{ matrix.id }} dev/tasks/linux-packages + ls -lrt artifacts-downloaded + mv artifacts-downloaded/* dev/tasks/linux-packages pushd dev/tasks/linux-packages rm -rf */${TASK_NAMESPACE}/repositories # Remove artifacts tar xf ${{ matrix.id }}.tar.gz # Use uploaded artifacts From b1b98c8533e5490d78968e6ade834a405dc51c6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 23 Sep 2025 11:58:15 +0200 Subject: [PATCH 24/36] Untar on dowload script --- .github/workflows/package_linux.yml | 1 - dev/release/04-binary-download.sh | 11 +++++++++++ dev/release/utils-watch-gh-workflow.sh | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index c2f73881e2f1f..bbd7656afe474 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -252,7 +252,6 @@ jobs: name: ${{ matrix.id }} - name: Set up test run: | - ls -lrt artifacts-downloaded mv artifacts-downloaded/* dev/tasks/linux-packages pushd dev/tasks/linux-packages rm -rf */${TASK_NAMESPACE}/repositories # Remove artifacts diff --git a/dev/release/04-binary-download.sh b/dev/release/04-binary-download.sh index 2a247138a6d97..20544c934c87c 100755 --- a/dev/release/04-binary-download.sh +++ b/dev/release/04-binary-download.sh @@ -50,3 +50,14 @@ archery crossbow download-artifacts --no-fetch ${CROSSBOW_JOB_ID} "$@" RUN_ID=$(get_run_id) # Download the artifacts created by the package_linux.yml workflow download_artifacts "${SOURCE_DIR}/../../packages/${CROSSBOW_JOB_ID}" + +# Find and extract all .tar.gz files in their own artifact directory +find "${SOURCE_DIR}/../../packages/${CROSSBOW_JOB_ID}" -name "*.tar.gz" -type f | while read -r tarfile; do + echo "Extracting: ${tarfile}" + tarfile_dir=$(dirname "${tarfile}") + + # Extract to the same directory as the tar.gz file + tar -xzf "${tarfile}" -C "${tarfile_dir}" + # Should we remove the tar.gz file after extraction? + # rm "${tarfile}" +done diff --git a/dev/release/utils-watch-gh-workflow.sh b/dev/release/utils-watch-gh-workflow.sh index 7117e7859f2fd..ee23abc7d9e33 100755 --- a/dev/release/utils-watch-gh-workflow.sh +++ b/dev/release/utils-watch-gh-workflow.sh @@ -28,7 +28,7 @@ fi TAG=$1 WORKFLOW=$2 -: "${REPOSITORY:=${GITHUB_REPOSITORY:-raulcd/arrow}}" +: "${REPOSITORY:=${GITHUB_REPOSITORY:-apache/arrow}}" get_run_id() { gh run list \ From 51d7766ae14fae3a8005b3029670cbbacf02cd50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 23 Sep 2025 12:02:28 +0200 Subject: [PATCH 25/36] Simplify apt:rc:copy and yum:rc:copy to maintain repo folder structure from tar.gz --- dev/release/binary-task.rb | 124 ++++++++++++------------------------- 1 file changed, 40 insertions(+), 84 deletions(-) diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index 7fc49f3eb8f22..1205d8aae5734 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -1645,45 +1645,37 @@ def define_apt_rc_tasks progress_label = "Copying: #{distribution} #{code_name}" progress_reporter = ProgressReporter.new(progress_label) - distribution_dir = "#{incoming_dir}/#{distribution}" - pool_dir = "#{distribution_dir}/pool/#{code_name}" - rm_rf(pool_dir, verbose: verbose?) - mkdir_p(pool_dir, verbose: verbose?) - source_dir_prefix = "#{artifacts_dir}/#{distribution}-#{code_name}" - # apache/arrow uses debian-bookworm-{amd64,arm64} but - # apache/arrow-adbc uses debian-bookworm. So the following - # glob must much both of them. - Dir.glob("#{source_dir_prefix}*/*") do |path| - base_name = File.basename(path) + destination_prefix = "#{incoming_dir}/#{distribution}" + rm_rf(destination_prefix, verbose: verbose?) + + # Copy the entire repository structure + source_pattern = "#{artifacts_dir}/#{distribution}-#{code_name}*/*/" \ + "apt/repositories/#{distribution}" + Dir.glob(source_pattern) do |repo_source| + progress_reporter.increment_max + mkdir_p(File.dirname(destination_prefix), verbose: verbose?) + cp_r(repo_source, destination_prefix, preserve: true, verbose: verbose?) + progress_reporter.advance + end + + # Create latest package links after copying + Dir.glob("#{destination_prefix}/**/*-apt-source_*.deb") do |apt_source_path| + base_name = File.basename(apt_source_path) package_name = ENV["DEB_PACKAGE_NAME"] if package_name.nil? or package_name.empty? - if base_name.start_with?("apache-arrow-apt-source") - package_name = "apache-arrow-apt-source" - else - package_name = "apache-arrow" - end + package_name = "apache-arrow-apt-source" end - destination_path = [ - pool_dir, - component, - package_name[0], - package_name, - base_name, + + latest_apt_source_package_path = [ + destination_prefix, + "#{package_name}-latest-#{code_name}.deb" ].join("/") - copy_artifact(path, - destination_path, + + copy_artifact(apt_source_path, + latest_apt_source_package_path, progress_reporter) - case base_name - when /\A[^_]+-apt-source_.*\.deb\z/ - latest_apt_source_package_path = [ - distribution_dir, - "#{package_name}-latest-#{code_name}.deb" - ].join("/") - copy_artifact(path, - latest_apt_source_package_path, - progress_reporter) - end end + progress_reporter.finish end end @@ -2043,59 +2035,23 @@ def define_yum_rc_tasks progress_label = "Copying: #{distribution} #{distribution_version}" progress_reporter = ProgressReporter.new(progress_label) - destination_prefix = [ - incoming_dir, - distribution, - distribution_version, - ].join("/") + destination_prefix = "#{incoming_dir}/#{distribution}/#{distribution_version}" rm_rf(destination_prefix, verbose: verbose?) - source_dir_prefix = - "#{artifacts_dir}/#{distribution}-#{distribution_version}" - Dir.glob("#{source_dir_prefix}*/*.rpm") do |path| - base_name = File.basename(path) - type = base_name.split(".")[-2] - destination_paths = [] - case type - when "src" - destination_paths << [ - destination_prefix, - "Source", - "SPackages", - base_name, - ].join("/") - when "noarch" - yum_architectures.each do |architecture| - destination_paths << [ - destination_prefix, - architecture, - "Packages", - base_name, - ].join("/") - end - else - destination_paths << [ - destination_prefix, - type, - "Packages", - base_name, - ].join("/") - end - destination_paths.each do |destination_path| - copy_artifact(path, - destination_path, - progress_reporter) - end - case base_name - when /\A(apache-arrow-release)-.*\.noarch\.rpm\z/ - package_name = $1 - latest_release_package_path = [ - destination_prefix, - "#{package_name}-latest.rpm" - ].join("/") - copy_artifact(path, - latest_release_package_path, - progress_reporter) + + # Copy all repository structures for this distribution/version + source_pattern = "#{artifacts_dir}/#{distribution}-#{distribution_version}*/*/" \ + "yum/repositories/#{distribution}/#{distribution_version}" + Dir.glob(source_pattern) do |repo_source| + progress_reporter.increment_max + + # Copy and merge all architectures + Dir.glob("#{repo_source}/*") do |arch_dir| + arch_name = File.basename(arch_dir) + destination_arch_dir = "#{destination_prefix}/#{arch_name}" + mkdir_p(File.dirname(destination_arch_dir), verbose: verbose?) + cp_r(arch_dir, destination_arch_dir, preserve: true, verbose: verbose?) end + progress_reporter.advance end progress_reporter.finish From 9528414c344519eefa1b3acc19e92b5b1d10410a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 23 Sep 2025 12:14:39 +0200 Subject: [PATCH 26/36] Remove old Linux Package tasks --- dev/tasks/linux-packages/github.linux.yml | 129 ------------ dev/tasks/tasks.yml | 241 ---------------------- 2 files changed, 370 deletions(-) delete mode 100644 dev/tasks/linux-packages/github.linux.yml diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml deleted file mode 100644 index e514931fd1b6a..0000000000000 --- a/dev/tasks/linux-packages/github.linux.yml +++ /dev/null @@ -1,129 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -{% import 'macros.jinja' as macros with context %} - -{{ macros.github_header() }} - -jobs: - package: - name: Package - {% if architecture == "amd64" %} - runs-on: ubuntu-latest - {% else %} - runs-on: ubuntu-24.04-arm - {% endif %} - env: - ARCHITECTURE: {{ architecture }} - steps: - {{ macros.github_checkout_arrow()|indent }} - {{ macros.github_login_dockerhub()|indent }} - {{ macros.github_free_space()|indent }} - - - name: Set up Ruby - run: | - sudo apt update - sudo apt install -y \ - rake \ - ruby \ - ruby-dev - - name: Prepare apache-arrow-apt-source for arm64 - if: | - env.ARCHITECTURE == 'arm64' - run: | - pushd arrow/dev/tasks/linux-packages/apache-arrow-apt-source/apt - for target in *-*; do - cp -a ${target} ${target}-arm64 - done - popd - - name: Prepare apache-arrow-release for arm64 - if: | - env.ARCHITECTURE == 'arm64' - run: | - pushd arrow/dev/tasks/linux-packages/apache-arrow-release/yum - for target in *-*; do - cp -a ${target} ${target}-aarch64 - done - popd - - name: Build - run: | - set -e - pushd arrow/dev/tasks/linux-packages - rake version:update ARROW_RELEASE_TIME="$(date --iso-8601=seconds)" - rake docker:pull || : - rake --trace {{ task_namespace }}:build BUILD_DIR=build - popd - env: - APT_TARGETS: {{ target }} - ARROW_VERSION: {{ arrow.version }} - REPO: {{ '${{ secrets.REPO }}' }} - YUM_TARGETS: {{ target }} - - name: Docker Push - continue-on-error: true - shell: bash - run: | - pushd arrow/dev/tasks/linux-packages - rake docker:push - popd - env: - APT_TARGETS: {{ target }} - REPO: {{ '${{ secrets.REPO }}' }} - YUM_TARGETS: {{ target }} - - {% set patterns = upload_extensions | format_all("arrow/dev/tasks/linux-packages/*/*/repositories/**/*{}") %} - {{ macros.github_upload_releases(patterns)|indent }} - - - name: Set up test - run: | - sudo apt install -y \ - apt-utils \ - cpio \ - createrepo-c \ - devscripts \ - gpg \ - rpm \ - rsync - gem install --user-install apt-dists-merge - (echo "Key-Type: RSA"; \ - echo "Key-Length: 4096"; \ - echo "Name-Real: Test"; \ - echo "Name-Email: test@example.com"; \ - echo "%no-protection") | \ - gpg --full-generate-key --batch - GPG_KEY_ID=$(gpg --list-keys --with-colon test@example.com | grep fpr | cut -d: -f10) - echo "GPG_KEY_ID=${GPG_KEY_ID}" >> ${GITHUB_ENV} - case "{{ target }}" in - almalinux-*|amazon-linux-*|centos-*) - repositories_dir=arrow/dev/tasks/linux-packages/apache-arrow-release/yum/repositories - rpm2cpio ${repositories_dir}/*/*/*/Packages/apache-arrow-release-*.rpm | \ - cpio -id - mv etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow \ - arrow/dev/tasks/linux-packages/KEYS - ;; - esac - gpg --export --armor test@example.com >> arrow/dev/tasks/linux-packages/KEYS - - name: Test - run: | - set -e - pushd arrow/dev/tasks/linux-packages - rake --trace {{ task_namespace }}:test - rm -rf {{ task_namespace }}/repositories - popd - env: - APT_TARGETS: {{ target }} - ARROW_VERSION: {{ arrow.version }} - YUM_TARGETS: {{ target }} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index caad31911c938..45446a0c13d11 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -28,38 +28,14 @@ groups: - wheel-* - python-sdist - linux: - - almalinux-* - - amazon-linux-* - - centos-* - - debian-* - - ubuntu-* - - linux-amd64: - - almalinux-*-amd64 - - amazon-linux-*-amd64 - - centos-*-amd64 - - debian-*-amd64 - - ubuntu-*-amd64 - - linux-arm64: - - almalinux-*-arm64 - - debian-*-arm64 - - ubuntu-*-arm64 - homebrew: - homebrew-* packaging: - - almalinux-* - - amazon-linux-* - - centos-* - conan-* - - debian-* - matlab - python-sdist - r-binary-packages - - ubuntu-* - wheel-* - test-debian-*-docs @@ -291,223 +267,6 @@ tasks: artifacts: - pyarrow-{no_rc_version}.tar.gz -{############################## Linux PKGS ####################################} - -{% for target in ["debian-bookworm", - "debian-trixie", - "debian-forky", - "ubuntu-jammy", - "ubuntu-noble"] %} - {% for architecture in ["amd64", "arm64"] %} - {{ target }}-{{ architecture }}: - ci: github - template: linux-packages/github.linux.yml - params: - architecture: "{{ architecture }}" - {% if architecture == "amd64" %} - target: "{{ target }}" - {% else %} - target: "{{ target }}-arm64" - {% endif %} - task_namespace: "apt" - upload_extensions: - - .ddeb - - .deb - - .debian.tar.xz - - .dsc - - .orig.tar.gz - artifacts: - - arrow-tools_{no_rc_version}-1_[a-z0-9]+.deb - {% if architecture == "amd64" %} - - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz - - apache-arrow-apt-source_{no_rc_version}-1.dsc - - apache-arrow-apt-source_{no_rc_version}-1_all.deb - - apache-arrow-apt-source_{no_rc_version}.orig.tar.gz - - apache-arrow_{no_rc_version}-1.debian.tar.xz - - apache-arrow_{no_rc_version}-1.dsc - - apache-arrow_{no_rc_version}.orig.tar.gz - {% endif %} - - gir1.2-arrow-1.0_{no_rc_version}-1_[a-z0-9]+.deb - - gir1.2-arrow-dataset-1.0_{no_rc_version}-1_[a-z0-9]+.deb - - gir1.2-arrow-flight-1.0_{no_rc_version}-1_[a-z0-9]+.deb - - gir1.2-arrow-flight-sql-1.0_{no_rc_version}-1_[a-z0-9]+.deb - - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb - - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-acero-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-acero{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-acero{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-compute-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-compute{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-compute{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset-glib{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-dataset-glib{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-dataset{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-glib{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-glib{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql-glib{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-sql-glib{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-sql{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-glib{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-glib{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva-glib{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libgandiva-glib{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libgandiva{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet-glib{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libparquet-glib{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libparquet{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - parquet-tools_{no_rc_version}-1_[a-z0-9]+.deb - {% if architecture == "amd64" %} - - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda-glib{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-cuda-glib{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda{so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-cuda{so_version}_{no_rc_version}-1_[a-z0-9]+.deb - {% endif %} - {% endfor %} -{% endfor %} - -{% for target in ["almalinux-10", - "almalinux-9", - "almalinux-8", - "amazon-linux-2023", - "centos-9-stream", - "centos-7"] %} - {% set is_rhel7_based = (target == "centos-7") %} - {% for architecture - in ["amd64", "arm64"] - if not (target == "centos-7" and architecture == "arm64") %} - {{ target }}-{{ architecture }}: - ci: github - template: linux-packages/github.linux.yml - params: - architecture: "{{ architecture }}" - {% if architecture == "amd64" %} - target: "{{ target }}" - {% else %} - target: "{{ target }}-aarch64" - {% endif %} - task_namespace: "yum" - upload_extensions: - - .rpm - artifacts: - {% if architecture == "amd64" %} - - apache-arrow-release-{no_rc_version}-1.[a-z0-9]+.noarch.rpm - - apache-arrow-release-{no_rc_version}-1.[a-z0-9]+.src.rpm - {% endif %} - - arrow[0-9]+-acero-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-acero-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow[0-9]+-acero-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - arrow[0-9]+-compute-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-compute-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow[0-9]+-compute-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - arrow[0-9]+-dataset-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-dataset-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-dataset-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-dataset-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow[0-9]+-dataset-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - arrow[0-9]+-dataset-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow[0-9]+-dataset-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - arrow-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow-debugsource-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - arrow-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-flight-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-flight-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-flight-sql-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-flight-sql-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-flight-sql-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-flight-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-flight-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-flight-sql-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-flight-sql-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-flight-sql-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-flight-sql-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - arrow-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow[0-9]+-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - arrow[0-9]+-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow[0-9]+-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - arrow[0-9]+-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if architecture == "amd64" %} - - arrow-{no_rc_version}-1.[a-z0-9]+.src.rpm - {% endif %} - - arrow-tools-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow-tools-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - {% if not is_rhel7_based %} - - gandiva-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - gandiva-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - gandiva-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - gandiva[0-9]+-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - gandiva[0-9]+-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - gandiva[0-9]+-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - gandiva[0-9]+-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - parquet-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - parquet-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - parquet-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - parquet[0-9]+-glib-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - parquet[0-9]+-glib-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - parquet[0-9]+-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - parquet[0-9]+-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - parquet-tools-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - parquet-tools-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - {% endfor %} -{% endfor %} - ############################## Homebrew Tasks ################################ homebrew-cpp: From 7674972a314581a2a966ec43211956cea259c656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 23 Sep 2025 12:15:19 +0200 Subject: [PATCH 27/36] Uncomment the rest of linux package tasks --- .github/workflows/package_linux.yml | 38 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index bbd7656afe474..52e0f29a33796 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -92,25 +92,25 @@ jobs: id: - almalinux-8-amd64 - almalinux-8-arm64 - #- almalinux-9-amd64 - #- almalinux-9-arm64 - #- almalinux-10-amd64 - #- almalinux-10-arm64 - #- amazon-linux-2023-amd64 - #- amazon-linux-2023-arm64 - #- centos-9-stream-amd64 - #- centos-9-stream-arm64 - #- centos-7-amd64 - #- debian-bookworm-amd64 - #- debian-bookworm-arm64 - #- debian-trixie-amd64 - #- debian-trixie-arm64 - #- debian-forky-amd64 - #- debian-forky-arm64 - #- ubuntu-jammy-amd64 - #- ubuntu-jammy-arm64 - #- ubuntu-noble-amd64 - #- ubuntu-noble-arm64 + - almalinux-9-amd64 + - almalinux-9-arm64 + - almalinux-10-amd64 + - almalinux-10-arm64 + - amazon-linux-2023-amd64 + - amazon-linux-2023-arm64 + - centos-9-stream-amd64 + - centos-9-stream-arm64 + - centos-7-amd64 + - debian-bookworm-amd64 + - debian-bookworm-arm64 + - debian-trixie-amd64 + - debian-trixie-arm64 + - debian-forky-amd64 + - debian-forky-arm64 + - ubuntu-jammy-amd64 + - ubuntu-jammy-arm64 + - ubuntu-noble-amd64 + - ubuntu-noble-arm64 env: DOCKER_VOLUME_PREFIX: ".docker/" ARROW_VERSION: ${{ inputs.version || ''}} From f30f25eff3899f5d2cc9854550f828c79f88adb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 23 Sep 2025 12:42:04 +0200 Subject: [PATCH 28/36] Remove missing tasks --- dev/tasks/tasks.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 45446a0c13d11..b598359236cc6 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -108,11 +108,6 @@ groups: nightly: - verify-rc-source-* - - almalinux-* - - amazon-linux-* - - debian-* - - ubuntu-* - - centos-* - conan-* - homebrew-cpp - test-* @@ -127,11 +122,6 @@ groups: - example-* nightly-packaging: - - almalinux-* - - amazon-linux-* - - debian-* - - ubuntu-* - - centos-* - conan-* - homebrew-cpp - wheel-* From 471e1100ddc2f14636a2574163572a138a4480ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 23 Sep 2025 15:46:07 +0200 Subject: [PATCH 29/36] Simplify .tar.gz folder structure more to be able to simplify even more binary tasks --- .github/workflows/package_linux.yml | 40 +++++++++++++++-------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 52e0f29a33796..b023bf4d82d2d 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -92,23 +92,23 @@ jobs: id: - almalinux-8-amd64 - almalinux-8-arm64 - - almalinux-9-amd64 - - almalinux-9-arm64 - - almalinux-10-amd64 - - almalinux-10-arm64 - - amazon-linux-2023-amd64 - - amazon-linux-2023-arm64 - - centos-9-stream-amd64 - - centos-9-stream-arm64 - - centos-7-amd64 - - debian-bookworm-amd64 - - debian-bookworm-arm64 - - debian-trixie-amd64 - - debian-trixie-arm64 - - debian-forky-amd64 - - debian-forky-arm64 - - ubuntu-jammy-amd64 - - ubuntu-jammy-arm64 + #- almalinux-9-amd64 + #- almalinux-9-arm64 + #- almalinux-10-amd64 + #- almalinux-10-arm64 + #- amazon-linux-2023-amd64 + #- amazon-linux-2023-arm64 + #- centos-9-stream-amd64 + #- centos-9-stream-arm64 + #- centos-7-amd64 + #- debian-bookworm-amd64 + #- debian-bookworm-arm64 + #- debian-trixie-amd64 + #- debian-trixie-arm64 + #- debian-forky-amd64 + #- debian-forky-arm64 + #- ubuntu-jammy-amd64 + #- ubuntu-jammy-arm64 - ubuntu-noble-amd64 - ubuntu-noble-arm64 env: @@ -238,7 +238,9 @@ jobs: run: | set -ex pushd dev/tasks/linux-packages - tar cvzf ${{ matrix.id }}.tar.gz */${TASK_NAMESPACE}/repositories + mkdir -p artifacts + cp -a */${TASK_NAMESPACE}/repositories/* artifacts/ + tar cvzf ${{ matrix.id }}.tar.gz -C artifacts . popd - name: Upload the artifacts to the job uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 @@ -276,7 +278,7 @@ jobs: echo "GPG_KEY_ID=${GPG_KEY_ID}" >> ${GITHUB_ENV} case "${{ matrix.id }}" in almalinux-*|amazon-linux-*|centos-*) - repositories_dir=dev/tasks/linux-packages/apache-arrow-release/yum/repositories + repositories_dir=dev/tasks/linux-packages rpm2cpio ${repositories_dir}/*/*/*/Packages/apache-arrow-release-*.rpm | \ cpio -id mv etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow \ From 516e0fb8aa98495547efb5785f6549f2e6b4d723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 23 Sep 2025 17:00:53 +0200 Subject: [PATCH 30/36] Fix verify-yum.sh and verify-apt.sh to remove apt/repositories and yum/repositories subfolders Try fixing yum tests --- dev/release/binary-task.rb | 23 ++++++++++------------- dev/release/verify-apt.sh | 2 +- dev/release/verify-yum.sh | 4 ++-- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index 1205d8aae5734..101a4fe1a4c71 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -1854,11 +1854,11 @@ def define_apt_tasks end def yum_rc_repositories_dir - "#{rc_dir}/yum/repositories" + "#{rc_dir}" end def yum_release_repositories_dir - "#{release_dir}/yum/repositories" + "#{release_dir}" end def available_yum_targets @@ -1929,7 +1929,7 @@ def sign_rpms(directory) thread_pool.join end - def rpm_sign(directory) + def rpm_sign() unless system("rpm", "-q", rpm_gpg_key_package_name(gpg_key_id), out: IO::NULL) @@ -1948,7 +1948,6 @@ def rpm_sign(directory) yum_targets.each do |distribution, distribution_version| source_dir = [ - directory, distribution, distribution_version, ].join("/") @@ -1956,9 +1955,9 @@ def rpm_sign(directory) end end - def yum_update(base_dir, incoming_dir) + def yum_update(base_dir) yum_targets.each do |distribution, distribution_version| - target_dir = "#{incoming_dir}/#{distribution}/#{distribution_version}" + target_dir = "#{distribution}/#{distribution_version}" target_dir = Pathname(target_dir) next unless target_dir.directory? @@ -2040,7 +2039,7 @@ def define_yum_rc_tasks # Copy all repository structures for this distribution/version source_pattern = "#{artifacts_dir}/#{distribution}-#{distribution_version}*/*/" \ - "yum/repositories/#{distribution}/#{distribution_version}" + "#{distribution}/#{distribution_version}" Dir.glob(source_pattern) do |repo_source| progress_reporter.increment_max @@ -2527,18 +2526,16 @@ def define_yum_test_task namespace :yum do desc "Test RPM packages" task :test do - repositories_dir = "yum/repositories" unless @packages.empty? - rm_rf(repositories_dir) @packages.each do |package| - package_repositories = "#{package}/yum/repositories" + package_repositories = "#{package}" next unless File.exist?(package_repositories) - sh("rsync", "-av", "#{package_repositories}/", repositories_dir) + sh("rsync", "-av", "#{package_repositories}/") end end - rpm_sign(repositories_dir) + rpm_sign() base_dir = "nonexistent" - yum_update(base_dir, repositories_dir) + yum_update(base_dir) yum_test_targets.each do |target| verify(target) end diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh index 1aa6c45acf5d3..e6be5b39caa9a 100755 --- a/dev/release/verify-apt.sh +++ b/dev/release/verify-apt.sh @@ -92,7 +92,7 @@ if [ "${TYPE}" = "local" ]; then ;; esac package_version+="-1" - apt_source_path="${local_prefix}/apt/repositories" + apt_source_path="${local_prefix}" apt_source_path+="/${distribution}/pool/${code_name}/main" apt_source_path+="/a/apache-arrow-apt-source" apt_source_path+="/apache-arrow-apt-source_${package_version}_all.deb" diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh index 4c32ba308941a..41acf7fe21f63 100755 --- a/dev/release/verify-yum.sh +++ b/dev/release/verify-yum.sh @@ -136,7 +136,7 @@ if [ "${TYPE}" = "local" ]; then package_version="${VERSION}-1" ;; esac - release_path="${local_prefix}/yum/repositories" + release_path="${local_prefix}" case "${distribution}" in almalinux) package_version+=".el${distribution_version}" @@ -167,7 +167,7 @@ fi if [ "${TYPE}" = "local" ]; then sed \ -i"" \ - -e "s,baseurl=https://packages\.apache\.org/artifactory/arrow/,baseurl=file://${local_prefix}/yum/repositories/,g" \ + -e "s,baseurl=https://packages\.apache\.org/artifactory/arrow/,baseurl=file://${local_prefix}/,g" \ /etc/yum.repos.d/Apache-Arrow.repo keys="${local_prefix}/KEYS" if [ -f "${keys}" ]; then From c9c4631025f3e22e3780c04594cfd0c55c5f2818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 23 Sep 2025 21:12:26 +0200 Subject: [PATCH 31/36] Try to fix after understanding the code a bit better :) --- dev/release/binary-task.rb | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index 101a4fe1a4c71..edde8f6df335d 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -1929,7 +1929,7 @@ def sign_rpms(directory) thread_pool.join end - def rpm_sign() + def rpm_sign(directory) unless system("rpm", "-q", rpm_gpg_key_package_name(gpg_key_id), out: IO::NULL) @@ -1948,6 +1948,7 @@ def rpm_sign() yum_targets.each do |distribution, distribution_version| source_dir = [ + directory, distribution, distribution_version, ].join("/") @@ -1955,9 +1956,9 @@ def rpm_sign() end end - def yum_update(base_dir) + def yum_update(base_dir, incoming_dir) yum_targets.each do |distribution, distribution_version| - target_dir = "#{distribution}/#{distribution_version}" + target_dir = "#{incoming_dir}/#{distribution}/#{distribution_version}" target_dir = Pathname(target_dir) next unless target_dir.directory? @@ -2526,16 +2527,18 @@ def define_yum_test_task namespace :yum do desc "Test RPM packages" task :test do + repositories_dir = "yum/repositories" unless @packages.empty? + rm_rf(repositories_dir) @packages.each do |package| package_repositories = "#{package}" next unless File.exist?(package_repositories) - sh("rsync", "-av", "#{package_repositories}/") + sh("rsync", "-av", "#{package_repositories}/", repositories_dir) end end - rpm_sign() + rpm_sign(repositories_dir) base_dir = "nonexistent" - yum_update(base_dir) + yum_update(base_dir, repositories_dir) yum_test_targets.each do |target| verify(target) end From f4130efeb2d3aceceb224efbab6aacd0653567fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 25 Sep 2025 10:23:38 +0200 Subject: [PATCH 32/36] Revert "Try to fix after understanding the code a bit better :)" This reverts commit c9c4631025f3e22e3780c04594cfd0c55c5f2818. --- dev/release/binary-task.rb | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index edde8f6df335d..101a4fe1a4c71 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -1929,7 +1929,7 @@ def sign_rpms(directory) thread_pool.join end - def rpm_sign(directory) + def rpm_sign() unless system("rpm", "-q", rpm_gpg_key_package_name(gpg_key_id), out: IO::NULL) @@ -1948,7 +1948,6 @@ def rpm_sign(directory) yum_targets.each do |distribution, distribution_version| source_dir = [ - directory, distribution, distribution_version, ].join("/") @@ -1956,9 +1955,9 @@ def rpm_sign(directory) end end - def yum_update(base_dir, incoming_dir) + def yum_update(base_dir) yum_targets.each do |distribution, distribution_version| - target_dir = "#{incoming_dir}/#{distribution}/#{distribution_version}" + target_dir = "#{distribution}/#{distribution_version}" target_dir = Pathname(target_dir) next unless target_dir.directory? @@ -2527,18 +2526,16 @@ def define_yum_test_task namespace :yum do desc "Test RPM packages" task :test do - repositories_dir = "yum/repositories" unless @packages.empty? - rm_rf(repositories_dir) @packages.each do |package| package_repositories = "#{package}" next unless File.exist?(package_repositories) - sh("rsync", "-av", "#{package_repositories}/", repositories_dir) + sh("rsync", "-av", "#{package_repositories}/") end end - rpm_sign(repositories_dir) + rpm_sign() base_dir = "nonexistent" - yum_update(base_dir, repositories_dir) + yum_update(base_dir) yum_test_targets.each do |target| verify(target) end From b564c22dc5e66a8ee478a529f202a55ce80077bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 25 Sep 2025 10:23:45 +0200 Subject: [PATCH 33/36] Revert "Fix verify-yum.sh and verify-apt.sh to remove apt/repositories and yum/repositories subfolders" This reverts commit 516e0fb8aa98495547efb5785f6549f2e6b4d723. --- dev/release/binary-task.rb | 23 +++++++++++++---------- dev/release/verify-apt.sh | 2 +- dev/release/verify-yum.sh | 4 ++-- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index 101a4fe1a4c71..1205d8aae5734 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -1854,11 +1854,11 @@ def define_apt_tasks end def yum_rc_repositories_dir - "#{rc_dir}" + "#{rc_dir}/yum/repositories" end def yum_release_repositories_dir - "#{release_dir}" + "#{release_dir}/yum/repositories" end def available_yum_targets @@ -1929,7 +1929,7 @@ def sign_rpms(directory) thread_pool.join end - def rpm_sign() + def rpm_sign(directory) unless system("rpm", "-q", rpm_gpg_key_package_name(gpg_key_id), out: IO::NULL) @@ -1948,6 +1948,7 @@ def rpm_sign() yum_targets.each do |distribution, distribution_version| source_dir = [ + directory, distribution, distribution_version, ].join("/") @@ -1955,9 +1956,9 @@ def rpm_sign() end end - def yum_update(base_dir) + def yum_update(base_dir, incoming_dir) yum_targets.each do |distribution, distribution_version| - target_dir = "#{distribution}/#{distribution_version}" + target_dir = "#{incoming_dir}/#{distribution}/#{distribution_version}" target_dir = Pathname(target_dir) next unless target_dir.directory? @@ -2039,7 +2040,7 @@ def define_yum_rc_tasks # Copy all repository structures for this distribution/version source_pattern = "#{artifacts_dir}/#{distribution}-#{distribution_version}*/*/" \ - "#{distribution}/#{distribution_version}" + "yum/repositories/#{distribution}/#{distribution_version}" Dir.glob(source_pattern) do |repo_source| progress_reporter.increment_max @@ -2526,16 +2527,18 @@ def define_yum_test_task namespace :yum do desc "Test RPM packages" task :test do + repositories_dir = "yum/repositories" unless @packages.empty? + rm_rf(repositories_dir) @packages.each do |package| - package_repositories = "#{package}" + package_repositories = "#{package}/yum/repositories" next unless File.exist?(package_repositories) - sh("rsync", "-av", "#{package_repositories}/") + sh("rsync", "-av", "#{package_repositories}/", repositories_dir) end end - rpm_sign() + rpm_sign(repositories_dir) base_dir = "nonexistent" - yum_update(base_dir) + yum_update(base_dir, repositories_dir) yum_test_targets.each do |target| verify(target) end diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh index e6be5b39caa9a..1aa6c45acf5d3 100755 --- a/dev/release/verify-apt.sh +++ b/dev/release/verify-apt.sh @@ -92,7 +92,7 @@ if [ "${TYPE}" = "local" ]; then ;; esac package_version+="-1" - apt_source_path="${local_prefix}" + apt_source_path="${local_prefix}/apt/repositories" apt_source_path+="/${distribution}/pool/${code_name}/main" apt_source_path+="/a/apache-arrow-apt-source" apt_source_path+="/apache-arrow-apt-source_${package_version}_all.deb" diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh index 41acf7fe21f63..4c32ba308941a 100755 --- a/dev/release/verify-yum.sh +++ b/dev/release/verify-yum.sh @@ -136,7 +136,7 @@ if [ "${TYPE}" = "local" ]; then package_version="${VERSION}-1" ;; esac - release_path="${local_prefix}" + release_path="${local_prefix}/yum/repositories" case "${distribution}" in almalinux) package_version+=".el${distribution_version}" @@ -167,7 +167,7 @@ fi if [ "${TYPE}" = "local" ]; then sed \ -i"" \ - -e "s,baseurl=https://packages\.apache\.org/artifactory/arrow/,baseurl=file://${local_prefix}/,g" \ + -e "s,baseurl=https://packages\.apache\.org/artifactory/arrow/,baseurl=file://${local_prefix}/yum/repositories/,g" \ /etc/yum.repos.d/Apache-Arrow.repo keys="${local_prefix}/KEYS" if [ -f "${keys}" ]; then From f60d6adee60d871ae5b3e40898f223d8252a4649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 25 Sep 2025 10:23:52 +0200 Subject: [PATCH 34/36] Revert "Simplify .tar.gz folder structure more to be able to simplify even more binary tasks" This reverts commit 471e1100ddc2f14636a2574163572a138a4480ce. --- .github/workflows/package_linux.yml | 40 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index b023bf4d82d2d..52e0f29a33796 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -92,23 +92,23 @@ jobs: id: - almalinux-8-amd64 - almalinux-8-arm64 - #- almalinux-9-amd64 - #- almalinux-9-arm64 - #- almalinux-10-amd64 - #- almalinux-10-arm64 - #- amazon-linux-2023-amd64 - #- amazon-linux-2023-arm64 - #- centos-9-stream-amd64 - #- centos-9-stream-arm64 - #- centos-7-amd64 - #- debian-bookworm-amd64 - #- debian-bookworm-arm64 - #- debian-trixie-amd64 - #- debian-trixie-arm64 - #- debian-forky-amd64 - #- debian-forky-arm64 - #- ubuntu-jammy-amd64 - #- ubuntu-jammy-arm64 + - almalinux-9-amd64 + - almalinux-9-arm64 + - almalinux-10-amd64 + - almalinux-10-arm64 + - amazon-linux-2023-amd64 + - amazon-linux-2023-arm64 + - centos-9-stream-amd64 + - centos-9-stream-arm64 + - centos-7-amd64 + - debian-bookworm-amd64 + - debian-bookworm-arm64 + - debian-trixie-amd64 + - debian-trixie-arm64 + - debian-forky-amd64 + - debian-forky-arm64 + - ubuntu-jammy-amd64 + - ubuntu-jammy-arm64 - ubuntu-noble-amd64 - ubuntu-noble-arm64 env: @@ -238,9 +238,7 @@ jobs: run: | set -ex pushd dev/tasks/linux-packages - mkdir -p artifacts - cp -a */${TASK_NAMESPACE}/repositories/* artifacts/ - tar cvzf ${{ matrix.id }}.tar.gz -C artifacts . + tar cvzf ${{ matrix.id }}.tar.gz */${TASK_NAMESPACE}/repositories popd - name: Upload the artifacts to the job uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 @@ -278,7 +276,7 @@ jobs: echo "GPG_KEY_ID=${GPG_KEY_ID}" >> ${GITHUB_ENV} case "${{ matrix.id }}" in almalinux-*|amazon-linux-*|centos-*) - repositories_dir=dev/tasks/linux-packages + repositories_dir=dev/tasks/linux-packages/apache-arrow-release/yum/repositories rpm2cpio ${repositories_dir}/*/*/*/Packages/apache-arrow-release-*.rpm | \ cpio -id mv etc/pki/rpm-gpg/RPM-GPG-KEY-Apache-Arrow \ From 220dca17d3ee9d87e1c24f2f8778c429e0716418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 25 Sep 2025 10:38:36 +0200 Subject: [PATCH 35/36] Some fixes to ruby as per code review --- dev/release/binary-task.rb | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index 1205d8aae5734..0511f3226fad8 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -1645,30 +1645,24 @@ def define_apt_rc_tasks progress_label = "Copying: #{distribution} #{code_name}" progress_reporter = ProgressReporter.new(progress_label) - destination_prefix = "#{incoming_dir}/#{distribution}" - rm_rf(destination_prefix, verbose: verbose?) + destination_dir = "#{incoming_dir}/#{distribution}" + rm_rf(destination_dir, verbose: verbose?) # Copy the entire repository structure source_pattern = "#{artifacts_dir}/#{distribution}-#{code_name}*/*/" \ "apt/repositories/#{distribution}" Dir.glob(source_pattern) do |repo_source| progress_reporter.increment_max - mkdir_p(File.dirname(destination_prefix), verbose: verbose?) - cp_r(repo_source, destination_prefix, preserve: true, verbose: verbose?) + mkdir_p(File.dirname(destination_dir), verbose: verbose?) + cp_r(repo_source, destination_dir, preserve: true, verbose: verbose?) progress_reporter.advance end # Create latest package links after copying - Dir.glob("#{destination_prefix}/**/*-apt-source_*.deb") do |apt_source_path| - base_name = File.basename(apt_source_path) - package_name = ENV["DEB_PACKAGE_NAME"] - if package_name.nil? or package_name.empty? - package_name = "apache-arrow-apt-source" - end - + Dir.glob("#{destination_dir}/**/*-apt-source_*.deb") do |apt_source_path| latest_apt_source_package_path = [ - destination_prefix, - "#{package_name}-latest-#{code_name}.deb" + destination_dir, + "apache-arrow-apt-source-latest-#{code_name}.deb" ].join("/") copy_artifact(apt_source_path, @@ -2035,8 +2029,8 @@ def define_yum_rc_tasks progress_label = "Copying: #{distribution} #{distribution_version}" progress_reporter = ProgressReporter.new(progress_label) - destination_prefix = "#{incoming_dir}/#{distribution}/#{distribution_version}" - rm_rf(destination_prefix, verbose: verbose?) + destination_dir = "#{incoming_dir}/#{distribution}/#{distribution_version}" + rm_rf(destination_dir, verbose: verbose?) # Copy all repository structures for this distribution/version source_pattern = "#{artifacts_dir}/#{distribution}-#{distribution_version}*/*/" \ @@ -2047,7 +2041,7 @@ def define_yum_rc_tasks # Copy and merge all architectures Dir.glob("#{repo_source}/*") do |arch_dir| arch_name = File.basename(arch_dir) - destination_arch_dir = "#{destination_prefix}/#{arch_name}" + destination_arch_dir = "#{destination_dir}/#{arch_name}" mkdir_p(File.dirname(destination_arch_dir), verbose: verbose?) cp_r(arch_dir, destination_arch_dir, preserve: true, verbose: verbose?) end From c7f2c5957510d5fa3a40b31f830113b6ae8bdcbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 25 Sep 2025 10:40:16 +0200 Subject: [PATCH 36/36] Apply suggestion from @kou Co-authored-by: Sutou Kouhei --- dev/tasks/linux-packages/helper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/linux-packages/helper.rb b/dev/tasks/linux-packages/helper.rb index 618d4691c5165..458198c6704e4 100644 --- a/dev/tasks/linux-packages/helper.rb +++ b/dev/tasks/linux-packages/helper.rb @@ -47,7 +47,7 @@ def arrow_source_dir def detect_version(release_time) version_env = ENV["ARROW_VERSION"] - return version_env unless version_env.nil? || version_env.empty? + return version_env unless version_env.to_s.empty? cmakelists_txt_path = File.join(arrow_source_dir, "cpp", "CMakeLists.txt") cmakelists_txt_content = File.read(cmakelists_txt_path)