diff --git a/.github/actions/setup-spark-builder/action.yaml b/.github/actions/setup-spark-builder/action.yaml new file mode 100644 index 0000000000..9293dee901 --- /dev/null +++ b/.github/actions/setup-spark-builder/action.yaml @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Setup Spark Builder +description: 'Setup Apache Spark to run SQL tests' +inputs: + spark-short-version: + description: 'The Apache Spark short version (e.g., 3.4) to build' + required: true + default: '3.4' + spark-version: + description: 'The Apache Spark version (e.g., 3.4.2) to build' + required: true + default: '3.4.2' + comet-version: + description: 'The Comet version to use for Spark' + required: true + default: '0.1.0-SNAPSHOT' +runs: + using: "composite" + steps: + - name: Clone Spark repo + uses: actions/checkout@v4 + with: + repository: apache/spark + path: apache-spark + ref: v${{inputs.spark-version}} + fetch-depth: 1 + + - name: Setup Spark for Comet + shell: bash + run: | + cd apache-spark + git apply ../dev/diffs/${{inputs.spark-version}}.diff + ../mvnw -nsu -q versions:set-property -Dproperty=comet.version -DnewVersion=${{inputs.comet-version}} -DgenerateBackupPoms=false + + - name: Cache Maven dependencies + uses: actions/cache@v4 + with: + path: | + ~/.m2/repository + /root/.m2/repository + key: ${{ runner.os }}-spark-sql-${{ hashFiles('spark/**/pom.xml', 'common/**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-spark-sql- + + - name: Build Comet + shell: bash + run: | + PROFILES="-Pspark-${{inputs.spark-short-version}}" make release diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml new file mode 100644 index 0000000000..5c460b703e --- /dev/null +++ b/.github/workflows/spark_sql_test.yml @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Spark SQL Tests + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +on: + push: + paths-ignore: + - "doc/**" + - "**.md" + pull_request: + paths-ignore: + - "doc/**" + - "**.md" + # manual trigger + # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow + workflow_dispatch: + +env: + RUST_VERSION: nightly + +jobs: + spark-sql-catalyst: + strategy: + matrix: + os: [ubuntu-latest] + java-version: [11] + spark-version: [{short: '3.4', full: '3.4.2'}] + module: + - {name: "catalyst", args1: "catalyst/test", args2: ""} + - {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest} + - {name: "sql/core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"} + - {name: "sql/core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"} + - {name: "sql/hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"} + - {name: "sql/hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"} + - {name: "sql/hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"} + fail-fast: false + name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }} + runs-on: ${{ matrix.os }} + container: + image: amd64/rust + steps: + - uses: actions/checkout@v4 + - name: Setup Rust & Java toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: ${{env.RUST_VERSION}} + jdk-version: ${{ matrix.java-version }} + - name: Setup Spark + uses: ./.github/actions/setup-spark-builder + with: + spark-version: ${{ matrix.spark-version.full }} + spark-short-version: ${{ matrix.spark-version.short }} + comet-version: '0.1.0-SNAPSHOT' # TODO: get this from pom.xml + - name: Run Spark tests + run: | + cd apache-spark + ENABLE_COMET=true build/sbt ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}" + env: + LC_ALL: "C.UTF-8" + diff --git a/dev/diffs/3.4.2.diff b/dev/diffs/3.4.2.diff new file mode 100644 index 0000000000..b571cd2b5e --- /dev/null +++ b/dev/diffs/3.4.2.diff @@ -0,0 +1,1306 @@ +diff --git a/pom.xml b/pom.xml +index fab98342498..f2156d790d1 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -148,6 +148,8 @@ + 0.10.0 + 2.5.1 + 2.0.8 ++ 3.4 ++ 0.1.0-SNAPSHOT +