diff --git a/.github/actions/setup-spark-builder/action.yaml b/.github/actions/setup-spark-builder/action.yaml
new file mode 100644
index 0000000000..9293dee901
--- /dev/null
+++ b/.github/actions/setup-spark-builder/action.yaml
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Setup Spark Builder
+description: 'Setup Apache Spark to run SQL tests'
+inputs:
+ spark-short-version:
+ description: 'The Apache Spark short version (e.g., 3.4) to build'
+ required: true
+ default: '3.4'
+ spark-version:
+ description: 'The Apache Spark version (e.g., 3.4.2) to build'
+ required: true
+ default: '3.4.2'
+ comet-version:
+ description: 'The Comet version to use for Spark'
+ required: true
+ default: '0.1.0-SNAPSHOT'
+runs:
+ using: "composite"
+ steps:
+ - name: Clone Spark repo
+ uses: actions/checkout@v4
+ with:
+ repository: apache/spark
+ path: apache-spark
+ ref: v${{inputs.spark-version}}
+ fetch-depth: 1
+
+ - name: Setup Spark for Comet
+ shell: bash
+ run: |
+ cd apache-spark
+ git apply ../dev/diffs/${{inputs.spark-version}}.diff
+ ../mvnw -nsu -q versions:set-property -Dproperty=comet.version -DnewVersion=${{inputs.comet-version}} -DgenerateBackupPoms=false
+
+ - name: Cache Maven dependencies
+ uses: actions/cache@v4
+ with:
+ path: |
+ ~/.m2/repository
+ /root/.m2/repository
+ key: ${{ runner.os }}-spark-sql-${{ hashFiles('spark/**/pom.xml', 'common/**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-spark-sql-
+
+ - name: Build Comet
+ shell: bash
+ run: |
+ PROFILES="-Pspark-${{inputs.spark-short-version}}" make release
diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml
new file mode 100644
index 0000000000..5c460b703e
--- /dev/null
+++ b/.github/workflows/spark_sql_test.yml
@@ -0,0 +1,79 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Spark SQL Tests
+
+concurrency:
+ group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
+ cancel-in-progress: true
+
+on:
+ push:
+ paths-ignore:
+ - "doc/**"
+ - "**.md"
+ pull_request:
+ paths-ignore:
+ - "doc/**"
+ - "**.md"
+ # manual trigger
+ # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
+ workflow_dispatch:
+
+env:
+ RUST_VERSION: nightly
+
+jobs:
+ spark-sql-catalyst:
+ strategy:
+ matrix:
+ os: [ubuntu-latest]
+ java-version: [11]
+ spark-version: [{short: '3.4', full: '3.4.2'}]
+ module:
+ - {name: "catalyst", args1: "catalyst/test", args2: ""}
+ - {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
+ - {name: "sql/core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"}
+ - {name: "sql/core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"}
+ - {name: "sql/hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
+ - {name: "sql/hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
+ - {name: "sql/hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
+ fail-fast: false
+ name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }}
+ runs-on: ${{ matrix.os }}
+ container:
+ image: amd64/rust
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup Rust & Java toolchain
+ uses: ./.github/actions/setup-builder
+ with:
+ rust-version: ${{env.RUST_VERSION}}
+ jdk-version: ${{ matrix.java-version }}
+ - name: Setup Spark
+ uses: ./.github/actions/setup-spark-builder
+ with:
+ spark-version: ${{ matrix.spark-version.full }}
+ spark-short-version: ${{ matrix.spark-version.short }}
+ comet-version: '0.1.0-SNAPSHOT' # TODO: get this from pom.xml
+ - name: Run Spark tests
+ run: |
+ cd apache-spark
+ ENABLE_COMET=true build/sbt ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
+ env:
+ LC_ALL: "C.UTF-8"
+
diff --git a/dev/diffs/3.4.2.diff b/dev/diffs/3.4.2.diff
new file mode 100644
index 0000000000..b571cd2b5e
--- /dev/null
+++ b/dev/diffs/3.4.2.diff
@@ -0,0 +1,1306 @@
+diff --git a/pom.xml b/pom.xml
+index fab98342498..f2156d790d1 100644
+--- a/pom.xml
++++ b/pom.xml
+@@ -148,6 +148,8 @@
+ 0.10.0
+ 2.5.1
+ 2.0.8
++ 3.4
++ 0.1.0-SNAPSHOT
+