From d51a37afa0781879ab6ce393526632d87133bb20 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Sat, 20 Sep 2025 17:06:05 -0400 Subject: [PATCH 1/5] Add CI check for outdated documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure that PRs include necessary documentation updates when config defaults change. The check runs after Maven build (which generates docs from config values) and fails if any docs files are modified, indicating missing documentation changes. Fixes #2427 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .github/actions/java-test/action.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/actions/java-test/action.yaml b/.github/actions/java-test/action.yaml index 1f7899f8ac..5bf4981f2d 100644 --- a/.github/actions/java-test/action.yaml +++ b/.github/actions/java-test/action.yaml @@ -82,6 +82,21 @@ runs: MAVEN_SUITES="$(echo "${{ inputs.suites }}" | paste -sd, -)" echo "Running with MAVEN_SUITES=$MAVEN_SUITES" MAVEN_OPTS="-Xmx4G -Xms2G -DwildcardSuites=$MAVEN_SUITES -XX:+UnlockDiagnosticVMOptions -XX:+ShowMessageBoxOnError -XX:+HeapDumpOnOutOfMemoryError -XX:ErrorFile=./hs_err_pid%p.log" SPARK_HOME=`pwd` ./mvnw -B clean install ${{ inputs.maven_opts }} + + - name: Check for documentation changes + shell: bash + run: | + # Check if any files in docs/ have been modified after Maven build (which runs GenerateDocs) + if [ -n "$(git status --porcelain -- docs/)" ]; then + echo "❌ Documentation files have been modified after Maven build:" + git diff --name-only -- docs/ + echo "" + echo "This suggests that config defaults have changed but the documentation wasn't updated." + echo "Please run 'make jvm' locally and commit the updated documentation files." + exit 1 + else + echo "✅ Documentation is up-to-date" + fi - name: Upload crash logs if: failure() uses: actions/upload-artifact@v4 From 0cc16dfa0433b5f62cbb5d2796b606ec5c7da1d0 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Mon, 22 Sep 2025 10:15:20 -0400 Subject: [PATCH 2/5] Try again. --- .github/actions/java-test/action.yaml | 48 +++++++++++++++++++-------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/.github/actions/java-test/action.yaml b/.github/actions/java-test/action.yaml index 5bf4981f2d..d0ed3a1dc3 100644 --- a/.github/actions/java-test/action.yaml +++ b/.github/actions/java-test/action.yaml @@ -62,6 +62,40 @@ runs: run: | ./mvnw -B compile test-compile scalafix:scalafix -Dscalafix.mode=CHECK -Psemanticdb ${{ inputs.maven_opts }} + - name: Check for documentation changes + shell: bash + run: | + # Capture the initial state of docs before we potentially regenerate them + echo "Checking documentation consistency..." + + # Save current state of docs directory + git add docs/ || true + INITIAL_DOCS_HASH=$(git write-tree --prefix=docs/) + echo "Initial docs hash: $INITIAL_DOCS_HASH" + + # Reset any staged changes to docs + git reset docs/ || true + + # Force regenerate docs by running just the package phase to ensure GenerateDocs runs + echo "Running Maven package to regenerate documentation..." + ./mvnw package -DskipTests -q -pl spark + + # Check if docs changed after regeneration + git add docs/ || true + FINAL_DOCS_HASH=$(git write-tree --prefix=docs/) + echo "Final docs hash: $FINAL_DOCS_HASH" + + if [ "$INITIAL_DOCS_HASH" != "$FINAL_DOCS_HASH" ]; then + echo "Documentation files have been modified after running GenerateDocs:" + git diff --name-only $INITIAL_DOCS_HASH $FINAL_DOCS_HASH + echo "" + echo "This suggests that config defaults have changed but the documentation wasn't updated." + echo "Please run 'make jvm' locally and commit the updated documentation files." + exit 1 + else + echo "Documentation is up-to-date" + fi + - name: Run all tests shell: bash if: ${{ inputs.suites == '' }} @@ -83,20 +117,6 @@ runs: echo "Running with MAVEN_SUITES=$MAVEN_SUITES" MAVEN_OPTS="-Xmx4G -Xms2G -DwildcardSuites=$MAVEN_SUITES -XX:+UnlockDiagnosticVMOptions -XX:+ShowMessageBoxOnError -XX:+HeapDumpOnOutOfMemoryError -XX:ErrorFile=./hs_err_pid%p.log" SPARK_HOME=`pwd` ./mvnw -B clean install ${{ inputs.maven_opts }} - - name: Check for documentation changes - shell: bash - run: | - # Check if any files in docs/ have been modified after Maven build (which runs GenerateDocs) - if [ -n "$(git status --porcelain -- docs/)" ]; then - echo "❌ Documentation files have been modified after Maven build:" - git diff --name-only -- docs/ - echo "" - echo "This suggests that config defaults have changed but the documentation wasn't updated." - echo "Please run 'make jvm' locally and commit the updated documentation files." - exit 1 - else - echo "✅ Documentation is up-to-date" - fi - name: Upload crash logs if: failure() uses: actions/upload-artifact@v4 From 1ec48e18083bb81be934eee4f7b80291caddcb60 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Mon, 22 Sep 2025 10:29:48 -0400 Subject: [PATCH 3/5] Try again. --- .github/actions/java-test/action.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/actions/java-test/action.yaml b/.github/actions/java-test/action.yaml index d0ed3a1dc3..10d070e331 100644 --- a/.github/actions/java-test/action.yaml +++ b/.github/actions/java-test/action.yaml @@ -68,6 +68,9 @@ runs: # Capture the initial state of docs before we potentially regenerate them echo "Checking documentation consistency..." + # Configure git for safe directory access in container + git config --global --add safe.directory "$(pwd)" + # Save current state of docs directory git add docs/ || true INITIAL_DOCS_HASH=$(git write-tree --prefix=docs/) From 5f2aca0b4d2e17945d509715682eeec7dac81701 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 24 Sep 2025 09:32:54 -0400 Subject: [PATCH 4/5] Use @andygrove's approach. --- .github/actions/java-test/action.yaml | 29 +++++---------------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/.github/actions/java-test/action.yaml b/.github/actions/java-test/action.yaml index 10d070e331..81ff4b34dc 100644 --- a/.github/actions/java-test/action.yaml +++ b/.github/actions/java-test/action.yaml @@ -65,38 +65,19 @@ runs: - name: Check for documentation changes shell: bash run: | - # Capture the initial state of docs before we potentially regenerate them - echo "Checking documentation consistency..." - # Configure git for safe directory access in container git config --global --add safe.directory "$(pwd)" - # Save current state of docs directory - git add docs/ || true - INITIAL_DOCS_HASH=$(git write-tree --prefix=docs/) - echo "Initial docs hash: $INITIAL_DOCS_HASH" - - # Reset any staged changes to docs - git reset docs/ || true - # Force regenerate docs by running just the package phase to ensure GenerateDocs runs echo "Running Maven package to regenerate documentation..." ./mvnw package -DskipTests -q -pl spark - # Check if docs changed after regeneration - git add docs/ || true - FINAL_DOCS_HASH=$(git write-tree --prefix=docs/) - echo "Final docs hash: $FINAL_DOCS_HASH" - - if [ "$INITIAL_DOCS_HASH" != "$FINAL_DOCS_HASH" ]; then - echo "Documentation files have been modified after running GenerateDocs:" - git diff --name-only $INITIAL_DOCS_HASH $FINAL_DOCS_HASH - echo "" - echo "This suggests that config defaults have changed but the documentation wasn't updated." - echo "Please run 'make jvm' locally and commit the updated documentation files." + # Check if there are any uncommitted changes after build/format + if ! git diff --quiet || ! git diff --cached --quiet; then + echo "Found uncommitted changes after build/format" + git status + git diff exit 1 - else - echo "Documentation is up-to-date" fi - name: Run all tests From a2a0b0ceaeb2880f2a5ec949b4d11352a6e6492c Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 24 Sep 2025 09:36:13 -0400 Subject: [PATCH 5/5] Use @andygrove's approach. --- docs/source/user-guide/latest/configs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/latest/configs.md b/docs/source/user-guide/latest/configs.md index fdfc2da35c..1e30c378d2 100644 --- a/docs/source/user-guide/latest/configs.md +++ b/docs/source/user-guide/latest/configs.md @@ -40,7 +40,7 @@ Comet provides the following configuration settings. | spark.comet.exceptionOnDatetimeRebase | Whether to throw exception when seeing dates/timestamps from the legacy hybrid (Julian + Gregorian) calendar. Since Spark 3, dates/timestamps were written according to the Proleptic Gregorian calendar. When this is true, Comet will throw exceptions when seeing these dates/timestamps that were written by Spark version before 3.0. If this is false, these dates/timestamps will be read as if they were written to the Proleptic Gregorian calendar and will not be rebased. | false | | spark.comet.exec.aggregate.enabled | Whether to enable aggregate by default. | true | | spark.comet.exec.broadcastExchange.enabled | Whether to enable broadcastExchange by default. | true | -| spark.comet.exec.broadcastHashJoin.enabled | Whether to enable broadcastHashJoin by default. | true | +| spark.comet.exec.broadcastHashJoin.enabled | Whether to enable broadcastHashJoin by default. | false | | spark.comet.exec.coalesce.enabled | Whether to enable coalesce by default. | true | | spark.comet.exec.collectLimit.enabled | Whether to enable collectLimit by default. | true | | spark.comet.exec.enabled | Whether to enable Comet native vectorized execution for Spark. This controls whether Spark should convert operators into their Comet counterparts and execute them in native space. Note: each operator is associated with a separate config in the format of 'spark.comet.exec..enabled' at the moment, and both the config and this need to be turned on, in order for the operator to be executed in native. | true |