diff --git a/.github/actions/java-test/action.yaml b/.github/actions/java-test/action.yaml index 1f7899f8ac..81ff4b34dc 100644 --- a/.github/actions/java-test/action.yaml +++ b/.github/actions/java-test/action.yaml @@ -62,6 +62,24 @@ runs: run: | ./mvnw -B compile test-compile scalafix:scalafix -Dscalafix.mode=CHECK -Psemanticdb ${{ inputs.maven_opts }} + - name: Check for documentation changes + shell: bash + run: | + # Configure git for safe directory access in container + git config --global --add safe.directory "$(pwd)" + + # Force regenerate docs by running just the package phase to ensure GenerateDocs runs + echo "Running Maven package to regenerate documentation..." + ./mvnw package -DskipTests -q -pl spark + + # Check if there are any uncommitted changes after build/format + if ! git diff --quiet || ! git diff --cached --quiet; then + echo "Found uncommitted changes after build/format" + git status + git diff + exit 1 + fi + - name: Run all tests shell: bash if: ${{ inputs.suites == '' }} @@ -82,6 +100,7 @@ runs: MAVEN_SUITES="$(echo "${{ inputs.suites }}" | paste -sd, -)" echo "Running with MAVEN_SUITES=$MAVEN_SUITES" MAVEN_OPTS="-Xmx4G -Xms2G -DwildcardSuites=$MAVEN_SUITES -XX:+UnlockDiagnosticVMOptions -XX:+ShowMessageBoxOnError -XX:+HeapDumpOnOutOfMemoryError -XX:ErrorFile=./hs_err_pid%p.log" SPARK_HOME=`pwd` ./mvnw -B clean install ${{ inputs.maven_opts }} + - name: Upload crash logs if: failure() uses: actions/upload-artifact@v4 diff --git a/docs/source/user-guide/latest/configs.md b/docs/source/user-guide/latest/configs.md index fdfc2da35c..1e30c378d2 100644 --- a/docs/source/user-guide/latest/configs.md +++ b/docs/source/user-guide/latest/configs.md @@ -40,7 +40,7 @@ Comet provides the following configuration settings. | spark.comet.exceptionOnDatetimeRebase | Whether to throw exception when seeing dates/timestamps from the legacy hybrid (Julian + Gregorian) calendar. Since Spark 3, dates/timestamps were written according to the Proleptic Gregorian calendar. When this is true, Comet will throw exceptions when seeing these dates/timestamps that were written by Spark version before 3.0. If this is false, these dates/timestamps will be read as if they were written to the Proleptic Gregorian calendar and will not be rebased. | false | | spark.comet.exec.aggregate.enabled | Whether to enable aggregate by default. | true | | spark.comet.exec.broadcastExchange.enabled | Whether to enable broadcastExchange by default. | true | -| spark.comet.exec.broadcastHashJoin.enabled | Whether to enable broadcastHashJoin by default. | true | +| spark.comet.exec.broadcastHashJoin.enabled | Whether to enable broadcastHashJoin by default. | false | | spark.comet.exec.coalesce.enabled | Whether to enable coalesce by default. | true | | spark.comet.exec.collectLimit.enabled | Whether to enable collectLimit by default. | true | | spark.comet.exec.enabled | Whether to enable Comet native vectorized execution for Spark. This controls whether Spark should convert operators into their Comet counterparts and execute them in native space. Note: each operator is associated with a separate config in the format of 'spark.comet.exec..enabled' at the moment, and both the config and this need to be turned on, in order for the operator to be executed in native. | true |