diff --git a/.github/workflows/presto-nightly-upstream.yml b/.github/workflows/presto-nightly-upstream.yml index 108b54be..b9fc7fcc 100644 --- a/.github/workflows/presto-nightly-upstream.yml +++ b/.github/workflows/presto-nightly-upstream.yml @@ -1,6 +1,8 @@ name: Presto Nightly Test (Upstream) on: + schedule: + - cron: '0 4 * * *' # daily at 04:00 UTC workflow_dispatch: jobs: @@ -15,4 +17,5 @@ jobs: run_cpu_tests: true run_gpu_tests: true set_velox_backward_compatible: false + build_upstream: true secrets: inherit diff --git a/.github/workflows/presto-nightly.yml b/.github/workflows/presto-nightly.yml index e7106f30..f0ace12f 100644 --- a/.github/workflows/presto-nightly.yml +++ b/.github/workflows/presto-nightly.yml @@ -17,4 +17,5 @@ jobs: run_cpu_tests: true run_gpu_tests: true set_velox_backward_compatible: ${{ vars.SET_PRESTO_VELOX_BACKWARD_COMPATIBLE == 'true' }} + build_upstream: false secrets: inherit diff --git a/.github/workflows/presto-test-composite.yml b/.github/workflows/presto-test-composite.yml index 52f9c424..9cf7d49a 100644 --- a/.github/workflows/presto-test-composite.yml +++ b/.github/workflows/presto-test-composite.yml @@ -23,11 +23,6 @@ on: type: string required: false default: 'main' - velox_testing_commit: - description: 'Velox Testing Commit SHA or Branch' - type: string - required: false - default: 'main' presto_worker_type: description: 'Type of Presto Worker to use' type: string @@ -40,6 +35,10 @@ on: description: 'Set VELOX_ENABLE_BACKWARD_COMPATIBLE in Velox build' type: string required: false + build_upstream: + description: 'Build Presto Upstream Default' + type: boolean + default: false jobs: build-and-test: @@ -51,18 +50,29 @@ jobs: repository: ${{ inputs.presto_repository }} ref: ${{ inputs.presto_commit }} path: presto + - name: Get Presto Pinned Velox Version + id: get-presto-pinned-velox-version + if: ${{ inputs.build_upstream && inputs.presto_worker_type != 'java' }} + run: | + pushd presto/presto-native-execution + make submodules + cd velox + PRESTO_PINNED_VELOX_SHA=$(git rev-parse HEAD) + echo "Found Presto pinned Velox SHA: ${PRESTO_PINNED_VELOX_SHA}" + echo "PRESTO_PINNED_VELOX_SHA=${PRESTO_PINNED_VELOX_SHA}" >> $GITHUB_OUTPUT + popd - name: Checkout Velox if: ${{ inputs.presto_worker_type != 'java' }} uses: actions/checkout@v4 with: - repository: ${{ inputs.velox_repository }} - ref: ${{ inputs.velox_commit }} + repository: ${{ inputs.build_upstream && 'facebookincubator/velox' || inputs.velox_repository }} + ref: ${{ inputs.build_upstream && steps.get-presto-pinned-velox-version.outputs.PRESTO_PINNED_VELOX_SHA || inputs.velox_commit }} path: velox - name: Checkout Velox Testing uses: actions/checkout@v4 with: repository: rapidsai/velox-testing - ref: ${{ inputs.velox_testing_commit }} + ref: ${{ github.ref_name }} # automatically match the workflow branch path: velox-testing - name: Download Presto Dependencies Container Image if: ${{ inputs.presto_worker_type != 'java' }} diff --git a/.github/workflows/presto-test.yml b/.github/workflows/presto-test.yml index 0e41b999..709439bc 100644 --- a/.github/workflows/presto-test.yml +++ b/.github/workflows/presto-test.yml @@ -23,11 +23,6 @@ on: type: string required: false default: 'main' - velox_testing_commit: &velox_testing_commit - description: 'Velox Testing Commit SHA or Branch' - type: string - required: false - default: 'main' run_java_tests: &run_java_tests description: 'Run tests with Java Worker' type: boolean @@ -44,6 +39,10 @@ on: description: 'Set VELOX_ENABLE_BACKWARD_COMPATIBLE in Velox build' type: boolean default: false + build_upstream: &build_upstream + description: 'Build Presto Upstream Default' + type: boolean + default: false workflow_call: inputs: @@ -51,11 +50,11 @@ on: presto_commit: *presto_commit velox_repository: *velox_repository velox_commit: *velox_commit - velox_testing_commit: *velox_testing_commit run_java_tests: *run_java_tests run_cpu_tests: *run_cpu_tests run_gpu_tests: *run_gpu_tests set_velox_backward_compatible: *set_velox_backward_compatible + build_upstream: *build_upstream jobs: java: @@ -68,8 +67,8 @@ jobs: presto_commit: ${{ inputs.presto_commit }} velox_repository: ${{ inputs.velox_repository }} velox_commit: ${{ inputs.velox_commit }} - velox_testing_commit: ${{ inputs.velox_testing_commit }} set_velox_backward_compatible: false + build_upstream: ${{ inputs.build_upstream }} secrets: inherit native-cpu: if: ${{ inputs.run_cpu_tests }} @@ -81,8 +80,8 @@ jobs: presto_commit: ${{ inputs.presto_commit }} velox_repository: ${{ inputs.velox_repository }} velox_commit: ${{ inputs.velox_commit }} - velox_testing_commit: ${{ inputs.velox_testing_commit }} set_velox_backward_compatible: ${{ inputs.set_velox_backward_compatible }} + build_upstream: ${{ inputs.build_upstream }} secrets: inherit native-gpu: if: ${{ inputs.run_gpu_tests }} @@ -94,6 +93,6 @@ jobs: presto_commit: ${{ inputs.presto_commit }} velox_repository: ${{ inputs.velox_repository }} velox_commit: ${{ inputs.velox_commit }} - velox_testing_commit: ${{ inputs.velox_testing_commit }} set_velox_backward_compatible: ${{ inputs.set_velox_backward_compatible }} + build_upstream: ${{ inputs.build_upstream }} secrets: inherit diff --git a/presto/docker/config/template/etc_common/jvm.config b/presto/docker/config/template/etc_common/jvm.config index 7ea5d695..cb8f2a63 100644 --- a/presto/docker/config/template/etc_common/jvm.config +++ b/presto/docker/config/template/etc_common/jvm.config @@ -1,40 +1 @@ -# Enable JVM server mode for better JIT optimization on long-running servers. --server -# Maximum Java heap size; templated to match container memory. --Xmx{{ .HeapSizeGb }}G -# Initial Java heap size; equal to max to avoid heap resizing pauses. --Xms{{ .HeapSizeGb }}G -# Use the G1 garbage collector for predictable pause times. --XX:+UseG1GC -# Tune G1 region size to balance GC throughput and fragmentation. --XX:G1HeapRegionSize=32M -# Abort when GC overhead becomes excessive to prevent hangs. --XX:+UseGCOverheadLimit -# Make System.gc() invoke concurrent collections to reduce pauses. --XX:+ExplicitGCInvokesConcurrent -# Create heap dumps on OOM for postmortem analysis. --XX:+HeapDumpOnOutOfMemoryError -# Exit the JVM on OOM so orchestration can restart the process. --XX:+ExitOnOutOfMemoryError -# Cap NIO direct buffer cache to limit retained off-heap memory. --Djdk.nio.maxCachedBufferSize=2000000 -# Allow self-attach for profilers (e.g., async-profiler) during debugging. --Djdk.attach.allowAttachSelf=true -# Open JDK internals for reflection required by Presto and dependencies under Java 11+ modules. ---add-opens=java.base/java.io=ALL-UNNAMED ---add-opens=java.base/java.lang=ALL-UNNAMED ---add-opens=java.base/java.lang.ref=ALL-UNNAMED ---add-opens=java.base/java.lang.reflect=ALL-UNNAMED ---add-opens=java.base/java.net=ALL-UNNAMED ---add-opens=java.base/java.nio=ALL-UNNAMED ---add-opens=java.base/java.security=ALL-UNNAMED ---add-opens=java.base/javax.security.auth=ALL-UNNAMED ---add-opens=java.base/javax.security.auth.login=ALL-UNNAMED ---add-opens=java.base/java.text=ALL-UNNAMED ---add-opens=java.base/java.util=ALL-UNNAMED ---add-opens=java.base/java.util.concurrent=ALL-UNNAMED ---add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED ---add-opens=java.base/java.util.regex=ALL-UNNAMED ---add-opens=java.base/jdk.internal.loader=ALL-UNNAMED ---add-opens=java.base/sun.security.action=ALL-UNNAMED ---add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED +# This file will be overridden by a coordinator or worker specific configuration file. diff --git a/presto/docker/config/template/etc_common_java/jvm_java.config b/presto/docker/config/template/etc_common_java/jvm_java.config new file mode 100644 index 00000000..e2c1f74e --- /dev/null +++ b/presto/docker/config/template/etc_common_java/jvm_java.config @@ -0,0 +1,40 @@ +# Enable JVM server mode for better JIT optimization on long-running servers. +-server +# Maximum Java heap size; templated to match container memory. +-Xmx24G +# Initial Java heap size; equal to max to avoid heap resizing pauses. +-Xms24G +# Use the G1 garbage collector for predictable pause times. +-XX:+UseG1GC +# Tune G1 region size to balance GC throughput and fragmentation. +-XX:G1HeapRegionSize=32M +# Abort when GC overhead becomes excessive to prevent hangs. +-XX:+UseGCOverheadLimit +# Make System.gc() invoke concurrent collections to reduce pauses. +-XX:+ExplicitGCInvokesConcurrent +# Create heap dumps on OOM for postmortem analysis. +-XX:+HeapDumpOnOutOfMemoryError +# Exit the JVM on OOM so orchestration can restart the process. +-XX:+ExitOnOutOfMemoryError +# Cap NIO direct buffer cache to limit retained off-heap memory. +-Djdk.nio.maxCachedBufferSize=2000000 +# Allow self-attach for profilers (e.g., async-profiler) during debugging. +-Djdk.attach.allowAttachSelf=true +# Open JDK internals for reflection required by Presto and dependencies under Java 11+ modules. +--add-opens=java.base/java.io=ALL-UNNAMED +--add-opens=java.base/java.lang=ALL-UNNAMED +--add-opens=java.base/java.lang.ref=ALL-UNNAMED +--add-opens=java.base/java.lang.reflect=ALL-UNNAMED +--add-opens=java.base/java.net=ALL-UNNAMED +--add-opens=java.base/java.nio=ALL-UNNAMED +--add-opens=java.base/java.security=ALL-UNNAMED +--add-opens=java.base/javax.security.auth=ALL-UNNAMED +--add-opens=java.base/javax.security.auth.login=ALL-UNNAMED +--add-opens=java.base/java.text=ALL-UNNAMED +--add-opens=java.base/java.util=ALL-UNNAMED +--add-opens=java.base/java.util.concurrent=ALL-UNNAMED +--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED +--add-opens=java.base/java.util.regex=ALL-UNNAMED +--add-opens=java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens=java.base/sun.security.action=ALL-UNNAMED +--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED diff --git a/presto/docker/config/template/etc_common_java/jvm_native.config b/presto/docker/config/template/etc_common_java/jvm_native.config new file mode 100644 index 00000000..7ea5d695 --- /dev/null +++ b/presto/docker/config/template/etc_common_java/jvm_native.config @@ -0,0 +1,40 @@ +# Enable JVM server mode for better JIT optimization on long-running servers. +-server +# Maximum Java heap size; templated to match container memory. +-Xmx{{ .HeapSizeGb }}G +# Initial Java heap size; equal to max to avoid heap resizing pauses. +-Xms{{ .HeapSizeGb }}G +# Use the G1 garbage collector for predictable pause times. +-XX:+UseG1GC +# Tune G1 region size to balance GC throughput and fragmentation. +-XX:G1HeapRegionSize=32M +# Abort when GC overhead becomes excessive to prevent hangs. +-XX:+UseGCOverheadLimit +# Make System.gc() invoke concurrent collections to reduce pauses. +-XX:+ExplicitGCInvokesConcurrent +# Create heap dumps on OOM for postmortem analysis. +-XX:+HeapDumpOnOutOfMemoryError +# Exit the JVM on OOM so orchestration can restart the process. +-XX:+ExitOnOutOfMemoryError +# Cap NIO direct buffer cache to limit retained off-heap memory. +-Djdk.nio.maxCachedBufferSize=2000000 +# Allow self-attach for profilers (e.g., async-profiler) during debugging. +-Djdk.attach.allowAttachSelf=true +# Open JDK internals for reflection required by Presto and dependencies under Java 11+ modules. +--add-opens=java.base/java.io=ALL-UNNAMED +--add-opens=java.base/java.lang=ALL-UNNAMED +--add-opens=java.base/java.lang.ref=ALL-UNNAMED +--add-opens=java.base/java.lang.reflect=ALL-UNNAMED +--add-opens=java.base/java.net=ALL-UNNAMED +--add-opens=java.base/java.nio=ALL-UNNAMED +--add-opens=java.base/java.security=ALL-UNNAMED +--add-opens=java.base/javax.security.auth=ALL-UNNAMED +--add-opens=java.base/javax.security.auth.login=ALL-UNNAMED +--add-opens=java.base/java.text=ALL-UNNAMED +--add-opens=java.base/java.util=ALL-UNNAMED +--add-opens=java.base/java.util.concurrent=ALL-UNNAMED +--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED +--add-opens=java.base/java.util.regex=ALL-UNNAMED +--add-opens=java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens=java.base/sun.security.action=ALL-UNNAMED +--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED diff --git a/presto/docker/docker-compose.common.yml b/presto/docker/docker-compose.common.yml index 6165006d..2d7bfe27 100644 --- a/presto/docker/docker-compose.common.yml +++ b/presto/docker/docker-compose.common.yml @@ -33,3 +33,4 @@ services: volumes: - ./config/generated/etc_worker/node.properties:/opt/presto-server/etc/node.properties - ./config/generated/etc_worker/config_native.properties:/opt/presto-server/etc/config.properties + - ./config/generated/etc_common_java/jvm_native.config:/opt/presto-server/etc/jvm.config diff --git a/presto/docker/docker-compose.java.yml b/presto/docker/docker-compose.java.yml index f25fb508..fec692a5 100644 --- a/presto/docker/docker-compose.java.yml +++ b/presto/docker/docker-compose.java.yml @@ -5,6 +5,7 @@ services: service: presto-base-coordinator volumes: - ./config/generated/etc_coordinator/config_java.properties:/opt/presto-server/etc/config.properties + - ./config/generated/etc_common_java/jvm_java.config:/opt/presto-server/etc/jvm.config presto-java-worker: extends: @@ -15,5 +16,6 @@ services: volumes: - ./config/generated/etc_worker/config_java.properties:/opt/presto-server/etc/config.properties - ./config/generated/etc_worker/node.properties:/opt/presto-server/etc/node.properties + - ./config/generated/etc_common_java/jvm_java.config:/opt/presto-server/etc/jvm.config depends_on: - presto-coordinator diff --git a/presto/docker/docker-compose.native-cpu.yml b/presto/docker/docker-compose.native-cpu.yml index 09fd5a87..958acc01 100644 --- a/presto/docker/docker-compose.native-cpu.yml +++ b/presto/docker/docker-compose.native-cpu.yml @@ -5,6 +5,7 @@ services: service: presto-base-coordinator volumes: - ./config/generated/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties + - ./config/generated/etc_common_java/jvm_native.config:/opt/presto-server/etc/jvm.config presto-native-worker-cpu: extends: diff --git a/presto/docker/docker-compose.native-gpu.yml b/presto/docker/docker-compose.native-gpu.yml index 376c5167..d7f4c526 100644 --- a/presto/docker/docker-compose.native-gpu.yml +++ b/presto/docker/docker-compose.native-gpu.yml @@ -5,6 +5,7 @@ services: service: presto-base-coordinator volumes: - ./config/generated/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties + - ./config/generated/etc_common_java/jvm_native.config:/opt/presto-server/etc/jvm.config presto-native-worker-gpu: extends: diff --git a/presto/scripts/common_functions.sh b/presto/scripts/common_functions.sh index 9c7cc4df..55edff37 100644 --- a/presto/scripts/common_functions.sh +++ b/presto/scripts/common_functions.sh @@ -14,6 +14,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +function print_presto_container_status_and_logs() { + # log container status (whether running or not) + echo "############ Docker Container Status ############" + docker ps -a + + # dump each container's log + echo "############ Docker Logs ############" + local CONTAINERS=$(docker ps -a --format '{{.Names}}') + while IFS= read -r CONTAINER; do + echo "############ Log for Container '${CONTAINER}' ############" + docker logs ${CONTAINER} + done <<< ${CONTAINERS} + echo "############ End of Docker Logs ############" +} + function wait_for_worker_node_registration() { trap "rm -rf node_response.json" RETURN @@ -28,6 +43,7 @@ function wait_for_worker_node_registration() { (( $(jq length node_response.json) > 0 )); do if (( $retry_count >= $MAX_RETRIES )); then echo "Error: Worker node not registered after 60s. Exiting." + print_presto_container_status_and_logs exit 1 fi sleep 5