From e52b5ccce838a2dad15abb33aa05df73e928a45d Mon Sep 17 00:00:00 2001 From: Simon Eves Date: Mon, 3 Nov 2025 09:34:54 -0800 Subject: [PATCH 1/4] Split files --- .../etc_common/catalog/hive.properties | 22 +------------------ .../etc_coordinator/catalog/hive.properties | 21 ++++++++++++++++++ .../etc_worker/catalog/hive.properties | 21 ++++++++++++++++++ 3 files changed, 43 insertions(+), 21 deletions(-) create mode 100644 presto/docker/config/template/etc_coordinator/catalog/hive.properties create mode 100644 presto/docker/config/template/etc_worker/catalog/hive.properties diff --git a/presto/docker/config/template/etc_common/catalog/hive.properties b/presto/docker/config/template/etc_common/catalog/hive.properties index db4e38f1..cb8f2a63 100644 --- a/presto/docker/config/template/etc_common/catalog/hive.properties +++ b/presto/docker/config/template/etc_common/catalog/hive.properties @@ -1,21 +1 @@ -# Select the connector implementation. "hive-hadoop2" uses the Hive connector -# backed by Hadoop 2.x libraries which is the default for Presto's Hive support. -connector.name=hive-hadoop2 - -# Configure the metastore implementation. "file" enables a simple file-based -# metastore suitable for local testing without an external Hive Metastore (HMS). -# See https://prestodb.io/docs/current/installation/deployment.html#configuring-a-file-based-metastore for more details. -hive.metastore=file -# Root directory where the file-based metastore stores table and partition -# metadata. This path is inside the container volume so state persists across -# server restarts during tests. -hive.metastore.catalog.dir=file:/var/lib/presto/data/hive/metastore -# Allow DROP TABLE statements. Enabled to make smoke/perf tests able to reset -# state and clean up artifacts without manual intervention. -hive.allow-drop-table=true - -# Control whether Presto can split files for parallel reads. Disable when the -# file compression/format isn't splittable to avoid read failures. TPCH Parquet -# test data commonly uses SNAPPY compression that isn't splittable at the file -# level here, hence this must be false. -hive.file-splittable=false +# This file will be overridden by a coordinator or worker specific configuration file. diff --git a/presto/docker/config/template/etc_coordinator/catalog/hive.properties b/presto/docker/config/template/etc_coordinator/catalog/hive.properties new file mode 100644 index 00000000..db4e38f1 --- /dev/null +++ b/presto/docker/config/template/etc_coordinator/catalog/hive.properties @@ -0,0 +1,21 @@ +# Select the connector implementation. "hive-hadoop2" uses the Hive connector +# backed by Hadoop 2.x libraries which is the default for Presto's Hive support. +connector.name=hive-hadoop2 + +# Configure the metastore implementation. "file" enables a simple file-based +# metastore suitable for local testing without an external Hive Metastore (HMS). +# See https://prestodb.io/docs/current/installation/deployment.html#configuring-a-file-based-metastore for more details. +hive.metastore=file +# Root directory where the file-based metastore stores table and partition +# metadata. This path is inside the container volume so state persists across +# server restarts during tests. +hive.metastore.catalog.dir=file:/var/lib/presto/data/hive/metastore +# Allow DROP TABLE statements. Enabled to make smoke/perf tests able to reset +# state and clean up artifacts without manual intervention. +hive.allow-drop-table=true + +# Control whether Presto can split files for parallel reads. Disable when the +# file compression/format isn't splittable to avoid read failures. TPCH Parquet +# test data commonly uses SNAPPY compression that isn't splittable at the file +# level here, hence this must be false. +hive.file-splittable=false diff --git a/presto/docker/config/template/etc_worker/catalog/hive.properties b/presto/docker/config/template/etc_worker/catalog/hive.properties new file mode 100644 index 00000000..db4e38f1 --- /dev/null +++ b/presto/docker/config/template/etc_worker/catalog/hive.properties @@ -0,0 +1,21 @@ +# Select the connector implementation. "hive-hadoop2" uses the Hive connector +# backed by Hadoop 2.x libraries which is the default for Presto's Hive support. +connector.name=hive-hadoop2 + +# Configure the metastore implementation. "file" enables a simple file-based +# metastore suitable for local testing without an external Hive Metastore (HMS). +# See https://prestodb.io/docs/current/installation/deployment.html#configuring-a-file-based-metastore for more details. +hive.metastore=file +# Root directory where the file-based metastore stores table and partition +# metadata. This path is inside the container volume so state persists across +# server restarts during tests. +hive.metastore.catalog.dir=file:/var/lib/presto/data/hive/metastore +# Allow DROP TABLE statements. Enabled to make smoke/perf tests able to reset +# state and clean up artifacts without manual intervention. +hive.allow-drop-table=true + +# Control whether Presto can split files for parallel reads. Disable when the +# file compression/format isn't splittable to avoid read failures. TPCH Parquet +# test data commonly uses SNAPPY compression that isn't splittable at the file +# level here, hence this must be false. +hive.file-splittable=false From 171459067b945cc3dbcf3ae098bb923dea89b18d Mon Sep 17 00:00:00 2001 From: Simon Eves Date: Mon, 3 Nov 2025 09:35:11 -0800 Subject: [PATCH 2/4] Map split files --- presto/docker/docker-compose.java.yml | 2 ++ presto/docker/docker-compose.native-cpu.yml | 2 ++ presto/docker/docker-compose.native-gpu.yml | 2 ++ 3 files changed, 6 insertions(+) diff --git a/presto/docker/docker-compose.java.yml b/presto/docker/docker-compose.java.yml index 120f0234..6352058a 100644 --- a/presto/docker/docker-compose.java.yml +++ b/presto/docker/docker-compose.java.yml @@ -7,6 +7,7 @@ services: - ./config/generated/java/etc_common:/opt/presto-server/etc - ./config/generated/java/etc_coordinator/config_java.properties:/opt/presto-server/etc/config.properties - ./config/generated/java/etc_coordinator/node.properties:/opt/presto-server/etc/node.properties + - ./config/generated/java/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties presto-java-worker: extends: @@ -18,5 +19,6 @@ services: - ./config/generated/java/etc_common:/opt/presto-server/etc - ./config/generated/java/etc_worker/config_java.properties:/opt/presto-server/etc/config.properties - ./config/generated/java/etc_worker/node.properties:/opt/presto-server/etc/node.properties + - ./config/generated/java/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties depends_on: - presto-coordinator diff --git a/presto/docker/docker-compose.native-cpu.yml b/presto/docker/docker-compose.native-cpu.yml index 830e59ac..6c7b5dd3 100644 --- a/presto/docker/docker-compose.native-cpu.yml +++ b/presto/docker/docker-compose.native-cpu.yml @@ -7,6 +7,7 @@ services: - ./config/generated/cpu/etc_common:/opt/presto-server/etc - ./config/generated/cpu/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties - ./config/generated/cpu/etc_coordinator/node.properties:/opt/presto-server/etc/node.properties + - ./config/generated/cpu/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties presto-native-worker-cpu: extends: @@ -23,3 +24,4 @@ services: - ./config/generated/cpu/etc_common:/opt/presto-server/etc - ./config/generated/cpu/etc_worker/node.properties:/opt/presto-server/etc/node.properties - ./config/generated/cpu/etc_worker/config_native.properties:/opt/presto-server/etc/config.properties + - ./config/generated/cpu/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties diff --git a/presto/docker/docker-compose.native-gpu.yml b/presto/docker/docker-compose.native-gpu.yml index edfc499d..4f572408 100644 --- a/presto/docker/docker-compose.native-gpu.yml +++ b/presto/docker/docker-compose.native-gpu.yml @@ -7,6 +7,7 @@ services: - ./config/generated/gpu/etc_common:/opt/presto-server/etc - ./config/generated/gpu/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties - ./config/generated/gpu/etc_coordinator/node.properties:/opt/presto-server/etc/node.properties + - ./config/generated/gpu/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties presto-native-worker-gpu: extends: @@ -28,3 +29,4 @@ services: - ./config/generated/gpu/etc_common:/opt/presto-server/etc - ./config/generated/gpu/etc_worker/node.properties:/opt/presto-server/etc/node.properties - ./config/generated/gpu/etc_worker/config_native.properties:/opt/presto-server/etc/config.properties + - ./config/generated/gpu/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties From 6e8dc102fbfc62fccd81761aad47ecb243c0b9eb Mon Sep 17 00:00:00 2001 From: Simon Eves Date: Mon, 3 Nov 2025 10:05:36 -0800 Subject: [PATCH 3/4] Add default Parquet read options to Worker Hive properties --- .../docker/config/template/etc_worker/catalog/hive.properties | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/presto/docker/config/template/etc_worker/catalog/hive.properties b/presto/docker/config/template/etc_worker/catalog/hive.properties index db4e38f1..397dafe0 100644 --- a/presto/docker/config/template/etc_worker/catalog/hive.properties +++ b/presto/docker/config/template/etc_worker/catalog/hive.properties @@ -19,3 +19,7 @@ hive.allow-drop-table=true # test data commonly uses SNAPPY compression that isn't splittable at the file # level here, hence this must be false. hive.file-splittable=false + +# Parquet read options +parquet.reader.chunk-read-limit=0 +parquet.reader.pass-read-limit=0 From 84b10bd28d5a3fe23314a80e5db5b54f940e5be4 Mon Sep 17 00:00:00 2001 From: Simon Eves Date: Mon, 3 Nov 2025 20:53:46 -0800 Subject: [PATCH 4/4] Add comments on Parquet read parameters --- .../docker/config/template/etc_worker/catalog/hive.properties | 2 ++ 1 file changed, 2 insertions(+) diff --git a/presto/docker/config/template/etc_worker/catalog/hive.properties b/presto/docker/config/template/etc_worker/catalog/hive.properties index 397dafe0..13fd7548 100644 --- a/presto/docker/config/template/etc_worker/catalog/hive.properties +++ b/presto/docker/config/template/etc_worker/catalog/hive.properties @@ -21,5 +21,7 @@ hive.allow-drop-table=true hive.file-splittable=false # Parquet read options +# Limit (in bytes) on total number of bytes to be returned per read, or 0 if there is no limit parquet.reader.chunk-read-limit=0 +# Limit (in bytes) on the amount of memory used for reading and decompressing data or 0 if there is no limit parquet.reader.pass-read-limit=0