Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public class ParquetProperties {
public static final boolean DEFAULT_SIZE_STATISTICS_ENABLED = true;

public static final boolean DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED = true;
public static final double DEFAULT_PAGE_COMPRESS_THRESHOLD = 0.98;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this magic number? Is it better to use a smaller number like 0.9 or 0.85?


/**
* @deprecated This shared instance can cause thread safety issues when used by multiple builders concurrently.
Expand Down Expand Up @@ -120,6 +121,7 @@ public static WriterVersion fromString(String name) {
private final int statisticsTruncateLength;
private final boolean statisticsEnabled;
private final boolean sizeStatisticsEnabled;
private final double pageCompressThreshold;

// The expected NDV (number of distinct values) for each columns
private final ColumnProperty<Long> bloomFilterNDVs;
Expand Down Expand Up @@ -154,6 +156,8 @@ private ParquetProperties(Builder builder) {
this.statisticsTruncateLength = builder.statisticsTruncateLength;
this.statisticsEnabled = builder.statisticsEnabled;
this.sizeStatisticsEnabled = builder.sizeStatisticsEnabled;
this.pageCompressThreshold = builder.pageCompressThreshold;

this.bloomFilterNDVs = builder.bloomFilterNDVs.build();
this.bloomFilterFPPs = builder.bloomFilterFPPs.build();
this.bloomFilterEnabled = builder.bloomFilterEnabled.build();
Expand Down Expand Up @@ -322,6 +326,10 @@ public boolean getPageWriteChecksumEnabled() {
return pageWriteChecksumEnabled;
}

public double pageCompressThreshold() {
return pageCompressThreshold;
}

public OptionalLong getBloomFilterNDV(ColumnDescriptor column) {
Long ndv = bloomFilterNDVs.getValue(column);
return ndv == null ? OptionalLong.empty() : OptionalLong.of(ndv);
Expand Down Expand Up @@ -388,7 +396,8 @@ public String toString() {
+ "Page row count limit to " + getPageRowCountLimit() + '\n'
+ "Writing page checksums is: " + (getPageWriteChecksumEnabled() ? "on" : "off") + '\n'
+ "Statistics enabled: " + statisticsEnabled + '\n'
+ "Size statistics enabled: " + sizeStatisticsEnabled;
+ "Size statistics enabled: " + sizeStatisticsEnabled + '\n'
+ "Page compress threshold: " + pageCompressThreshold;
}

public static class Builder {
Expand All @@ -406,6 +415,7 @@ public static class Builder {
private int statisticsTruncateLength = DEFAULT_STATISTICS_TRUNCATE_LENGTH;
private boolean statisticsEnabled = DEFAULT_STATISTICS_ENABLED;
private boolean sizeStatisticsEnabled = DEFAULT_SIZE_STATISTICS_ENABLED;
private double pageCompressThreshold = DEFAULT_PAGE_COMPRESS_THRESHOLD;
private final ColumnProperty.Builder<Long> bloomFilterNDVs;
private final ColumnProperty.Builder<Double> bloomFilterFPPs;
private int maxBloomFilterBytes = DEFAULT_MAX_BLOOM_FILTER_BYTES;
Expand Down Expand Up @@ -460,6 +470,7 @@ private Builder(ParquetProperties toCopy) {
this.extraMetaData = toCopy.extraMetaData;
this.statistics = ColumnProperty.builder(toCopy.statistics);
this.sizeStatistics = ColumnProperty.builder(toCopy.sizeStatistics);
this.pageCompressThreshold = toCopy.pageCompressThreshold();
}

/**
Expand Down Expand Up @@ -756,6 +767,21 @@ public Builder withSizeStatisticsEnabled(String columnPath, boolean enabled) {
return this;
}

/**
* Sets the compression threshold for data pages, only effect for V2 pages.
*
* <p>When the compression ratio (compressed size / uncompressed size) exceeds this threshold,
* the uncompressed data will be used instead. For example, with a threshold of 0.98, if
* compression only saves 2% of space, the data will not be compressed.
*
* @param threshold the compression ratio threshold, default is {@value #DEFAULT_PAGE_COMPRESS_THRESHOLD}
* @return this builder for method chaining
*/
public Builder withPageCompressThreshold(double threshold) {
this.pageCompressThreshold = threshold;
return this;
}

public ParquetProperties build() {
ParquetProperties properties = new ParquetProperties(this);
// we pass a constructed but uninitialized factory to ParquetProperties above as currently
Expand Down
Loading