diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index 88eabb0c1a41..782f2008730c 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -61,6 +61,7 @@ import io.trino.plugin.hive.fs.DirectoryLister; import io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore; import io.trino.plugin.hive.procedure.OptimizeTableProcedure; +import io.trino.plugin.hive.projection.PartitionProjection; import io.trino.plugin.hive.security.AccessControlMetadata; import io.trino.plugin.hive.statistics.HiveStatisticsProvider; import io.trino.plugin.hive.util.HiveUtil; @@ -292,6 +293,8 @@ import static io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore.cleanExtraOutputFiles; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getSupportedColumnStatistics; import static io.trino.plugin.hive.projection.PartitionProjectionProperties.arePartitionProjectionPropertiesSet; +import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getPartitionProjectionFromHiveTable; +import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getPartitionProjectionFromTable; import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getPartitionProjectionHiveTableProperties; import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getPartitionProjectionTrinoTableProperties; import static io.trino.plugin.hive.util.AcidTables.deltaSubdir; @@ -1719,12 +1722,13 @@ public void finishStatisticsCollection(ConnectorSession session, ConnectorTableH .map(HiveColumnHandle::getType) .collect(toImmutableList()); + Optional partitionProjection = getPartitionProjectionFromHiveTable(handle); for (int i = 0; i < partitionNames.size(); i++) { String partitionName = partitionNames.get(i); List partitionValues = partitionValuesList.get(i); ComputedStatistics collectedStatistics = computedStatisticsMap.containsKey(partitionValues) ? computedStatisticsMap.get(partitionValues) - : computedStatisticsMap.get(canonicalizePartitionValues(partitionName, partitionValues, partitionTypes)); + : computedStatisticsMap.get(canonicalizePartitionValues(partitionName, partitionValues, partitionColumnNames, partitionTypes, partitionProjection)); if (collectedStatistics == null) { partitionStatistics.put(partitionValues, emptyPartitionStatistics.get()); } @@ -1736,14 +1740,20 @@ public void finishStatisticsCollection(ConnectorSession session, ConnectorTableH } } - private static List canonicalizePartitionValues(String partitionName, List partitionValues, List partitionTypes) + private static List canonicalizePartitionValues( + String partitionName, + List partitionValues, + List partitionColumns, + List partitionTypes, + Optional partitionProjection) { verify(partitionValues.size() == partitionTypes.size(), "Expected partitionTypes size to be %s but got %s", partitionValues.size(), partitionTypes.size()); + verify(partitionValues.size() == partitionColumns.size(), "Expected partitionColumns size to be %s but got %s", partitionValues.size(), partitionColumns.size()); Block[] parsedPartitionValuesBlocks = new Block[partitionValues.size()]; for (int i = 0; i < partitionValues.size(); i++) { String partitionValue = partitionValues.get(i); Type partitionType = partitionTypes.get(i); - parsedPartitionValuesBlocks[i] = parsePartitionValue(partitionName, partitionValue, partitionType).asBlock(); + parsedPartitionValuesBlocks[i] = parsePartitionValue(partitionName, partitionValue, partitionType, partitionColumns.get(i), partitionProjection).asBlock(); } return createPartitionValues(partitionTypes, new Page(parsedPartitionValuesBlocks), 0); @@ -2154,6 +2164,13 @@ private HiveInsertTableHandle beginInsertOrMerge(ConnectorSession session, Conne throw new TrinoException(NOT_SUPPORTED, description + " Spark bucketed tables is not supported"); } + if (partitionProjectionEnabled) { + Optional partitionProjection = getPartitionProjectionFromTable(table, typeManager); + if (!partitionProjection.map(PartitionProjection::allowWrite).orElse(true)) { + throw new TrinoException(NOT_SUPPORTED, "Hive connector does not support writing such partition with projection"); + } + } + List handles = hiveColumnHandles(table, typeManager, getTimestampPrecision(session)).stream() .filter(columnHandle -> !columnHandle.isHidden()) .collect(toImmutableList()); @@ -2281,6 +2298,8 @@ private Table finishChangingTable(AcidOperation acidOperation, String changeDesc .collect(toImmutableMap(HiveColumnHandle::getName, column -> typeManager.getType(getTypeSignature(column.getHiveType())))); Map, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes); + // TODO: get from table handle once we support writing of the partition projection with custom format + Optional partitionProjection = getPartitionProjectionFromTable(metastore.getTable(handle.getSchemaName(), handle.getTableName()).orElseThrow(), typeManager); ImmutableList.Builder partitionUpdateInfosBuilder = ImmutableList.builder(); for (PartitionUpdate partitionUpdate : partitionUpdates) { if (partitionUpdate.getName().isEmpty()) { @@ -2338,7 +2357,7 @@ else if (partitionUpdate.getUpdateMode() == APPEND) { PartitionStatistics partitionStatistics = createPartitionStatistics( partitionUpdate.getStatistics(), columnTypes, - getColumnStatistics(partitionComputedStatistics, partitionName, partitionValues, partitionTypes)); + getColumnStatistics(partitionComputedStatistics, partitionName, partitionValues, partitionedBy, partitionTypes, partitionProjection)); partitionUpdateInfosBuilder.add( new PartitionUpdateInfo( partitionValues, @@ -2361,7 +2380,7 @@ else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode PartitionStatistics partitionStatistics = createPartitionStatistics( partitionUpdate.getStatistics(), columnTypes, - getColumnStatistics(partitionComputedStatistics, partitionName, partitionValues, partitionTypes)); + getColumnStatistics(partitionComputedStatistics, partitionName, partitionValues, partitionedBy, partitionTypes, partitionProjection)); if (partitionUpdate.getUpdateMode() == OVERWRITE) { if (handle.getLocationHandle().getWriteMode() == DIRECT_TO_TARGET_EXISTING_DIRECTORY) { removeNonCurrentQueryFiles(session, partitionUpdate.getTargetPath()); @@ -2486,12 +2505,14 @@ private static Map getColumnStatistics( Map, ComputedStatistics> statistics, String partitionName, List partitionValues, - List partitionTypes) + List partitionColumns, + List partitionTypes, + Optional partitionProjection) { Optional> columnStatistics = Optional.ofNullable(statistics.get(partitionValues)) .map(ComputedStatistics::getColumnStatistics); return columnStatistics - .orElseGet(() -> getColumnStatistics(statistics, canonicalizePartitionValues(partitionName, partitionValues, partitionTypes))); + .orElseGet(() -> getColumnStatistics(statistics, canonicalizePartitionValues(partitionName, partitionValues, partitionColumns, partitionTypes, partitionProjection))); } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java index 0fa66f92e875..f6dba191f358 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java @@ -26,6 +26,7 @@ import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.coercions.CoercionUtils.CoercionContext; import io.trino.plugin.hive.coercions.TypeCoercer; +import io.trino.plugin.hive.projection.PartitionProjection; import io.trino.plugin.hive.util.HiveBucketing.BucketingVersion; import io.trino.spi.TrinoException; import io.trino.spi.connector.ColumnHandle; @@ -71,6 +72,7 @@ import static io.trino.plugin.hive.coercions.CoercionUtils.createCoercer; import static io.trino.plugin.hive.coercions.CoercionUtils.createTypeFromCoercer; import static io.trino.plugin.hive.coercions.CoercionUtils.extractHiveStorageFormat; +import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getPartitionProjectionFromHiveTable; import static io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter; import static io.trino.plugin.hive.util.HiveBucketing.getHiveBucketFilter; import static io.trino.plugin.hive.util.HiveTypeUtil.getHiveTypeForDereferences; @@ -132,7 +134,8 @@ public ConnectorPageSource createPageSource( hiveSplit.getPath(), hiveSplit.getTableBucketNumber(), hiveSplit.getEstimatedFileSize(), - hiveSplit.getFileModifiedTime()); + hiveSplit.getFileModifiedTime(), + getPartitionProjectionFromHiveTable(hiveTable)); // Perform dynamic partition pruning in case coordinator didn't prune split. // This can happen when dynamic filters are collected after partition splits were listed. @@ -466,6 +469,21 @@ public static List buildColumnMappings( OptionalInt bucketNumber, long estimatedFileSize, long fileModifiedTime) + { + return buildColumnMappings(partitionName, partitionKeys, columns, requiredInterimColumns, hiveColumnCoercions, path, bucketNumber, estimatedFileSize, fileModifiedTime, Optional.empty()); + } + + public static List buildColumnMappings( + String partitionName, + List partitionKeys, + List columns, + List requiredInterimColumns, + Map hiveColumnCoercions, + String path, + OptionalInt bucketNumber, + long estimatedFileSize, + long fileModifiedTime, + Optional partitionProjection) { Map partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::name); @@ -515,7 +533,7 @@ else if (isRowIdColumnHandle(column)) { else { columnMappings.add(prefilled( column, - getPrefilledColumnValue(column, partitionKeysByName.get(column.getName()), path, bucketNumber, estimatedFileSize, fileModifiedTime, partitionName), + getPrefilledColumnValue(column, partitionKeysByName.get(column.getName()), path, bucketNumber, estimatedFileSize, fileModifiedTime, partitionName, partitionProjection), baseTypeCoercionFrom)); } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePartitionManager.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePartitionManager.java index 1bfd12de825a..b0a78aa0a2f7 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePartitionManager.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePartitionManager.java @@ -20,6 +20,7 @@ import com.google.inject.Inject; import io.trino.metastore.HivePartition; import io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore; +import io.trino.plugin.hive.projection.PartitionProjection; import io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.connector.ConnectorTableHandle; @@ -41,6 +42,7 @@ import static io.trino.metastore.Partitions.unescapePathName; import static io.trino.plugin.hive.metastore.MetastoreUtil.computePartitionKeyFilter; import static io.trino.plugin.hive.metastore.MetastoreUtil.toPartitionName; +import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getPartitionProjectionFromHiveTable; import static io.trino.plugin.hive.util.HiveBucketing.getHiveBucketFilter; import static io.trino.plugin.hive.util.HiveUtil.parsePartitionValue; @@ -108,9 +110,10 @@ public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastor else { List partitionNamesList = hiveTableHandle.getPartitionNames() .orElseGet(() -> getFilteredPartitionNames(metastore, tableName, partitionColumns, compactEffectivePredicate)); + Optional partitionProjection = getPartitionProjectionFromHiveTable(hiveTableHandle); partitionsIterable = () -> partitionNamesList.stream() // Apply extra filters which could not be done by getFilteredPartitionNames - .map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, effectivePredicate, predicate)) + .map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, effectivePredicate, predicate, partitionProjection)) .filter(Optional::isPresent) .map(Optional::get) .iterator(); @@ -131,9 +134,10 @@ public HivePartitionResult getPartitions(ConnectorTableHandle tableHandle, List< .map(HiveColumnHandle::getName) .collect(toImmutableList()); + Optional partitionProjection = getPartitionProjectionFromHiveTable(hiveTableHandle); List partitionList = partitionValuesList.stream() .map(partitionValues -> toPartitionName(partitionColumnNames, partitionValues)) - .map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, TupleDomain.all(), value -> true)) + .map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, TupleDomain.all(), value -> true, partitionProjection)) .map(partition -> partition.orElseThrow(() -> new VerifyException("partition must exist"))) .collect(toImmutableList()); @@ -211,9 +215,10 @@ private Optional parseValuesAndFilterPartition( String partitionId, List partitionColumns, TupleDomain constraintSummary, - Predicate> constraint) + Predicate> constraint, + Optional partitionProjection) { - HivePartition partition = parsePartition(tableName, partitionId, partitionColumns); + HivePartition partition = parsePartition(tableName, partitionId, partitionColumns, partitionProjection); if (partitionMatches(partitionColumns, constraintSummary, constraint, partition)) { return Optional.of(partition); @@ -256,13 +261,14 @@ private List getFilteredPartitionNames(SemiTransactionalHiveMetastore me public static HivePartition parsePartition( SchemaTableName tableName, String partitionName, - List partitionColumns) + List partitionColumns, + Optional partitionProjection) { List partitionValues = extractPartitionValues(partitionName); ImmutableMap.Builder builder = ImmutableMap.builderWithExpectedSize(partitionColumns.size()); for (int i = 0; i < partitionColumns.size(); i++) { HiveColumnHandle column = partitionColumns.get(i); - NullableValue parsedValue = parsePartitionValue(partitionName, partitionValues.get(i), column.getType()); + NullableValue parsedValue = parsePartitionValue(partitionName, partitionValues.get(i), column.getType(), column.getName(), partitionProjection); builder.put(column, parsedValue); } Map values = builder.buildOrThrow(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveTableHandle.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveTableHandle.java index 48704d70f60a..3ac2e27fc101 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveTableHandle.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveTableHandle.java @@ -71,12 +71,13 @@ public HiveTableHandle( @JsonProperty("tablePartitioning") Optional tablePartitioning, @JsonProperty("bucketFilter") Optional bucketFilter, @JsonProperty("analyzePartitionValues") Optional>> analyzePartitionValues, - @JsonProperty("transaction") AcidTransaction transaction) + @JsonProperty("transaction") AcidTransaction transaction, + @JsonProperty("tableParameters") Optional> tableParameters) { this( schemaName, tableName, - Optional.empty(), + tableParameters, partitionColumns, dataColumns, Optional.empty(), @@ -340,8 +341,7 @@ public String getTableName() return tableName; } - // do not serialize tableParameters as they are not needed on workers - @JsonIgnore + @JsonProperty public Optional> getTableParameters() { return tableParameters; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/DateProjection.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/DateProjection.java index 09662eecb55f..16506cb77d50 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/DateProjection.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/DateProjection.java @@ -17,46 +17,54 @@ import com.google.common.collect.ImmutableSet; import io.trino.spi.TrinoException; import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.NullableValue; +import io.trino.spi.type.CharType; import io.trino.spi.type.DateType; import io.trino.spi.type.TimestampType; import io.trino.spi.type.Type; import io.trino.spi.type.VarcharType; -import java.text.DateFormat; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; import java.time.temporal.ChronoField; import java.time.temporal.ChronoUnit; -import java.util.Date; +import java.time.temporal.TemporalAccessor; +import java.time.temporal.TemporalQueries; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.function.Supplier; import java.util.regex.Matcher; import java.util.regex.Pattern; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.metastore.Partitions.HIVE_DEFAULT_DYNAMIC_PARTITION; import static io.trino.plugin.hive.projection.PartitionProjectionProperties.COLUMN_PROJECTION_FORMAT; import static io.trino.plugin.hive.projection.PartitionProjectionProperties.COLUMN_PROJECTION_INTERVAL; import static io.trino.plugin.hive.projection.PartitionProjectionProperties.COLUMN_PROJECTION_INTERVAL_UNIT; import static io.trino.plugin.hive.projection.PartitionProjectionProperties.COLUMN_PROJECTION_RANGE; import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getProjectionPropertyRequiredValue; import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getProjectionPropertyValue; +import static io.trino.plugin.hive.util.HiveUtil.HIVE_DATE_PARSER; +import static io.trino.plugin.hive.util.HiveUtil.HIVE_TIMESTAMP_PARSER; import static io.trino.spi.predicate.Domain.singleValue; +import static io.trino.spi.type.DateType.DATE; +import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; import static java.lang.String.format; +import static java.time.ZoneOffset.UTC; import static java.time.temporal.ChronoUnit.DAYS; import static java.time.temporal.ChronoUnit.HOURS; import static java.time.temporal.ChronoUnit.MINUTES; import static java.time.temporal.ChronoUnit.MONTHS; import static java.time.temporal.ChronoUnit.SECONDS; +import static java.util.Locale.ENGLISH; import static java.util.Objects.nonNull; import static java.util.Objects.requireNonNull; -import static java.util.TimeZone.getTimeZone; import static java.util.concurrent.TimeUnit.MILLISECONDS; final class DateProjection @@ -71,9 +79,10 @@ final class DateProjection private static final Pattern DATE_RANGE_BOUND_EXPRESSION_PATTERN = Pattern.compile("^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$"); private final String columnName; - private final DateFormat dateFormat; - private final Supplier leftBound; - private final Supplier rightBound; + private final Type columnType; + private final DateTimeFormatter dateFormat; + private final Instant leftBound; + private final Instant rightBound; private final int interval; private final ChronoUnit intervalUnit; @@ -86,6 +95,7 @@ public DateProjection(String columnName, Type columnType, Map co } this.columnName = requireNonNull(columnName, "columnName is null"); + this.columnType = requireNonNull(columnType, "columnType is null"); String dateFormatPattern = getProjectionPropertyRequiredValue( columnName, @@ -104,14 +114,11 @@ public DateProjection(String columnName, Type columnType, Map co throw invalidRangeProperty(columnName, dateFormatPattern, Optional.empty()); } - SimpleDateFormat dateFormat = new SimpleDateFormat(dateFormatPattern); - dateFormat.setLenient(false); - dateFormat.setTimeZone(getTimeZone(UTC_TIME_ZONE_ID)); - this.dateFormat = requireNonNull(dateFormat, "dateFormatPattern is null"); + this.dateFormat = DateTimeFormatter.ofPattern(dateFormatPattern, ENGLISH); - leftBound = parseDateRangerBound(columnName, range.get(0), dateFormat); - rightBound = parseDateRangerBound(columnName, range.get(1), dateFormat); - if (!leftBound.get().isBefore(rightBound.get())) { + leftBound = parseDateRangerBound(columnName, range.get(0), dateFormatPattern, dateFormat); + rightBound = parseDateRangerBound(columnName, range.get(1), dateFormatPattern, dateFormat); + if (!leftBound.isBefore(rightBound)) { throw invalidRangeProperty(columnName, dateFormatPattern, Optional.empty()); } @@ -135,8 +142,8 @@ public List getProjectedValues(Optional partitionValueFilter) { ImmutableList.Builder builder = ImmutableList.builder(); - Instant leftBound = adjustBoundToDateFormat(this.leftBound.get()); - Instant rightBound = adjustBoundToDateFormat(this.rightBound.get()); + Instant leftBound = adjustBoundToDateFormat(this.leftBound); + Instant rightBound = adjustBoundToDateFormat(this.rightBound); Instant currentValue = leftBound; while (!currentValue.isAfter(rightBound)) { @@ -152,20 +159,77 @@ public List getProjectedValues(Optional partitionValueFilter) return builder.build(); } + // TODO: remove once we support write custom format partition projection + public boolean allowWrite() + { + if (columnType instanceof DateType) { + String formatted = formatValue(Instant.now()); + try { + HIVE_DATE_PARSER.parseLocalDateTime(formatted); + return true; + } + catch (IllegalArgumentException e) { + return false; + } + } + + if (columnType instanceof TimestampType timestampType && timestampType.isShort()) { + String formatted = formatValue(Instant.now()); + try { + HIVE_TIMESTAMP_PARSER.parseLocalDateTime(formatted); + return true; + } + catch (IllegalArgumentException e) { + return false; + } + } + return true; + } + + @Override + public Optional parsePartitionValue(String value) + { + if (value == null) { + return Optional.empty(); + } + + if (HIVE_DEFAULT_DYNAMIC_PARTITION.equals(value)) { + return Optional.of(NullableValue.asNull(columnType)); + } + + // leave the char/varchar type empty, so it will parse as usual, i.e, using `HiveUtils#parsePartitionValue` + if (columnType instanceof CharType || columnType instanceof VarcharType) { + return Optional.empty(); + } + + if (columnType instanceof DateType) { + long epochMilli = LocalDate.parse(value, dateFormat).atStartOfDay(UTC_TIME_ZONE_ID).toInstant().toEpochMilli(); + return Optional.of(NullableValue.of(DATE, MILLISECONDS.toDays(epochMilli))); + } + + if (columnType instanceof TimestampType timestampType && timestampType.isShort()) { + long epochMilli = LocalDateTime.parse(value, dateFormat).toInstant(UTC).toEpochMilli(); + return Optional.of(NullableValue.of(TIMESTAMP_MILLIS, MILLISECONDS.toMicros(epochMilli))); + } + + throw new InvalidProjectionException(columnName, columnType); + } + private Instant adjustBoundToDateFormat(Instant value) { String formatted = formatValue(value.with(ChronoField.MILLI_OF_SECOND, 0)); try { - return dateFormat.parse(formatted).toInstant(); + return parse(formatted, dateFormat); } - catch (ParseException e) { + catch (DateTimeParseException e) { throw new InvalidProjectionException(formatted, e.getMessage()); } } private String formatValue(Instant current) { - return dateFormat.format(new Date(current.toEpochMilli())); + LocalDateTime localDateTime = LocalDateTime.ofInstant(current, UTC_TIME_ZONE_ID); + return localDateTime.format(dateFormat); } private boolean isValueInDomain(Optional valueDomain, Instant value, String formattedValue) @@ -206,7 +270,7 @@ private static ChronoUnit resolveDefaultChronoUnit(String columnName, String dat return MONTHS; } - private static Supplier parseDateRangerBound(String columnName, String value, SimpleDateFormat dateFormat) + private static Instant parseDateRangerBound(String columnName, String value, String dateFormatPattern, DateTimeFormatter dateFormat) { Matcher matcher = DATE_RANGE_BOUND_EXPRESSION_PATTERN.matcher(value); if (matcher.matches()) { @@ -214,27 +278,34 @@ private static Supplier parseDateRangerBound(String columnName, String String multiplierString = matcher.group(3); String unitString = matcher.group(4); if (nonNull(operator) && nonNull(multiplierString) && nonNull(unitString)) { - unitString = unitString.toUpperCase(Locale.ENGLISH); - return new DateExpressionBound( - Integer.parseInt(multiplierString), - ChronoUnit.valueOf(unitString + "S"), - operator.charAt(0) == '+'); + unitString = unitString.toUpperCase(ENGLISH); + int multiplier = Integer.parseInt(multiplierString); + boolean increment = operator.charAt(0) == '+'; + ChronoUnit unit = ChronoUnit.valueOf(unitString + "S"); + return Instant.now().plus(increment ? multiplier : -multiplier, unit); } if (value.trim().equals("NOW")) { - Instant now = Instant.now(); - return () -> now; + return Instant.now(); } - throw invalidRangeProperty(columnName, dateFormat.toPattern(), Optional.of("Invalid expression")); + throw invalidRangeProperty(columnName, dateFormatPattern, Optional.of("Invalid expression")); } - Instant dateBound; try { - dateBound = dateFormat.parse(value).toInstant(); + return parse(value, dateFormat); } - catch (ParseException e) { - throw invalidRangeProperty(columnName, dateFormat.toPattern(), Optional.of(e.getMessage())); + catch (DateTimeParseException e) { + throw invalidRangeProperty(columnName, dateFormatPattern, Optional.of(e.getMessage())); } - return () -> dateBound; + } + + private static Instant parse(String value, DateTimeFormatter dateFormat) + throws DateTimeParseException + { + TemporalAccessor parsed = dateFormat.parse(value); + if (parsed.query(TemporalQueries.localDate()) != null && parsed.query(TemporalQueries.localTime()) == null) { + return LocalDate.from(parsed).atStartOfDay().toInstant(UTC); + } + return LocalDateTime.from(parsed).toInstant(UTC); } private static TrinoException invalidRangeProperty(String columnName, String dateFormatPattern, Optional errorDetail) @@ -248,14 +319,4 @@ private static TrinoException invalidRangeProperty(String columnName, String dat DATE_RANGE_BOUND_EXPRESSION_PATTERN.pattern(), errorDetail.map(error -> ": " + error).orElse(""))); } - - private record DateExpressionBound(int multiplier, ChronoUnit unit, boolean increment) - implements Supplier - { - @Override - public Instant get() - { - return Instant.now().plus(increment ? multiplier : -multiplier, unit); - } - } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjection.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjection.java index 4f29ed1d63eb..5daeba228570 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjection.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjection.java @@ -19,6 +19,7 @@ import io.trino.metastore.Partition; import io.trino.metastore.Table; import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.NullableValue; import io.trino.spi.predicate.TupleDomain; import java.util.List; @@ -52,6 +53,31 @@ public PartitionProjection(Optional storageLocationTemplate, Map parsePartitionValue(String columnName, String value) + { + Projection projection = columnProjections.get(columnName); + if (projection == null) { + return Optional.empty(); + } + return projection.parsePartitionValue(value); + } + public Optional> getProjectedPartitionNamesByFilter(List columnNames, TupleDomain partitionKeysFilter) { if (partitionKeysFilter.isNone()) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjectionProperties.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjectionProperties.java index cb82a8e2f2be..f51c8a3c50c8 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjectionProperties.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/PartitionProjectionProperties.java @@ -19,6 +19,8 @@ import com.google.common.collect.ImmutableSet; import io.trino.metastore.Column; import io.trino.metastore.Table; +import io.trino.plugin.hive.HiveColumnHandle; +import io.trino.plugin.hive.HiveTableHandle; import io.trino.spi.connector.ColumnMetadata; import io.trino.spi.connector.ConnectorTableMetadata; import io.trino.spi.type.Type; @@ -203,6 +205,25 @@ public static Optional getPartitionProjectionFromTable(Tabl tableProperties); } + public static Optional getPartitionProjectionFromHiveTable(HiveTableHandle table) + { + Map tableProperties = table.getTableParameters().orElse(ImmutableMap.of()); + if (parseBoolean(tableProperties.get(METASTORE_PROPERTY_PROJECTION_IGNORE)) || + !parseBoolean(tableProperties.get(METASTORE_PROPERTY_PROJECTION_ENABLED))) { + return Optional.empty(); + } + + Set partitionColumnNames = table.getPartitionColumns().stream().map(HiveColumnHandle::getName).collect(Collectors.toSet()); + return createPartitionProjection( + table.getDataColumns().stream() + .map(HiveColumnHandle::getName) + .filter(partitionColumnNames::contains) + .collect(toImmutableList()), + table.getPartitionColumns().stream() + .collect(toImmutableMap(HiveColumnHandle::getName, HiveColumnHandle::getType)), + tableProperties); + } + private static Optional createPartitionProjection(List dataColumns, Map partitionColumns, Map tableProperties) { // This method is used during table creation to validate the properties. The validation is performed even if the projection is disabled. diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/Projection.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/Projection.java index 0677769dfaba..559bfdfba311 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/Projection.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/Projection.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive.projection; import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.NullableValue; import java.util.List; import java.util.Optional; @@ -22,4 +23,9 @@ sealed interface Projection permits DateProjection, EnumProjection, InjectedProjection, IntegerProjection { List getProjectedValues(Optional partitionValueFilter); + + default Optional parsePartitionValue(String value) + { + return Optional.empty(); + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java index cf4339f11bd2..e1e0b5668605 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java @@ -33,6 +33,7 @@ import io.trino.plugin.hive.HiveColumnHandle; import io.trino.plugin.hive.HivePartitionKey; import io.trino.plugin.hive.HiveTimestampPrecision; +import io.trino.plugin.hive.projection.PartitionProjection; import io.trino.spi.ErrorCodeSupplier; import io.trino.spi.TrinoException; import io.trino.spi.connector.ColumnMetadata; @@ -150,8 +151,8 @@ public final class HiveUtil public static final String ICEBERG_TABLE_TYPE_VALUE = "iceberg"; private static final LocalDateTime EPOCH_DAY = new LocalDateTime(1970, 1, 1, 0, 0); - private static final DateTimeFormatter HIVE_DATE_PARSER; - private static final DateTimeFormatter HIVE_TIMESTAMP_PARSER; + public static final DateTimeFormatter HIVE_DATE_PARSER; + public static final DateTimeFormatter HIVE_TIMESTAMP_PARSER; private static final String BIG_DECIMAL_POSTFIX = "BD"; @@ -234,10 +235,15 @@ private static boolean isValidPartitionType(Type type) type instanceof CharType; } - public static NullableValue parsePartitionValue(String partitionName, String value, Type type) + public static NullableValue parsePartitionValue(String partitionName, String value, Type type, String columName, Optional partitionProjection) { verifyPartitionTypeSupported(partitionName, type); + Optional parsedValue = partitionProjection.flatMap(projection -> projection.parsePartitionValue(columName, value)); + if (parsedValue.isPresent()) { + return parsedValue.get(); + } + boolean isNull = HIVE_DEFAULT_DYNAMIC_PARTITION.equals(value); if (type instanceof DecimalType decimalType) { @@ -596,7 +602,8 @@ public static NullableValue getPrefilledColumnValue( OptionalInt bucketNumber, long fileSize, long fileModifiedTime, - String partitionName) + String partitionName, + Optional partitionProjection) { String columnValue; if (partitionKey != null) { @@ -623,6 +630,12 @@ else if (isPartitionColumnHandle(columnHandle)) { byte[] bytes = columnValue.getBytes(UTF_8); String name = columnHandle.getName(); + + Optional parsedValue = partitionProjection.flatMap(projection -> projection.parsePartitionValue(name, columnValue)); + if (parsedValue.isPresent()) { + return parsedValue.get(); + } + Type type = columnHandle.getType(); if (isHiveNull(bytes)) { return NullableValue.asNull(type); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseTestHiveOnDataLake.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseTestHiveOnDataLake.java index d8fb1711dddf..53331154707f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseTestHiveOnDataLake.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseTestHiveOnDataLake.java @@ -56,6 +56,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static io.airlift.slice.Slices.utf8Slice; import static io.airlift.units.DataSize.Unit.MEGABYTE; +import static io.trino.metastore.Partitions.makePartName; import static io.trino.plugin.hive.TestingThriftHiveMetastoreBuilder.testingThriftHiveMetastoreBuilder; import static io.trino.plugin.hive.metastore.MetastoreUtil.getHiveBasicStatistics; import static io.trino.spi.type.VarcharType.VARCHAR; @@ -917,6 +918,158 @@ public void testIntegerPartitionProjectionOnIntegerColumnWithDefaults() "VALUES ('POLAND_1'), ('POLAND_2'), ('CZECH_1'), ('CZECH_2')"); } + @Test + public void testReadDatePartitionProjectionWithFormat() + { + String tableName = "partition_projection_read_custom_date" + randomNameSuffix(); + String fullyQualifiedTestTableName = getFullyQualifiedTestTableName(tableName); + String tablePath = format("%s/%s/", HIVE_TEST_SCHEMA, tableName); + + computeActual( + "CREATE TABLE " + fullyQualifiedTestTableName + " ( " + + " name varchar(25), " + + " comment varchar(152), " + + " dt DATE WITH (" + + " partition_projection_format='yyyy/MM/dd',\n" + + " partition_projection_interval=1,\n" + + " partition_projection_interval_unit='DAYS', \n" + + " partition_projection_range=ARRAY['2025/01/01', '2025/01/30'], \n" + + " partition_projection_type='date'" + + " ), " + + " ts timestamp WITH (" + + " partition_projection_type='date', " + + " partition_projection_format='yyyy/M/dd HH@mm@ss', " + + " partition_projection_range=ARRAY['2025/1/20 00@00@00', '2025/1/21 00@00@00'], " + + " partition_projection_interval=1, " + + " partition_projection_interval_unit='HOURS'" + + " )" + + ") WITH ( " + + " partitioned_by=ARRAY['dt','ts'], " + + " partition_projection_enabled=true, " + + " format='TEXTFILE')"); + + assertThat( + hiveMinioDataLake + .runOnHive("SHOW TBLPROPERTIES " + getHiveTestTableName(tableName))) + .containsPattern("[ |]+projection\\.enabled[ |]+true[ |]+") + .containsPattern("[ |]+projection\\.dt\\.type[ |]+date[ |]+") + .containsPattern("[ |]+projection\\.dt\\.format[ |]+yyyy/MM/dd[ |]+") + .containsPattern("[ |]+projection\\.dt\\.interval.unit[ |]+days[ |]+") + .containsPattern("[ |]+projection\\.dt\\.range[ |]+2025/01/01,2025/01/30[ |]+") + .containsPattern("[ |]+projection\\.ts\\.type[ |]+date[ |]+") + .containsPattern("[ |]+projection\\.ts\\.format[ |]+yyyy/M/dd HH@mm@ss[ |]+") + .containsPattern("[ |]+projection\\.ts\\.interval.unit[ |]+hours[ |]+") + .containsPattern("[ |]+projection\\.ts\\.range[ |]+2025/1/20 00@00@00,2025/1/21 00@00@00[ |]+"); + + assertThat(query(createInsertStatement( + fullyQualifiedTestTableName, + ImmutableList.of( + ImmutableList.of("'POLAND_1'", "'Comment'", "DATE '2025-1-23'", "TIMESTAMP '2025-1-20 01:00:00'"), + ImmutableList.of("'POLAND_2'", "'Comment'", "DATE '2025-1-23'", "TIMESTAMP '2025-1-20 02:00:00'"), + ImmutableList.of("'CZECH_2'", "'Comment'", "DATE '2025-1-24'", "TIMESTAMP '2025-1-20 10:00:00'"))))) + .failure() + .hasMessage("Hive connector does not support writing such partition with projection"); + + byte[] row1 = "POLAND_1\u0001Comment".getBytes(UTF_8); + byte[] row2 = "POLAND_2\u0001Comment".getBytes(UTF_8); + byte[] row3 = "CZECH_2\u0001Comment".getBytes(UTF_8); + List columns = ImmutableList.of("dt", "ts"); + // row : ("'POLAND_1'", "'Comment'", "DATE '2025-1-23'", "TIMESTAMP '2025-1-20 01:00:00'") + hiveMinioDataLake.getMinioClient().putObject(bucketName, row1, tablePath + makePartName(columns, ImmutableList.of("2025/01/23", "2025/1/20 01@00@00")) + "/data.txt"); + // row : ("'POLAND_2'", "'Comment'", "DATE '2025-1-23'", "TIMESTAMP '2025-1-20 02:00:00'") + hiveMinioDataLake.getMinioClient().putObject(bucketName, row2, tablePath + makePartName(columns, ImmutableList.of("2025/01/23", "2025/1/20 02@00@00")) + "/data.txt"); + // row: ("'CZECH_2'", "'Comment'", "DATE '2025-1-24'", "TIMESTAMP '2025-1-20 10:00:00'") + hiveMinioDataLake.getMinioClient().putObject(bucketName, row3, tablePath + makePartName(columns, ImmutableList.of("2025/01/24", "2025/1/20 10@00@00")) + "/data.txt"); + + assertQuery("SELECT * FROM " + fullyQualifiedTestTableName + " WHERE name = 'POLAND_1'", "VALUES ('POLAND_1', 'Comment', DATE '2025-01-23', TIMESTAMP '2025-01-20 01:00:00')"); + + assertQuery("SELECT * FROM " + fullyQualifiedTestTableName + " WHERE dt = DATE '2025-01-23'", + "VALUES ('POLAND_1', 'Comment', DATE '2025-01-23', TIMESTAMP '2025-01-20 01:00:00')," + + "('POLAND_2', 'Comment', DATE '2025-01-23', TIMESTAMP '2025-01-20 02:00:00')"); + assertQuery("SELECT * FROM " + fullyQualifiedTestTableName + " WHERE dt = DATE '2025-01-23' AND ts > TIMESTAMP '2025-1-20 01:00:00'", + "VALUES ('POLAND_2', 'Comment', DATE '2025-01-23', TIMESTAMP '2025-01-20 02:00:00')"); + assertQuery("SELECT * FROM " + fullyQualifiedTestTableName + " WHERE dt IN (DATE '2025-01-23', DATE '2025-01-24', DATE '2025-01-25') AND ts > TIMESTAMP '2025-1-20 01:00:00'", + "VALUES ('POLAND_2', 'Comment', DATE '2025-01-23', TIMESTAMP '2025-01-20 02:00:00')," + + "('CZECH_2', 'Comment', DATE '2025-1-24', TIMESTAMP '2025-1-20 10:00:00')"); + } + + @Test + public void testReadDatePartitionProjectionWithFormatAndLocationTemplate() + { + String tableName = "partition_projection_read_custom_date" + randomNameSuffix(); + String fullyQualifiedTestTableName = getFullyQualifiedTestTableName(tableName); + String tablePath = format("%s/%s", HIVE_TEST_SCHEMA, tableName); + String projectionLocationTemplate = format("s3://%s/%s/xxx/${dt}/${ts}-xyz", bucketName, tablePath); + String projectionLocationValueFormat = tablePath + "/xxx/%s/%s-xyz"; + + computeActual( + "CREATE TABLE " + fullyQualifiedTestTableName + " ( " + + " name varchar(25), " + + " comment varchar(152), " + + " dt DATE WITH (" + + " partition_projection_format='yyyy/MM/dd',\n" + + " partition_projection_interval=1,\n" + + " partition_projection_interval_unit='DAYS', \n" + + " partition_projection_range=ARRAY['2025/01/01', '2025/01/30'], \n" + + " partition_projection_type='date'" + + " ), " + + " ts timestamp WITH (" + + " partition_projection_type='date', " + + " partition_projection_format='yyyy/M/dd HH@mm@ss', " + + " partition_projection_range=ARRAY['2025/1/20 00@00@00', '2025/1/21 00@00@00'], " + + " partition_projection_interval=1, " + + " partition_projection_interval_unit='HOURS'" + + " )" + + ") WITH ( " + + " partitioned_by=ARRAY['dt','ts'], " + + " partition_projection_enabled=true, " + + " partition_projection_location_template='%s',".formatted(projectionLocationTemplate) + + " format='TEXTFILE')"); + + assertThat( + hiveMinioDataLake + .runOnHive("SHOW TBLPROPERTIES " + getHiveTestTableName(tableName))) + .containsPattern("[ |]+projection\\.enabled[ |]+true[ |]+") + .containsPattern("[ |]+projection\\.dt\\.type[ |]+date[ |]+") + .containsPattern("[ |]+projection\\.dt\\.format[ |]+yyyy/MM/dd[ |]+") + .containsPattern("[ |]+projection\\.dt\\.interval.unit[ |]+days[ |]+") + .containsPattern("[ |]+projection\\.dt\\.range[ |]+2025/01/01,2025/01/30[ |]+") + .containsPattern("[ |]+projection\\.ts\\.type[ |]+date[ |]+") + .containsPattern("[ |]+projection\\.ts\\.format[ |]+yyyy/M/dd HH@mm@ss[ |]+") + .containsPattern("[ |]+projection\\.ts\\.interval.unit[ |]+hours[ |]+") + .containsPattern("[ |]+projection\\.ts\\.range[ |]+2025/1/20 00@00@00,2025/1/21 00@00@00[ |]+"); + + assertThat(query(createInsertStatement( + fullyQualifiedTestTableName, + ImmutableList.of( + ImmutableList.of("'POLAND_1'", "'Comment'", "DATE '2025-1-23'", "TIMESTAMP '2025-1-20 01:00:00'"), + ImmutableList.of("'POLAND_2'", "'Comment'", "DATE '2025-1-23'", "TIMESTAMP '2025-1-20 02:00:00'"), + ImmutableList.of("'CZECH_2'", "'Comment'", "DATE '2025-1-24'", "TIMESTAMP '2025-1-20 10:00:00'"))))) + .failure() + .hasMessage("Hive connector does not support writing such partition with projection"); + + byte[] row1 = "POLAND_1\u0001Comment".getBytes(UTF_8); + byte[] row2 = "POLAND_2\u0001Comment".getBytes(UTF_8); + byte[] row3 = "CZECH_2\u0001Comment".getBytes(UTF_8); + // row : ("'POLAND_1'", "'Comment'", "DATE '2025-1-23'", "TIMESTAMP '2025-1-20 01:00:00'") + hiveMinioDataLake.getMinioClient().putObject(bucketName, row1, projectionLocationValueFormat.formatted("2025/01/23", "2025/1/20 01@00@00") + "/data.txt"); + // row : ("'POLAND_2'", "'Comment'", "DATE '2025-1-23'", "TIMESTAMP '2025-1-20 02:00:00'") + hiveMinioDataLake.getMinioClient().putObject(bucketName, row2, projectionLocationValueFormat.formatted("2025/01/23", "2025/1/20 02@00@00") + "/data.txt"); + // row: ("'CZECH_2'", "'Comment'", "DATE '2025-1-24'", "TIMESTAMP '2025-1-20 10:00:00'") + hiveMinioDataLake.getMinioClient().putObject(bucketName, row3, projectionLocationValueFormat.formatted("2025/01/24", "2025/1/20 10@00@00") + "/data.txt"); + + assertQuery("SELECT * FROM " + fullyQualifiedTestTableName + " WHERE name = 'POLAND_1'", "VALUES ('POLAND_1', 'Comment', DATE '2025-01-23', TIMESTAMP '2025-01-20 01:00:00')"); + + assertQuery("SELECT * FROM " + fullyQualifiedTestTableName + " WHERE dt = DATE '2025-01-23'", + "VALUES ('POLAND_1', 'Comment', DATE '2025-01-23', TIMESTAMP '2025-01-20 01:00:00')," + + "('POLAND_2', 'Comment', DATE '2025-01-23', TIMESTAMP '2025-01-20 02:00:00')"); + assertQuery("SELECT * FROM " + fullyQualifiedTestTableName + " WHERE dt = DATE '2025-01-23' AND ts > TIMESTAMP '2025-1-20 01:00:00'", + "VALUES ('POLAND_2', 'Comment', DATE '2025-01-23', TIMESTAMP '2025-01-20 02:00:00')"); + assertQuery("SELECT * FROM " + fullyQualifiedTestTableName + " WHERE dt IN (DATE '2025-01-23', DATE '2025-01-24', DATE '2025-01-25') AND ts > TIMESTAMP '2025-1-20 01:00:00'", + "VALUES ('POLAND_2', 'Comment', DATE '2025-01-23', TIMESTAMP '2025-01-20 02:00:00')," + + "('CZECH_2', 'Comment', DATE '2025-1-24', TIMESTAMP '2025-1-20 10:00:00')"); + } + @Test public void testDatePartitionProjectionOnDateColumnWithDefaults() { @@ -936,7 +1089,7 @@ public void testDatePartitionProjectionOnDateColumnWithDefaults() " short_name2 date WITH (" + " partition_projection_type='date', " + " partition_projection_format='yyyy-MM-dd', " + - " partition_projection_range=ARRAY['2001-1-22', '2001-1-25']" + + " partition_projection_range=ARRAY['2001-01-22', '2001-01-25']" + " )" + ") WITH ( " + " partitioned_by=ARRAY['short_name1', 'short_name2'], " + @@ -951,7 +1104,7 @@ public void testDatePartitionProjectionOnDateColumnWithDefaults() .containsPattern("[ |]+projection\\.short_name1\\.values[ |]+PL1,CZ1[ |]+") .containsPattern("[ |]+projection\\.short_name2\\.type[ |]+date[ |]+") .containsPattern("[ |]+projection\\.short_name2\\.format[ |]+yyyy-MM-dd[ |]+") - .containsPattern("[ |]+projection\\.short_name2\\.range[ |]+2001-1-22,2001-1-25[ |]+"); + .containsPattern("[ |]+projection\\.short_name2\\.range[ |]+2001-01-22,2001-01-25[ |]+"); computeActual(createInsertStatement( fullyQualifiedTestTableName, @@ -1011,7 +1164,7 @@ public void testDatePartitionProjectionOnTimestampColumnWithInterval() " short_name2 timestamp WITH (" + " partition_projection_type='date', " + " partition_projection_format='yyyy-MM-dd HH:mm:ss', " + - " partition_projection_range=ARRAY['2001-1-22 00:00:00', '2001-1-22 00:00:06'], " + + " partition_projection_range=ARRAY['2001-01-22 00:00:00', '2001-01-22 00:00:06'], " + " partition_projection_interval=2, " + " partition_projection_interval_unit='SECONDS'" + " )" + @@ -1028,7 +1181,7 @@ public void testDatePartitionProjectionOnTimestampColumnWithInterval() .containsPattern("[ |]+projection\\.short_name1\\.values[ |]+PL1,CZ1[ |]+") .containsPattern("[ |]+projection\\.short_name2\\.type[ |]+date[ |]+") .containsPattern("[ |]+projection\\.short_name2\\.format[ |]+yyyy-MM-dd HH:mm:ss[ |]+") - .containsPattern("[ |]+projection\\.short_name2\\.range[ |]+2001-1-22 00:00:00,2001-1-22 00:00:06[ |]+") + .containsPattern("[ |]+projection\\.short_name2\\.range[ |]+2001-01-22 00:00:00,2001-01-22 00:00:06[ |]+") .containsPattern("[ |]+projection\\.short_name2\\.interval[ |]+2[ |]+") .containsPattern("[ |]+projection\\.short_name2\\.interval\\.unit[ |]+seconds[ |]+"); @@ -1582,7 +1735,7 @@ public void testPartitionProjectionInvalidTableProperties() " partition_projection_enabled=true " + ")")) .hasMessage("Column projection for column 'short_name1' failed. Property: 'partition_projection_range' needs to be a list of 2 valid dates formatted as 'yyyy-MM-dd HH' " + - "or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Unparseable date: \"2001-01-01\""); + "or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Text '2001-01-01' could not be parsed at index 10"); assertThatThrownBy(() -> getQueryRunner().execute( "CREATE TABLE " + getFullyQualifiedTestTableName("nation_" + randomNameSuffix()) + " ( " + @@ -1597,7 +1750,7 @@ public void testPartitionProjectionInvalidTableProperties() " partition_projection_enabled=true " + ")")) .hasMessage("Column projection for column 'short_name1' failed. Property: 'partition_projection_range' needs to be a list of 2 valid dates formatted as 'yyyy-MM-dd' " + - "or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Unparseable date: \"NOW*3DAYS\""); + "or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Text 'NOW*3DAYS' could not be parsed at index 0"); assertThatThrownBy(() -> getQueryRunner().execute( "CREATE TABLE " + getFullyQualifiedTestTableName("nation_" + randomNameSuffix()) + " ( " + @@ -1703,7 +1856,7 @@ public void testPartitionProjectionIgnore() // Expect invalid Partition Projection properties to fail assertThatThrownBy(() -> getQueryRunner().execute("SELECT * FROM " + fullyQualifiedTestTableName)) .hasMessage("Column projection for column 'date_time' failed. Property: 'partition_projection_range' needs to be a list of 2 valid dates formatted as 'yyyy-MM-dd HH' " + - "or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Unparseable date: \"2001-01-01\""); + "or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Text '2001-01-01' could not be parsed at index 10"); // Append kill switch table property to ignore Partition Projection properties hiveMinioDataLake.runOnHive( diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/statistics/TestMetastoreHiveStatisticsProvider.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/statistics/TestMetastoreHiveStatisticsProvider.java index f74ea30fb771..085452249ec0 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/statistics/TestMetastoreHiveStatisticsProvider.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/statistics/TestMetastoreHiveStatisticsProvider.java @@ -430,7 +430,7 @@ public void testConvertPartitionValueToDouble() private static void assertConvertPartitionValueToDouble(Type type, String value, double expected) { - Object trinoValue = parsePartitionValue(format("p=%s", value), value, type).getValue(); + Object trinoValue = parsePartitionValue(format("p=%s", value), value, type, "", Optional.empty()).getValue(); assertThat(convertPartitionValueToDouble(type, trinoValue)).isEqualTo(OptionalDouble.of(expected)); } @@ -853,7 +853,7 @@ private static String invalidColumnStatistics(String message) private static HivePartition partition(String name) { - return parsePartition(TABLE, name, ImmutableList.of(PARTITION_COLUMN_1, PARTITION_COLUMN_2)); + return parsePartition(TABLE, name, ImmutableList.of(PARTITION_COLUMN_1, PARTITION_COLUMN_2), Optional.empty()); } private static PartitionStatistics rowsCount(long rowsCount) diff --git a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiUtil.java b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiUtil.java index e92742aaaeb8..97766372a364 100644 --- a/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiUtil.java +++ b/plugin/trino-hudi/src/main/java/io/trino/plugin/hudi/HudiUtil.java @@ -36,6 +36,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Optional; import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA; import static io.trino.plugin.hive.util.HiveUtil.checkCondition; @@ -96,7 +97,7 @@ public static boolean partitionMatchesPredicates( TupleDomain constraintSummary) { HivePartition partition = HivePartitionManager.parsePartition( - tableName, hivePartitionName, partitionColumnHandles); + tableName, hivePartitionName, partitionColumnHandles, Optional.empty()); return partitionMatches(partitionColumnHandles, constraintSummary, partition); }