Skip to content

Commit 538068e

Browse files
committed
Refactor DateProjection to use DateTimeFormatter
Replace legacy formatting with `DateTimeFormatter` for better compatibility with the `java.time` package, providing more precise and predictable behavior This refactor follows AWS Athena's date-type partition projection format (https://docs.aws.amazon.com/athena/latest/ug/partition-projection-supported-types.html) in the description. Additionally, this commit replaces the error-prone Supplier<Instant> with a direct Instant type for `leftBound` and `rightBound`.
1 parent 9ef74ae commit 538068e

File tree

2 files changed

+49
-53
lines changed

2 files changed

+49
-53
lines changed

plugin/trino-hive/src/main/java/io/trino/plugin/hive/projection/DateProjection.java

Lines changed: 42 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,20 @@
2222
import io.trino.spi.type.Type;
2323
import io.trino.spi.type.VarcharType;
2424

25-
import java.text.DateFormat;
26-
import java.text.ParseException;
27-
import java.text.SimpleDateFormat;
2825
import java.time.Instant;
26+
import java.time.LocalDate;
27+
import java.time.LocalDateTime;
2928
import java.time.ZoneId;
29+
import java.time.format.DateTimeFormatter;
30+
import java.time.format.DateTimeParseException;
3031
import java.time.temporal.ChronoField;
3132
import java.time.temporal.ChronoUnit;
32-
import java.util.Date;
33+
import java.time.temporal.TemporalAccessor;
34+
import java.time.temporal.TemporalQueries;
3335
import java.util.List;
34-
import java.util.Locale;
3536
import java.util.Map;
3637
import java.util.Optional;
3738
import java.util.Set;
38-
import java.util.function.Supplier;
3939
import java.util.regex.Matcher;
4040
import java.util.regex.Pattern;
4141

@@ -49,14 +49,15 @@
4949
import static io.trino.plugin.hive.projection.PartitionProjectionProperties.getProjectionPropertyValue;
5050
import static io.trino.spi.predicate.Domain.singleValue;
5151
import static java.lang.String.format;
52+
import static java.time.ZoneOffset.UTC;
5253
import static java.time.temporal.ChronoUnit.DAYS;
5354
import static java.time.temporal.ChronoUnit.HOURS;
5455
import static java.time.temporal.ChronoUnit.MINUTES;
5556
import static java.time.temporal.ChronoUnit.MONTHS;
5657
import static java.time.temporal.ChronoUnit.SECONDS;
58+
import static java.util.Locale.ENGLISH;
5759
import static java.util.Objects.nonNull;
5860
import static java.util.Objects.requireNonNull;
59-
import static java.util.TimeZone.getTimeZone;
6061
import static java.util.concurrent.TimeUnit.MILLISECONDS;
6162

6263
final class DateProjection
@@ -71,9 +72,9 @@ final class DateProjection
7172
private static final Pattern DATE_RANGE_BOUND_EXPRESSION_PATTERN = Pattern.compile("^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$");
7273

7374
private final String columnName;
74-
private final DateFormat dateFormat;
75-
private final Supplier<Instant> leftBound;
76-
private final Supplier<Instant> rightBound;
75+
private final DateTimeFormatter dateFormat;
76+
private final Instant leftBound;
77+
private final Instant rightBound;
7778
private final int interval;
7879
private final ChronoUnit intervalUnit;
7980

@@ -104,14 +105,11 @@ public DateProjection(String columnName, Type columnType, Map<String, Object> co
104105
throw invalidRangeProperty(columnName, dateFormatPattern, Optional.empty());
105106
}
106107

107-
SimpleDateFormat dateFormat = new SimpleDateFormat(dateFormatPattern);
108-
dateFormat.setLenient(false);
109-
dateFormat.setTimeZone(getTimeZone(UTC_TIME_ZONE_ID));
110-
this.dateFormat = requireNonNull(dateFormat, "dateFormatPattern is null");
108+
this.dateFormat = DateTimeFormatter.ofPattern(dateFormatPattern, ENGLISH);
111109

112-
leftBound = parseDateRangerBound(columnName, range.get(0), dateFormat);
113-
rightBound = parseDateRangerBound(columnName, range.get(1), dateFormat);
114-
if (!leftBound.get().isBefore(rightBound.get())) {
110+
leftBound = parseDateRangerBound(columnName, range.get(0), dateFormatPattern, dateFormat);
111+
rightBound = parseDateRangerBound(columnName, range.get(1), dateFormatPattern, dateFormat);
112+
if (!leftBound.isBefore(rightBound)) {
115113
throw invalidRangeProperty(columnName, dateFormatPattern, Optional.empty());
116114
}
117115

@@ -135,8 +133,8 @@ public List<String> getProjectedValues(Optional<Domain> partitionValueFilter)
135133
{
136134
ImmutableList.Builder<String> builder = ImmutableList.builder();
137135

138-
Instant leftBound = adjustBoundToDateFormat(this.leftBound.get());
139-
Instant rightBound = adjustBoundToDateFormat(this.rightBound.get());
136+
Instant leftBound = adjustBoundToDateFormat(this.leftBound);
137+
Instant rightBound = adjustBoundToDateFormat(this.rightBound);
140138

141139
Instant currentValue = leftBound;
142140
while (!currentValue.isAfter(rightBound)) {
@@ -156,16 +154,17 @@ private Instant adjustBoundToDateFormat(Instant value)
156154
{
157155
String formatted = formatValue(value.with(ChronoField.MILLI_OF_SECOND, 0));
158156
try {
159-
return dateFormat.parse(formatted).toInstant();
157+
return parse(formatted, dateFormat);
160158
}
161-
catch (ParseException e) {
159+
catch (DateTimeParseException e) {
162160
throw new InvalidProjectionException(formatted, e.getMessage());
163161
}
164162
}
165163

166164
private String formatValue(Instant current)
167165
{
168-
return dateFormat.format(new Date(current.toEpochMilli()));
166+
LocalDateTime localDateTime = LocalDateTime.ofInstant(current, UTC_TIME_ZONE_ID);
167+
return localDateTime.format(dateFormat);
169168
}
170169

171170
private boolean isValueInDomain(Optional<Domain> valueDomain, Instant value, String formattedValue)
@@ -206,35 +205,42 @@ private static ChronoUnit resolveDefaultChronoUnit(String columnName, String dat
206205
return MONTHS;
207206
}
208207

209-
private static Supplier<Instant> parseDateRangerBound(String columnName, String value, SimpleDateFormat dateFormat)
208+
private static Instant parseDateRangerBound(String columnName, String value, String dateFormatPattern, DateTimeFormatter dateFormat)
210209
{
211210
Matcher matcher = DATE_RANGE_BOUND_EXPRESSION_PATTERN.matcher(value);
212211
if (matcher.matches()) {
213212
String operator = matcher.group(2);
214213
String multiplierString = matcher.group(3);
215214
String unitString = matcher.group(4);
216215
if (nonNull(operator) && nonNull(multiplierString) && nonNull(unitString)) {
217-
unitString = unitString.toUpperCase(Locale.ENGLISH);
218-
return new DateExpressionBound(
219-
Integer.parseInt(multiplierString),
220-
ChronoUnit.valueOf(unitString + "S"),
221-
operator.charAt(0) == '+');
216+
unitString = unitString.toUpperCase(ENGLISH);
217+
int multiplier = Integer.parseInt(multiplierString);
218+
boolean increment = operator.charAt(0) == '+';
219+
ChronoUnit unit = ChronoUnit.valueOf(unitString + "S");
220+
return Instant.now().plus(increment ? multiplier : -multiplier, unit);
222221
}
223222
if (value.trim().equals("NOW")) {
224-
Instant now = Instant.now();
225-
return () -> now;
223+
return Instant.now();
226224
}
227-
throw invalidRangeProperty(columnName, dateFormat.toPattern(), Optional.of("Invalid expression"));
225+
throw invalidRangeProperty(columnName, dateFormatPattern, Optional.of("Invalid expression"));
228226
}
229227

230-
Instant dateBound;
231228
try {
232-
dateBound = dateFormat.parse(value).toInstant();
229+
return parse(value, dateFormat);
233230
}
234-
catch (ParseException e) {
235-
throw invalidRangeProperty(columnName, dateFormat.toPattern(), Optional.of(e.getMessage()));
231+
catch (DateTimeParseException e) {
232+
throw invalidRangeProperty(columnName, dateFormatPattern, Optional.of(e.getMessage()));
236233
}
237-
return () -> dateBound;
234+
}
235+
236+
private static Instant parse(String value, DateTimeFormatter dateFormat)
237+
throws DateTimeParseException
238+
{
239+
TemporalAccessor parsed = dateFormat.parse(value);
240+
if (parsed.query(TemporalQueries.localDate()) != null && parsed.query(TemporalQueries.localTime()) == null) {
241+
return LocalDate.from(parsed).atStartOfDay().toInstant(UTC);
242+
}
243+
return LocalDateTime.from(parsed).toInstant(UTC);
238244
}
239245

240246
private static TrinoException invalidRangeProperty(String columnName, String dateFormatPattern, Optional<String> errorDetail)
@@ -248,14 +254,4 @@ private static TrinoException invalidRangeProperty(String columnName, String dat
248254
DATE_RANGE_BOUND_EXPRESSION_PATTERN.pattern(),
249255
errorDetail.map(error -> ": " + error).orElse("")));
250256
}
251-
252-
private record DateExpressionBound(int multiplier, ChronoUnit unit, boolean increment)
253-
implements Supplier<Instant>
254-
{
255-
@Override
256-
public Instant get()
257-
{
258-
return Instant.now().plus(increment ? multiplier : -multiplier, unit);
259-
}
260-
}
261257
}

plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseTestHiveOnDataLake.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -936,7 +936,7 @@ public void testDatePartitionProjectionOnDateColumnWithDefaults()
936936
" short_name2 date WITH (" +
937937
" partition_projection_type='date', " +
938938
" partition_projection_format='yyyy-MM-dd', " +
939-
" partition_projection_range=ARRAY['2001-1-22', '2001-1-25']" +
939+
" partition_projection_range=ARRAY['2001-01-22', '2001-01-25']" +
940940
" )" +
941941
") WITH ( " +
942942
" partitioned_by=ARRAY['short_name1', 'short_name2'], " +
@@ -951,7 +951,7 @@ public void testDatePartitionProjectionOnDateColumnWithDefaults()
951951
.containsPattern("[ |]+projection\\.short_name1\\.values[ |]+PL1,CZ1[ |]+")
952952
.containsPattern("[ |]+projection\\.short_name2\\.type[ |]+date[ |]+")
953953
.containsPattern("[ |]+projection\\.short_name2\\.format[ |]+yyyy-MM-dd[ |]+")
954-
.containsPattern("[ |]+projection\\.short_name2\\.range[ |]+2001-1-22,2001-1-25[ |]+");
954+
.containsPattern("[ |]+projection\\.short_name2\\.range[ |]+2001-01-22,2001-01-25[ |]+");
955955

956956
computeActual(createInsertStatement(
957957
fullyQualifiedTestTableName,
@@ -1011,7 +1011,7 @@ public void testDatePartitionProjectionOnTimestampColumnWithInterval()
10111011
" short_name2 timestamp WITH (" +
10121012
" partition_projection_type='date', " +
10131013
" partition_projection_format='yyyy-MM-dd HH:mm:ss', " +
1014-
" partition_projection_range=ARRAY['2001-1-22 00:00:00', '2001-1-22 00:00:06'], " +
1014+
" partition_projection_range=ARRAY['2001-01-22 00:00:00', '2001-01-22 00:00:06'], " +
10151015
" partition_projection_interval=2, " +
10161016
" partition_projection_interval_unit='SECONDS'" +
10171017
" )" +
@@ -1028,7 +1028,7 @@ public void testDatePartitionProjectionOnTimestampColumnWithInterval()
10281028
.containsPattern("[ |]+projection\\.short_name1\\.values[ |]+PL1,CZ1[ |]+")
10291029
.containsPattern("[ |]+projection\\.short_name2\\.type[ |]+date[ |]+")
10301030
.containsPattern("[ |]+projection\\.short_name2\\.format[ |]+yyyy-MM-dd HH:mm:ss[ |]+")
1031-
.containsPattern("[ |]+projection\\.short_name2\\.range[ |]+2001-1-22 00:00:00,2001-1-22 00:00:06[ |]+")
1031+
.containsPattern("[ |]+projection\\.short_name2\\.range[ |]+2001-01-22 00:00:00,2001-01-22 00:00:06[ |]+")
10321032
.containsPattern("[ |]+projection\\.short_name2\\.interval[ |]+2[ |]+")
10331033
.containsPattern("[ |]+projection\\.short_name2\\.interval\\.unit[ |]+seconds[ |]+");
10341034

@@ -1582,7 +1582,7 @@ public void testPartitionProjectionInvalidTableProperties()
15821582
" partition_projection_enabled=true " +
15831583
")"))
15841584
.hasMessage("Column projection for column 'short_name1' failed. Property: 'partition_projection_range' needs to be a list of 2 valid dates formatted as 'yyyy-MM-dd HH' " +
1585-
"or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Unparseable date: \"2001-01-01\"");
1585+
"or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Text '2001-01-01' could not be parsed at index 10");
15861586

15871587
assertThatThrownBy(() -> getQueryRunner().execute(
15881588
"CREATE TABLE " + getFullyQualifiedTestTableName("nation_" + randomNameSuffix()) + " ( " +
@@ -1597,7 +1597,7 @@ public void testPartitionProjectionInvalidTableProperties()
15971597
" partition_projection_enabled=true " +
15981598
")"))
15991599
.hasMessage("Column projection for column 'short_name1' failed. Property: 'partition_projection_range' needs to be a list of 2 valid dates formatted as 'yyyy-MM-dd' " +
1600-
"or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Unparseable date: \"NOW*3DAYS\"");
1600+
"or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Text 'NOW*3DAYS' could not be parsed at index 0");
16011601

16021602
assertThatThrownBy(() -> getQueryRunner().execute(
16031603
"CREATE TABLE " + getFullyQualifiedTestTableName("nation_" + randomNameSuffix()) + " ( " +
@@ -1703,7 +1703,7 @@ public void testPartitionProjectionIgnore()
17031703
// Expect invalid Partition Projection properties to fail
17041704
assertThatThrownBy(() -> getQueryRunner().execute("SELECT * FROM " + fullyQualifiedTestTableName))
17051705
.hasMessage("Column projection for column 'date_time' failed. Property: 'partition_projection_range' needs to be a list of 2 valid dates formatted as 'yyyy-MM-dd HH' " +
1706-
"or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Unparseable date: \"2001-01-01\"");
1706+
"or '^\\s*NOW\\s*(([+-])\\s*([0-9]+)\\s*(DAY|HOUR|MINUTE|SECOND)S?\\s*)?$' that are sequential: Text '2001-01-01' could not be parsed at index 10");
17071707

17081708
// Append kill switch table property to ignore Partition Projection properties
17091709
hiveMinioDataLake.runOnHive(

0 commit comments

Comments
 (0)