diff --git a/src/main/java/com/nvidia/spark/rapids/jni/OrcDstRuleExtractor.java b/src/main/java/com/nvidia/spark/rapids/jni/OrcDstRuleExtractor.java new file mode 100644 index 0000000000..7c4aa271e7 --- /dev/null +++ b/src/main/java/com/nvidia/spark/rapids/jni/OrcDstRuleExtractor.java @@ -0,0 +1,529 @@ +/* + * Copyright (c) 2026, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.jni; + +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.zone.ZoneOffsetTransitionRule; +import java.time.zone.ZoneRules; +import java.util.List; +import java.util.TimeZone; + +/** + * Recovers a recurring DST rule for an IANA timezone in the shape the GPU + * side consumes for ORC DST conversion. + * + *

Two extraction paths are supported. The probing path is tried first + * because it captures what {@link TimeZone#getOffset(long)} actually returns + * (which is the source of truth the GPU side must match for ORC byte + * compatibility); the {@link ZoneRules#getTransitionRules()} path is used as + * a fallback for zones whose recurring rule cannot be recovered from hourly + * probes. + * + *

This class is intentionally separate from {@link OrcTimezoneInfo} — the + * historical-transition machinery and the DST-rule extraction share no + * read/write state, just a couple of small package-private helpers + * ({@link OrcTimezoneInfo#utcMillisForDate} and + * {@link OrcTimezoneInfo#binarySearchTransition}). + */ +final class OrcDstRuleExtractor { + private OrcDstRuleExtractor() {} + + // Reference years used to cross-check the extracted DST rule against + // java.util.TimeZone.getOffset. A near-future anchor (2060) catches + // divergence within the typical application lifetime; the far-future + // anchors exercise the recurring-rule fallback well past any historical + // transition entry in tzdata. + private static final int[] DST_RULE_VALIDATION_YEARS = {2060, 2400, 9997}; + + /** + * Recurring DST rule for a single zone, encoded in the same shape that + * {@link java.util.SimpleTimeZone} stores internally and that the GPU side + * consumes. {@code month} is 0-based (Calendar.JANUARY=0), {@code dayOfWeek} + * follows Calendar's 1=Sun..7=Sat convention, and {@code dstSavings} / + * {@code time} are in milliseconds. + * + *

The {@code *Mode} fields encode how {@code *Day}/{@code *DayOfWeek} + * combine — see the {@code MODE_*} constants. {@code *TimeMode} selects + * the time-of-day basis — see the {@code TIME_MODE_*} constants. + */ + static final class DstRule { + // Day-rule modes for {start,end}Mode — matches SimpleTimeZone's internal + // encoding so the GPU side can consume the values directly. + static final int MODE_DOM = 0; + static final int MODE_DOW_IN_MONTH = 1; + static final int MODE_DOW_GE_DOM = 2; + static final int MODE_DOW_LE_DOM = 3; + + // Time-of-day basis for {start,end}TimeMode. + static final int TIME_MODE_WALL = 0; + static final int TIME_MODE_STANDARD = 1; + static final int TIME_MODE_UTC = 2; + + int dstSavings; + int startMonth; + int startDay; + int startDayOfWeek; + int startTime; + int startTimeMode; + int startMode; + int endMonth; + int endDay; + int endDayOfWeek; + int endTime; + int endTimeMode; + int endMode; + } + + /** + * Extract the recurring DST rule for a zone, or {@code null} if the zone has + * no DST. + * + * @param timezoneId IANA timezone id; used only in exception messages + * @param tz {@link TimeZone} for the zone; must describe the same zone as + * {@code rules} + * @param rules {@link ZoneRules} for the zone + * @return the recurring DST rule, or {@code null} if the zone has no DST + * ({@code rules.isFixedOffset()} or {@code !tz.useDaylightTime()}) + * @throws IllegalStateException if the zone reports DST but neither + * extraction path produces a usable rule — for example, an unsupported + * {@link ZoneRules#getTransitionRules()} count (not 0 and not 2), a + * transition rule shape outside DOW_GE_DOM, mismatched start vs. end + * DST savings, zero-delta savings, or cross-year verification mismatch + * against {@code tz.getOffset} on the anchor years + * {@code 2060, 2400, 9997}. + */ + static DstRule extractDstRule(String timezoneId, TimeZone tz, ZoneRules rules) { + // Fixed-offset zones (e.g. "+05:30") have no DST. Guard explicitly because + // TimeZone.getTimeZone(zoneId) silently returns GMT for such ids on most + // JVMs, which would leave `tz` describing a different zone than `rules`. + // Mirrors the guard in OrcTimezoneInfo.buildRuntimeOrcTimezoneInfo. + if (rules.isFixedOffset() || !tz.useDaylightTime()) { + return null; + } + // Sanity-check that tz and rules describe the same zone. Both Path A and + // Path B assume this, but the two-argument signature lets a caller pass + // mismatched objects (the silent-GMT trap above is one way this can + // happen). Compare standard offsets at a recent reference instant -- the + // epoch is unsafe because some zones (e.g. Europe/London) were on a + // different standard offset in 1970 (British Standard Time experiment). + Instant ref = Instant.parse("2024-01-15T00:00:00Z"); + int rulesStandardOffsetMillis = rules.getStandardOffset(ref).getTotalSeconds() * 1000; + if (tz.getRawOffset() != rulesStandardOffsetMillis) { + throw new IllegalStateException( + "TimeZone and ZoneRules describe different zones for timezone: " + timezoneId + + " (tz.rawOffset=" + tz.getRawOffset() + + ", rules.standardOffset=" + rulesStandardOffsetMillis + ")"); + } + DstRule rule = extractDstRuleByProbing(tz); + if (rule != null) { + return rule; + } + rule = extractDstRuleFromZoneRules(timezoneId, tz, rules); + if (rule != null) { + return rule; + } + throw new IllegalStateException("Failed to extract ORC DST rule for timezone: " + timezoneId); + } + + // ---- Path A: from ZoneRules.getTransitionRules() ---- + + private static DstRule extractDstRuleFromZoneRules(String timezoneId, TimeZone tz, + ZoneRules rules) { + List transitionRules = rules.getTransitionRules(); + if (transitionRules.isEmpty()) { + return null; + } + if (transitionRules.size() != 2) { + throw new IllegalStateException("Unsupported ORC DST rule count for timezone: " + timezoneId); + } + + ZoneOffsetTransitionRule startTransitionRule = null; + ZoneOffsetTransitionRule endTransitionRule = null; + for (ZoneOffsetTransitionRule transitionRule : transitionRules) { + int deltaMillis = (transitionRule.getOffsetAfter().getTotalSeconds() + - transitionRule.getOffsetBefore().getTotalSeconds()) * 1000; + if (deltaMillis > 0) { + startTransitionRule = transitionRule; + } else if (deltaMillis < 0) { + endTransitionRule = transitionRule; + } else { + throw new IllegalStateException("Unsupported zero-delta ORC DST rule for timezone: " + + timezoneId); + } + } + if (startTransitionRule == null || endTransitionRule == null) { + throw new IllegalStateException("Failed to identify ORC DST start/end rules for timezone: " + + timezoneId); + } + + int dstSavings = (startTransitionRule.getOffsetAfter().getTotalSeconds() + - startTransitionRule.getOffsetBefore().getTotalSeconds()) * 1000; + int endDeltaMillis = (endTransitionRule.getOffsetBefore().getTotalSeconds() + - endTransitionRule.getOffsetAfter().getTotalSeconds()) * 1000; + if (dstSavings != endDeltaMillis) { + throw new IllegalStateException("Mismatched ORC DST savings for timezone: " + timezoneId); + } + + DstRule rule = new DstRule(); + rule.dstSavings = dstSavings; + fillDstRuleFromTransitionRule(timezoneId, rule, startTransitionRule, true); + fillDstRuleFromTransitionRule(timezoneId, rule, endTransitionRule, false); + + if (!verifyDstRuleAcrossReferenceYears(tz, rule)) { + throw new IllegalStateException("ZoneRules ORC DST rule verification failed for timezone: " + + timezoneId); + } + return rule; + } + + private static void fillDstRuleFromTransitionRule(String timezoneId, DstRule rule, + ZoneOffsetTransitionRule transitionRule, boolean isStartRule) { + // We only accept rules shaped as "first on or after ", + // i.e. ZoneRules' positive-day-indicator form. A negative indicator would mean + // "last on or before day" (DOW_LE_DOM_MODE, mode=3); we reject those + // here so that downstream code can assume DOW_GE_DOM_MODE (mode=2) unconditionally. + if (transitionRule.getDayOfWeek() == null + || transitionRule.getDayOfMonthIndicator() <= 0) { + throw new IllegalStateException("Unsupported ORC DST transition rule shape for timezone: " + + timezoneId); + } + + int month = transitionRule.getMonth().getValue() - 1; + int day = transitionRule.getDayOfMonthIndicator(); + int dayOfWeek = toCalendarDayOfWeek(transitionRule.getDayOfWeek().getValue()); + int time = getTransitionRuleTimeMillis(transitionRule); + int timeMode = getTransitionRuleTimeMode(transitionRule); + // Guaranteed by the precondition above (DayOfMonthIndicator > 0). + int mode = DstRule.MODE_DOW_GE_DOM; + + if (isStartRule) { + rule.startMonth = month; + rule.startDay = day; + rule.startDayOfWeek = dayOfWeek; + rule.startTime = time; + rule.startTimeMode = timeMode; + rule.startMode = mode; + } else { + rule.endMonth = month; + rule.endDay = day; + rule.endDayOfWeek = dayOfWeek; + rule.endTime = time; + rule.endTimeMode = timeMode; + rule.endMode = mode; + } + } + + private static int getTransitionRuleTimeMillis(ZoneOffsetTransitionRule transitionRule) { + int secondOfDay = transitionRule.isMidnightEndOfDay() + ? 24 * 3_600 + : transitionRule.getLocalTime().toSecondOfDay(); + return secondOfDay * 1000; + } + + private static int getTransitionRuleTimeMode(ZoneOffsetTransitionRule transitionRule) { + ZoneOffsetTransitionRule.TimeDefinition timeDef = transitionRule.getTimeDefinition(); + if (ZoneOffsetTransitionRule.TimeDefinition.UTC == timeDef) { + return DstRule.TIME_MODE_UTC; + } else if (ZoneOffsetTransitionRule.TimeDefinition.STANDARD == timeDef) { + return DstRule.TIME_MODE_STANDARD; + } else { + return DstRule.TIME_MODE_WALL; + } + } + + private static int toCalendarDayOfWeek(int javaTimeDayOfWeek) { + // java.time DayOfWeek: 1=Mon..7=Sun -> Calendar: 1=Sun..7=Sat + return (javaTimeDayOfWeek % 7) + 1; + } + + // ---- Path B: probe tz.getOffset() and recover the recurring rule ---- + + private static DstRule extractDstRuleByProbing(TimeZone tz) { + for (int refYear : DST_RULE_VALIDATION_YEARS) { + DstRule rule = extractDstRuleByProbing(tz, refYear); + if (rule != null && verifyDstRuleAcrossReferenceYears(tz, rule)) { + return rule; + } + } + return null; + } + + private static DstRule extractDstRuleByProbing(TimeZone tz, int refYear) { + long janFirst = OrcTimezoneInfo.utcMillisForDate(refYear, 1, 1); + long nextJanFirst = OrcTimezoneInfo.utcMillisForDate(refYear + 1, 1, 1); + + long dstOnTransition = -1; + long dstOffTransition = -1; + int initialOffset = tz.getOffset(janFirst - 1); + int prevOffset = initialOffset; + long step = 3_600_000L; // 1 hour + + for (long ms = janFirst; ms < nextJanFirst; ms += step) { + int curOffset = tz.getOffset(ms); + if (curOffset != prevOffset) { + long exactMs = OrcTimezoneInfo.binarySearchTransition(tz, ms - step, ms); + if (curOffset > prevOffset) { + // More than one DST-on transition in the same year would mean this + // year doesn't fit a SimpleTimeZone-style two-transition rule; let + // the caller fall back to extractDstRuleFromZoneRules. + if (dstOnTransition >= 0) return null; + dstOnTransition = exactMs; + } else { + if (dstOffTransition >= 0) return null; + dstOffTransition = exactMs; + } + prevOffset = curOffset; + } + // SimpleTimeZone-style zones have exactly two transitions per year. + // Once both are recorded and the running offset is back to the year- + // start standard offset, the remainder of the year is a no-op tail + // (~1-3 months for typical northern-hemisphere zones). Any later + // change would correctly trigger the "more than one DST-on/off" + // early-return above, so the only thing this break can skip is + // wasted probes -- ~720-2200 calls per year on common DST zones. + if (dstOnTransition >= 0 && dstOffTransition >= 0 && prevOffset == initialOffset) { + break; + } + } + + if (dstOnTransition < 0 || dstOffTransition < 0) { + return null; + } + + DstRule rule = new DstRule(); + rule.dstSavings = tz.getDSTSavings(); + + int[] startFields = decodeTransition(dstOnTransition, tz.getRawOffset()); + rule.startMonth = startFields[0]; + rule.startDay = startFields[1]; + rule.startDayOfWeek = startFields[2]; + rule.startTime = startFields[3]; + // decodeTransition converts to standard local time. + rule.startTimeMode = DstRule.TIME_MODE_STANDARD; + rule.startMode = startFields[4]; + + int[] endFields = decodeTransition(dstOffTransition, tz.getRawOffset()); + rule.endMonth = endFields[0]; + rule.endDay = endFields[1]; + rule.endDayOfWeek = endFields[2]; + rule.endTime = endFields[3]; + rule.endTimeMode = DstRule.TIME_MODE_STANDARD; + rule.endMode = endFields[4]; + + return rule; + } + + /** + * Decode a UTC transition instant into + * {@code [month(0-11), baseDay, dayOfWeek(1-7), timeMs, mode]}. + * + *

Precondition: {@code utcMs} is the first millisecond at which + * the new offset applies — the convention returned by + * {@link OrcTimezoneInfo#binarySearchTransition}. With that convention, + * {@code utcMs + rawOffsetMs} recovers the standard-time wall clock at + * the transition moment for both spring-forward (offset before == rawOffset) + * and fall-back (offset after == rawOffset). If a future change shifts the + * binary-search return convention by one ms, the decoded {@code timeMs} + * will drift and {@code baseDay} can cross a midnight boundary. + * + *

Recurring weekday rules are encoded as DOW_GE_DOM (mode=2). The base + * day is the earliest possible day of the matching occurrence in the month: + * 1st=1, 2nd=8, 3rd=15, 4th=22, last={@code monthLength - 6}. This mirrors + * encodings like "Sun >= 8" for the second Sunday in March and "Sun >= 25" + * for the last Sunday in October. + */ + private static int[] decodeTransition(long utcMs, int rawOffsetMs) { + long localMs = utcMs + rawOffsetMs; + LocalDateTime ldt = LocalDateTime.ofInstant(Instant.ofEpochMilli(localMs), ZoneOffset.UTC); + + int month = ldt.getMonthValue() - 1; // 0-based for Calendar compat + int dayOfMonth = ldt.getDayOfMonth(); + int dayOfWeek = toCalendarDayOfWeek(ldt.getDayOfWeek().getValue()); + int timeInDay = ldt.getHour() * 3_600_000 + + ldt.getMinute() * 60_000 + + ldt.getSecond() * 1000 + + ldt.getNano() / 1_000_000; + + int monthLength = ldt.toLocalDate().lengthOfMonth(); + int dayOfWeekInMonth = (dayOfMonth - 1) / 7 + 1; + boolean isLastOccurrence = dayOfMonth + 7 > monthLength; + int baseDayOfMonth = isLastOccurrence + ? monthLength - 6 + : 1 + (dayOfWeekInMonth - 1) * 7; + + return new int[]{month, baseDayOfMonth, dayOfWeek, timeInDay, DstRule.MODE_DOW_GE_DOM}; + } + + // ---- Verification: ensure the extracted rule matches tz.getOffset ---- + + private static boolean verifyDstRuleAcrossReferenceYears(TimeZone tz, DstRule rule) { + for (int refYear : DST_RULE_VALIDATION_YEARS) { + if (!verifyDstRule(tz, rule, refYear)) { + return false; + } + } + return true; + } + + /** + * Verify the extracted rule matches {@code tz.getOffset} around transition + * boundaries and at monthly sample points for {@code refYear ± 1} (3 years). + * DST mismatches only manifest near transitions, so dense sampling at + * boundaries plus monthly spot checks reduces a naive full-year scan from + * ~52K probes to ~200 per reference year. + */ + private static boolean verifyDstRule(TimeZone tz, DstRule rule, int refYear) { + int rawOffsetMs = tz.getRawOffset(); + for (int y = refYear - 1; y <= refYear + 1; y++) { + long dstStart = computeTransitionUtcMillis(y, rule.startMonth, rule.startDay, + rule.startDayOfWeek, rule.startTime, rule.startTimeMode, rule.startMode, + rawOffsetMs, rule.dstSavings, true); + long dstEnd = computeTransitionUtcMillis(y, rule.endMonth, rule.endDay, + rule.endDayOfWeek, rule.endTime, rule.endTimeMode, rule.endMode, + rawOffsetMs, rule.dstSavings, false); + + // All sample points below land within the wall-clock year y for any + // real DST rule (boundaries ±12h, plus monthly noon-UTC anchors), so + // skip the per-sample year derivation and re-computation of these + // bounds inside computeDstOffset -- ~186 computeTransitionUtcMillis + // calls per verifyDstRule otherwise. Hand the already-computed bounds + // to the variant. + long[] boundaries = {dstStart, dstEnd}; + for (long boundary : boundaries) { + long from = boundary - 12 * 3_600_000L; + long to = boundary + 12 * 3_600_000L; + for (long ms = from; ms <= to; ms += 3_600_000L) { + if (tz.getOffset(ms) != computeDstOffsetWithBounds(ms, rawOffsetMs, rule, + dstStart, dstEnd)) { + return false; + } + } + } + + for (int m = 1; m <= 12; m++) { + long ms = OrcTimezoneInfo.utcMillisForDate(y, m, 1) + 12 * 3_600_000L; + if (tz.getOffset(ms) != computeDstOffsetWithBounds(ms, rawOffsetMs, rule, + dstStart, dstEnd)) { + return false; + } + } + } + return true; + } + + /** + * In-DST classification with bounds supplied by the caller. The only caller + * is {@link #verifyDstRule}, which already computes {@code dstStart} and + * {@code dstEnd} for the year being checked; passing them in avoids + * recomputing them per sample. A general-purpose entry point that derives + * the year from {@code utcMs} can be reintroduced in a follow-up part when + * a non-verification caller appears. + */ + private static int computeDstOffsetWithBounds(long utcMs, int rawOffsetMs, DstRule rule, + long dstStart, long dstEnd) { + boolean inDst = dstStart < dstEnd + ? (utcMs >= dstStart && utcMs < dstEnd) + : (utcMs >= dstStart || utcMs < dstEnd); + return inDst ? rawOffsetMs + rule.dstSavings : rawOffsetMs; + } + + private static long computeTransitionUtcMillis(int year, int ruleMonth, int ruleDay, + int ruleDayOfWeek, int ruleTime, int ruleTimeMode, int ruleMode, int rawOffsetMs, + int dstSavingsMs, boolean isStartRule) { + int actualDay = computeRuleDay(ruleMode, ruleDay, ruleDayOfWeek, year, ruleMonth); + long utcMs = OrcTimezoneInfo.utcMillisForDate(year, ruleMonth + 1, actualDay) + ruleTime; + if (ruleTimeMode == DstRule.TIME_MODE_WALL) { + // WALL time: subtract raw offset and (for end transitions) also DST savings. + utcMs -= rawOffsetMs; + if (!isStartRule) { + utcMs -= dstSavingsMs; + } + } else if (ruleTimeMode == DstRule.TIME_MODE_STANDARD) { + utcMs -= rawOffsetMs; + } + // TIME_MODE_UTC is already in UTC. + return utcMs; + } + + private static int computeRuleDay(int ruleMode, int ruleDay, int ruleDayOfWeek, int year, + int month) { + LocalDate firstOfMonth = LocalDate.of(year, month + 1, 1); + int monthLength = firstOfMonth.lengthOfMonth(); + int firstDayOfWeek = toCalendarDayOfWeek(firstOfMonth.getDayOfWeek().getValue()); + + // The current extraction paths (Path A's fillDstRuleFromTransitionRule and + // Path B's decodeTransition) emit only MODE_DOW_GE_DOM. The other three + // branches are kept for forward compatibility -- e.g. a Part 3+ caller that + // constructs a DstRule directly from a serialised form. Until such a + // caller exists they are not reachable from any test; add coverage when + // one appears. + switch (ruleMode) { + case DstRule.MODE_DOW_IN_MONTH: { + // Clamp the result into [1, monthLength] so a "Nth occurrence" that + // overflows the month (e.g. 5th Sunday in a 28-day February) or + // underflows (e.g. -5th occurrence in a 28-day month) collapses to a + // valid in-month day rather than escaping with a DateTimeException + // from utcMillisForDate. Mirrors the within-month clamp applied to + // MODE_DOW_GE_DOM below and SimpleTimeZone's documented behaviour. + if (ruleDay > 0) { + int diff = ruleDayOfWeek - firstDayOfWeek; + if (diff < 0) diff += 7; + return Math.min(1 + diff + (ruleDay - 1) * 7, monthLength); + } else { + int lastDayOfWeek = toCalendarDayOfWeek( + LocalDate.of(year, month + 1, monthLength).getDayOfWeek().getValue()); + int diff = lastDayOfWeek - ruleDayOfWeek; + if (diff < 0) diff += 7; + return Math.max(monthLength - diff + (ruleDay + 1) * 7, 1); + } + } + case DstRule.MODE_DOW_GE_DOM: { + // Per ZoneOffsetTransitionRule.getDayOfMonthIndicator(), the indicator + // may exceed monthLength (e.g. Feb 29 in a non-leap year, treated as + // Mar 1). Clamp the anchor before LocalDate.of so it never throws, and + // clamp the result so a DOW_GE_DOM rule whose computed day overflows + // the month produces a valid in-month day rather than escaping with a + // DateTimeException from utcMillisForDate. This mirrors SimpleTimeZone's + // documented within-month clamp for DOW_GE_DOM. + int anchorDay = Math.min(ruleDay, monthLength); + int targetDayOfWeek = toCalendarDayOfWeek( + LocalDate.of(year, month + 1, anchorDay).getDayOfWeek().getValue()); + int diff = ruleDayOfWeek - targetDayOfWeek; + if (diff < 0) diff += 7; + return Math.min(anchorDay + diff, monthLength); + } + case DstRule.MODE_DOW_LE_DOM: { + // Mirrors the MODE_DOW_GE_DOM clamp above: the day-of-month indicator + // can exceed monthLength (e.g. 31 in February). Clamp the anchor + // before LocalDate.of so it never throws, and clamp the result to a + // valid in-month day so utcMillisForDate cannot receive day <= 0. + int anchorDay = Math.min(ruleDay, monthLength); + int targetDayOfWeek = toCalendarDayOfWeek( + LocalDate.of(year, month + 1, anchorDay).getDayOfWeek().getValue()); + int diff = targetDayOfWeek - ruleDayOfWeek; + if (diff < 0) diff += 7; + return Math.max(anchorDay - diff, 1); + } + case DstRule.MODE_DOM: + default: + return ruleDay; + } + } +} diff --git a/src/main/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfo.java b/src/main/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfo.java index 348b3cfeb2..9afa575fae 100644 --- a/src/main/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfo.java +++ b/src/main/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfo.java @@ -77,7 +77,9 @@ public OrcTimezoneInfo(int rawOffset, long[] transitions, int[] offsets) { // year, month, and day are all 1-indexed, matching LocalDate.of conventions // (e.g. month=1 is January). This avoids the easy-to-misread mix of 0-based // month and 1-based day at the call site. - private static long utcMillisForDate(int year, int month, int day) { + // + // Package-private so OrcDstRuleExtractor can share the same anchor. + static long utcMillisForDate(int year, int month, int day) { return LocalDate.of(year, month, day).toEpochDay() * 24L * 3600_000L; } @@ -277,7 +279,9 @@ private static int collectTimeZoneTransitionsByScanning( return currentOffset; } - private static long binarySearchTransition(TimeZone tz, long lo, long hi) { + // Package-private so OrcDstRuleExtractor can reuse the same bracketed + // binary search. + static long binarySearchTransition(TimeZone tz, long lo, long hi) { int loOffset = tz.getOffset(lo); while (hi - lo > 1) { long mid = lo + (hi - lo) / 2; diff --git a/src/test/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfoTest.java b/src/test/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfoTest.java index c34f2cc9a0..4878c10c00 100644 --- a/src/test/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfoTest.java +++ b/src/test/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfoTest.java @@ -18,7 +18,22 @@ import org.junit.jupiter.api.Test; +import java.time.DayOfWeek; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.Month; +import java.time.Year; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.zone.ZoneOffsetTransition; +import java.time.zone.ZoneOffsetTransitionRule; +import java.time.zone.ZoneRules; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; import java.util.List; +import java.util.TimeZone; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -30,6 +45,30 @@ public class OrcTimezoneInfoTest { + /** + * An inert historical transition for synthetic {@link ZoneRules} fixtures. + * + *

On JDK 8 (the build's {@code maven.compiler.target}), {@code + * ZoneRules.of(base, base, emptyList, emptyList, recurringRules)} -- recurring + * rules but no concrete historical transitions -- reports {@code + * isFixedOffset() == true} even though DST rules are present. That makes + * {@link OrcDstRuleExtractor#extractDstRule} short-circuit to {@code null} at + * its {@code rules.isFixedOffset()} guard before either extraction path runs. + * JDK 17 reports {@code false}, so the gap is invisible to a JDK 17 compile + * check and only surfaces when the 1.8-target test suite actually runs. + * + *

Including one concrete transition flips {@code isFixedOffset()} to {@code + * false} on every JDK. Real IANA zones always carry historical transitions, + * so production is unaffected -- only these hand-built fixtures need it. The + * transition is dated 1900 and changes only the wall offset (the + * standard-offset history stays at the base offset), so it perturbs neither + * {@code getStandardOffset} nor the post-2060 probing windows the tests rely + * on. + */ + private static final ZoneOffsetTransition SYNTHETIC_HISTORICAL_TRANSITION = + ZoneOffsetTransition.of( + LocalDateTime.of(1900, 1, 1, 0, 0), ZoneOffset.ofHours(-1), ZoneOffset.UTC); + @Test void testGetFixedOffsetZone() { // Fixed-offset zones must return a non-null OrcTimezoneInfo with the @@ -116,4 +155,744 @@ void testGetHistoricalTransitionsZone() { "transitions must be strictly increasing"); } } + + // ---- DST rule extraction (Part 2 — not wired into production yet) ---- + + @Test + void testExtractDstRuleNorthernHemisphere() { + // America/New_York: DST starts 2nd Sunday of March, ends 1st Sunday of November. + // dstSavings is +1h (3_600_000 ms). startMonth=2 (March, 0-based), + // endMonth=10 (November, 0-based). DOW_GE_DOM_MODE = 2. + OrcDstRuleExtractor.DstRule rule = extractDstRuleFor("America/New_York"); + assertNotNull(rule, "America/New_York must have a DST rule"); + assertEquals(3_600_000, rule.dstSavings); + assertEquals(2, rule.startMonth); + assertEquals(10, rule.endMonth); + assertEquals(2, rule.startMode); + assertEquals(2, rule.endMode); + // Day-of-week 1 == Sunday in Calendar's 1=Sun..7=Sat convention. + assertEquals(1, rule.startDayOfWeek); + assertEquals(1, rule.endDayOfWeek); + // Second Sunday in March: base day 8 ("Sun >= 8"). First Sunday in November: base day 1. + assertEquals(8, rule.startDay); + assertEquals(1, rule.endDay); + // Probing path encodes both transitions as STANDARD time (timeMode=1) at + // the wall-clock instants 02:00 (DST start) and 01:00 (DST end). Lock + // these so a regression that flips timeMode to WALL would shift the + // computed UTC transitions by dstSavings and fail verifyDstRule silently. + assertEquals(1, rule.startTimeMode, "DST start should be STANDARD time mode"); + assertEquals(1, rule.endTimeMode, "DST end should be STANDARD time mode"); + assertEquals(2 * 3_600_000, rule.startTime, "DST start at 02:00 standard"); + assertEquals(1 * 3_600_000, rule.endTime, "DST end at 01:00 standard"); + } + + @Test + void testExtractDstRuleEuropeLondon() { + // Europe/London: DST starts last Sunday of March, ends last Sunday of October. + // Encoded as DOW_GE_DOM with base day = monthLength - 6. + OrcDstRuleExtractor.DstRule rule = extractDstRuleFor("Europe/London"); + assertNotNull(rule, "Europe/London must have a DST rule"); + assertEquals(3_600_000, rule.dstSavings); + assertEquals(2, rule.startMonth); + assertEquals(9, rule.endMonth); + assertEquals(1, rule.startDayOfWeek); + assertEquals(1, rule.endDayOfWeek); + // Last Sunday of March (31-day month): base 25. Last Sunday of October (31-day): base 25. + assertEquals(25, rule.startDay); + assertEquals(25, rule.endDay); + // Probing path encodes both ends as DOW_GE_DOM (mode=2) on STANDARD time + // (timeMode=1). BST flips on at 01:00 standard in March and off at 01:00 + // standard in October. + assertEquals(2, rule.startMode); + assertEquals(2, rule.endMode); + assertEquals(1, rule.startTimeMode); + assertEquals(1, rule.endTimeMode); + assertEquals(1 * 3_600_000, rule.startTime, "DST start at 01:00 standard"); + assertEquals(1 * 3_600_000, rule.endTime, "DST end at 01:00 standard"); + } + + @Test + void testExtractDstRuleSouthernHemisphere() { + // Australia/Sydney: DST starts 1st Sunday of October, ends 1st Sunday of April. + // Southern hemisphere — start month numerically > end month. + OrcDstRuleExtractor.DstRule rule = extractDstRuleFor("Australia/Sydney"); + assertNotNull(rule, "Australia/Sydney must have a DST rule"); + assertEquals(3_600_000, rule.dstSavings); + assertEquals(9, rule.startMonth); + assertEquals(3, rule.endMonth); + assertTrue(rule.startMonth > rule.endMonth, + "southern hemisphere: start month should follow end month within the calendar year"); + // 1st Sunday of October: base 1. 1st Sunday of April: base 1. + assertEquals(1, rule.startDay); + assertEquals(1, rule.endDay); + assertEquals(1, rule.startDayOfWeek); + assertEquals(1, rule.endDayOfWeek); + assertEquals(2, rule.startMode); + assertEquals(2, rule.endMode); + assertEquals(1, rule.startTimeMode); + assertEquals(1, rule.endTimeMode); + assertEquals(2 * 3_600_000, rule.startTime, "DST start at 02:00 standard"); + assertEquals(2 * 3_600_000, rule.endTime, "DST end at 02:00 standard"); + } + + @Test + void testExtractDstRuleNoDstReturnsNull() { + // Asia/Shanghai had DST historically (1940s, 1986-1991) but no current rule. + // tz.useDaylightTime() must be false → extractDstRule returns null. + assertNull(extractDstRuleFor("Asia/Shanghai")); + } + + @Test + void testExtractDstRuleFixedOffsetReturnsNull() { + // Fixed-offset zones never observe DST. + assertNull(extractDstRuleFor("UTC")); + assertNull(extractDstRuleFor("+05:30")); + } + + @Test + void testExtractDstRuleThrowsOnUnsupportedRuleCount() { + // Synthesize a TimeZone whose getOffset is constant. The probing path + // (extractDstRuleByProbing) observes no transitions across all anchor + // years and returns null, so extractDstRuleFromZoneRules runs with the + // hand-crafted ZoneRules below. + TimeZone constantOffsetWithDstFlag = new TimeZone() { + @Override public int getOffset(long instant) { return 0; } + @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) { + return 0; + } + @Override public int getRawOffset() { return 0; } + @Override public void setRawOffset(int offsetMillis) {} + @Override public boolean useDaylightTime() { return true; } + @Override public boolean inDaylightTime(Date date) { return false; } + }; + constantOffsetWithDstFlag.setID("Synthetic/UnsupportedRuleCount"); + + // ZoneRules with exactly one recurring rule. Production code rejects any + // count outside {0, 2}, so this triggers the "Unsupported ORC DST rule + // count" branch in extractDstRuleFromZoneRules. + ZoneOffset baseOffset = ZoneOffset.UTC; + ZoneOffsetTransitionRule lonelyRule = ZoneOffsetTransitionRule.of( + Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + baseOffset, baseOffset, ZoneOffset.ofHours(1)); + ZoneRules syntheticRules = ZoneRules.of( + baseOffset, baseOffset, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Collections.singletonList(lonelyRule)); + + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule( + "Synthetic/UnsupportedRuleCount", constantOffsetWithDstFlag, syntheticRules)); + assertTrue(ex.getMessage().contains("Synthetic/UnsupportedRuleCount"), + "exception message should name the offending zone: " + ex.getMessage()); + } + + /** + * Resolve a zone id through the same SHORT_IDS pipeline production uses. + * + *

For fixed-offset ids like {@code "+05:30"}, {@code TimeZone.getTimeZone} + * silently returns GMT (rawOffset=0) rather than a TimeZone with the actual + * offset, because {@code java.util.TimeZone} does not recognise the + * offset-format id. Mirror production's {@code rules.isFixedOffset()} guard + * here so the test does not silently feed a GMT TimeZone into + * {@code extractDstRule}; production's + * {@link OrcDstRuleExtractor#extractDstRule(String, java.util.TimeZone, java.time.zone.ZoneRules)} + * now short-circuits on {@code rules.isFixedOffset()} too, but the test + * helper keeps its own pre-call guard so a future caller pattern that drops + * the production guard cannot silently re-introduce the trap. + */ + private static OrcDstRuleExtractor.DstRule extractDstRuleFor(String timezoneId) { + ZoneId zoneId = ZoneId.of(timezoneId, ZoneId.SHORT_IDS); + ZoneRules rules = zoneId.getRules(); + // For fixed-offset zones the UTC placeholder has rawOffset=0, which would + // *not* match the rules' actual standard offset and so would fail + // extractDstRule's tz-vs-rules sanity check. That's safe today because + // extractDstRule's isFixedOffset() short-circuit at the top of the method + // returns null before the sanity check executes. If a future change ever + // reorders these guards, the mismatch would surface here as a confusing + // "describe different zones" exception rather than the silent-GMT bug we + // are guarding against. + TimeZone tz = rules.isFixedOffset() + ? TimeZone.getTimeZone("UTC") + : TimeZone.getTimeZone(zoneId.getId()); + return OrcDstRuleExtractor.extractDstRule(timezoneId, tz, rules); + } + + @Test + void testExtractDstRuleThrowsWhenBothPathsFail() { + // Constant-offset TimeZone — probing observes no transitions across all + // anchor years and returns null. + TimeZone constantOffsetWithDstFlag = new TimeZone() { + @Override public int getOffset(long instant) { return 0; } + @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) { + return 0; + } + @Override public int getRawOffset() { return 0; } + @Override public void setRawOffset(int offsetMillis) {} + @Override public boolean useDaylightTime() { return true; } + @Override public boolean inDaylightTime(Date date) { return false; } + }; + constantOffsetWithDstFlag.setID("Synthetic/NoRecurringRules"); + + // A single historical transition keeps rules.isFixedOffset() == false so + // the early guard in extractDstRule does not short-circuit; the empty + // lastRules list makes extractDstRuleFromZoneRules return null. Both paths + // fail and the terminal "Failed to extract" throw fires. + ZoneOffset baseOffset = ZoneOffset.UTC; + ZoneOffsetTransition historical = ZoneOffsetTransition.of( + LocalDateTime.of(1900, 1, 1, 0, 0), + ZoneOffset.ofHours(-1), baseOffset); + ZoneRules rules = ZoneRules.of( + baseOffset, baseOffset, + Collections.emptyList(), + Collections.singletonList(historical), + Collections.emptyList()); + + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule( + "Synthetic/NoRecurringRules", constantOffsetWithDstFlag, rules)); + assertTrue(ex.getMessage().contains("Synthetic/NoRecurringRules"), + "exception message should name the offending zone: " + ex.getMessage()); + assertTrue(ex.getMessage().contains("Failed to extract"), + "terminal throw should mention 'Failed to extract': " + ex.getMessage()); + } + + // Helper: TimeZone whose getOffset is constant. Probing finds no transitions + // across any anchor year and returns null, so extractDstRuleFromZoneRules is + // invoked with the hand-crafted ZoneRules in each test below. + private static TimeZone newConstantOffsetWithDstFlag(String id) { + TimeZone tz = new TimeZone() { + @Override public int getOffset(long instant) { return 0; } + @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) { + return 0; + } + @Override public int getRawOffset() { return 0; } + @Override public void setRawOffset(int offsetMillis) {} + @Override public boolean useDaylightTime() { return true; } + @Override public boolean inDaylightTime(Date date) { return false; } + }; + tz.setID(id); + return tz; + } + + @Test + void testExtractDstRuleThrowsOnZeroDeltaRule() { + // Two recurring rules where the second one has offsetBefore == offsetAfter + // (delta == 0). Triggers the "Unsupported zero-delta ORC DST rule" branch. + TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/ZeroDelta"); + ZoneOffset base = ZoneOffset.UTC; + ZoneOffsetTransitionRule startRule = ZoneOffsetTransitionRule.of( + Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, ZoneOffset.ofHours(1)); + ZoneOffsetTransitionRule zeroDeltaRule = ZoneOffsetTransitionRule.of( + Month.OCTOBER, 25, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, base); // zero delta + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(startRule, zeroDeltaRule)); + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule("Synthetic/ZeroDelta", tz, rules)); + assertTrue(ex.getMessage().contains("zero-delta"), + "expected 'zero-delta' in message: " + ex.getMessage()); + } + + @Test + void testExtractDstRuleThrowsOnBothPositiveDeltaRules() { + // Two rules both with positive delta — endTransitionRule stays null. + // Triggers the "Failed to identify ORC DST start/end rules" branch. + TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/BothPositive"); + ZoneOffset base = ZoneOffset.UTC; + ZoneOffset plus1 = ZoneOffset.ofHours(1); + ZoneOffsetTransitionRule ruleA = ZoneOffsetTransitionRule.of( + Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, plus1); + ZoneOffsetTransitionRule ruleB = ZoneOffsetTransitionRule.of( + Month.JUNE, 1, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, plus1); + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(ruleA, ruleB)); + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule("Synthetic/BothPositive", tz, rules)); + assertTrue(ex.getMessage().contains("Failed to identify"), + "expected 'Failed to identify' in message: " + ex.getMessage()); + } + + @Test + void testExtractDstRuleThrowsWhenTzAndRulesDescribeDifferentZones() { + // The sanity-check in extractDstRule compares tz.getRawOffset() against + // rules.getStandardOffset(ref). Pair a constant-zero TimeZone with + // ZoneRules for America/New_York (rawOffset = -18_000_000 ms at the + // 2024 reference instant) so the check fires before either extraction + // path runs. + TimeZone zeroOffsetTz = newConstantOffsetWithDstFlag("Synthetic/OffsetMismatch"); + ZoneRules newYorkRules = ZoneId.of("America/New_York").getRules(); + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule( + "Synthetic/OffsetMismatch", zeroOffsetTz, newYorkRules)); + assertTrue(ex.getMessage().contains("describe different zones"), + "expected 'describe different zones' in message: " + ex.getMessage()); + } + + @Test + void testExtractDstRuleThrowsOnBothNegativeDeltaRules() { + // Symmetric to testExtractDstRuleThrowsOnBothPositiveDeltaRules. Two rules + // both with negative delta — startTransitionRule stays null. Pins the + // startTransitionRule == null sub-case of the || at line 157 so a future + // change from || to && cannot slip through. + TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/BothNegative"); + ZoneOffset base = ZoneOffset.UTC; + ZoneOffset plus1 = ZoneOffset.ofHours(1); + ZoneOffsetTransitionRule ruleA = ZoneOffsetTransitionRule.of( + Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, plus1, base); + ZoneOffsetTransitionRule ruleB = ZoneOffsetTransitionRule.of( + Month.NOVEMBER, 1, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, plus1, base); + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(ruleA, ruleB)); + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule("Synthetic/BothNegative", tz, rules)); + assertTrue(ex.getMessage().contains("Failed to identify"), + "expected 'Failed to identify' in message: " + ex.getMessage()); + } + + @Test + void testExtractDstRuleThrowsOnMismatchedSavings() { + // Start gains +1h, end loses -2h. Triggers the "Mismatched ORC DST savings" + // branch. + TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/MismatchedSavings"); + ZoneOffset base = ZoneOffset.UTC; + ZoneOffset plus1 = ZoneOffset.ofHours(1); + ZoneOffset plus2 = ZoneOffset.ofHours(2); + ZoneOffsetTransitionRule startRule = ZoneOffsetTransitionRule.of( + Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, plus1); + ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of( + Month.NOVEMBER, 1, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, plus2, base); // -2h, but start was +1h + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(startRule, endRule)); + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule("Synthetic/MismatchedSavings", tz, rules)); + assertTrue(ex.getMessage().contains("Mismatched ORC DST savings"), + "expected 'Mismatched ORC DST savings' in message: " + ex.getMessage()); + } + + @Test + void testExtractDstRuleThrowsOnUnsupportedRuleShape() { + // First rule has null dayOfWeek (DOM-shaped rule, fixed day-of-month). + // Triggers the "Unsupported ORC DST transition rule shape" branch in + // fillDstRuleFromTransitionRule. + TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/DomRule"); + ZoneOffset base = ZoneOffset.UTC; + ZoneOffset plus1 = ZoneOffset.ofHours(1); + ZoneOffsetTransitionRule domRule = ZoneOffsetTransitionRule.of( + Month.MARCH, 15, null, LocalTime.of(2, 0), false, // null dayOfWeek + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, plus1); + ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of( + Month.OCTOBER, 25, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, plus1, base); + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(domRule, endRule)); + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule("Synthetic/DomRule", tz, rules)); + assertTrue(ex.getMessage().contains("transition rule shape"), + "expected 'transition rule shape' in message: " + ex.getMessage()); + } + + @Test + void testExtractDstRuleThrowsWhenPathAVerificationFails() { + // Path A's verification-failure branch. The TimeZone returns offset 0 + // everywhere, so probing finds no transitions and returns null. Path A + // then parses the two valid recurring rules into a DstRule with + // dstSavings=+1h, but verifyDstRuleAcrossReferenceYears compares + // computeDstOffset (which predicts +1h inside the DST window) against + // tz.getOffset (constant 0) — the mismatch causes verify to return + // false and the "ZoneRules ORC DST rule verification failed" branch + // fires. + TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/PathAVerifyFail"); + ZoneOffset base = ZoneOffset.UTC; + ZoneOffset plus1 = ZoneOffset.ofHours(1); + ZoneOffsetTransitionRule startRule = ZoneOffsetTransitionRule.of( + Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, plus1); + ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of( + Month.NOVEMBER, 1, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, plus1, base); + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(startRule, endRule)); + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule("Synthetic/PathAVerifyFail", tz, rules)); + assertTrue(ex.getMessage().contains("ZoneRules ORC DST rule verification failed"), + "expected verification-failed message: " + ex.getMessage()); + } + + @Test + void testExtractDstRuleThrowsOnNegativeDayIndicator() { + // Negative dayOfMonthIndicator encodes a DOW_LE_DOM rule ("last + // on or before day"); fillDstRuleFromTransitionRule rejects it via the + // same "Unsupported ORC DST transition rule shape" guard as null dayOfWeek + // (the two sub-cases share one || condition; this test pins the second). + TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/NegativeIndicator"); + ZoneOffset base = ZoneOffset.UTC; + ZoneOffset plus1 = ZoneOffset.ofHours(1); + ZoneOffsetTransitionRule negativeIndicatorRule = ZoneOffsetTransitionRule.of( + Month.MARCH, -1, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, plus1); + ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of( + Month.OCTOBER, 25, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, plus1, base); + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(negativeIndicatorRule, endRule)); + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule("Synthetic/NegativeIndicator", tz, rules)); + assertTrue(ex.getMessage().contains("transition rule shape"), + "expected 'transition rule shape' in message: " + ex.getMessage()); + } + + /** + * Compute the UTC millis at midnight UTC of the {@code n}-th occurrence of {@code dow} in + * {@code month} of {@code year}. + * + * @param year calendar year + * @param month month (1-based via {@link Month}) + * @param dow day of week to match + * @param n occurrence index (1 = first, 2 = second, ...) + * @return UTC epoch millis at midnight of the resolved date + */ + private static long nthDayOfWeekUtcMs(int year, Month month, DayOfWeek dow, int n) { + LocalDate firstOfMonth = LocalDate.of(year, month, 1); + int firstDow = firstOfMonth.getDayOfWeek().getValue(); // 1..7, Mon..Sun + int diff = dow.getValue() - firstDow; + if (diff < 0) diff += 7; + int day = 1 + diff + (n - 1) * 7; + return LocalDate.of(year, month, day).toEpochDay() * 86_400_000L; + } + + @Test + void testExtractDstRuleFebruaryClampDoesNotThrow() { + // dayOfMonthIndicator=29 + SUNDAY for FEBRUARY forces ruleDay (29) > + // monthLength (28 in non-leap years). Without the Math.min(ruleDay, + // monthLength) anchor clamp in computeRuleDay's MODE_DOW_GE_DOM branch, + // LocalDate.of(year, FEBRUARY, 29) on a non-leap year would throw + // DateTimeException inside verifyDstRuleAcrossReferenceYears. + // + // For indicator=29 the clamp makes computeRuleDay collapse to "last day + // of February": 29 on leap years, 28 on non-leap years (and the day-of- + // week constraint is silently dropped by the result clamp). The + // synthetic tz must mirror that semantics or verify will fail before + // the test can observe the clamp's effect; we therefore have the tz + // report DST starting on the last day of February. + // + // Path B is forced to fail (custom getDSTSavings()==0 mismatch) so + // Path A runs. Path A copies getDayOfMonthIndicator() verbatim into + // rule.startDay, so the stored value is 29 -- the clamp lives in + // computeRuleDay, not in Path A's encoding. + TimeZone tz = new TimeZone() { + @Override public int getOffset(long instant) { + int year = LocalDate.ofEpochDay(Math.floorDiv(instant, 86_400_000L)).getYear(); + // Mirror computeRuleDay(MODE_DOW_GE_DOM, 29, SUNDAY, year, FEBRUARY) + // step by step: anchor = min(29, monthLength); search forward for the + // first SUNDAY >= anchor; clamp the result to monthLength. The outer + // result clamp is what collapses the day-of-week constraint when the + // anchor is already at the month boundary -- which is exactly the + // case we want to exercise. + int monthLength = Year.of(year).isLeap() ? 29 : 28; + int anchorDay = Math.min(29, monthLength); + int targetDow = LocalDate.of(year, Month.FEBRUARY, anchorDay).getDayOfWeek().getValue(); + int diff = DayOfWeek.SUNDAY.getValue() - targetDow; + if (diff < 0) diff += 7; + int computedDay = Math.min(anchorDay + diff, monthLength); + long dstStart = LocalDate.of(year, Month.FEBRUARY, computedDay) + .toEpochDay() * 86_400_000L + 2 * 3_600_000L; + long dstEnd = nthDayOfWeekUtcMs(year, Month.NOVEMBER, DayOfWeek.SUNDAY, 1) + 1 * 3_600_000L; + return (instant >= dstStart && instant < dstEnd) ? 2 * 3_600_000 : 0; + } + @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) { + return 0; + } + @Override public int getRawOffset() { return 0; } + @Override public void setRawOffset(int offsetMillis) {} + @Override public boolean useDaylightTime() { return true; } + @Override public int getDSTSavings() { return 0; } // force Path B verify to fail + @Override public boolean inDaylightTime(Date date) { return false; } + }; + tz.setID("Synthetic/FebruaryClamp"); + + ZoneOffset base = ZoneOffset.UTC; + ZoneOffset plus2 = ZoneOffset.ofHours(2); + ZoneOffsetTransitionRule startRule = ZoneOffsetTransitionRule.of( + Month.FEBRUARY, 29, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, plus2); + ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of( + Month.NOVEMBER, 1, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, plus2, base); + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(startRule, endRule)); + + OrcDstRuleExtractor.DstRule rule = OrcDstRuleExtractor.extractDstRule( + "Synthetic/FebruaryClamp", tz, rules); + assertNotNull(rule, "Feb indicator=29 must not throw -- clamp should prevent DateTimeException"); + // Path A stores getDayOfMonthIndicator() unchanged; the clamp is applied + // later in computeRuleDay. Surviving verifyDstRuleAcrossReferenceYears + // round-trip is what proves the clamp prevented a DateTimeException. + assertEquals(29, rule.startDay, + "Path A stores indicator verbatim; clamp lives in computeRuleDay"); + } + + @Test + void testExtractDstRuleByProbingReturnsNullOnMultipleDstOnTransitions() { + // extractDstRuleByProbing returns null when probing finds more than one + // offset-increasing transition in a year. All other Path-A tests reach + // Path A by failing Path B's cross-year verify -- the early-return on + // "second DST-on transition seen" was never exercised. + // + // This synthetic TimeZone spring-forwards twice a year (March 2nd Sunday + // and June 1st Sunday) and falls back once (November 1st Sunday). The + // ZoneRules supplies no recurring transition rules, so Path A also + // returns null (Unsupported rule count). The terminal "Failed to extract" + // IllegalStateException confirms probing returned null due to the + // multiple-DST-on guard rather than some other reason. + TimeZone tz = new TimeZone() { + @Override public int getOffset(long instant) { + int year = LocalDate.ofEpochDay(Math.floorDiv(instant, 86_400_000L)).getYear(); + long forward1 = nthDayOfWeekUtcMs(year, Month.MARCH, DayOfWeek.SUNDAY, 2) + 2 * 3_600_000L; + long forward2 = nthDayOfWeekUtcMs(year, Month.JUNE, DayOfWeek.SUNDAY, 1) + 2 * 3_600_000L; + long back = nthDayOfWeekUtcMs(year, Month.NOVEMBER, DayOfWeek.SUNDAY, 1) + 1 * 3_600_000L; + if (instant >= forward1 && instant < forward2) return 1 * 3_600_000; + if (instant >= forward2 && instant < back) return 2 * 3_600_000; + return 0; + } + @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) { + return 0; + } + @Override public int getRawOffset() { return 0; } + @Override public void setRawOffset(int offsetMillis) {} + @Override public boolean useDaylightTime() { return true; } + @Override public int getDSTSavings() { return 3_600_000; } + @Override public boolean inDaylightTime(Date date) { return false; } + }; + tz.setID("Synthetic/MultipleDstOn"); + ZoneOffset base = ZoneOffset.UTC; + // A single historical transition is enough to make rules.isFixedOffset() + // return false (without it the entry-point short-circuit at the top of + // extractDstRule would return null before probing runs). The + // transitionRules list stays empty so Path A also returns null on the + // "empty getTransitionRules()" early-out, leaving the terminal + // "Failed to extract" throw as the expected outcome. + ZoneOffsetTransition historicalTransition = ZoneOffsetTransition.of( + LocalDateTime.of(1900, 1, 1, 0, 0), ZoneOffset.ofHours(-1), base); + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(historicalTransition), + Collections.emptyList()); + IllegalStateException ex = assertThrows(IllegalStateException.class, + () -> OrcDstRuleExtractor.extractDstRule("Synthetic/MultipleDstOn", tz, rules)); + assertTrue(ex.getMessage().contains("Failed to extract"), + "expected 'Failed to extract' in terminal throw: " + ex.getMessage()); + } + + @Test + void testExtractDstRuleViaZoneRulesFallback() { + // Path A (ZoneRules fallback) success scenario. The custom TimeZone + // observes a real +2h DST window but reports getDSTSavings()==+1h. + // Probing therefore extracts a rule with dstSavings=+1h, whose + // computeDstOffset prediction (rawOffset + 1h) disagrees with the + // observed +2h inside the DST window — verifyDstRule fails, so + // extractDstRuleByProbing returns null. The synthetic ZoneRules below + // carries the actual +2h delta; extractDstRuleFromZoneRules re-derives + // dstSavings from the rule's offset deltas, verify passes, and the + // returned DstRule has dstSavings=+2h (proving Path A ran). + // + // The end rule uses TimeDefinition.UTC to exercise + // getTransitionRuleTimeMode's TIME_MODE_UTC branch — the only + // production path that produces non-STANDARD timeMode in DstRule. + TimeZone tz = new TimeZone() { + @Override public int getOffset(long instant) { + LocalDate date = LocalDate.ofEpochDay(Math.floorDiv(instant, 86_400_000L)); + int year = date.getYear(); + long dstStart = nthDayOfWeekUtcMs(year, Month.MARCH, DayOfWeek.SUNDAY, 2) + 2 * 3_600_000L; + long dstEnd = nthDayOfWeekUtcMs(year, Month.NOVEMBER, DayOfWeek.SUNDAY, 1) + 2 * 3_600_000L; + return (instant >= dstStart && instant < dstEnd) ? 2 * 3_600_000 : 0; + } + @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) { + return 0; // never called by extractDstRule + } + @Override public int getRawOffset() { return 0; } + @Override public void setRawOffset(int offsetMillis) {} + @Override public boolean useDaylightTime() { return true; } + @Override public int getDSTSavings() { return 1 * 3_600_000; } // intentionally wrong + @Override public boolean inDaylightTime(Date date) { return false; } + }; + tz.setID("Synthetic/PathASuccess"); + + ZoneOffset base = ZoneOffset.UTC; + ZoneOffset plus2 = ZoneOffset.ofHours(2); + ZoneOffsetTransitionRule startRule = ZoneOffsetTransitionRule.of( + Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, plus2); + ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of( + Month.NOVEMBER, 1, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.UTC, + base, plus2, base); + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(startRule, endRule)); + + OrcDstRuleExtractor.DstRule rule = OrcDstRuleExtractor.extractDstRule( + "Synthetic/PathASuccess", tz, rules); + assertNotNull(rule, "Path A must succeed"); + // dstSavings derived from rule deltas = +2h. If Path B had succeeded + // we would see +1h instead (tz.getDSTSavings). + assertEquals(2 * 3_600_000, rule.dstSavings, + "Path A derives dstSavings from offset deltas, not tz.getDSTSavings()"); + assertEquals(2, rule.startMonth); + assertEquals(8, rule.startDay); + assertEquals(1, rule.startDayOfWeek); + assertEquals(2, rule.startMode); + assertEquals(1, rule.startTimeMode); // STANDARD + assertEquals(2 * 3_600_000, rule.startTime); + assertEquals(10, rule.endMonth); + assertEquals(1, rule.endDay); + assertEquals(1, rule.endDayOfWeek); + assertEquals(2, rule.endMode); + assertEquals(2, rule.endTimeMode); // UTC — covers TIME_MODE_UTC branch + assertEquals(2 * 3_600_000, rule.endTime); + } + + @Test + void testExtractDstRuleMidnightEndOfDayRule() { + // ZoneOffsetTransitionRule.of(..., midnightEndOfDay=true, ...) encodes a + // transition at 24:00 (start of the next day). getTransitionRuleTimeMillis + // must return 24 * 3_600_000 for this case, NOT + // LocalTime.MIDNIGHT.toSecondOfDay() = 0. The TimeZone fires its DST start + // at March 2nd-Sunday + 24h (i.e., the 0:00 boundary of the following + // Monday) and reports getDSTSavings()==0 so Path B fails verify and Path A + // runs; the extracted rule's startTime is asserted to be 24h. + TimeZone tz = new TimeZone() { + @Override public int getOffset(long instant) { + int year = LocalDate.ofEpochDay(Math.floorDiv(instant, 86_400_000L)).getYear(); + long dstStart = nthDayOfWeekUtcMs(year, Month.MARCH, DayOfWeek.SUNDAY, 2) + 24 * 3_600_000L; + long dstEnd = nthDayOfWeekUtcMs(year, Month.NOVEMBER, DayOfWeek.SUNDAY, 1) + 2 * 3_600_000L; + return (instant >= dstStart && instant < dstEnd) ? 2 * 3_600_000 : 0; + } + @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) { + return 0; + } + @Override public int getRawOffset() { return 0; } + @Override public void setRawOffset(int offsetMillis) {} + @Override public boolean useDaylightTime() { return true; } + @Override public int getDSTSavings() { return 0; } // force Path B verify to fail + @Override public boolean inDaylightTime(Date date) { return false; } + }; + tz.setID("Synthetic/MidnightEndOfDay"); + + ZoneOffset base = ZoneOffset.UTC; + ZoneOffset plus2 = ZoneOffset.ofHours(2); + ZoneOffsetTransitionRule startRule = ZoneOffsetTransitionRule.of( + Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.MIDNIGHT, true, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, base, plus2); + ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of( + Month.NOVEMBER, 1, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, plus2, base); + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(startRule, endRule)); + + OrcDstRuleExtractor.DstRule rule = OrcDstRuleExtractor.extractDstRule( + "Synthetic/MidnightEndOfDay", tz, rules); + assertNotNull(rule, "Path A with midnight-end-of-day start rule must succeed"); + assertEquals(24 * 3_600_000, rule.startTime, + "isMidnightEndOfDay must produce time=24h, not LocalTime.MIDNIGHT.toSecondOfDay()=0"); + } + + @Test + void testExtractDstRuleTimeDefinitionWallMode() { + // Path A success with start rule encoded as TimeDefinition.WALL — + // exercises both TIME_MODE_WALL in getTransitionRuleTimeMode and the + // wall-time branch in computeTransitionUtcMillis. The custom TimeZone + // observes a +2h DST window from 2nd Sun of March 02:00 UTC to 1st Sun + // of November 01:00 UTC but reports getDSTSavings()==0 — Path B's + // probing extracts dstSavings=0 from getDSTSavings, verifyDstRule then + // disagrees (predicted 0, observed +2h), Path B returns null. Path A's + // ZoneRules deltas yield dstSavings=+2h, verify passes, the WALL start + // rule is encoded as startTimeMode=0. + TimeZone tz = new TimeZone() { + @Override public int getOffset(long instant) { + int year = LocalDate.ofEpochDay(Math.floorDiv(instant, 86_400_000L)).getYear(); + long dstStart = nthDayOfWeekUtcMs(year, Month.MARCH, DayOfWeek.SUNDAY, 2) + 2 * 3_600_000L; + long dstEnd = nthDayOfWeekUtcMs(year, Month.NOVEMBER, DayOfWeek.SUNDAY, 1) + 1 * 3_600_000L; + return (instant >= dstStart && instant < dstEnd) ? 2 * 3_600_000 : 0; + } + @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) { + return 0; + } + @Override public int getRawOffset() { return 0; } + @Override public void setRawOffset(int offsetMillis) {} + @Override public boolean useDaylightTime() { return true; } + @Override public int getDSTSavings() { return 0; } // forces Path B verify to fail + @Override public boolean inDaylightTime(Date date) { return false; } + }; + tz.setID("Synthetic/WallMode"); + + ZoneOffset base = ZoneOffset.UTC; + ZoneOffset plus2 = ZoneOffset.ofHours(2); + ZoneOffsetTransitionRule startRule = ZoneOffsetTransitionRule.of( + Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.WALL, + base, base, plus2); + ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of( + Month.NOVEMBER, 1, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false, + ZoneOffsetTransitionRule.TimeDefinition.STANDARD, + base, plus2, base); + ZoneRules rules = ZoneRules.of(base, base, + Collections.emptyList(), + Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION), + Arrays.asList(startRule, endRule)); + + OrcDstRuleExtractor.DstRule rule = OrcDstRuleExtractor.extractDstRule( + "Synthetic/WallMode", tz, rules); + assertNotNull(rule, "Path A with WALL start rule must succeed"); + assertEquals(0, rule.startTimeMode, + "TimeDefinition.WALL must produce TIME_MODE_WALL (0)"); + assertEquals(1, rule.endTimeMode, + "TimeDefinition.STANDARD must produce TIME_MODE_STANDARD (1)"); + } }