diff --git a/src/main/java/com/nvidia/spark/rapids/jni/OrcDstRuleExtractor.java b/src/main/java/com/nvidia/spark/rapids/jni/OrcDstRuleExtractor.java new file mode 100644 index 0000000000..7c4aa271e7 --- /dev/null +++ b/src/main/java/com/nvidia/spark/rapids/jni/OrcDstRuleExtractor.java @@ -0,0 +1,529 @@ +/* + * Copyright (c) 2026, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.jni; + +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.zone.ZoneOffsetTransitionRule; +import java.time.zone.ZoneRules; +import java.util.List; +import java.util.TimeZone; + +/** + * Recovers a recurring DST rule for an IANA timezone in the shape the GPU + * side consumes for ORC DST conversion. + * + *
Two extraction paths are supported. The probing path is tried first + * because it captures what {@link TimeZone#getOffset(long)} actually returns + * (which is the source of truth the GPU side must match for ORC byte + * compatibility); the {@link ZoneRules#getTransitionRules()} path is used as + * a fallback for zones whose recurring rule cannot be recovered from hourly + * probes. + * + *
This class is intentionally separate from {@link OrcTimezoneInfo} — the + * historical-transition machinery and the DST-rule extraction share no + * read/write state, just a couple of small package-private helpers + * ({@link OrcTimezoneInfo#utcMillisForDate} and + * {@link OrcTimezoneInfo#binarySearchTransition}). + */ +final class OrcDstRuleExtractor { + private OrcDstRuleExtractor() {} + + // Reference years used to cross-check the extracted DST rule against + // java.util.TimeZone.getOffset. A near-future anchor (2060) catches + // divergence within the typical application lifetime; the far-future + // anchors exercise the recurring-rule fallback well past any historical + // transition entry in tzdata. + private static final int[] DST_RULE_VALIDATION_YEARS = {2060, 2400, 9997}; + + /** + * Recurring DST rule for a single zone, encoded in the same shape that + * {@link java.util.SimpleTimeZone} stores internally and that the GPU side + * consumes. {@code month} is 0-based (Calendar.JANUARY=0), {@code dayOfWeek} + * follows Calendar's 1=Sun..7=Sat convention, and {@code dstSavings} / + * {@code time} are in milliseconds. + * + *
The {@code *Mode} fields encode how {@code *Day}/{@code *DayOfWeek}
+ * combine — see the {@code MODE_*} constants. {@code *TimeMode} selects
+ * the time-of-day basis — see the {@code TIME_MODE_*} constants.
+ */
+ static final class DstRule {
+ // Day-rule modes for {start,end}Mode — matches SimpleTimeZone's internal
+ // encoding so the GPU side can consume the values directly.
+ static final int MODE_DOM = 0;
+ static final int MODE_DOW_IN_MONTH = 1;
+ static final int MODE_DOW_GE_DOM = 2;
+ static final int MODE_DOW_LE_DOM = 3;
+
+ // Time-of-day basis for {start,end}TimeMode.
+ static final int TIME_MODE_WALL = 0;
+ static final int TIME_MODE_STANDARD = 1;
+ static final int TIME_MODE_UTC = 2;
+
+ int dstSavings;
+ int startMonth;
+ int startDay;
+ int startDayOfWeek;
+ int startTime;
+ int startTimeMode;
+ int startMode;
+ int endMonth;
+ int endDay;
+ int endDayOfWeek;
+ int endTime;
+ int endTimeMode;
+ int endMode;
+ }
+
+ /**
+ * Extract the recurring DST rule for a zone, or {@code null} if the zone has
+ * no DST.
+ *
+ * @param timezoneId IANA timezone id; used only in exception messages
+ * @param tz {@link TimeZone} for the zone; must describe the same zone as
+ * {@code rules}
+ * @param rules {@link ZoneRules} for the zone
+ * @return the recurring DST rule, or {@code null} if the zone has no DST
+ * ({@code rules.isFixedOffset()} or {@code !tz.useDaylightTime()})
+ * @throws IllegalStateException if the zone reports DST but neither
+ * extraction path produces a usable rule — for example, an unsupported
+ * {@link ZoneRules#getTransitionRules()} count (not 0 and not 2), a
+ * transition rule shape outside DOW_GE_DOM, mismatched start vs. end
+ * DST savings, zero-delta savings, or cross-year verification mismatch
+ * against {@code tz.getOffset} on the anchor years
+ * {@code 2060, 2400, 9997}.
+ */
+ static DstRule extractDstRule(String timezoneId, TimeZone tz, ZoneRules rules) {
+ // Fixed-offset zones (e.g. "+05:30") have no DST. Guard explicitly because
+ // TimeZone.getTimeZone(zoneId) silently returns GMT for such ids on most
+ // JVMs, which would leave `tz` describing a different zone than `rules`.
+ // Mirrors the guard in OrcTimezoneInfo.buildRuntimeOrcTimezoneInfo.
+ if (rules.isFixedOffset() || !tz.useDaylightTime()) {
+ return null;
+ }
+ // Sanity-check that tz and rules describe the same zone. Both Path A and
+ // Path B assume this, but the two-argument signature lets a caller pass
+ // mismatched objects (the silent-GMT trap above is one way this can
+ // happen). Compare standard offsets at a recent reference instant -- the
+ // epoch is unsafe because some zones (e.g. Europe/London) were on a
+ // different standard offset in 1970 (British Standard Time experiment).
+ Instant ref = Instant.parse("2024-01-15T00:00:00Z");
+ int rulesStandardOffsetMillis = rules.getStandardOffset(ref).getTotalSeconds() * 1000;
+ if (tz.getRawOffset() != rulesStandardOffsetMillis) {
+ throw new IllegalStateException(
+ "TimeZone and ZoneRules describe different zones for timezone: " + timezoneId
+ + " (tz.rawOffset=" + tz.getRawOffset()
+ + ", rules.standardOffset=" + rulesStandardOffsetMillis + ")");
+ }
+ DstRule rule = extractDstRuleByProbing(tz);
+ if (rule != null) {
+ return rule;
+ }
+ rule = extractDstRuleFromZoneRules(timezoneId, tz, rules);
+ if (rule != null) {
+ return rule;
+ }
+ throw new IllegalStateException("Failed to extract ORC DST rule for timezone: " + timezoneId);
+ }
+
+ // ---- Path A: from ZoneRules.getTransitionRules() ----
+
+ private static DstRule extractDstRuleFromZoneRules(String timezoneId, TimeZone tz,
+ ZoneRules rules) {
+ List Precondition: {@code utcMs} is the first millisecond at which
+ * the new offset applies — the convention returned by
+ * {@link OrcTimezoneInfo#binarySearchTransition}. With that convention,
+ * {@code utcMs + rawOffsetMs} recovers the standard-time wall clock at
+ * the transition moment for both spring-forward (offset before == rawOffset)
+ * and fall-back (offset after == rawOffset). If a future change shifts the
+ * binary-search return convention by one ms, the decoded {@code timeMs}
+ * will drift and {@code baseDay} can cross a midnight boundary.
+ *
+ * Recurring weekday rules are encoded as DOW_GE_DOM (mode=2). The base
+ * day is the earliest possible day of the matching occurrence in the month:
+ * 1st=1, 2nd=8, 3rd=15, 4th=22, last={@code monthLength - 6}. This mirrors
+ * encodings like "Sun >= 8" for the second Sunday in March and "Sun >= 25"
+ * for the last Sunday in October.
+ */
+ private static int[] decodeTransition(long utcMs, int rawOffsetMs) {
+ long localMs = utcMs + rawOffsetMs;
+ LocalDateTime ldt = LocalDateTime.ofInstant(Instant.ofEpochMilli(localMs), ZoneOffset.UTC);
+
+ int month = ldt.getMonthValue() - 1; // 0-based for Calendar compat
+ int dayOfMonth = ldt.getDayOfMonth();
+ int dayOfWeek = toCalendarDayOfWeek(ldt.getDayOfWeek().getValue());
+ int timeInDay = ldt.getHour() * 3_600_000
+ + ldt.getMinute() * 60_000
+ + ldt.getSecond() * 1000
+ + ldt.getNano() / 1_000_000;
+
+ int monthLength = ldt.toLocalDate().lengthOfMonth();
+ int dayOfWeekInMonth = (dayOfMonth - 1) / 7 + 1;
+ boolean isLastOccurrence = dayOfMonth + 7 > monthLength;
+ int baseDayOfMonth = isLastOccurrence
+ ? monthLength - 6
+ : 1 + (dayOfWeekInMonth - 1) * 7;
+
+ return new int[]{month, baseDayOfMonth, dayOfWeek, timeInDay, DstRule.MODE_DOW_GE_DOM};
+ }
+
+ // ---- Verification: ensure the extracted rule matches tz.getOffset ----
+
+ private static boolean verifyDstRuleAcrossReferenceYears(TimeZone tz, DstRule rule) {
+ for (int refYear : DST_RULE_VALIDATION_YEARS) {
+ if (!verifyDstRule(tz, rule, refYear)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Verify the extracted rule matches {@code tz.getOffset} around transition
+ * boundaries and at monthly sample points for {@code refYear ± 1} (3 years).
+ * DST mismatches only manifest near transitions, so dense sampling at
+ * boundaries plus monthly spot checks reduces a naive full-year scan from
+ * ~52K probes to ~200 per reference year.
+ */
+ private static boolean verifyDstRule(TimeZone tz, DstRule rule, int refYear) {
+ int rawOffsetMs = tz.getRawOffset();
+ for (int y = refYear - 1; y <= refYear + 1; y++) {
+ long dstStart = computeTransitionUtcMillis(y, rule.startMonth, rule.startDay,
+ rule.startDayOfWeek, rule.startTime, rule.startTimeMode, rule.startMode,
+ rawOffsetMs, rule.dstSavings, true);
+ long dstEnd = computeTransitionUtcMillis(y, rule.endMonth, rule.endDay,
+ rule.endDayOfWeek, rule.endTime, rule.endTimeMode, rule.endMode,
+ rawOffsetMs, rule.dstSavings, false);
+
+ // All sample points below land within the wall-clock year y for any
+ // real DST rule (boundaries ±12h, plus monthly noon-UTC anchors), so
+ // skip the per-sample year derivation and re-computation of these
+ // bounds inside computeDstOffset -- ~186 computeTransitionUtcMillis
+ // calls per verifyDstRule otherwise. Hand the already-computed bounds
+ // to the variant.
+ long[] boundaries = {dstStart, dstEnd};
+ for (long boundary : boundaries) {
+ long from = boundary - 12 * 3_600_000L;
+ long to = boundary + 12 * 3_600_000L;
+ for (long ms = from; ms <= to; ms += 3_600_000L) {
+ if (tz.getOffset(ms) != computeDstOffsetWithBounds(ms, rawOffsetMs, rule,
+ dstStart, dstEnd)) {
+ return false;
+ }
+ }
+ }
+
+ for (int m = 1; m <= 12; m++) {
+ long ms = OrcTimezoneInfo.utcMillisForDate(y, m, 1) + 12 * 3_600_000L;
+ if (tz.getOffset(ms) != computeDstOffsetWithBounds(ms, rawOffsetMs, rule,
+ dstStart, dstEnd)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /**
+ * In-DST classification with bounds supplied by the caller. The only caller
+ * is {@link #verifyDstRule}, which already computes {@code dstStart} and
+ * {@code dstEnd} for the year being checked; passing them in avoids
+ * recomputing them per sample. A general-purpose entry point that derives
+ * the year from {@code utcMs} can be reintroduced in a follow-up part when
+ * a non-verification caller appears.
+ */
+ private static int computeDstOffsetWithBounds(long utcMs, int rawOffsetMs, DstRule rule,
+ long dstStart, long dstEnd) {
+ boolean inDst = dstStart < dstEnd
+ ? (utcMs >= dstStart && utcMs < dstEnd)
+ : (utcMs >= dstStart || utcMs < dstEnd);
+ return inDst ? rawOffsetMs + rule.dstSavings : rawOffsetMs;
+ }
+
+ private static long computeTransitionUtcMillis(int year, int ruleMonth, int ruleDay,
+ int ruleDayOfWeek, int ruleTime, int ruleTimeMode, int ruleMode, int rawOffsetMs,
+ int dstSavingsMs, boolean isStartRule) {
+ int actualDay = computeRuleDay(ruleMode, ruleDay, ruleDayOfWeek, year, ruleMonth);
+ long utcMs = OrcTimezoneInfo.utcMillisForDate(year, ruleMonth + 1, actualDay) + ruleTime;
+ if (ruleTimeMode == DstRule.TIME_MODE_WALL) {
+ // WALL time: subtract raw offset and (for end transitions) also DST savings.
+ utcMs -= rawOffsetMs;
+ if (!isStartRule) {
+ utcMs -= dstSavingsMs;
+ }
+ } else if (ruleTimeMode == DstRule.TIME_MODE_STANDARD) {
+ utcMs -= rawOffsetMs;
+ }
+ // TIME_MODE_UTC is already in UTC.
+ return utcMs;
+ }
+
+ private static int computeRuleDay(int ruleMode, int ruleDay, int ruleDayOfWeek, int year,
+ int month) {
+ LocalDate firstOfMonth = LocalDate.of(year, month + 1, 1);
+ int monthLength = firstOfMonth.lengthOfMonth();
+ int firstDayOfWeek = toCalendarDayOfWeek(firstOfMonth.getDayOfWeek().getValue());
+
+ // The current extraction paths (Path A's fillDstRuleFromTransitionRule and
+ // Path B's decodeTransition) emit only MODE_DOW_GE_DOM. The other three
+ // branches are kept for forward compatibility -- e.g. a Part 3+ caller that
+ // constructs a DstRule directly from a serialised form. Until such a
+ // caller exists they are not reachable from any test; add coverage when
+ // one appears.
+ switch (ruleMode) {
+ case DstRule.MODE_DOW_IN_MONTH: {
+ // Clamp the result into [1, monthLength] so a "Nth occurrence" that
+ // overflows the month (e.g. 5th Sunday in a 28-day February) or
+ // underflows (e.g. -5th occurrence in a 28-day month) collapses to a
+ // valid in-month day rather than escaping with a DateTimeException
+ // from utcMillisForDate. Mirrors the within-month clamp applied to
+ // MODE_DOW_GE_DOM below and SimpleTimeZone's documented behaviour.
+ if (ruleDay > 0) {
+ int diff = ruleDayOfWeek - firstDayOfWeek;
+ if (diff < 0) diff += 7;
+ return Math.min(1 + diff + (ruleDay - 1) * 7, monthLength);
+ } else {
+ int lastDayOfWeek = toCalendarDayOfWeek(
+ LocalDate.of(year, month + 1, monthLength).getDayOfWeek().getValue());
+ int diff = lastDayOfWeek - ruleDayOfWeek;
+ if (diff < 0) diff += 7;
+ return Math.max(monthLength - diff + (ruleDay + 1) * 7, 1);
+ }
+ }
+ case DstRule.MODE_DOW_GE_DOM: {
+ // Per ZoneOffsetTransitionRule.getDayOfMonthIndicator(), the indicator
+ // may exceed monthLength (e.g. Feb 29 in a non-leap year, treated as
+ // Mar 1). Clamp the anchor before LocalDate.of so it never throws, and
+ // clamp the result so a DOW_GE_DOM rule whose computed day overflows
+ // the month produces a valid in-month day rather than escaping with a
+ // DateTimeException from utcMillisForDate. This mirrors SimpleTimeZone's
+ // documented within-month clamp for DOW_GE_DOM.
+ int anchorDay = Math.min(ruleDay, monthLength);
+ int targetDayOfWeek = toCalendarDayOfWeek(
+ LocalDate.of(year, month + 1, anchorDay).getDayOfWeek().getValue());
+ int diff = ruleDayOfWeek - targetDayOfWeek;
+ if (diff < 0) diff += 7;
+ return Math.min(anchorDay + diff, monthLength);
+ }
+ case DstRule.MODE_DOW_LE_DOM: {
+ // Mirrors the MODE_DOW_GE_DOM clamp above: the day-of-month indicator
+ // can exceed monthLength (e.g. 31 in February). Clamp the anchor
+ // before LocalDate.of so it never throws, and clamp the result to a
+ // valid in-month day so utcMillisForDate cannot receive day <= 0.
+ int anchorDay = Math.min(ruleDay, monthLength);
+ int targetDayOfWeek = toCalendarDayOfWeek(
+ LocalDate.of(year, month + 1, anchorDay).getDayOfWeek().getValue());
+ int diff = targetDayOfWeek - ruleDayOfWeek;
+ if (diff < 0) diff += 7;
+ return Math.max(anchorDay - diff, 1);
+ }
+ case DstRule.MODE_DOM:
+ default:
+ return ruleDay;
+ }
+ }
+}
diff --git a/src/main/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfo.java b/src/main/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfo.java
index 348b3cfeb2..9afa575fae 100644
--- a/src/main/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfo.java
+++ b/src/main/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfo.java
@@ -77,7 +77,9 @@ public OrcTimezoneInfo(int rawOffset, long[] transitions, int[] offsets) {
// year, month, and day are all 1-indexed, matching LocalDate.of conventions
// (e.g. month=1 is January). This avoids the easy-to-misread mix of 0-based
// month and 1-based day at the call site.
- private static long utcMillisForDate(int year, int month, int day) {
+ //
+ // Package-private so OrcDstRuleExtractor can share the same anchor.
+ static long utcMillisForDate(int year, int month, int day) {
return LocalDate.of(year, month, day).toEpochDay() * 24L * 3600_000L;
}
@@ -277,7 +279,9 @@ private static int collectTimeZoneTransitionsByScanning(
return currentOffset;
}
- private static long binarySearchTransition(TimeZone tz, long lo, long hi) {
+ // Package-private so OrcDstRuleExtractor can reuse the same bracketed
+ // binary search.
+ static long binarySearchTransition(TimeZone tz, long lo, long hi) {
int loOffset = tz.getOffset(lo);
while (hi - lo > 1) {
long mid = lo + (hi - lo) / 2;
diff --git a/src/test/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfoTest.java b/src/test/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfoTest.java
index c34f2cc9a0..4878c10c00 100644
--- a/src/test/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfoTest.java
+++ b/src/test/java/com/nvidia/spark/rapids/jni/OrcTimezoneInfoTest.java
@@ -18,7 +18,22 @@
import org.junit.jupiter.api.Test;
+import java.time.DayOfWeek;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.Month;
+import java.time.Year;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.zone.ZoneOffsetTransition;
+import java.time.zone.ZoneOffsetTransitionRule;
+import java.time.zone.ZoneRules;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Date;
import java.util.List;
+import java.util.TimeZone;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -30,6 +45,30 @@
public class OrcTimezoneInfoTest {
+ /**
+ * An inert historical transition for synthetic {@link ZoneRules} fixtures.
+ *
+ * On JDK 8 (the build's {@code maven.compiler.target}), {@code
+ * ZoneRules.of(base, base, emptyList, emptyList, recurringRules)} -- recurring
+ * rules but no concrete historical transitions -- reports {@code
+ * isFixedOffset() == true} even though DST rules are present. That makes
+ * {@link OrcDstRuleExtractor#extractDstRule} short-circuit to {@code null} at
+ * its {@code rules.isFixedOffset()} guard before either extraction path runs.
+ * JDK 17 reports {@code false}, so the gap is invisible to a JDK 17 compile
+ * check and only surfaces when the 1.8-target test suite actually runs.
+ *
+ * Including one concrete transition flips {@code isFixedOffset()} to {@code
+ * false} on every JDK. Real IANA zones always carry historical transitions,
+ * so production is unaffected -- only these hand-built fixtures need it. The
+ * transition is dated 1900 and changes only the wall offset (the
+ * standard-offset history stays at the base offset), so it perturbs neither
+ * {@code getStandardOffset} nor the post-2060 probing windows the tests rely
+ * on.
+ */
+ private static final ZoneOffsetTransition SYNTHETIC_HISTORICAL_TRANSITION =
+ ZoneOffsetTransition.of(
+ LocalDateTime.of(1900, 1, 1, 0, 0), ZoneOffset.ofHours(-1), ZoneOffset.UTC);
+
@Test
void testGetFixedOffsetZone() {
// Fixed-offset zones must return a non-null OrcTimezoneInfo with the
@@ -116,4 +155,744 @@ void testGetHistoricalTransitionsZone() {
"transitions must be strictly increasing");
}
}
+
+ // ---- DST rule extraction (Part 2 — not wired into production yet) ----
+
+ @Test
+ void testExtractDstRuleNorthernHemisphere() {
+ // America/New_York: DST starts 2nd Sunday of March, ends 1st Sunday of November.
+ // dstSavings is +1h (3_600_000 ms). startMonth=2 (March, 0-based),
+ // endMonth=10 (November, 0-based). DOW_GE_DOM_MODE = 2.
+ OrcDstRuleExtractor.DstRule rule = extractDstRuleFor("America/New_York");
+ assertNotNull(rule, "America/New_York must have a DST rule");
+ assertEquals(3_600_000, rule.dstSavings);
+ assertEquals(2, rule.startMonth);
+ assertEquals(10, rule.endMonth);
+ assertEquals(2, rule.startMode);
+ assertEquals(2, rule.endMode);
+ // Day-of-week 1 == Sunday in Calendar's 1=Sun..7=Sat convention.
+ assertEquals(1, rule.startDayOfWeek);
+ assertEquals(1, rule.endDayOfWeek);
+ // Second Sunday in March: base day 8 ("Sun >= 8"). First Sunday in November: base day 1.
+ assertEquals(8, rule.startDay);
+ assertEquals(1, rule.endDay);
+ // Probing path encodes both transitions as STANDARD time (timeMode=1) at
+ // the wall-clock instants 02:00 (DST start) and 01:00 (DST end). Lock
+ // these so a regression that flips timeMode to WALL would shift the
+ // computed UTC transitions by dstSavings and fail verifyDstRule silently.
+ assertEquals(1, rule.startTimeMode, "DST start should be STANDARD time mode");
+ assertEquals(1, rule.endTimeMode, "DST end should be STANDARD time mode");
+ assertEquals(2 * 3_600_000, rule.startTime, "DST start at 02:00 standard");
+ assertEquals(1 * 3_600_000, rule.endTime, "DST end at 01:00 standard");
+ }
+
+ @Test
+ void testExtractDstRuleEuropeLondon() {
+ // Europe/London: DST starts last Sunday of March, ends last Sunday of October.
+ // Encoded as DOW_GE_DOM with base day = monthLength - 6.
+ OrcDstRuleExtractor.DstRule rule = extractDstRuleFor("Europe/London");
+ assertNotNull(rule, "Europe/London must have a DST rule");
+ assertEquals(3_600_000, rule.dstSavings);
+ assertEquals(2, rule.startMonth);
+ assertEquals(9, rule.endMonth);
+ assertEquals(1, rule.startDayOfWeek);
+ assertEquals(1, rule.endDayOfWeek);
+ // Last Sunday of March (31-day month): base 25. Last Sunday of October (31-day): base 25.
+ assertEquals(25, rule.startDay);
+ assertEquals(25, rule.endDay);
+ // Probing path encodes both ends as DOW_GE_DOM (mode=2) on STANDARD time
+ // (timeMode=1). BST flips on at 01:00 standard in March and off at 01:00
+ // standard in October.
+ assertEquals(2, rule.startMode);
+ assertEquals(2, rule.endMode);
+ assertEquals(1, rule.startTimeMode);
+ assertEquals(1, rule.endTimeMode);
+ assertEquals(1 * 3_600_000, rule.startTime, "DST start at 01:00 standard");
+ assertEquals(1 * 3_600_000, rule.endTime, "DST end at 01:00 standard");
+ }
+
+ @Test
+ void testExtractDstRuleSouthernHemisphere() {
+ // Australia/Sydney: DST starts 1st Sunday of October, ends 1st Sunday of April.
+ // Southern hemisphere — start month numerically > end month.
+ OrcDstRuleExtractor.DstRule rule = extractDstRuleFor("Australia/Sydney");
+ assertNotNull(rule, "Australia/Sydney must have a DST rule");
+ assertEquals(3_600_000, rule.dstSavings);
+ assertEquals(9, rule.startMonth);
+ assertEquals(3, rule.endMonth);
+ assertTrue(rule.startMonth > rule.endMonth,
+ "southern hemisphere: start month should follow end month within the calendar year");
+ // 1st Sunday of October: base 1. 1st Sunday of April: base 1.
+ assertEquals(1, rule.startDay);
+ assertEquals(1, rule.endDay);
+ assertEquals(1, rule.startDayOfWeek);
+ assertEquals(1, rule.endDayOfWeek);
+ assertEquals(2, rule.startMode);
+ assertEquals(2, rule.endMode);
+ assertEquals(1, rule.startTimeMode);
+ assertEquals(1, rule.endTimeMode);
+ assertEquals(2 * 3_600_000, rule.startTime, "DST start at 02:00 standard");
+ assertEquals(2 * 3_600_000, rule.endTime, "DST end at 02:00 standard");
+ }
+
+ @Test
+ void testExtractDstRuleNoDstReturnsNull() {
+ // Asia/Shanghai had DST historically (1940s, 1986-1991) but no current rule.
+ // tz.useDaylightTime() must be false → extractDstRule returns null.
+ assertNull(extractDstRuleFor("Asia/Shanghai"));
+ }
+
+ @Test
+ void testExtractDstRuleFixedOffsetReturnsNull() {
+ // Fixed-offset zones never observe DST.
+ assertNull(extractDstRuleFor("UTC"));
+ assertNull(extractDstRuleFor("+05:30"));
+ }
+
+ @Test
+ void testExtractDstRuleThrowsOnUnsupportedRuleCount() {
+ // Synthesize a TimeZone whose getOffset is constant. The probing path
+ // (extractDstRuleByProbing) observes no transitions across all anchor
+ // years and returns null, so extractDstRuleFromZoneRules runs with the
+ // hand-crafted ZoneRules below.
+ TimeZone constantOffsetWithDstFlag = new TimeZone() {
+ @Override public int getOffset(long instant) { return 0; }
+ @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) {
+ return 0;
+ }
+ @Override public int getRawOffset() { return 0; }
+ @Override public void setRawOffset(int offsetMillis) {}
+ @Override public boolean useDaylightTime() { return true; }
+ @Override public boolean inDaylightTime(Date date) { return false; }
+ };
+ constantOffsetWithDstFlag.setID("Synthetic/UnsupportedRuleCount");
+
+ // ZoneRules with exactly one recurring rule. Production code rejects any
+ // count outside {0, 2}, so this triggers the "Unsupported ORC DST rule
+ // count" branch in extractDstRuleFromZoneRules.
+ ZoneOffset baseOffset = ZoneOffset.UTC;
+ ZoneOffsetTransitionRule lonelyRule = ZoneOffsetTransitionRule.of(
+ Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ baseOffset, baseOffset, ZoneOffset.ofHours(1));
+ ZoneRules syntheticRules = ZoneRules.of(
+ baseOffset, baseOffset,
+ Collections.emptyList(),
+ Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION),
+ Collections.singletonList(lonelyRule));
+
+ IllegalStateException ex = assertThrows(IllegalStateException.class,
+ () -> OrcDstRuleExtractor.extractDstRule(
+ "Synthetic/UnsupportedRuleCount", constantOffsetWithDstFlag, syntheticRules));
+ assertTrue(ex.getMessage().contains("Synthetic/UnsupportedRuleCount"),
+ "exception message should name the offending zone: " + ex.getMessage());
+ }
+
+ /**
+ * Resolve a zone id through the same SHORT_IDS pipeline production uses.
+ *
+ * For fixed-offset ids like {@code "+05:30"}, {@code TimeZone.getTimeZone}
+ * silently returns GMT (rawOffset=0) rather than a TimeZone with the actual
+ * offset, because {@code java.util.TimeZone} does not recognise the
+ * offset-format id. Mirror production's {@code rules.isFixedOffset()} guard
+ * here so the test does not silently feed a GMT TimeZone into
+ * {@code extractDstRule}; production's
+ * {@link OrcDstRuleExtractor#extractDstRule(String, java.util.TimeZone, java.time.zone.ZoneRules)}
+ * now short-circuits on {@code rules.isFixedOffset()} too, but the test
+ * helper keeps its own pre-call guard so a future caller pattern that drops
+ * the production guard cannot silently re-introduce the trap.
+ */
+ private static OrcDstRuleExtractor.DstRule extractDstRuleFor(String timezoneId) {
+ ZoneId zoneId = ZoneId.of(timezoneId, ZoneId.SHORT_IDS);
+ ZoneRules rules = zoneId.getRules();
+ // For fixed-offset zones the UTC placeholder has rawOffset=0, which would
+ // *not* match the rules' actual standard offset and so would fail
+ // extractDstRule's tz-vs-rules sanity check. That's safe today because
+ // extractDstRule's isFixedOffset() short-circuit at the top of the method
+ // returns null before the sanity check executes. If a future change ever
+ // reorders these guards, the mismatch would surface here as a confusing
+ // "describe different zones" exception rather than the silent-GMT bug we
+ // are guarding against.
+ TimeZone tz = rules.isFixedOffset()
+ ? TimeZone.getTimeZone("UTC")
+ : TimeZone.getTimeZone(zoneId.getId());
+ return OrcDstRuleExtractor.extractDstRule(timezoneId, tz, rules);
+ }
+
+ @Test
+ void testExtractDstRuleThrowsWhenBothPathsFail() {
+ // Constant-offset TimeZone — probing observes no transitions across all
+ // anchor years and returns null.
+ TimeZone constantOffsetWithDstFlag = new TimeZone() {
+ @Override public int getOffset(long instant) { return 0; }
+ @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) {
+ return 0;
+ }
+ @Override public int getRawOffset() { return 0; }
+ @Override public void setRawOffset(int offsetMillis) {}
+ @Override public boolean useDaylightTime() { return true; }
+ @Override public boolean inDaylightTime(Date date) { return false; }
+ };
+ constantOffsetWithDstFlag.setID("Synthetic/NoRecurringRules");
+
+ // A single historical transition keeps rules.isFixedOffset() == false so
+ // the early guard in extractDstRule does not short-circuit; the empty
+ // lastRules list makes extractDstRuleFromZoneRules return null. Both paths
+ // fail and the terminal "Failed to extract" throw fires.
+ ZoneOffset baseOffset = ZoneOffset.UTC;
+ ZoneOffsetTransition historical = ZoneOffsetTransition.of(
+ LocalDateTime.of(1900, 1, 1, 0, 0),
+ ZoneOffset.ofHours(-1), baseOffset);
+ ZoneRules rules = ZoneRules.of(
+ baseOffset, baseOffset,
+ Collections.emptyList(),
+ Collections.singletonList(historical),
+ Collections.emptyList());
+
+ IllegalStateException ex = assertThrows(IllegalStateException.class,
+ () -> OrcDstRuleExtractor.extractDstRule(
+ "Synthetic/NoRecurringRules", constantOffsetWithDstFlag, rules));
+ assertTrue(ex.getMessage().contains("Synthetic/NoRecurringRules"),
+ "exception message should name the offending zone: " + ex.getMessage());
+ assertTrue(ex.getMessage().contains("Failed to extract"),
+ "terminal throw should mention 'Failed to extract': " + ex.getMessage());
+ }
+
+ // Helper: TimeZone whose getOffset is constant. Probing finds no transitions
+ // across any anchor year and returns null, so extractDstRuleFromZoneRules is
+ // invoked with the hand-crafted ZoneRules in each test below.
+ private static TimeZone newConstantOffsetWithDstFlag(String id) {
+ TimeZone tz = new TimeZone() {
+ @Override public int getOffset(long instant) { return 0; }
+ @Override public int getOffset(int era, int year, int month, int day, int dow, int ms) {
+ return 0;
+ }
+ @Override public int getRawOffset() { return 0; }
+ @Override public void setRawOffset(int offsetMillis) {}
+ @Override public boolean useDaylightTime() { return true; }
+ @Override public boolean inDaylightTime(Date date) { return false; }
+ };
+ tz.setID(id);
+ return tz;
+ }
+
+ @Test
+ void testExtractDstRuleThrowsOnZeroDeltaRule() {
+ // Two recurring rules where the second one has offsetBefore == offsetAfter
+ // (delta == 0). Triggers the "Unsupported zero-delta ORC DST rule" branch.
+ TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/ZeroDelta");
+ ZoneOffset base = ZoneOffset.UTC;
+ ZoneOffsetTransitionRule startRule = ZoneOffsetTransitionRule.of(
+ Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, base, ZoneOffset.ofHours(1));
+ ZoneOffsetTransitionRule zeroDeltaRule = ZoneOffsetTransitionRule.of(
+ Month.OCTOBER, 25, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, base, base); // zero delta
+ ZoneRules rules = ZoneRules.of(base, base,
+ Collections.emptyList(),
+ Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION),
+ Arrays.asList(startRule, zeroDeltaRule));
+ IllegalStateException ex = assertThrows(IllegalStateException.class,
+ () -> OrcDstRuleExtractor.extractDstRule("Synthetic/ZeroDelta", tz, rules));
+ assertTrue(ex.getMessage().contains("zero-delta"),
+ "expected 'zero-delta' in message: " + ex.getMessage());
+ }
+
+ @Test
+ void testExtractDstRuleThrowsOnBothPositiveDeltaRules() {
+ // Two rules both with positive delta — endTransitionRule stays null.
+ // Triggers the "Failed to identify ORC DST start/end rules" branch.
+ TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/BothPositive");
+ ZoneOffset base = ZoneOffset.UTC;
+ ZoneOffset plus1 = ZoneOffset.ofHours(1);
+ ZoneOffsetTransitionRule ruleA = ZoneOffsetTransitionRule.of(
+ Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, base, plus1);
+ ZoneOffsetTransitionRule ruleB = ZoneOffsetTransitionRule.of(
+ Month.JUNE, 1, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, base, plus1);
+ ZoneRules rules = ZoneRules.of(base, base,
+ Collections.emptyList(),
+ Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION),
+ Arrays.asList(ruleA, ruleB));
+ IllegalStateException ex = assertThrows(IllegalStateException.class,
+ () -> OrcDstRuleExtractor.extractDstRule("Synthetic/BothPositive", tz, rules));
+ assertTrue(ex.getMessage().contains("Failed to identify"),
+ "expected 'Failed to identify' in message: " + ex.getMessage());
+ }
+
+ @Test
+ void testExtractDstRuleThrowsWhenTzAndRulesDescribeDifferentZones() {
+ // The sanity-check in extractDstRule compares tz.getRawOffset() against
+ // rules.getStandardOffset(ref). Pair a constant-zero TimeZone with
+ // ZoneRules for America/New_York (rawOffset = -18_000_000 ms at the
+ // 2024 reference instant) so the check fires before either extraction
+ // path runs.
+ TimeZone zeroOffsetTz = newConstantOffsetWithDstFlag("Synthetic/OffsetMismatch");
+ ZoneRules newYorkRules = ZoneId.of("America/New_York").getRules();
+ IllegalStateException ex = assertThrows(IllegalStateException.class,
+ () -> OrcDstRuleExtractor.extractDstRule(
+ "Synthetic/OffsetMismatch", zeroOffsetTz, newYorkRules));
+ assertTrue(ex.getMessage().contains("describe different zones"),
+ "expected 'describe different zones' in message: " + ex.getMessage());
+ }
+
+ @Test
+ void testExtractDstRuleThrowsOnBothNegativeDeltaRules() {
+ // Symmetric to testExtractDstRuleThrowsOnBothPositiveDeltaRules. Two rules
+ // both with negative delta — startTransitionRule stays null. Pins the
+ // startTransitionRule == null sub-case of the || at line 157 so a future
+ // change from || to && cannot slip through.
+ TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/BothNegative");
+ ZoneOffset base = ZoneOffset.UTC;
+ ZoneOffset plus1 = ZoneOffset.ofHours(1);
+ ZoneOffsetTransitionRule ruleA = ZoneOffsetTransitionRule.of(
+ Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, plus1, base);
+ ZoneOffsetTransitionRule ruleB = ZoneOffsetTransitionRule.of(
+ Month.NOVEMBER, 1, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, plus1, base);
+ ZoneRules rules = ZoneRules.of(base, base,
+ Collections.emptyList(),
+ Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION),
+ Arrays.asList(ruleA, ruleB));
+ IllegalStateException ex = assertThrows(IllegalStateException.class,
+ () -> OrcDstRuleExtractor.extractDstRule("Synthetic/BothNegative", tz, rules));
+ assertTrue(ex.getMessage().contains("Failed to identify"),
+ "expected 'Failed to identify' in message: " + ex.getMessage());
+ }
+
+ @Test
+ void testExtractDstRuleThrowsOnMismatchedSavings() {
+ // Start gains +1h, end loses -2h. Triggers the "Mismatched ORC DST savings"
+ // branch.
+ TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/MismatchedSavings");
+ ZoneOffset base = ZoneOffset.UTC;
+ ZoneOffset plus1 = ZoneOffset.ofHours(1);
+ ZoneOffset plus2 = ZoneOffset.ofHours(2);
+ ZoneOffsetTransitionRule startRule = ZoneOffsetTransitionRule.of(
+ Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, base, plus1);
+ ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of(
+ Month.NOVEMBER, 1, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, plus2, base); // -2h, but start was +1h
+ ZoneRules rules = ZoneRules.of(base, base,
+ Collections.emptyList(),
+ Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION),
+ Arrays.asList(startRule, endRule));
+ IllegalStateException ex = assertThrows(IllegalStateException.class,
+ () -> OrcDstRuleExtractor.extractDstRule("Synthetic/MismatchedSavings", tz, rules));
+ assertTrue(ex.getMessage().contains("Mismatched ORC DST savings"),
+ "expected 'Mismatched ORC DST savings' in message: " + ex.getMessage());
+ }
+
+ @Test
+ void testExtractDstRuleThrowsOnUnsupportedRuleShape() {
+ // First rule has null dayOfWeek (DOM-shaped rule, fixed day-of-month).
+ // Triggers the "Unsupported ORC DST transition rule shape" branch in
+ // fillDstRuleFromTransitionRule.
+ TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/DomRule");
+ ZoneOffset base = ZoneOffset.UTC;
+ ZoneOffset plus1 = ZoneOffset.ofHours(1);
+ ZoneOffsetTransitionRule domRule = ZoneOffsetTransitionRule.of(
+ Month.MARCH, 15, null, LocalTime.of(2, 0), false, // null dayOfWeek
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, base, plus1);
+ ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of(
+ Month.OCTOBER, 25, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, plus1, base);
+ ZoneRules rules = ZoneRules.of(base, base,
+ Collections.emptyList(),
+ Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION),
+ Arrays.asList(domRule, endRule));
+ IllegalStateException ex = assertThrows(IllegalStateException.class,
+ () -> OrcDstRuleExtractor.extractDstRule("Synthetic/DomRule", tz, rules));
+ assertTrue(ex.getMessage().contains("transition rule shape"),
+ "expected 'transition rule shape' in message: " + ex.getMessage());
+ }
+
+ @Test
+ void testExtractDstRuleThrowsWhenPathAVerificationFails() {
+ // Path A's verification-failure branch. The TimeZone returns offset 0
+ // everywhere, so probing finds no transitions and returns null. Path A
+ // then parses the two valid recurring rules into a DstRule with
+ // dstSavings=+1h, but verifyDstRuleAcrossReferenceYears compares
+ // computeDstOffset (which predicts +1h inside the DST window) against
+ // tz.getOffset (constant 0) — the mismatch causes verify to return
+ // false and the "ZoneRules ORC DST rule verification failed" branch
+ // fires.
+ TimeZone tz = newConstantOffsetWithDstFlag("Synthetic/PathAVerifyFail");
+ ZoneOffset base = ZoneOffset.UTC;
+ ZoneOffset plus1 = ZoneOffset.ofHours(1);
+ ZoneOffsetTransitionRule startRule = ZoneOffsetTransitionRule.of(
+ Month.MARCH, 8, DayOfWeek.SUNDAY, LocalTime.of(2, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, base, plus1);
+ ZoneOffsetTransitionRule endRule = ZoneOffsetTransitionRule.of(
+ Month.NOVEMBER, 1, DayOfWeek.SUNDAY, LocalTime.of(1, 0), false,
+ ZoneOffsetTransitionRule.TimeDefinition.STANDARD,
+ base, plus1, base);
+ ZoneRules rules = ZoneRules.of(base, base,
+ Collections.emptyList(),
+ Collections.singletonList(SYNTHETIC_HISTORICAL_TRANSITION),
+ Arrays.asList(startRule, endRule));
+ IllegalStateException ex = assertThrows(IllegalStateException.class,
+ () -> OrcDstRuleExtractor.extractDstRule("Synthetic/PathAVerifyFail", tz, rules));
+ assertTrue(ex.getMessage().contains("ZoneRules ORC DST rule verification failed"),
+ "expected verification-failed message: " + ex.getMessage());
+ }
+
+ @Test
+ void testExtractDstRuleThrowsOnNegativeDayIndicator() {
+ // Negative dayOfMonthIndicator encodes a DOW_LE_DOM rule ("last