AffineFoundation · angosr · Mar 27, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/liveweb_arena/core/task_registry.py b/liveweb_arena/core/task_registry.py
@@ -146,6 +146,9 @@ class TaskRegistry:
         86: ("openmeteo", "openmeteo_comparison"),
         87: ("openmeteo", "openmeteo_hourly_extrema"),
         88: ("openmeteo", "openmeteo_forecast_trend"),
+        99: ("openmeteo", "openmeteo_hourly_threshold"),
+        100: ("openmeteo", "openmeteo_sunrise_sunset"),
+        101: ("openmeteo", "openmeteo_hourly_time_of"),
 
         # ArXiv templates
         90: ("arxiv", "arxiv_paper_info"),
@@ -189,6 +192,8 @@ class TaskRegistry:
         # Version 7: Open Library engagement & comparison templates (PR #13)
         # NOTE: PR #14 (openmeteo IDs 99-101) must use Version 8.
         [96, 97, 98],
+        # Version 8: Additional Open Meteo templates
+        [99, 100, 101],
     ]
 
     # Combination registry: list of template ID tuples

diff --git a/liveweb_arena/plugins/openmeteo/openmeteo.py b/liveweb_arena/plugins/openmeteo/openmeteo.py
@@ -122,14 +122,22 @@ def _build_data_html(data: dict) -> str:
             t_max = daily.get("temperature_2m_max", [])
             t_min = daily.get("temperature_2m_min", [])
             p_max = daily.get("precipitation_probability_max", [])
+            sr = daily.get("sunrise", [])
+            ss = daily.get("sunset", [])
             for i, t in enumerate(times):
                 mx = t_max[i] if i < len(t_max) else "N/A"
                 mn = t_min[i] if i < len(t_min) else "N/A"
                 pp = p_max[i] if i < len(p_max) else "N/A"
-                rows.append(f"<tr><td>{t}</td><td>{mx} C</td><td>{mn} C</td><td>{pp}%</td></tr>")
+                sunrise = sr[i] if i < len(sr) else "N/A"
+                sunset = ss[i] if i < len(ss) else "N/A"
+                rows.append(
+                    f"<tr><td>{t}</td><td>{mx} C</td><td>{mn} C</td>"
+                    f"<td>{pp}%</td><td>{sunrise}</td><td>{sunset}</td></tr>"
+                )
             parts.append(
                 "<h2>Daily Forecast</h2><table>"
-                "<tr><th>Date</th><th>Max Temp</th><th>Min Temp</th><th>Precip Prob</th></tr>"
+                "<tr><th>Date</th><th>Max Temp</th><th>Min Temp</th>"
+                "<th>Precip Prob</th><th>Sunrise</th><th>Sunset</th></tr>"
                 + "".join(rows) + "</table>"
             )
 

diff --git a/liveweb_arena/plugins/openmeteo/templates/__init__.py b/liveweb_arena/plugins/openmeteo/templates/__init__.py
@@ -4,10 +4,16 @@
 from .comparison import OpenMeteoComparisonTemplate
 from .hourly_extrema import OpenMeteoHourlyExtremaTemplate
 from .forecast_trend import OpenMeteoForecastTrendTemplate
+from .hourly_threshold import OpenMeteoHourlyThresholdTemplate
+from .sunrise_sunset import OpenMeteoSunriseSunsetTemplate
+from .hourly_time_of import OpenMeteoHourlyTimeOfTemplate
 
 __all__ = [
     "OpenMeteoCurrentWeatherTemplate",
     "OpenMeteoComparisonTemplate",
     "OpenMeteoHourlyExtremaTemplate",
     "OpenMeteoForecastTrendTemplate",
+    "OpenMeteoHourlyThresholdTemplate",
+    "OpenMeteoSunriseSunsetTemplate",
+    "OpenMeteoHourlyTimeOfTemplate",
 ]
diff --git a/liveweb_arena/plugins/openmeteo/templates/common.py b/liveweb_arena/plugins/openmeteo/templates/common.py
@@ -28,13 +28,14 @@ def get_collected_location_data(
     return data, None
 
 
-def get_today_hourly_series(
+def get_today_hourly_pairs(
     data: Dict[str, Any],
     field_name: str,
-) -> Tuple[Optional[List[float]], Optional[GroundTruthResult]]:
-    """Extract today's hourly values for the given field from API data.
+) -> Tuple[Optional[List[Tuple[str, float]]], Optional[GroundTruthResult]]:
+    """Extract today's hourly (time_str, value) pairs for the given field.
 
-    Returns (values, None) on success, or (None, failure_result) on error.
+    Returns a list of (ISO time string, numeric value) tuples for today,
+    or (None, failure_result) on error.
     """
     hourly = data.get("hourly")
     if not hourly:
@@ -70,29 +71,37 @@ def get_today_hourly_series(
     if not today:
         today = str(times[0]).split("T", 1)[0]
 
-    values: List[float] = []
+    pairs: List[Tuple[str, float]] = []
     for time_str, val in zip(times, series):
         if not isinstance(time_str, str) or not time_str.startswith(today):
             continue
         if val is None:
             continue
         try:
-            values.append(float(val))
+            pairs.append((time_str, float(val)))
         except (TypeError, ValueError):
             return None, GroundTruthResult.fail(
                 f"Non-numeric value in hourly {field_name}: {val!r}"
             )
 
-    if not values:
+    if not pairs:
         return None, GroundTruthResult.fail(
             f"No hourly {field_name} data found for today ({today})"
         )
 
-    return values, None
+    return pairs, None
 
 
-def get_today_hourly_temperatures(
+def get_today_hourly_series(
     data: Dict[str, Any],
+    field_name: str,
 ) -> Tuple[Optional[List[float]], Optional[GroundTruthResult]]:
-    """Extract today's hourly temperatures from a collected API payload."""
-    return get_today_hourly_series(data, "temperature_2m")
+    """Extract today's hourly values for the given field from API data.
+
+    Thin wrapper around get_today_hourly_pairs that discards the timestamps.
+    Returns (values, None) on success, or (None, failure_result) on error.
+    """
+    pairs, failure = get_today_hourly_pairs(data, field_name)
+    if failure is not None:
+        return None, failure
+    return [val for _, val in pairs], None
diff --git a/liveweb_arena/plugins/openmeteo/templates/hourly_threshold.py b/liveweb_arena/plugins/openmeteo/templates/hourly_threshold.py
@@ -0,0 +1,198 @@
+"""Hourly threshold counting template for Open Meteo - MEDIUM DIFFICULTY.
+
+Asks how many hours today a given metric is above or below a threshold
+in a given city. The agent starts on the generic docs page, finds the city,
+then counts qualifying hours from the hourly forecast table.
+
+Dynamic data: hourly forecasts update continuously.
+Time-sensitive: asks about "today" which changes daily.
+Computation required: agent must count hours, not read a single value.
+
+SFT defense:
+- Threshold includes a seed-derived offset (±2.0 for temp, scaled for others),
+  so the exact threshold is never a memorizable constant.
+- Strict scoring: exact count only for 1.0, off-by-1 for 0.5.
+  On a 0-24 range, SFT with climate priors may guess close but rarely exact.
+
+Effective variants: 170 cities x 4 metrics x ~8 base thresholds x continuous offset
+                    x 2 directions → effectively continuous.
+"""
+
+import random
+from typing import Any, Dict, Optional
+
+from liveweb_arena.core.validators.base import (
+    QuestionTemplate, GeneratedQuestion, ValidationResult, register_template,
+)
+from liveweb_arena.core.ground_truth_trigger import (
+    UrlPatternTrigger, TriggerConfig, GroundTruthResult,
+)
+from liveweb_arena.core.gt_collector import GTSourceType
+
+from .common import DOCS_HOME_URL, get_collected_location_data, get_today_hourly_series
+from .variables import CITIES, HourlyMetric, HOURLY_THRESHOLDS
+
+# Per-metric jitter half-range applied to each base threshold.
+# Prevents SFT from memorising fixed threshold→count mappings.
+_THRESHOLD_JITTER = {
+    "temperature_2m": 2.0,       # ±2 °C
+    "relative_humidity_2m": 5.0,  # ±5 %
+    "wind_speed_10m": 3.0,       # ±3 km/h
+    "precipitation_probability": 5.0,  # ±5 %
+}
+
+
+PATTERNS_ABOVE = {
+    HourlyMetric.TEMPERATURE: [
+        "According to Open-Meteo, how many hours today will the temperature in {city} be above {threshold}{unit}?",
+        "Using Open-Meteo, count the hours today when {city}'s temperature exceeds {threshold}{unit}.",
+        "On Open-Meteo, for how many hours today is {city}'s temperature forecast above {threshold}{unit}?",
+    ],
+    HourlyMetric.HUMIDITY: [
+        "According to Open-Meteo, how many hours today will the relative humidity in {city} be above {threshold}{unit}?",
+        "Using Open-Meteo, count the hours today when {city}'s humidity exceeds {threshold}{unit}.",
+    ],
+    HourlyMetric.WIND_SPEED: [
+        "According to Open-Meteo, how many hours today will the wind speed in {city} be above {threshold} {unit}?",
+        "Using Open-Meteo, count the hours today when {city}'s wind speed exceeds {threshold} {unit}.",
+    ],
+    HourlyMetric.PRECIP_PROBABILITY: [
+        "According to Open-Meteo, how many hours today will the precipitation probability in {city} be above {threshold}{unit}?",
+        "Using Open-Meteo, count the hours today when {city}'s precipitation probability exceeds {threshold}{unit}.",
+    ],
+}
+
+PATTERNS_BELOW = {
+    HourlyMetric.TEMPERATURE: [
+        "According to Open-Meteo, how many hours today will the temperature in {city} be below {threshold}{unit}?",
+        "Using Open-Meteo, count the hours today when {city}'s temperature is below {threshold}{unit}.",
+        "On Open-Meteo, for how many hours today is {city}'s temperature forecast below {threshold}{unit}?",
+    ],
+    HourlyMetric.HUMIDITY: [
+        "According to Open-Meteo, how many hours today will the relative humidity in {city} be below {threshold}{unit}?",
+        "Using Open-Meteo, count the hours today when {city}'s humidity is below {threshold}{unit}.",
+    ],
+    HourlyMetric.WIND_SPEED: [
+        "According to Open-Meteo, how many hours today will the wind speed in {city} be below {threshold} {unit}?",
+        "Using Open-Meteo, count the hours today when {city}'s wind speed is below {threshold} {unit}.",
+    ],
+    HourlyMetric.PRECIP_PROBABILITY: [
+        "According to Open-Meteo, how many hours today will the precipitation probability in {city} be below {threshold}{unit}?",
+        "Using Open-Meteo, count the hours today when {city}'s precipitation probability is below {threshold}{unit}.",
+    ],
+}
+
+
+@register_template("openmeteo_hourly_threshold")
+class OpenMeteoHourlyThresholdTemplate(QuestionTemplate):
+    """
+    MEDIUM: Count hours above/below a jittered threshold for a metric today.
+
+    Requires scanning hourly forecast data and counting qualifying entries.
+    Threshold includes a seed-derived random offset so SFT cannot memorise
+    fixed threshold-to-count mappings. Scoring is strict: exact = 1.0,
+    off-by-1 = 0.5, off-by->1 = 0.0.
+    """
+
+    GT_SOURCE = GTSourceType.PAGE_ONLY
+
+    def __init__(self):
+        super().__init__("openmeteo_hourly_threshold")
+
+    def generate(self, seed: int, variant: Optional[int] = None) -> GeneratedQuestion:
+        rng = random.Random(seed)
+
+        metrics = list(HourlyMetric)
+        metric = metrics[variant % len(metrics)] if variant is not None else rng.choice(metrics)
+
+        base_thresholds = HOURLY_THRESHOLDS[metric.api_field]
+        base = rng.choice(base_thresholds)
+        jitter_range = _THRESHOLD_JITTER[metric.api_field]
+        offset = rng.uniform(-jitter_range, jitter_range)
+        # Round to 1 decimal so the question reads naturally
+        threshold = round(base + offset, 1)
+
+        is_above = rng.choice([True, False])
+
+        city = rng.choice(CITIES)
+        patterns = PATTERNS_ABOVE[metric] if is_above else PATTERNS_BELOW[metric]
+        question_text = rng.choice(patterns).format(
+            city=city.display_name,
+            threshold=threshold,
+            unit=metric.unit,
+        )
+
+        return GeneratedQuestion(
+            question_text=question_text,
+            start_url=DOCS_HOME_URL,
+            variables={"city": city.name, "metric": metric.name, "threshold": threshold, "is_above": is_above},
+            validation_info={
+                "city_name": city.name,
+                "coord_key": city.coord_key,
+                "metric_field": metric.api_field,
+                "metric_label": metric.display_name,
+                "unit": metric.unit,
+                "threshold": threshold,
+                "is_above": is_above,
+            },
+            template_name=self.name,
+            expected_steps=7,
+        )
+
+    def get_validation_rules(self, validation_info: Dict[str, Any]) -> str:
+        city = validation_info.get("city_name", "")
+        label = validation_info.get("metric_label", "hourly temperature")
+        unit = validation_info.get("unit", "°C")
+        threshold = validation_info.get("threshold", 0)
+        is_above = validation_info.get("is_above", True)
+        direction = "above" if is_above else "below"
+        return f"""Task-Specific Rules (Open Meteo Hourly Threshold Count):
+- City: {city}
+- Count hours today where {label} is strictly {direction} {threshold}{unit}
+- Answer should be a whole number (0-24)
+- Score 1.0: Exact count
+- Score 0.5: Off by exactly 1 hour
+- Score 0.0: Off by more than 1 hour or no numeric answer
+- Use the hourly forecast for today's local date"""
+
+    async def get_ground_truth(self, validation_info: Dict[str, Any]) -> GroundTruthResult:
+        coord_key = validation_info.get("coord_key", "")
+        city_name = validation_info.get("city_name", "")
+        metric_field = validation_info.get("metric_field", "temperature_2m")
+        threshold = validation_info.get("threshold", 0)
+        is_above = validation_info.get("is_above", True)
+
+        data, failure = get_collected_location_data(coord_key, city_name)
+        if failure is not None:
+            return failure
+
+        values, val_failure = get_today_hourly_series(data, metric_field)
+        if val_failure is not None:
+            return val_failure
+
+        if is_above:
+            count = sum(1 for v in values if v > threshold)
+        else:
+            count = sum(1 for v in values if v < threshold)
+
+        return GroundTruthResult.ok(str(count))
+
+    async def validate_answer(
+        self, answer: str, validation_info: Dict[str, Any]
+    ) -> ValidationResult:
+        """Not used — the pipeline uses LLM-based validation via get_validation_rules()."""
+        return ValidationResult(
+            score=0.0, is_correct=False, expected=None, actual=answer,
+            details="Use LLM validation",
+        )
+
+    def get_ground_truth_trigger(self, validation_info: dict) -> TriggerConfig:
+        trigger = UrlPatternTrigger(domains=["open-meteo.com"])
+        return TriggerConfig(trigger=trigger)
+
+    @classmethod
+    def get_cache_source(cls) -> str:
+        return "openmeteo"
+
+    def get_gt_source(self) -> GTSourceType:
+        return self.GT_SOURCE