AffineFoundation · kiannidev · Mar 20, 2026 · Mar 26, 2026
diff --git a/liveweb_arena/__init__.py b/liveweb_arena/__init__.py
@@ -2,9 +2,8 @@
 
 __version__ = "0.1.0"
 
-# Core components
+# Core components that do not require optional browser runtime deps.
 from .core.models import BrowserObservation, BrowserAction, CompositeTask, TrajectoryStep
-from .core.browser import BrowserEngine, BrowserSession
 from .plugins.base import BasePlugin, SubTask, ValidationResult
 
 __all__ = [
@@ -22,3 +21,12 @@
     "SubTask",
     "ValidationResult",
 ]
+
+
+def __getattr__(name: str):
+    """Lazy-load browser classes so base imports work without Playwright."""
+    if name in {"BrowserEngine", "BrowserSession"}:
+        from .core.browser import BrowserEngine, BrowserSession
+
+        return {"BrowserEngine": BrowserEngine, "BrowserSession": BrowserSession}[name]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/liveweb_arena/core/task_registry.py b/liveweb_arena/core/task_registry.py
@@ -146,6 +146,12 @@ class TaskRegistry:
         86: ("openmeteo", "openmeteo_comparison"),
         87: ("openmeteo", "openmeteo_hourly_extrema"),
         88: ("openmeteo", "openmeteo_forecast_trend"),
+        96: ("openmeteo", "openmeteo_daily_range"),
+        97: ("openmeteo", "openmeteo_precip_window_count"),
+        98: ("openmeteo", "openmeteo_humidity_band_hours"),
+        99: ("openmeteo", "openmeteo_wind_shift"),
+        100: ("openmeteo", "openmeteo_city_pair_forecast_gap"),
+        101: ("openmeteo", "openmeteo_comfort_index"),
 
         # ArXiv templates
         90: ("arxiv", "arxiv_paper_info"),
@@ -181,6 +187,8 @@ class TaskRegistry:
         [85, 86, 87, 88],
         # Version 6: ArXiv templates
         [90, 91, 92, 94, 95],
+        # Version 7: Additional Open Meteo templates
+        [96, 97, 98, 99, 100, 101],
     ]
 
     # Combination registry: list of template ID tuples

diff --git a/liveweb_arena/plugins/openmeteo/templates/__init__.py b/liveweb_arena/plugins/openmeteo/templates/__init__.py
@@ -1,13 +1,25 @@
-"""Open Meteo question templates"""
+"""Open Meteo question templates."""
 
 from .current_weather import OpenMeteoCurrentWeatherTemplate
 from .comparison import OpenMeteoComparisonTemplate
 from .hourly_extrema import OpenMeteoHourlyExtremaTemplate
 from .forecast_trend import OpenMeteoForecastTrendTemplate
+from .daily_range import OpenMeteoDailyRangeTemplate
+from .precip_window_count import OpenMeteoPrecipWindowCountTemplate
+from .humidity_band_hours import OpenMeteoHumidityBandHoursTemplate
+from .wind_shift import OpenMeteoWindShiftTemplate
+from .city_pair_forecast_gap import OpenMeteoCityPairForecastGapTemplate
+from .comfort_index import OpenMeteoComfortIndexTemplate
 
 __all__ = [
     "OpenMeteoCurrentWeatherTemplate",
     "OpenMeteoComparisonTemplate",
     "OpenMeteoHourlyExtremaTemplate",
     "OpenMeteoForecastTrendTemplate",
+    "OpenMeteoDailyRangeTemplate",
+    "OpenMeteoPrecipWindowCountTemplate",
+    "OpenMeteoHumidityBandHoursTemplate",
+    "OpenMeteoWindShiftTemplate",
+    "OpenMeteoCityPairForecastGapTemplate",
+    "OpenMeteoComfortIndexTemplate",
 ]
diff --git a/liveweb_arena/plugins/openmeteo/templates/city_pair_forecast_gap.py b/liveweb_arena/plugins/openmeteo/templates/city_pair_forecast_gap.py
@@ -0,0 +1,136 @@
+"""Two-city daily forecast comparison template - HARD difficulty."""
+
+import random
+from typing import Any, Dict, Optional
+
+from liveweb_arena.core.ground_truth_trigger import (
+    GroundTruthResult,
+    TriggerConfig,
+    UrlPatternTrigger,
+)
+from liveweb_arena.core.gt_collector import GTSourceType
+from liveweb_arena.core.validators.base import (
+    GeneratedQuestion,
+    QuestionTemplate,
+    ValidationResult,
+    register_template,
+)
+
+from .common import DOCS_HOME_URL, get_collected_location_data, get_daily_value
+from .variables import CITIES, DailyMetric
+
+DAY_CHOICES = [
+    (0, "today"),
+    (1, "tomorrow"),
+    (2, "the day after tomorrow"),
+]
+
+PATTERNS = [
+    "Using Open-Meteo, what is the signed difference in {metric_label} for {day_label} between {city1} and {city2} (answer as {city1} minus {city2})?",
+    "On Open-Meteo, compare {day_label}'s {metric_label} in {city1} vs {city2}. Report {city1} - {city2} with unit.",
+    "According to Open-Meteo forecast data, by how much is {city1}'s {day_label} {metric_label} above or below {city2}'s? (signed {city1} - {city2})",
+]
+
+
+@register_template("openmeteo_city_pair_forecast_gap")
+class OpenMeteoCityPairForecastGapTemplate(QuestionTemplate):
+    """Compare same-day metric across two cities (signed city1 - city2)."""
+
+    GT_SOURCE = GTSourceType.PAGE_ONLY
+
+    def __init__(self):
+        super().__init__("openmeteo_city_pair_forecast_gap")
+
+    def generate(self, seed: int, variant: Optional[int] = None) -> GeneratedQuestion:
+        rng = random.Random(seed)
+        city1, city2 = rng.sample(CITIES, 2)
+        metric = rng.choice(list(DailyMetric))
+        day_idx, day_label = rng.choice(DAY_CHOICES)
+        pattern = rng.choice(PATTERNS)
+
+        return GeneratedQuestion(
+            question_text=pattern.format(
+                metric_label=metric.display_name,
+                day_label=day_label,
+                city1=city1.display_name,
+                city2=city2.display_name,
+            ),
+            start_url=DOCS_HOME_URL,
+            variables={
+                "city1": city1.name,
+                "city2": city2.name,
+                "metric": metric.name,
+                "day_idx": day_idx,
+            },
+            validation_info={
+                "city1_name": city1.name,
+                "city1_coord_key": city1.coord_key,
+                "city2_name": city2.name,
+                "city2_coord_key": city2.coord_key,
+                "metric_field": metric.api_field,
+                "metric_label": metric.display_name,
+                "unit": metric.unit,
+                "day_idx": day_idx,
+                "day_label": day_label,
+            },
+            template_name=self.name,
+            expected_steps=10,
+        )
+
+    def get_validation_rules(self, validation_info: Dict[str, Any]) -> str:
+        return (
+            "Task-Specific Rules (Open Meteo City Pair Forecast Gap):\n"
+            f"- Day: {validation_info.get('day_label', '')}\n"
+            f"- Metric: {validation_info.get('metric_label', '')}\n"
+            f"- Signed difference must be {validation_info.get('city1_name', 'city1')} - {validation_info.get('city2_name', 'city2')}\n"
+            "- Score 1.0: signed value within ±1.0 unit\n"
+            "- Score 0.5: absolute magnitude close but sign wrong OR error <=3.0 units\n"
+            "- Score 0.0: otherwise"
+        )
+
+    async def get_ground_truth(self, validation_info: Dict[str, Any]) -> GroundTruthResult:
+        day_idx = int(validation_info.get("day_idx", 0))
+        metric_field = validation_info.get("metric_field", "temperature_2m_max")
+        unit = validation_info.get("unit", "")
+
+        city1_data, failure = get_collected_location_data(
+            validation_info.get("city1_coord_key", ""),
+            validation_info.get("city1_name", ""),
+        )
+        if failure is not None:
+            return failure
+        city2_data, failure = get_collected_location_data(
+            validation_info.get("city2_coord_key", ""),
+            validation_info.get("city2_name", ""),
+        )
+        if failure is not None:
+            return failure
+
+        value1, failure = get_daily_value(city1_data, metric_field, day_idx)
+        if failure is not None:
+            return failure
+        value2, failure = get_daily_value(city2_data, metric_field, day_idx)
+        if failure is not None:
+            return failure
+
+        diff = value1 - value2
+        return GroundTruthResult.ok(f"{diff:.1f}{unit}")
+
+    async def validate_answer(self, answer: str, validation_info: Dict[str, Any]) -> ValidationResult:
+        return ValidationResult(
+            score=0.0,
+            is_correct=False,
+            expected=None,
+            actual=answer,
+            details="Use LLM validation",
+        )
+
+    def get_ground_truth_trigger(self, validation_info: dict) -> TriggerConfig:
+        return TriggerConfig(trigger=UrlPatternTrigger(domains=["open-meteo.com"]))
+
+    @classmethod
+    def get_cache_source(cls) -> str:
+        return "openmeteo"
+
+    def get_gt_source(self) -> GTSourceType:
+        return self.GT_SOURCE
diff --git a/liveweb_arena/plugins/openmeteo/templates/comfort_index.py b/liveweb_arena/plugins/openmeteo/templates/comfort_index.py
@@ -0,0 +1,133 @@
+"""Comfort index template for Open Meteo - HARD difficulty."""
+
+import random
+from typing import Any, Dict, Optional
+
+from liveweb_arena.core.ground_truth_trigger import (
+    GroundTruthResult,
+    TriggerConfig,
+    UrlPatternTrigger,
+)
+from liveweb_arena.core.gt_collector import GTSourceType
+from liveweb_arena.core.validators.base import (
+    GeneratedQuestion,
+    QuestionTemplate,
+    ValidationResult,
+    register_template,
+)
+
+from .common import DOCS_HOME_URL, get_collected_location_data
+from .variables import CITIES
+
+PATTERNS = [
+    "For {city} on Open-Meteo, compute the comfort index: temperature_2m - 0.2 * wind_speed_10m - 0.05 * relative_humidity_2m. What is the value?",
+    "Using Open-Meteo current weather for {city}, calculate CI = T - 0.2W - 0.05H, where T is temperature (°C), W is wind speed (km/h), H is humidity (%).",
+    "According to Open-Meteo, what is {city}'s comfort index defined as T - 0.2W - 0.05H from current weather values?",
+]
+
+
+@register_template("openmeteo_comfort_index")
+class OpenMeteoComfortIndexTemplate(QuestionTemplate):
+    """Compute a deterministic index from three current-weather fields."""
+
+    GT_SOURCE = GTSourceType.PAGE_ONLY
+
+    def __init__(self):
+        super().__init__("openmeteo_comfort_index")
+
+    def generate(self, seed: int, variant: Optional[int] = None) -> GeneratedQuestion:
+        rng = random.Random(seed)
+        city = rng.choice(CITIES)
+        pattern = rng.choice(PATTERNS)
+
+        return GeneratedQuestion(
+            question_text=pattern.format(city=city.display_name),
+            start_url=DOCS_HOME_URL,
+            variables={"city": city.name},
+            validation_info={
+                "city_name": city.name,
+                "coord_key": city.coord_key,
+                "formula": "T - 0.2W - 0.05H",
+                "unit": "index-points",
+            },
+            template_name=self.name,
+            expected_steps=8,
+        )
+
+    def get_validation_rules(self, validation_info: Dict[str, Any]) -> str:
+        return (
+            "Task-Specific Rules (Open Meteo Comfort Index):\n"
+            f"- City: {validation_info.get('city_name', '')}\n"
+            "- Formula: CI = temperature - 0.2*wind_speed - 0.05*humidity\n"
+            "- Use current_weather values from Open-Meteo\n"
+            "- Score 1.0: within ±0.8 index-points\n"
+            "- Score 0.5: within ±2.0 index-points\n"
+            "- Score 0.0: otherwise"
+        )
+
+    async def get_ground_truth(self, validation_info: Dict[str, Any]) -> GroundTruthResult:
+        data, failure = get_collected_location_data(
+            validation_info.get("coord_key", ""),
+            validation_info.get("city_name", ""),
+        )
+        if failure is not None:
+            return failure
+
+        current = data.get("current_weather")
+        hourly = data.get("hourly")
+        if not isinstance(current, dict):
+            return GroundTruthResult.fail("No current_weather in API response")
+        if not isinstance(hourly, dict):
+            return GroundTruthResult.fail("No hourly data in API response")
+
+        temp_raw = current.get("temperature")
+        wind_raw = current.get("windspeed")
+        if temp_raw is None or wind_raw is None:
+            return GroundTruthResult.fail("Missing temperature/windspeed in current_weather")
+        try:
+            temp = float(temp_raw)
+            wind = float(wind_raw)
+        except (TypeError, ValueError):
+            return GroundTruthResult.fail("Non-numeric temperature/windspeed")
+
+        # Humidity may not be in current_weather; use hourly value nearest current time.
+        times = hourly.get("time")
+        humidity = hourly.get("relative_humidity_2m")
+        if not isinstance(times, list) or not isinstance(humidity, list) or len(times) != len(humidity):
+            return GroundTruthResult.fail("Invalid hourly humidity arrays")
+
+        current_time = current.get("time")
+        humidity_value = None
+        if isinstance(current_time, str) and current_time in times:
+            idx = times.index(current_time)
+            if idx < len(humidity) and humidity[idx] is not None:
+                humidity_value = humidity[idx]
+        if humidity_value is None and humidity:
+            humidity_value = humidity[0]
+
+        try:
+            hum = float(humidity_value)
+        except (TypeError, ValueError):
+            return GroundTruthResult.fail("Non-numeric humidity value")
+
+        ci = temp - 0.2 * wind - 0.05 * hum
+        return GroundTruthResult.ok(f"{ci:.2f}")
+
+    async def validate_answer(self, answer: str, validation_info: Dict[str, Any]) -> ValidationResult:
+        return ValidationResult(
+            score=0.0,
+            is_correct=False,
+            expected=None,
+            actual=answer,
+            details="Use LLM validation",
+        )
+
+    def get_ground_truth_trigger(self, validation_info: dict) -> TriggerConfig:
+        return TriggerConfig(trigger=UrlPatternTrigger(domains=["open-meteo.com"]))
+
+    @classmethod
+    def get_cache_source(cls) -> str:
+        return "openmeteo"
+
+    def get_gt_source(self) -> GTSourceType:
+        return self.GT_SOURCE