{safe_message}
" + f"URL: {safe_url}
" + "" + ) + await page.set_content(html, wait_until="domcontentloaded") + + a11y_tree = "" + try: + a11y_snapshot = await page.accessibility.snapshot() + if a11y_snapshot: + a11y_tree = self._format_accessibility_tree(a11y_snapshot) + except Exception: + pass + + if len(a11y_tree.strip()) < 20: + try: + page_text = await page.evaluate("() => document.body.innerText || ''") + if page_text.strip(): + a11y_tree = page_text + except Exception: + pass + + return await page.content(), a11y_tree + def _format_accessibility_tree(self, node: dict, indent: int = 0) -> str: """Format accessibility tree node recursively.""" if not node: @@ -666,4 +1249,3 @@ def get_cached(self, url: str) -> Optional[CachedPage]: return self._load(cache_file) except Exception: return None - diff --git a/liveweb_arena/core/interceptor.py b/liveweb_arena/core/interceptor.py index 1b4ad6b..42e0199 100644 --- a/liveweb_arena/core/interceptor.py +++ b/liveweb_arena/core/interceptor.py @@ -10,21 +10,59 @@ import asyncio import logging +import os import re from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Optional, Set, TYPE_CHECKING from urllib.parse import urlparse -from playwright.async_api import Route +if TYPE_CHECKING: + from playwright.async_api import Route +else: + Route = Any from liveweb_arena.core.block_patterns import TRACKING_BLOCK_PATTERNS -from liveweb_arena.core.cache import CachedPage, CacheFatalError, CacheManager, PageRequirement, normalize_url +from liveweb_arena.core.cache import ( + CachedPage, + CacheFatalError, + CacheFetchResult, + CacheManager, + PageRequirement, + normalize_url, +) logger = logging.getLogger(__name__) -# Pre-fetch timeout must be less than the main browser's NAVIGATION_TIMEOUT_MS (30s) -# so that route.abort() reaches the browser BEFORE page.goto() times out. -PREFETCH_TIMEOUT = 25 +DEFAULT_PREFETCH_TIMEOUT_NAV = int( + os.environ.get("LIVEWEB_PREFETCH_TIMEOUT_NAV", "12") +) +DEFAULT_PREFETCH_TIMEOUT_DATA = int( + os.environ.get("LIVEWEB_PREFETCH_TIMEOUT_DATA", "25") +) +DEFAULT_SOFT_FAIL_DOMAINS = "news.ycombinator.com,channelsurfer.tv,taostats.io,coingecko.com" +DEFAULT_SOFT_FAIL_URL_PATTERNS = ( + "news.ycombinator.com/ask," + "news.ycombinator.com/show," + "channelsurfer.tv," + "runcaptain.com," + "aether.saphal.me," + "stooq.com/q/currency/," + "stooq.com/q/c/," + "coingecko.app.link," + "coingecko.com/en/highlights/" +) +DEFAULT_REQUIRED_SOFT_URL_REGEXES = ( + r"^news\.ycombinator\.com/?$," + r"^news\.ycombinator\.com/(ask|show)(?:[/?].*)?$" +) +DEFAULT_PREFETCH_SOFT_URL_REGEXES = ( + r"^channelsurfer\.tv(?:/.*)?$," + r"^runcaptain\.com(?:/.*)?$," + r"^aether\.saphal\.me(?:/.*)?$," + r"^(?:www\.)?taostats\.io(?:/subnets(?:/\d+(?:/chart)?)?)?(?:[/?].*)?$," + r"^(?:www\.)?coingecko\.com/en/(?:coins/[^/?#]+(?:/historical_data)?|highlights/.*)(?:[/?].*)?$," + r"^(?:www\.)?stooq\.com/q/(?:currency|c)/.*$" +) # 1x1 transparent GIF (43 bytes) _TRANSPARENT_GIF = ( @@ -57,9 +95,16 @@ class InterceptorStats: blocked: int = 0 passed: int = 0 errors: int = 0 + stale_hits: int = 0 + prefetch_timeouts: int = 0 + soft_failures: int = 0 miss_urls: List[str] = field(default_factory=list) blocked_urls: Set[str] = field(default_factory=set) passed_urls: Set[str] = field(default_factory=set) + per_domain_prefetch_timeouts: Dict[str, int] = field(default_factory=dict) + per_domain_soft_failures: Dict[str, int] = field(default_factory=dict) + per_domain_miss_count: Dict[str, int] = field(default_factory=dict) + per_domain_miss_latency_s: Dict[str, float] = field(default_factory=dict) def to_dict(self) -> dict: total = self.hits + self.misses + self.blocked + self.passed @@ -69,11 +114,18 @@ def to_dict(self) -> dict: "blocked": self.blocked, "passed": self.passed, "errors": self.errors, + "stale_hits": self.stale_hits, + "prefetch_timeouts": self.prefetch_timeouts, + "soft_failures": self.soft_failures, "total": total, "hit_rate": self.hits / max(1, self.hits + self.misses), "miss_urls": self.miss_urls[:10], "blocked_urls": sorted(self.blocked_urls), "passed_urls": sorted(self.passed_urls), + "per_domain_prefetch_timeouts": self.per_domain_prefetch_timeouts, + "per_domain_soft_failures": self.per_domain_soft_failures, + "per_domain_miss_count": self.per_domain_miss_count, + "per_domain_miss_latency_s": self.per_domain_miss_latency_s, } @@ -140,6 +192,28 @@ def __init__( self.offline = offline self.stats = InterceptorStats() self._pending_error: Optional[Exception] = None + self._last_error_metadata: Dict[str, Any] = {} + self._last_blocked_document_metadata: Dict[str, Any] = {} + self._soft_fail_domains = { + item.strip().lower() + for item in os.environ.get("LIVEWEB_SOFT_FAIL_DOMAINS", DEFAULT_SOFT_FAIL_DOMAINS).split(",") + if item.strip() + } + self._soft_fail_url_patterns = [ + item.strip().lower() + for item in os.environ.get("LIVEWEB_SOFT_FAIL_URL_PATTERNS", DEFAULT_SOFT_FAIL_URL_PATTERNS).split(",") + if item.strip() + ] + self._required_soft_url_regexes = [ + re.compile(item.strip(), re.IGNORECASE) + for item in os.environ.get("LIVEWEB_REQUIRED_SOFT_URL_REGEXES", DEFAULT_REQUIRED_SOFT_URL_REGEXES).split(",") + if item.strip() + ] + self._prefetch_soft_url_regexes = [ + re.compile(item.strip(), re.IGNORECASE) + for item in os.environ.get("LIVEWEB_PREFETCH_SOFT_URL_REGEXES", DEFAULT_PREFETCH_SOFT_URL_REGEXES).split(",") + if item.strip() + ] # Per-evaluation storage for cached accessibility trees self._accessibility_trees: Dict[str, str] = {} @@ -247,6 +321,15 @@ async def _handle_document(self, route: Route, url: str): log("Intercept", f"MISS document - {self._url_display(url)}") if not self._is_domain_allowed(url): + blocked_domain = (urlparse(url).hostname or "").lower() + self._last_blocked_document_metadata = { + "classification": "model_disallowed_domain", + "blocked_url": url, + "blocked_domain": blocked_domain, + "allowed_domains": sorted(self.allowed_domains), + "blocked_resource_type": "document", + "blocked_by": "interceptor", + } await route.fulfill( status=403, headers={"content-type": "text/html"}, @@ -279,35 +362,164 @@ async def _handle_document(self, route: Route, url: str): try: need_api = plugin.needs_api_data(url) page_req = PageRequirement.data(url) if need_api else PageRequirement.nav(url) + timeout_s = self._prefetch_timeout_for_url(url, need_api) pages = await asyncio.wait_for( self.cache_manager.ensure_cached([page_req], plugin), - timeout=PREFETCH_TIMEOUT, + timeout=timeout_s, ) - self.cached_pages.update(pages) - - cached = pages.get(normalize_url(url)) - if cached and cached.html: - if cached.accessibility_tree: - self._accessibility_trees[normalized] = cached.accessibility_tree + cached_result = pages.get(normalize_url(url)) + if cached_result: + self._register_cache_fetch(cached_result) + self.cached_pages[cached_result.normalized_url] = cached_result.page + + if cached_result and cached_result.page.html: + if cached_result.page.accessibility_tree: + self._accessibility_trees[normalized] = cached_result.page.accessibility_tree await route.fulfill( status=200, headers={"content-type": "text/html; charset=utf-8"}, - body=cached.html, + body=cached_result.page.html, ) return except asyncio.TimeoutError: - self._pending_error = CacheFatalError( - f"Pre-fetch timeout ({PREFETCH_TIMEOUT}s)", url=url, + self._register_prefetch_timeout(url) + cached_fallback = self._find_any_cached_page(url) + if cached_fallback and cached_fallback.html: + if cached_fallback.accessibility_tree: + self._accessibility_trees[normalized] = cached_fallback.accessibility_tree + self.cached_pages[normalized] = cached_fallback + await route.fulfill( + status=200, + headers={"content-type": "text/html; charset=utf-8"}, + body=cached_fallback.html, + ) + return + self._last_error_metadata = { + "classification": "env_prefetch_timeout", + "layer": "cache", + "prefetch_attempted": True, + "prefetch_timeout_kind": "asyncio_timeout", + "prefetch_elapsed_s": timeout_s, + "soft_fail_triggered": True, + "soft_fail_reason": "prefetch_timeout", + "stale_fallback_used": False, + } + await route.fulfill( + status=200, + headers={"content-type": "text/html; charset=utf-8"}, + body=self._build_soft_error_page( + url=url, + title="Pre-fetch timeout", + message=f"The page could not be prefetched within {timeout_s}s. Try another page or retry later.", + ), ) - await route.abort("failed") return except CacheFatalError as e: - self._pending_error = e - await route.abort("failed") + if e.fatal and not self._should_soft_fail_domain(url): + e.evidence.setdefault("interceptor", {}) + e.evidence["interceptor"].update( + { + "prefetch_attempted": True, + "soft_fail_triggered": False, + "stale_fallback_used": False, + } + ) + e.plugin_name = getattr(plugin, "name", None) if plugin is not None else e.plugin_name + self._pending_error = e + await route.abort("failed") + else: + self._register_soft_failure(url) + cached_fallback = self._find_any_cached_page(url) + if cached_fallback and cached_fallback.html: + self._last_error_metadata = { + "classification": "env_cache_fetch_failed", + "layer": "cache", + "prefetch_attempted": True, + "soft_fail_triggered": True, + "soft_fail_reason": str(e), + "stale_fallback_used": True, + } + if cached_fallback.accessibility_tree: + self._accessibility_trees[normalized] = cached_fallback.accessibility_tree + self.cached_pages[normalized] = cached_fallback + await route.fulfill( + status=200, + headers={"content-type": "text/html; charset=utf-8"}, + body=cached_fallback.html, + ) + return + self._last_error_metadata = { + "classification": "env_cache_fetch_failed", + "layer": "cache", + "prefetch_attempted": True, + "soft_fail_triggered": True, + "soft_fail_reason": str(e), + "stale_fallback_used": False, + } + await route.fulfill( + status=200, + headers={"content-type": "text/html; charset=utf-8"}, + body=self._build_soft_error_page( + url=url, + title="Page unavailable", + message=str(e), + ), + ) return except Exception as e: - self._pending_error = CacheFatalError(str(e), url=url) - await route.abort("failed") + if self._should_soft_fail_domain(url): + self._register_soft_failure(url) + cached_fallback = self._find_any_cached_page(url) + if cached_fallback and cached_fallback.html: + self._last_error_metadata = { + "classification": "env_cache_fetch_failed", + "layer": "cache", + "prefetch_attempted": True, + "soft_fail_triggered": True, + "soft_fail_reason": str(e), + "stale_fallback_used": True, + } + if cached_fallback.accessibility_tree: + self._accessibility_trees[normalized] = cached_fallback.accessibility_tree + self.cached_pages[normalized] = cached_fallback + await route.fulfill( + status=200, + headers={"content-type": "text/html; charset=utf-8"}, + body=cached_fallback.html, + ) + return + self._last_error_metadata = { + "classification": "env_cache_fetch_failed", + "layer": "cache", + "prefetch_attempted": True, + "soft_fail_triggered": True, + "soft_fail_reason": str(e), + "stale_fallback_used": False, + } + await route.fulfill( + status=200, + headers={"content-type": "text/html; charset=utf-8"}, + body=self._build_soft_error_page( + url=url, + title="Temporary page unavailable", + message=str(e), + ), + ) + else: + self._pending_error = CacheFatalError( + str(e), + url=url, + kind="prefetch_failed", + evidence={ + "interceptor": { + "prefetch_attempted": True, + "soft_fail_triggered": False, + "stale_fallback_used": False, + } + }, + plugin_name=getattr(plugin, "name", None) if plugin is not None else None, + ) + await route.abort("failed") return # Fallback: LIVE mode or URL without plugin → pass through to network @@ -415,6 +627,112 @@ def _find_cached_page(self, url: str) -> Optional[CachedPage]: return None + def _find_any_cached_page(self, url: str) -> Optional[CachedPage]: + normalized = normalize_url(url) + parsed = urlparse(normalized) + + candidates = [normalized] + if parsed.netloc.startswith("www."): + candidates.append(normalized.replace("www.", "", 1)) + else: + candidates.append(normalized.replace("://", "://www.", 1)) + + for candidate in candidates: + page = self.cached_pages.get(candidate) or self._url_map.get(candidate) + if page and page.html: + return page + + if self.cache_manager: + for candidate in self._url_variants(url, parsed): + page = self.cache_manager.get_cached(candidate) + if page and page.html: + return page + return None + + def _prefetch_timeout_for_page(self, need_api: bool) -> int: + return DEFAULT_PREFETCH_TIMEOUT_DATA if need_api else DEFAULT_PREFETCH_TIMEOUT_NAV + + def _domain_key(self, url: str) -> str: + hostname = urlparse(url).netloc.lower() + if "coingecko" in hostname: + return "coingecko" + if "stooq" in hostname: + return "stooq" + if "news.ycombinator" in hostname: + return "news_ycombinator" + if "taostats" in hostname: + return "taostats" + return "default" + + def _prefetch_timeout_for_url(self, url: str, need_api: bool) -> int: + timeout = self._prefetch_timeout_for_page(need_api) + domain_key = self._domain_key(url) + if domain_key == "coingecko": + return max(timeout, 35 if need_api else 18) + if domain_key == "stooq": + return max(timeout, 45 if need_api else 20) + if domain_key == "taostats": + return max(timeout, 35 if need_api else 18) + return timeout + + def _register_prefetch_timeout(self, url: str): + self.stats.prefetch_timeouts += 1 + domain_key = self._domain_key(url) + self.stats.per_domain_prefetch_timeouts[domain_key] = ( + self.stats.per_domain_prefetch_timeouts.get(domain_key, 0) + 1 + ) + self._register_soft_failure(url) + + def _register_soft_failure(self, url: str): + self.stats.soft_failures += 1 + domain_key = self._domain_key(url) + self.stats.per_domain_soft_failures[domain_key] = ( + self.stats.per_domain_soft_failures.get(domain_key, 0) + 1 + ) + + def _should_soft_fail_domain(self, url: str) -> bool: + return self._soft_fail_policy(url) is not None + + def _soft_fail_policy(self, url: str) -> str | None: + parsed = urlparse(url) + hostname = parsed.netloc.lower() + path = parsed.path.lower() + combined = f"{hostname}{path}" + if any(pattern.match(combined) for pattern in self._required_soft_url_regexes): + return "required_soft" + if any(pattern.match(combined) for pattern in self._prefetch_soft_url_regexes): + return "prefetch_soft" + if any(domain in hostname for domain in self._soft_fail_domains): + return "domain_soft" + combined = f"{hostname}{path}" + if any(pattern in combined for pattern in self._soft_fail_url_patterns): + return "domain_soft" + return None + + def _register_cache_fetch(self, result: CacheFetchResult): + domain_key = result.domain_key + if result.source == "stale": + self.stats.stale_hits += 1 + if result.source in ("fresh", "stale"): + self.stats.per_domain_miss_count[domain_key] = ( + self.stats.per_domain_miss_count.get(domain_key, 0) + 1 + ) + self.stats.per_domain_miss_latency_s[domain_key] = ( + self.stats.per_domain_miss_latency_s.get(domain_key, 0.0) + result.latency_s + ) + + def _build_soft_error_page(self, url: str, title: str, message: str) -> str: + safe_url = url.replace("&", "&").replace("<", "<").replace(">", ">") + safe_message = message.replace("&", "&").replace("<", "<").replace(">", ">") + return ( + "" + f"{safe_message}
" + f"URL: {safe_url}
" + "This page could not be prefetched. You may retry or navigate elsewhere.
" + "" + ) + @staticmethod def _url_variants(url: str, parsed) -> List[str]: """Generate URL variants for cache lookup (original, without www, with www).""" @@ -484,6 +802,14 @@ def get_and_clear_error(self) -> Optional[Exception]: self._pending_error = None return err + def get_and_clear_error_metadata(self) -> Dict[str, Any]: + metadata = dict(self._last_error_metadata) + self._last_error_metadata = {} + return metadata + + def get_last_blocked_document_metadata(self) -> Dict[str, Any]: + return dict(self._last_blocked_document_metadata) + def raise_if_error(self, url: str = None) -> None: """Check for pending error and raise as CacheFatalError if present.""" err = self._pending_error @@ -491,7 +817,11 @@ def raise_if_error(self, url: str = None) -> None: if err is not None: if isinstance(err, CacheFatalError): raise err - raise CacheFatalError(str(err), url=url) + raise CacheFatalError( + str(err), + url=url, + evidence={"interceptor": self.get_and_clear_error_metadata()}, + ) def get_stats(self) -> dict: """Get interception statistics.""" @@ -509,3 +839,5 @@ def cleanup(self): self.cached_pages.clear() self.stats = InterceptorStats() self._pending_error = None + self._last_error_metadata = {} + self._last_blocked_document_metadata = {} diff --git a/liveweb_arena/core/reachability_audit.py b/liveweb_arena/core/reachability_audit.py new file mode 100644 index 0000000..a82f31f --- /dev/null +++ b/liveweb_arena/core/reachability_audit.py @@ -0,0 +1,589 @@ +from __future__ import annotations + +from dataclasses import asdict, dataclass, field +from typing import Any, Dict, Optional +from urllib.parse import urlparse + +from liveweb_arena.core.cache import normalize_url +from liveweb_arena.core.site_probe import probe_site + + +@dataclass +class ReachabilityAuditResult: + status: str + classification: str + layer: str + url: str + normalized_url: str + domain: str + plugin_name: Optional[str] = None + reason: Optional[str] = None + http_status: Optional[int] = None + exception_type: Optional[str] = None + raw_exception_type: Optional[str] = None + raw_exception_message: Optional[str] = None + navigation_stage: Optional[str] = None + resource_type: Optional[str] = None + attempt_index: Optional[int] = None + max_attempts: Optional[int] = None + browser_reused: Optional[bool] = None + context_reused: Optional[bool] = None + page_recreated_before_retry: Optional[bool] = None + is_environment_failure: bool = False + is_model_hallucination: bool = False + evidence: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + +def _domain(url: str) -> str: + try: + return (urlparse(url).hostname or "").lower() + except Exception: + return "" + + +def _matches_allowed_domain(domain: str, allowed_domain: str) -> bool: + return domain == allowed_domain or domain.endswith("." + allowed_domain) + + +def _is_taostats_list_url(url: str) -> bool: + parsed = urlparse(url) + host = (parsed.hostname or "").lower() + path = parsed.path.lower().rstrip("/") + return "taostats.io" in host and path in {"", "/subnets"} + + +def _is_taostats_detail_url(url: str) -> bool: + parsed = urlparse(url) + host = (parsed.hostname or "").lower() + path = parsed.path.lower() + return "taostats.io" in host and (path.startswith("/subnet/") or path.startswith("/subnets/")) + + +def _infer_taostats_interaction_kind(target_locator: str | None, raw_exception_message: str | None) -> str: + text = " ".join(part for part in [target_locator or "", raw_exception_message or ""] if part).lower() + if any(marker in text for marker in ("page-item", "next", "prev", "next page")): + return "paginate" + if any(marker in text for marker in ("rt-th", "dt-orderable", "1h", "24h", "1w", "1m", "sort")): + return "sort" + if any(marker in text for marker in ("all", "rows:", "dataTables_length", ".dataTables_length")): + return "show_all" + return "unknown" + + +def _is_invalid_selector_message(raw_exception_type: str | None, raw_exception_message: str | None) -> bool: + text = " ".join(part for part in [raw_exception_type or "", raw_exception_message or ""] if part).lower() + return any( + marker in text + for marker in ( + "not a valid selector", + "unexpected token", + "queryselectorall", + "selector engine", + "selector is malformed", + "unknown engine", + ) + ) + + +def _is_missing_ui_target_message(raw_exception_type: str | None, raw_exception_message: str | None) -> bool: + text = " ".join(part for part in [raw_exception_type or "", raw_exception_message or ""] if part).lower() + return "no element found with role" in text or "no element found for selector" in text + + +def _build_disallowed_domain_audit( + *, + url: str, + normalized: str, + domain: str, + plugin_name: str | None, + reason: str | None, + http_status: int | None, + exception: BaseException | None, + raw_exception_type: str | None, + raw_exception_message: str | None, + navigation_stage: str | None, + resource_type: str | None, + attempt_index: int | None, + max_attempts: int | None, + browser_reused: bool | None, + context_reused: bool | None, + page_recreated_before_retry: bool | None, + evidence: dict[str, Any], +) -> "ReachabilityAuditResult": + return ReachabilityAuditResult( + status="unreachable", + classification="model_disallowed_domain", + layer="model", + url=url, + normalized_url=normalized, + domain=domain, + plugin_name=plugin_name, + reason=reason or "Domain not allowed", + http_status=http_status, + exception_type=type(exception).__name__ if exception is not None else None, + raw_exception_type=raw_exception_type, + raw_exception_message=raw_exception_message, + navigation_stage=navigation_stage, + resource_type=resource_type, + attempt_index=attempt_index, + max_attempts=max_attempts, + browser_reused=browser_reused, + context_reused=context_reused, + page_recreated_before_retry=page_recreated_before_retry, + is_environment_failure=False, + is_model_hallucination=True, + evidence=evidence, + ) + + +def classify_stooq_url(url: str) -> str | None: + parsed = urlparse(url) + host = (parsed.hostname or "").lower() + if "stooq.com" not in host: + return None + path = parsed.path.lower() + query = parsed.query.lower() + + if host.startswith("www."): + return "env_tls_error" + if "/q/conv/" in path or "/s/mst/" in path or "quote.php" in path: + return "model_invalid_url_shape" + if "q=" in query and "s=" not in query: + return "model_invalid_url_shape" + return None + + +def classify_coingecko_url(url: str) -> str | None: + parsed = urlparse(url) + host = (parsed.hostname or "").lower() + if "coingecko.com" not in host: + return None + path = parsed.path.lower() + if "/coins/" in path: + slug = path.split("/coins/", 1)[1].split("/", 1)[0] + # Very loose first-pass hallucination heuristic for stock/company names in crypto namespace. + if slug in { + "microsoft", + "google", + "exxon-mobil", + "jpmorgan-chase", + "tesla", + "walmart", + "apple", + "amazon", + }: + return "model_invalid_asset_id" + return None + + +def classify_taostats_url(url: str) -> str | None: + parsed = urlparse(url) + host = (parsed.hostname or "").lower() + if "taostats.io" not in host: + return None + path = parsed.path.lower() + if path in {"", "/", "/subnets"}: + return None + if path.startswith("/subnets/"): + return None + return "model_invalid_url_shape" + + +def classify_model_hallucination(url: str) -> str | None: + return classify_stooq_url(url) or classify_coingecko_url(url) or classify_taostats_url(url) + + +def audit_reachability_failure( + *, + url: str, + plugin_name: str | None, + plugin: Any | None = None, + exception: BaseException | None = None, + reason: str | None = None, + allowed_domains: set[str] | None = None, + http_status: int | None = None, + evidence: dict[str, Any] | None = None, +) -> ReachabilityAuditResult: + normalized = normalize_url(url) if url else "" + domain = _domain(url) + evidence = dict(evidence or {}) + exception_text = f"{type(exception).__name__}: {exception}" if exception is not None else "" + exception_lower = exception_text.lower() + plugin_classification = plugin.classify_url(url) if plugin is not None and hasattr(plugin, "classify_url") else None + hallucination_class = plugin_classification or classify_model_hallucination(url) + + navigation_metadata = evidence.get("navigation_metadata") or {} + raw_exception_type = navigation_metadata.get("raw_exception_type") or (type(exception).__name__ if exception is not None else None) + raw_exception_message = navigation_metadata.get("raw_exception_message") or (str(exception) if exception is not None else None) + raw_exception_lower = (raw_exception_message or "").lower() + navigation_stage = navigation_metadata.get("navigation_stage") + resource_type = navigation_metadata.get("resource_type") + attempt_index = navigation_metadata.get("attempt_index") + max_attempts = navigation_metadata.get("max_attempts") + browser_reused = navigation_metadata.get("browser_reused") + context_reused = navigation_metadata.get("context_reused") + page_recreated_before_retry = navigation_metadata.get("page_recreated_before_retry") + navigation_evidence = dict(navigation_metadata.get("evidence") or {}) + interceptor_metadata = dict(evidence.get("interceptor") or {}) + if not resource_type: + resource_type = interceptor_metadata.get("blocked_resource_type") or resource_type + + if allowed_domains and domain: + normalized_allowed = {(item or "").lower() for item in allowed_domains if item} + if normalized_allowed and not any(_matches_allowed_domain(domain, allowed) for allowed in normalized_allowed): + evidence.setdefault("interceptor", interceptor_metadata) + evidence.setdefault("allowed_domains", sorted(normalized_allowed)) + return _build_disallowed_domain_audit( + url=url, + normalized=normalized, + domain=domain, + plugin_name=plugin_name, + reason=reason, + http_status=http_status, + exception=exception, + raw_exception_type=raw_exception_type, + raw_exception_message=raw_exception_message, + navigation_stage=navigation_stage, + resource_type=resource_type, + attempt_index=attempt_index, + max_attempts=max_attempts, + browser_reused=browser_reused, + context_reused=context_reused, + page_recreated_before_retry=page_recreated_before_retry, + evidence=evidence, + ) + + if hallucination_class is not None: + is_env = hallucination_class.startswith("env_") or hallucination_class.startswith("ambiguous_") + return ReachabilityAuditResult( + status="unreachable", + classification=hallucination_class, + layer="model" if not is_env else "tls", + url=url, + normalized_url=normalized, + domain=domain, + plugin_name=plugin_name, + reason=reason or exception_text, + http_status=http_status, + exception_type=type(exception).__name__ if exception is not None else None, + raw_exception_type=raw_exception_type, + raw_exception_message=raw_exception_message, + navigation_stage=navigation_stage, + resource_type=resource_type, + attempt_index=attempt_index, + max_attempts=max_attempts, + browser_reused=browser_reused, + context_reused=context_reused, + page_recreated_before_retry=page_recreated_before_retry, + is_environment_failure=is_env, + is_model_hallucination=not is_env, + evidence=evidence, + ) + + combined_lower = " ".join(part for part in [exception_lower, raw_exception_lower] if part) + + if "taostats.io" in domain and _is_taostats_list_url(url): + target_locator = ( + navigation_evidence.get("selector") + or navigation_evidence.get("target_locator") + or ( + f"role={navigation_evidence.get('role')} name={navigation_evidence.get('name')}" + if navigation_evidence.get("role") + else None + ) + ) + interaction_kind = _infer_taostats_interaction_kind(target_locator, raw_exception_message) + selector_syntax_invalid = _is_invalid_selector_message(raw_exception_type, raw_exception_message) + missing_ui_target = _is_missing_ui_target_message(raw_exception_type, raw_exception_message) + if selector_syntax_invalid: + evidence.update( + { + "page_kind": "taostats_list", + "interaction_kind": interaction_kind, + "target_locator": target_locator, + "selector_syntax_invalid": True, + } + ) + return ReachabilityAuditResult( + status="unreachable", + classification="model_invalid_selector", + layer="model", + url=url, + normalized_url=normalized, + domain=domain, + plugin_name=plugin_name, + reason=reason or exception_text, + http_status=http_status, + exception_type=type(exception).__name__ if exception is not None else None, + raw_exception_type=raw_exception_type, + raw_exception_message=raw_exception_message, + navigation_stage=navigation_stage, + resource_type=resource_type, + attempt_index=attempt_index, + max_attempts=max_attempts, + browser_reused=browser_reused, + context_reused=context_reused, + page_recreated_before_retry=page_recreated_before_retry, + is_environment_failure=False, + is_model_hallucination=True, + evidence=evidence, + ) + if missing_ui_target: + evidence.update( + { + "page_kind": "taostats_list", + "interaction_kind": interaction_kind, + "target_locator": target_locator, + "selector_syntax_invalid": False, + "ui_target_missing": True, + } + ) + return ReachabilityAuditResult( + status="unreachable", + classification="model_invalid_ui_target", + layer="model", + url=url, + normalized_url=normalized, + domain=domain, + plugin_name=plugin_name, + reason=reason or exception_text, + http_status=http_status, + exception_type=type(exception).__name__ if exception is not None else None, + raw_exception_type=raw_exception_type, + raw_exception_message=raw_exception_message, + navigation_stage=navigation_stage, + resource_type=resource_type, + attempt_index=attempt_index, + max_attempts=max_attempts, + browser_reused=browser_reused, + context_reused=context_reused, + page_recreated_before_retry=page_recreated_before_retry, + is_environment_failure=False, + is_model_hallucination=True, + evidence=evidence, + ) + if ( + (navigation_stage or "").startswith("action_") + and ( + "timeout" in combined_lower + or "too many consecutive action failures" in combined_lower + or "no element found with role" in combined_lower + ) + ): + evidence.update( + { + "page_kind": "taostats_list", + "interaction_kind": interaction_kind, + "target_locator": target_locator, + "selector_syntax_invalid": False, + } + ) + return ReachabilityAuditResult( + status="unreachable", + classification="env_taostats_list_action_timeout", + layer="browser", + url=url, + normalized_url=normalized, + domain=domain, + plugin_name=plugin_name, + reason=reason or exception_text, + http_status=http_status, + exception_type=type(exception).__name__ if exception is not None else None, + raw_exception_type=raw_exception_type, + raw_exception_message=raw_exception_message, + navigation_stage=navigation_stage, + resource_type=resource_type, + attempt_index=attempt_index, + max_attempts=max_attempts, + browser_reused=browser_reused, + context_reused=context_reused, + page_recreated_before_retry=page_recreated_before_retry, + is_environment_failure=True, + is_model_hallucination=False, + evidence=evidence, + ) + + taostats_prefetch = dict(evidence.get("taostats_prefetch") or {}) + if not taostats_prefetch: + taostats_prefetch = { + key: evidence.get(key) + for key in ("page_kind", "prefetch_phase", "wait_target", "background_refresh") + if evidence.get(key) is not None + } + if "taostats.io" in domain and _is_taostats_detail_url(url): + prefetch_phase = taostats_prefetch.get("prefetch_phase") + wait_target = taostats_prefetch.get("wait_target") + background_refresh = bool(taostats_prefetch.get("background_refresh", False)) + page_kind = taostats_prefetch.get("page_kind") + detail_setup_soft_failed = bool(taostats_prefetch.get("detail_setup_soft_failed", False)) + page_body_ready = taostats_prefetch.get("page_body_ready") + if prefetch_phase or page_kind == "taostats_detail": + if detail_setup_soft_failed and page_body_ready is True: + evidence.update( + { + "page_kind": "taostats_detail", + "prefetch_phase": prefetch_phase or "setup_page_for_cache", + "wait_target": wait_target, + "background_refresh": background_refresh, + "page_body_ready": True, + "detail_setup_soft_failed": True, + } + ) + else: + evidence.update( + { + "page_kind": "taostats_detail", + "prefetch_phase": prefetch_phase or "goto", + "wait_target": wait_target, + "background_refresh": background_refresh, + } + ) + return ReachabilityAuditResult( + status="unreachable", + classification="env_taostats_detail_prefetch_invalidated", + layer="browser", + url=url, + normalized_url=normalized, + domain=domain, + plugin_name=plugin_name, + reason=reason or exception_text, + http_status=http_status, + exception_type=type(exception).__name__ if exception is not None else None, + raw_exception_type=raw_exception_type, + raw_exception_message=raw_exception_message, + navigation_stage=navigation_stage, + resource_type=resource_type, + attempt_index=attempt_index, + max_attempts=max_attempts, + browser_reused=browser_reused, + context_reused=context_reused, + page_recreated_before_retry=page_recreated_before_retry, + is_environment_failure=True, + is_model_hallucination=False, + evidence=evidence, + ) + + if navigation_metadata.get("classification_hint") in { + "env_nav_aborted", + "env_target_closed", + "env_nav_timeout", + "env_browser_context_invalidated", + }: + return ReachabilityAuditResult( + status="unreachable", + classification=navigation_metadata["classification_hint"], + layer="browser", + url=url, + normalized_url=normalized, + domain=domain, + plugin_name=plugin_name, + reason=reason or exception_text, + http_status=http_status, + exception_type=type(exception).__name__ if exception is not None else None, + raw_exception_type=raw_exception_type, + raw_exception_message=raw_exception_message, + navigation_stage=navigation_stage, + resource_type=resource_type, + attempt_index=attempt_index, + max_attempts=max_attempts, + browser_reused=browser_reused, + context_reused=context_reused, + page_recreated_before_retry=page_recreated_before_retry, + is_environment_failure=True, + is_model_hallucination=False, + evidence=evidence, + ) + + probe = probe_site(url) if url else None + if probe is not None: + evidence.setdefault("site_probe", probe.to_dict()) + if http_status is None: + http_status = probe.http_status + + if http_status == 403 and "coingecko" in domain: + return ReachabilityAuditResult( + status="unreachable", + classification="env_cdn_blocked", + layer="cdn", + url=url, + normalized_url=normalized, + domain=domain, + plugin_name=plugin_name, + reason=reason or exception_text, + http_status=http_status, + exception_type=type(exception).__name__ if exception is not None else None, + raw_exception_type=raw_exception_type, + raw_exception_message=raw_exception_message, + navigation_stage=navigation_stage, + resource_type=resource_type, + attempt_index=attempt_index, + max_attempts=max_attempts, + browser_reused=browser_reused, + context_reused=context_reused, + page_recreated_before_retry=page_recreated_before_retry, + is_environment_failure=True, + is_model_hallucination=False, + evidence=evidence, + ) + + if probe and probe.exception_type == "SSLError": + classification = "env_tls_error" + layer = "tls" + elif "certificate_verify_failed" in combined_lower or "sslerror" in combined_lower: + classification = "env_tls_error" + layer = "tls" + elif "err_aborted" in combined_lower or "frame was detached" in combined_lower: + classification = "env_nav_aborted" + layer = "browser" + elif "target page, context or browser has been closed" in combined_lower or "targetclosederror" in combined_lower: + classification = "env_target_closed" + layer = "browser" + elif "timeout" in combined_lower: + classification = "env_nav_timeout" + layer = "browser" + elif "handler is closed" in combined_lower or "transport closed" in combined_lower or "connection closed" in combined_lower: + classification = "env_browser_context_invalidated" + layer = "browser" + elif "status=429" in combined_lower: + classification = "env_api_rate_limited" + layer = "api" + elif "empty response for coin_id" in combined_lower: + classification = "env_api_empty" + layer = "api" + elif http_status is not None and 400 <= http_status < 500: + classification = "env_http_4xx" + layer = "cdn" + elif http_status is not None and http_status >= 500: + classification = "env_http_5xx" + layer = "cdn" + else: + classification = "ambiguous_navigation_failure" + layer = "browser" + + return ReachabilityAuditResult( + status="unreachable", + classification=classification, + layer=layer, + url=url, + normalized_url=normalized, + domain=domain, + plugin_name=plugin_name, + reason=reason or exception_text, + http_status=http_status, + exception_type=type(exception).__name__ if exception is not None else None, + raw_exception_type=raw_exception_type, + raw_exception_message=raw_exception_message, + navigation_stage=navigation_stage, + resource_type=resource_type, + attempt_index=attempt_index, + max_attempts=max_attempts, + browser_reused=browser_reused, + context_reused=context_reused, + page_recreated_before_retry=page_recreated_before_retry, + is_environment_failure=classification.startswith("env_") or classification.startswith("ambiguous_"), + is_model_hallucination=False, + evidence=evidence, + ) diff --git a/liveweb_arena/core/runtime_profiles.py b/liveweb_arena/core/runtime_profiles.py new file mode 100644 index 0000000..82ba9b5 --- /dev/null +++ b/liveweb_arena/core/runtime_profiles.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +STRICT_EVAL_PROFILE = "strict_eval" +FAST_COLLECT_PROFILE = "fast_collect" + + +def normalize_runtime_profile(profile: str | None) -> str: + value = (profile or "").strip().lower() + if value in {"", "eval", STRICT_EVAL_PROFILE}: + return STRICT_EVAL_PROFILE + if value in {"collect", FAST_COLLECT_PROFILE}: + return FAST_COLLECT_PROFILE + raise ValueError(f"Unknown runtime profile: {profile}") + + +def runtime_profile_to_behavior_mode(profile: str | None) -> str: + normalized = normalize_runtime_profile(profile) + if normalized == FAST_COLLECT_PROFILE: + return "collect" + return "eval" + + +def is_fast_collect_profile(profile: str | None) -> bool: + return normalize_runtime_profile(profile) == FAST_COLLECT_PROFILE diff --git a/liveweb_arena/core/site_probe.py b/liveweb_arena/core/site_probe.py new file mode 100644 index 0000000..31a7681 --- /dev/null +++ b/liveweb_arena/core/site_probe.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from dataclasses import asdict, dataclass +from typing import Any + +import requests +import time + + +@dataclass +class SiteProbeResult: + ok: bool + url: str + final_url: str | None = None + http_status: int | None = None + exception_type: str | None = None + reason: str | None = None + server: str | None = None + cf_ray: str | None = None + location: str | None = None + body_length: int | None = None + elapsed_ms: int | None = None + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + +def probe_site(url: str, timeout: float = 10.0) -> SiteProbeResult: + start = time.time() + try: + response = requests.get( + url, + timeout=timeout, + headers={"User-Agent": "Mozilla/5.0"}, + allow_redirects=True, + ) + return SiteProbeResult( + ok=response.ok, + url=url, + final_url=response.url, + http_status=response.status_code, + server=response.headers.get("server"), + cf_ray=response.headers.get("cf-ray"), + location=response.headers.get("location"), + body_length=len(response.text or ""), + elapsed_ms=int((time.time() - start) * 1000), + reason=f"http_{response.status_code}" if not response.ok else None, + ) + except Exception as exc: + return SiteProbeResult( + ok=False, + url=url, + exception_type=type(exc).__name__, + reason=str(exc), + elapsed_ms=int((time.time() - start) * 1000), + ) diff --git a/liveweb_arena/core/task_manager.py b/liveweb_arena/core/task_manager.py index 3ae57c8..b453dfd 100644 --- a/liveweb_arena/core/task_manager.py +++ b/liveweb_arena/core/task_manager.py @@ -36,52 +36,76 @@ def _get_plugin(self, name: str) -> BasePlugin: self._plugin_instances[name] = plugin_cls() return self._plugin_instances[name] - async def generate_composite_task( + @staticmethod + def derive_subtask_seed(seed: int, index: int) -> int: + """Derive a deterministic seed for one subtask inside a composite task.""" + hash_input = f"{seed}:{index}".encode() + return int(hashlib.sha256(hash_input).hexdigest()[:8], 16) + + def plan_subtasks( self, seed: int, num_subtasks: int = 2, templates: Optional[List[tuple]] = None, - ) -> CompositeTask: + ) -> List[Dict[str, object]]: """ - Generate a composite task with multiple sub-tasks. - - Args: - seed: Random seed for deterministic generation - num_subtasks: Number of sub-tasks (1-4) - templates: List of (plugin, template_name, variant) tuples; None = random. - variant can be None for random selection or int for specific variant. + Plan deterministic subtask generation without constructing the full CompositeTask. - Returns: - CompositeTask with subtasks and combined_intent + The returned plan is stable and matches the seeds/templates that + generate_composite_task() will use. """ - # Validate num_subtasks num_subtasks = max(1, min(4, num_subtasks)) - - # Initialize RNG with seed for deterministic generation rng = random.Random(seed) - # Build list of (plugin_name, template_name, variant) for each subtask if templates: - # Use specified templates (cycle if not enough) - # Normalize to 3-element tuples selected_templates = [] for i in range(num_subtasks): t = templates[i % len(templates)] if len(t) == 2: - # (plugin, template_name) -> (plugin, template_name, None) selected_templates.append((t[0], t[1], None)) else: - # Already (plugin, template_name, variant) selected_templates.append(t) else: - # Random selection from available plugins (no specific template or variant) available = list(self._plugin_classes.keys()) if len(available) == 0: raise ValueError("No plugins available") selected_templates = [(rng.choice(available), None, None) for _ in range(num_subtasks)] + plan = [] + for i, (plugin_name, template_name, variant) in enumerate(selected_templates): + plan.append( + { + "subtask_index": i, + "plugin_name": plugin_name, + "template_name": template_name, + "variant": variant, + "subtask_seed": self.derive_subtask_seed(seed, i), + } + ) + return plan + + async def generate_composite_task( + self, + seed: int, + num_subtasks: int = 2, + templates: Optional[List[tuple]] = None, + ) -> CompositeTask: + """ + Generate a composite task with multiple sub-tasks. + + Args: + seed: Random seed for deterministic generation + num_subtasks: Number of sub-tasks (1-4) + templates: List of (plugin, template_name, variant) tuples; None = random. + variant can be None for random selection or int for specific variant. + + Returns: + CompositeTask with subtasks and combined_intent + """ + plan = self.plan_subtasks(seed=seed, num_subtasks=num_subtasks, templates=templates) + # Initialize plugins that will be used (some need API data before question generation) - plugins_to_use = set(p for p, _, _ in selected_templates) + plugins_to_use = {item["plugin_name"] for item in plan} for plugin_name in plugins_to_use: plugin = self._get_plugin(plugin_name) if hasattr(plugin, 'initialize'): @@ -90,11 +114,12 @@ async def generate_composite_task( # Generate sub-tasks subtasks: List[SubTask] = [] - for i, (plugin_name, template_name, variant) in enumerate(selected_templates): + for i, item in enumerate(plan): + plugin_name = str(item["plugin_name"]) + template_name = item["template_name"] + variant = item["variant"] plugin = self._get_plugin(plugin_name) - # Derive seed for this sub-task (hash-based to avoid collisions) - hash_input = f"{seed}:{i}".encode() - subtask_seed = int(hashlib.sha256(hash_input).hexdigest()[:8], 16) + subtask_seed = int(item["subtask_seed"]) subtask = await plugin.generate_task( subtask_seed, template_name=template_name, @@ -104,20 +129,13 @@ async def generate_composite_task( subtask.answer_tag = f"answer{i + 1}" subtasks.append(subtask) - # Include hints only for plugins used in the task - # Avoids eagerly instantiating unused plugins (which may trigger API calls) - plugin_hints: Dict[str, str] = {} - for plugin_name in plugins_to_use: - plugin = self._get_plugin(plugin_name) - plugin_hints[plugin_name] = f"Use {', '.join(plugin.allowed_domains)} to find information." - # Build combined intent (without start_url - Agent decides navigation) combined_intent = self._build_combined_intent(subtasks) return CompositeTask( subtasks=subtasks, combined_intent=combined_intent, - plugin_hints=plugin_hints, + plugin_hints={}, seed=seed, ) diff --git a/liveweb_arena/core/task_registry.py b/liveweb_arena/core/task_registry.py index 8257cc2..d1cef6b 100644 --- a/liveweb_arena/core/task_registry.py +++ b/liveweb_arena/core/task_registry.py @@ -140,6 +140,19 @@ class TaskRegistry: 81: ("openlibrary", "openlibrary_subject_multi_condition"), 82: ("openlibrary", "openlibrary_book_comparison"), 84: ("openlibrary", "openlibrary_author_editions"), + + # Open Meteo templates + 85: ("openmeteo", "openmeteo_current"), + 86: ("openmeteo", "openmeteo_comparison"), + 87: ("openmeteo", "openmeteo_hourly_extrema"), + 88: ("openmeteo", "openmeteo_forecast_trend"), + + # ArXiv templates + 90: ("arxiv", "arxiv_paper_info"), + 91: ("arxiv", "arxiv_author_extrema"), + 92: ("arxiv", "arxiv_category_comparison"), + 94: ("arxiv", "arxiv_multi_author_filter"), + 95: ("arxiv", "arxiv_title_length_extrema"), } # Template versions - each version's combinations come AFTER all previous versions @@ -164,6 +177,10 @@ class TaskRegistry: [80, 81], # Version 4: Additional Open Library templates [82, 84], + # Version 5: Open Meteo templates + [85, 86, 87, 88], + # Version 6: ArXiv templates + [90, 91, 92, 94, 95], ] # Combination registry: list of template ID tuples @@ -251,7 +268,7 @@ def parse_task_id(cls, task_id: int) -> Dict[str, Any]: - template_ids: Tuple of template IDs in this combination - templates: List of (plugin, template_name) tuples - variation_seed: Seed for variation within this combination - - num_tasks: Number of sub-tasks (3-5) + - num_tasks: Number of sub-tasks (1-4) Raises: ValueError: If task_id is out of valid range @@ -273,7 +290,7 @@ def parse_task_id(cls, task_id: int) -> Dict[str, Any]: template_ids = cls._combinations[combo_index] templates = [cls.TEMPLATES[tid] for tid in template_ids] - num_tasks = (variation_seed % 3) + 2 + num_tasks = (variation_seed % 4) + 1 return { "task_id": task_id, diff --git a/liveweb_arena/core/task_registry_loader.py b/liveweb_arena/core/task_registry_loader.py new file mode 100644 index 0000000..45eae54 --- /dev/null +++ b/liveweb_arena/core/task_registry_loader.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import importlib.util +import os +from pathlib import Path +from typing import Any + +from .runtime_profiles import is_fast_collect_profile, normalize_runtime_profile + +STRICT_TASK_REGISTRY_DIR_ENV = "LIVEWEB_STRICT_TASK_REGISTRY_DIR" + + +def _load_task_registry_module_from_dir(source_dir: Path): + module_path = source_dir / "liveweb_arena" / "core" / "task_registry.py" + spec = importlib.util.spec_from_file_location("liveweb_runtime_task_registry", module_path) + if spec is None or spec.loader is None: + raise RuntimeError(f"Failed to load task registry from {module_path}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def resolve_task_registry_source_dir(runtime_profile: str | None = None) -> Path | None: + profile = normalize_runtime_profile(runtime_profile) + if is_fast_collect_profile(profile): + return None + raw = os.getenv(STRICT_TASK_REGISTRY_DIR_ENV, "").strip() + if not raw: + return None + return Path(raw) + + +def parse_task_id(task_id: int, *, runtime_profile: str | None = None) -> dict[str, Any]: + source_dir = resolve_task_registry_source_dir(runtime_profile) + if source_dir is None: + from .task_registry import parse_task_id as local_parse_task_id + + return local_parse_task_id(task_id) + module = _load_task_registry_module_from_dir(source_dir) + return module.parse_task_id(task_id) + + +def max_task_id(*, runtime_profile: str | None = None) -> int: + source_dir = resolve_task_registry_source_dir(runtime_profile) + if source_dir is None: + from .task_registry import max_task_id as local_max_task_id + + return local_max_task_id() + module = _load_task_registry_module_from_dir(source_dir) + return module.max_task_id() diff --git a/liveweb_arena/core/validators/llm_validator.py b/liveweb_arena/core/validators/llm_validator.py index 0c732bc..87636fc 100644 --- a/liveweb_arena/core/validators/llm_validator.py +++ b/liveweb_arena/core/validators/llm_validator.py @@ -50,6 +50,12 @@ class LLMValidationResult: "Qwen/Qwen3-32B", ] +OPENROUTER_VALIDATION_MODELS: List[str] = [ + "google/gemini-3-flash-preview", + "z-ai/glm-5", + "xiaomi/mimo-v2-pro", +] + OPENAI_VALIDATION_MODELS: List[str] = [ "gpt-4", "gpt-3.5-turbo", @@ -62,8 +68,10 @@ def _get_validation_models(llm_client) -> List[str]: Priority: 1. VALIDATION_MODELS env var (comma-separated) - 2. OpenAI-safe defaults when base_url points to api.openai.com - 3. Project default VALIDATION_MODELS + 2. Provider-specific env override (e.g. VALIDATION_OPENROUTER_MODELS) + 3. OpenAI-safe defaults when base_url points to api.openai.com + 4. OpenRouter-safe defaults when base_url points to openrouter.ai + 5. Project default VALIDATION_MODELS """ env_models = os.getenv("VALIDATION_MODELS", "") if env_models.strip(): @@ -72,8 +80,23 @@ def _get_validation_models(llm_client) -> List[str]: return models base_url = str(getattr(llm_client, "_base_url", "")).lower() + provider_env_key = "" + if "api.openai.com" in base_url: + provider_env_key = "VALIDATION_OPENAI_MODELS" + elif "openrouter.ai" in base_url: + provider_env_key = "VALIDATION_OPENROUTER_MODELS" + + if provider_env_key: + provider_models = os.getenv(provider_env_key, "") + if provider_models.strip(): + models = [m.strip() for m in provider_models.split(",") if m.strip()] + if models: + return models + if "api.openai.com" in base_url: return OPENAI_VALIDATION_MODELS + if "openrouter.ai" in base_url: + return OPENROUTER_VALIDATION_MODELS return VALIDATION_MODELS diff --git a/liveweb_arena/plugins/__init__.py b/liveweb_arena/plugins/__init__.py index 48990ba..8a95417 100644 --- a/liveweb_arena/plugins/__init__.py +++ b/liveweb_arena/plugins/__init__.py @@ -139,6 +139,11 @@ def get_plugin_names() -> List[str]: return list(_plugins.keys()) +def get_disabled_plugins() -> List[str]: + """Return disabled plugin names in a stable order.""" + return sorted(str(name) for name in DISABLED_PLUGINS) + + def reload_plugins(): """Reload all plugins (useful for development).""" _plugins.clear() diff --git a/liveweb_arena/plugins/arxiv/__init__.py b/liveweb_arena/plugins/arxiv/__init__.py new file mode 100644 index 0000000..73945a8 --- /dev/null +++ b/liveweb_arena/plugins/arxiv/__init__.py @@ -0,0 +1,8 @@ +"""ArXiv plugin for browsing and querying academic paper listings.""" + +from .arxiv import ArxivPlugin + +# Import templates to register them +from . import templates as templates # noqa: F401 — import registers templates + +__all__ = ["ArxivPlugin"] diff --git a/liveweb_arena/plugins/arxiv/api_client.py b/liveweb_arena/plugins/arxiv/api_client.py new file mode 100644 index 0000000..058156e --- /dev/null +++ b/liveweb_arena/plugins/arxiv/api_client.py @@ -0,0 +1,243 @@ +"""ArXiv API client with rate limiting. + +Fetches the HTML listing page (arxiv.org/list/| Temperature | {cw.get('temperature', 'N/A')} C |
| Wind Speed | {cw.get('windspeed', 'N/A')} km/h |
| Wind Direction | {cw.get('winddirection', 'N/A')} deg |
| Date | Max Temp | Min Temp | Precip Prob |
|---|
| Time | Temp | Humidity | " + "Wind Speed | Precip Prob |
|---|
Price 123
", + api_data={"symbol": "jnj.us"}, + fetched_at=time.time(), + need_api=True, + ) + with open(shared_file, "w") as f: + json.dump(shared_page.to_dict(), f) + + local_file = url_to_cache_dir(local_cache, normalized) / "page.json" + status, cached = mgr._load_with_status(normalized, local_file, need_api=True) + assert status == "valid" + assert cached is not None + assert cached.api_data == {"symbol": "jnj.us"} + assert local_file.exists() + + +def test_cache_manager_records_taostats_prefetch_cooldown(tmp_path): + mgr = CacheManager(cache_dir=tmp_path, ttl=3600) + url = "https://taostats.io/subnets/73" + err = CacheFatalError( + "Taostats detail prefetch invalidated", + url=url, + kind="taostats_prefetch_invalidated", + fatal=False, + evidence={ + "classification": "env_taostats_detail_prefetch_invalidated", + "page_kind": "taostats_detail", + "prefetch_phase": "setup_page_for_cache", + }, + plugin_name="taostats", + ) + + mgr._maybe_activate_taostats_prefetch_cooldown(url, err) + cooldown = mgr._get_taostats_prefetch_cooldown(url) + assert cooldown is not None + assert cooldown["classification"] == "env_taostats_detail_prefetch_invalidated" + assert cooldown["cooldown_applied"] is True + + +@pytest.mark.asyncio +async def test_cache_manager_short_circuits_taostats_detail_during_cooldown(tmp_path): + mgr = CacheManager(cache_dir=tmp_path, ttl=3600) + url = "https://taostats.io/subnets/73" + mgr._taostats_prefetch_cooldowns[normalize_url(url)] = ( + time.monotonic() + 60, + { + "classification": "env_taostats_detail_prefetch_invalidated", + "page_kind": "taostats_detail", + }, + ) + + with pytest.raises(CacheFatalError, match="cooldown active") as excinfo: + await mgr._ensure_single(url, TaostatsPlugin(), need_api=False) + + assert excinfo.value.kind == "taostats_prefetch_cooldown" diff --git a/tests/core/test_coingecko_plugin.py b/tests/core/test_coingecko_plugin.py new file mode 100644 index 0000000..702c530 --- /dev/null +++ b/tests/core/test_coingecko_plugin.py @@ -0,0 +1,12 @@ +from liveweb_arena.plugins.coingecko.coingecko import CoinGeckoPlugin + + +def test_coingecko_aliases_normalize_common_coin_slugs(): + plugin = CoinGeckoPlugin() + assert plugin._extract_coin_id("https://www.coingecko.com/en/coins/bnb") == "binancecoin" + assert plugin._extract_coin_id("https://www.coingecko.com/en/coins/binance-coin") == "binancecoin" + assert plugin._extract_coin_id("https://www.coingecko.com/en/coins/xrp") == "ripple" + assert plugin._extract_coin_id("https://www.coingecko.com/en/coins/ada") == "cardano" + assert plugin._extract_coin_id("https://www.coingecko.com/en/coins/polkadot-new") == "polkadot" + assert plugin._extract_coin_id("https://www.coingecko.com/en/coins/near-protocol") == "near" + assert plugin._extract_coin_id("https://www.coingecko.com/en/coins/hedera") == "hedera-hashgraph" diff --git a/tests/core/test_interceptor.py b/tests/core/test_interceptor.py index d89bc57..4e76c3b 100644 --- a/tests/core/test_interceptor.py +++ b/tests/core/test_interceptor.py @@ -20,6 +20,29 @@ def _interceptor(cached=None, domains=None, blocked=None, url_validator=None, of ) +class _FakeRequest: + def __init__(self, url: str, resource_type: str = "document"): + self.url = url + self.resource_type = resource_type + + +class _FakeRoute: + def __init__(self, url: str, resource_type: str = "document"): + self.request = _FakeRequest(url, resource_type) + self.fulfilled = None + self.aborted = None + self.continued = False + + async def fulfill(self, **kwargs): + self.fulfilled = kwargs + + async def abort(self, error_code): + self.aborted = error_code + + async def continue_(self): + self.continued = True + + # ── _should_block ────────────────────────────────────────────────── class TestShouldBlock: @@ -70,6 +93,34 @@ def test_port_stripped(self): assert i._is_domain_allowed("http://localhost:8080/api") +class TestSoftFailPolicy: + def test_soft_fail_url_pattern(self, monkeypatch): + monkeypatch.setenv("LIVEWEB_SOFT_FAIL_DOMAINS", "") + monkeypatch.setenv("LIVEWEB_SOFT_FAIL_URL_PATTERNS", "news.ycombinator.com/ask") + i = _interceptor() + assert i._should_soft_fail_domain("https://news.ycombinator.com/ask") + assert not i._should_soft_fail_domain("https://news.ycombinator.com/newest") + + def test_required_soft_regex_policy(self, monkeypatch): + monkeypatch.setenv("LIVEWEB_SOFT_FAIL_DOMAINS", "") + monkeypatch.setenv("LIVEWEB_SOFT_FAIL_URL_PATTERNS", "") + monkeypatch.setenv("LIVEWEB_REQUIRED_SOFT_URL_REGEXES", r"^news\.ycombinator\.com/?$,^news\.ycombinator\.com/(ask|show)(?:[/?].*)?$") + monkeypatch.setenv("LIVEWEB_PREFETCH_SOFT_URL_REGEXES", r"^channelsurfer\.tv(?:/.*)?$") + i = _interceptor() + assert i._soft_fail_policy("https://news.ycombinator.com/") == "required_soft" + assert i._soft_fail_policy("https://news.ycombinator.com/ask") == "required_soft" + assert i._soft_fail_policy("https://channelsurfer.tv/") == "prefetch_soft" + assert i._soft_fail_policy("https://news.ycombinator.com/newest") is None + + def test_default_soft_fail_domains_cover_high_noise_sites(self, monkeypatch): + monkeypatch.delenv("LIVEWEB_SOFT_FAIL_DOMAINS", raising=False) + monkeypatch.delenv("LIVEWEB_PREFETCH_SOFT_URL_REGEXES", raising=False) + i = _interceptor() + assert i._should_soft_fail_domain("https://www.taostats.io/subnets") + assert i._should_soft_fail_domain("https://www.coingecko.com/en/coins/bitcoin") + assert i._should_soft_fail_domain("https://www.stooq.com/q/currency/usd-eur") + + # ── _find_cached_page ───────────────────────────────────────────── class TestFindCachedPage: @@ -98,6 +149,18 @@ def test_incomplete_page_skipped(self): i = _interceptor(cached={normalize_url(url): page}) assert i._find_cached_page(url) is None + def test_find_any_cached_page_allows_stale_html_fallback(self): + url = "https://www.taostats.io/subnets" + page = CachedPage(url=url, html="