AffineFoundation
diff --git a/‎liveweb_arena/core/cache.py‎
Lines changed: 1 addition & 0 deletions b/‎liveweb_arena/core/cache.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎liveweb_arena/core/gt_collector.py‎
Lines changed: 0 additions & 17 deletions b/‎liveweb_arena/core/gt_collector.py‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎liveweb_arena/core/task_registry.py‎
Lines changed: 11 additions & 5 deletions b/‎liveweb_arena/core/task_registry.py‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎liveweb_arena/plugins/arxiv/templates/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎liveweb_arena/plugins/arxiv/templates/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎liveweb_arena/plugins/arxiv/templates/category_discovery_hints.py‎
Lines changed: 139 additions & 0 deletions b/‎liveweb_arena/plugins/arxiv/templates/category_discovery_hints.py‎
Lines changed: 139 additions & 0 deletions
diff --git a/‎liveweb_arena/plugins/arxiv/templates/category_infer_author_filter.py‎
Lines changed: 115 additions & 0 deletions b/‎liveweb_arena/plugins/arxiv/templates/category_infer_author_filter.py‎
Lines changed: 115 additions & 0 deletions
@@ -489,6 +489,7 @@ def _load_cache(self, cache_file: Path, need_api: bool, allow_stale: bool) -> Op
             return None
 
         if not allow_stale and cached.is_expired(self.ttl):
+            self._delete_cache(cache_file)
             return None
 
         # Check if cache is complete
 
@@ -369,14 +369,6 @@ def _merge_api_data(self, url: str, api_data: Dict[str, Any]) -> Optional[str]:
                 name = api_data.get("name", f"SN{netuid}")
                 return f"subnet[{name}]"
 
-        elif "hn.algolia.com" in url_lower:
-            # HN Algolia search data
-            query = str(api_data.get("query", "")).strip().lower()
-            page = int(api_data.get("page", 0))
-            key = f"hn_search:{query}:{page}"
-            self._collected_api_data[key] = api_data
-            return f"hn_search[{query}] page={page}"
-
         elif "news.ycombinator.com" in url_lower:
             if "stories" in api_data:
                 # Check if this is a category page (ask, show, jobs) or homepage
@@ -408,15 +400,6 @@ def _merge_api_data(self, url: str, api_data: Dict[str, Any]) -> Optional[str]:
                 if "rank" in existing and "rank" not in merged:
                     merged["rank"] = existing["rank"]
                 self._collected_api_data[story_id] = merged
-                added_comments = 0
-                comment_items = api_data.get("_comment_items")
-                if isinstance(comment_items, dict):
-                    for comment_id, comment_payload in comment_items.items():
-                        if isinstance(comment_id, str) and isinstance(comment_payload, dict):
-                            self._collected_api_data[comment_id] = comment_payload
-                            added_comments += 1
-                if added_comments > 0:
-                    return f"story[{story_id}] +{added_comments} comments"
                 return f"story[{story_id}]"
             elif "user" in api_data:
                 # User page
 
@@ -134,10 +134,6 @@ class TaskRegistry:
         76: ("hackernews", "hackernews_extrema_comparison"),
         77: ("hackernews", "hackernews_category_comparison"),
         78: ("hackernews", "hackernews_news_summary"),
-        110: ("hackernews", "hackernews_recent_burst_count"),
-        111: ("hackernews", "hackernews_comment_tree_focus"),
-        112: ("hackernews", "hackernews_keyword_scan_rank"),
-        113: ("hackernews", "hackernews_user_karma_gap"),
 
         # Open Library templates
         80: ("openlibrary", "openlibrary_book_stats"),
@@ -150,13 +146,23 @@ class TaskRegistry:
         86: ("openmeteo", "openmeteo_comparison"),
         87: ("openmeteo", "openmeteo_hourly_extrema"),
         88: ("openmeteo", "openmeteo_forecast_trend"),
+        96: ("openlibrary", "openlibrary_author_engagement_extrema"),
+        97: ("openlibrary", "openlibrary_author_comparison"),
+        98: ("openlibrary", "openlibrary_reading_stats_filter"),
+        99: ("openmeteo", "openmeteo_hourly_threshold"),
+        100: ("openmeteo", "openmeteo_sunrise_sunset"),
+        101: ("openmeteo", "openmeteo_hourly_time_of"),
 
         # ArXiv templates
         90: ("arxiv", "arxiv_paper_info"),
         91: ("arxiv", "arxiv_author_extrema"),
         92: ("arxiv", "arxiv_category_comparison"),
         94: ("arxiv", "arxiv_multi_author_filter"),
         95: ("arxiv", "arxiv_title_length_extrema"),
+        110: ("openmeteo", "openmeteo_daily_precip_peak_day"),
+        111: ("openlibrary", "openlibrary_subject_nested_work_title"),
+        112: ("arxiv", "arxiv_category_infer_title_substring"),
+        113: ("arxiv", "arxiv_category_infer_author_filter"),
     }
 
     # Template versions - each version's combinations come AFTER all previous versions
@@ -189,7 +195,7 @@ class TaskRegistry:
         [96, 97, 98],
         # Version 8: Additional Open Meteo templates
         [99, 100, 101],
-        # Version 9: Hacker News gap-filling templates
+        # Version 9: Cross-site templates (daylight calendar, subject hub, arXiv category feeds)
         [110, 111, 112, 113],
     ]
 
 
@@ -6,11 +6,15 @@
 from .multi_author_filter import ArxivMultiAuthorFilterTemplate
 from .title_length_extrema import ArxivTitleLengthExtremaTemplate
 from .category_comparison import ArxivCategoryComparisonTemplate
+from .category_infer_title_substring import ArxivCategoryInferTitleSubstringTemplate
+from .category_infer_author_filter import ArxivCategoryInferAuthorFilterTemplate
 
 __all__ = [
     "ArxivPaperInfoTemplate",
     "ArxivAuthorExtremaTemplate",
     "ArxivMultiAuthorFilterTemplate",
     "ArxivTitleLengthExtremaTemplate",
     "ArxivCategoryComparisonTemplate",
+    "ArxivCategoryInferTitleSubstringTemplate",
+    "ArxivCategoryInferAuthorFilterTemplate",
 ]
@@ -0,0 +1,139 @@
+"""Prose hints for locating an arXiv new-submissions stream without naming its official label.
+
+Aligned with CLAUDE.md §3: questions must not embed URLs, selectors, or routing shortcuts.
+The agent infers which `/list/<code>/new` page to open from domain knowledge plus browsing.
+
+Keys are arXiv category codes matching `variables.CATEGORIES`.
+"""
+
+from typing import Dict
+
+CATEGORY_NAVIGATION_HINTS: Dict[str, str] = {
+    "cs.AI": (
+        "The computer-science stream where planning, search, and knowledge representation papers "
+        "most often land alongside modern learning-based agents."
+    ),
+    "cs.CL": (
+        "The cs area chiefly concerned with human languages, token sequences, and machine "
+        "translation or parsing benchmarks."
+    ),
+    "cs.CV": (
+        "The cs track focused on pixels, cameras, detectors, segmentation masks, and visual scenes."
+    ),
+    "cs.LG": (
+        "The cs partition most associated with empirical training loops, generalization, and "
+        "differentiable models fit to datasets."
+    ),
+    "cs.SE": (
+        "The cs subject for software lifecycle, repositories, testing practice, and large-scale "
+        "engineering studies."
+    ),
+    "cs.CR": (
+        "The cs stream covering protocols, adversaries, proofs about secrecy, and cryptographic constructions."
+    ),
+    "cs.RO": (
+        "The cs listings where manipulation, kinematics, sensing stacks, and autonomous platforms converge."
+    ),
+    "cs.DS": (
+        "The cs class devoted to asymptotic complexity, classical algorithms, and combinatorial structures."
+    ),
+    "cs.HC": (
+        "The cs bucket for usability studies, interaction techniques, and studies of people using interfaces."
+    ),
+    "cs.IR": (
+        "The cs lane for ranking, retrieval metrics, corpora, and query–document modeling."
+    ),
+    "cs.GT": (
+        "The cs niche treating strategic interaction, equilibria, and incentives among rational actors."
+    ),
+    "math.CO": (
+        "The mathematics archive section for enumerative arguments, graphs as discrete objects, and designs."
+    ),
+    "math.PR": (
+        "The math feed centered on stochastic processes, measure-theoretic limits, and random structures."
+    ),
+    "math.OC": (
+        "The math stream about variational problems, controllers, and continuous-time decision systems."
+    ),
+    "math.NA": (
+        "The math area for discretization schemes, floating-point stability, and iterative linear algebra."
+    ),
+    "math.AG": (
+        "The math subject built around varieties, sheaves, and geometric invariants of polynomial systems."
+    ),
+    "math.AP": (
+        "The math queue for PDE well-posedness, Sobolev estimates, and evolution of physical fields."
+    ),
+    "math.NT": (
+        "The math lane for primes, congruences, L-functions, and arithmetic of integers."
+    ),
+    "math.DG": (
+        "The math topic for curvature, bundles, connections, and smooth manifolds beyond Euclidean space."
+    ),
+    "math.GR": (
+        "The math column for symmetries, presentations, and actions of abstract algebraic systems."
+    ),
+    "hep-th": (
+        "The high-energy theory feed discussing strings, dualities, quantum fields, and spacetime models."
+    ),
+    "hep-ph": (
+        "The collider-adjacent phenomenology stream bridging models with signals, rates, and detectors."
+    ),
+    "quant-ph": (
+        "The quantum archive for qubits, channels, entanglement measures, and information-theoretic protocols."
+    ),
+    "gr-qc": (
+        "The archive slice merging classical gravitation with quantum expectations about horizons and cosmology."
+    ),
+    "astro-ph.CO": (
+        "The astrophysics bucket for large-scale structure, dark components, and expansion history."
+    ),
+    "astro-ph.GA": (
+        "The astrophysics lane for stellar populations, galaxies as systems, and interstellar medium interplay."
+    ),
+    "astro-ph.HE": (
+        "The high-energy astrophysics feed for compact objects, relativistic outflows, and energetic spectra."
+    ),
+    "astro-ph.SR": (
+        "The stellar astrophysics listings for interiors, magnetism, and long-lived luminous spheres."
+    ),
+    "astro-ph.IM": (
+        "The astrophysics instrumentation track for telescopes, calibration pipelines, and survey hardware."
+    ),
+    "cond-mat.str-el": (
+        "The condensed-matter stream for strongly correlated lattices, emergent quasiparticles, and phases."
+    ),
+    "cond-mat.mes-hall": (
+        "The mesoscale condensed-matter area for nanowires, quantum dots, and low-dimensional transport."
+    ),
+    "cond-mat.mtrl-sci": (
+        "The materials-facing condensed-matter lane for synthesis, characterization, and structure–property links."
+    ),
+    "cond-mat.stat-mech": (
+        "The many-body equilibrium column for ensembles, phase transitions, and emergent macroscopic laws."
+    ),
+    "cond-mat.supr-con": (
+        "The low-temperature condensed-matter listings for Cooper pairing, Meissner physics, and critical fields."
+    ),
+    "cond-mat.soft": (
+        "The soft matter feed for colloids, gels, active grains, and sluggish thermal motion."
+    ),
+    "physics.optics": (
+        "The physics subject lane for interference, coherence, guided waves, and photonic devices."
+    ),
+    "stat.ML": (
+        "The statistics archive where inference meets high-dimensional prediction and uncertainty for models."
+    ),
+    "stat.ME": (
+        "The statistics methodology lane for estimators, experimental design, and inferential frameworks."
+    ),
+    "eess.SP": (
+        "The electrical-engineering signal stream for filters, spectra, acquisition chains, and discrete transforms."
+    ),
+    "eess.SY": (
+        "The systems-and-control electrical-engineering feed for stability, observers, and feedback synthesis."
+    ),
+    "eess.AS": (
+        "The audio-focused electrical-engineering listings for speech, hearing, and acoustic modeling."
+    ),
+}
@@ -0,0 +1,115 @@
+"""ArXiv category discovery + author-count threshold (registry T113).
+
+Same navigation pattern as `arxiv_category_infer_title_substring`: prose describes
+which new-submissions stream to open—no official label in the question text.
+
+Effective variants: len(CATEGORIES) * len(TOP_N) * len(AUTHOR_THRESHOLDS) * len(PATTERNS) > 500.
+"""
+
+import random
+from typing import Any, Dict, Optional
+
+from liveweb_arena.core.ground_truth_trigger import GroundTruthResult, TriggerConfig, UrlPatternTrigger
+from liveweb_arena.core.gt_collector import GTSourceType
+
+from liveweb_arena.core.validators.base import (
+    GeneratedQuestion,
+    QuestionTemplate,
+    ValidationResult,
+    register_template,
+)
+
+from .category_discovery_hints import CATEGORY_NAVIGATION_HINTS
+from .common import get_collected_listing_data, get_papers_from_listing
+from .variables import CATEGORIES
+
+TOP_N = [4, 5, 6, 7, 8]
+AUTHOR_THRESHOLDS = [2, 3, 4, 5, 6]
+
+PATTERNS = [
+    (
+        "On arXiv, open today's new-submissions listing for the stream best described by: \"{nav_hint}\". "
+        "Among the first {n} papers, how many list strictly more than {k} authors?"
+    ),
+    (
+        "Locate the arXiv new-submissions page matching this topical description: \"{nav_hint}\". "
+        "Considering only the first {n} entries, count papers whose author count is greater than {k}."
+    ),
+    (
+        "Find the daily new papers on arXiv under the area summarized as: \"{nav_hint}\". "
+        "In the top {n} items, how many have more than {k} authors named on the listing?"
+    ),
+]
+
+
+@register_template("arxiv_category_infer_author_filter")
+class ArxivCategoryInferAuthorFilterTemplate(QuestionTemplate):
+    GT_SOURCE = GTSourceType.PAGE_ONLY
+
+    def __init__(self):
+        super().__init__("arxiv_category_infer_author_filter")
+
+    def generate(self, seed: int, variant: Optional[int] = None) -> GeneratedQuestion:
+        rng = random.Random(seed)
+        cat = rng.choice(CATEGORIES)
+        nav_hint = CATEGORY_NAVIGATION_HINTS[cat.code]
+        n = rng.choice(TOP_N)
+        k = rng.choice(AUTHOR_THRESHOLDS)
+        pattern = rng.choice(PATTERNS)
+        question_text = pattern.format(nav_hint=nav_hint, n=n, k=k)
+        return GeneratedQuestion(
+            question_text=question_text,
+            start_url="https://arxiv.org",
+            variables={"category": cat.code, "top_n": n, "author_threshold": k},
+            validation_info={"category": cat.code, "top_n": n, "author_threshold": k},
+            template_name=self.name,
+            expected_steps=14,
+        )
+
+    def get_validation_rules(self, validation_info: Dict[str, Any]) -> str:
+        return (
+            "Task-Specific Rules (ArXiv category discovery + author threshold):\n"
+            f"- Expected listing category code: {validation_info.get('category')}\n"
+            f"- Papers scanned: first {validation_info.get('top_n')}\n"
+            f"- Strict author floor: {validation_info.get('author_threshold')}\n"
+            "- Score 1.0: exact count\n"
+            "- Score 0.0: otherwise"
+        )
+
+    async def get_ground_truth(self, validation_info: Dict[str, Any]) -> GroundTruthResult:
+        category = str(validation_info.get("category", ""))
+        n = int(validation_info.get("top_n", 5))
+        k = int(validation_info.get("author_threshold", 3))
+
+        data, failure = get_collected_listing_data(category)
+        if failure is not None:
+            return failure
+        papers, failure = get_papers_from_listing(data)
+        if failure is not None:
+            return failure
+        if len(papers) < n:
+            return GroundTruthResult.not_collected(
+                f"Need at least {n} papers in listing, have {len(papers)}."
+            )
+
+        count = 0
+        for p in papers[:n]:
+            authors = p.get("authors")
+            if not isinstance(authors, list):
+                return GroundTruthResult.fail("Paper missing authors list")
+            if len(authors) > k:
+                count += 1
+        return GroundTruthResult.ok(str(count))
+
+    async def validate_answer(self, answer: str, validation_info: Dict[str, Any]) -> ValidationResult:
+        return ValidationResult(score=0.0, is_correct=False, expected=None, actual=answer, details="Use LLM validation")
+
+    def get_ground_truth_trigger(self, validation_info: dict) -> TriggerConfig:
+        return TriggerConfig(trigger=UrlPatternTrigger(domains=["arxiv.org"]))
+
+    @classmethod
+    def get_cache_source(cls) -> str:
+        return "arxiv"
+
+    def get_gt_source(self) -> GTSourceType:
+        return self.GT_SOURCE