Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions liveweb_arena/core/task_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ class TaskRegistry:
76: ("hackernews", "hackernews_extrema_comparison"),
77: ("hackernews", "hackernews_category_comparison"),
78: ("hackernews", "hackernews_news_summary"),
110: ("hackernews", "hackernews_recent_burst_count"),
111: ("hackernews", "hackernews_comment_tree_focus"),
112: ("hackernews", "hackernews_keyword_scan_rank"),
113: ("hackernews", "hackernews_user_karma_gap"),

# Open Library templates
80: ("openlibrary", "openlibrary_book_stats"),
Expand Down Expand Up @@ -181,6 +185,8 @@ class TaskRegistry:
[85, 86, 87, 88],
# Version 6: ArXiv templates
[90, 91, 92, 94, 95],
# Version 7: Hacker News gap-filling templates
[110, 111, 112, 113],
]

# Combination registry: list of template ID tuples
Expand Down
38 changes: 38 additions & 0 deletions liveweb_arena/plugins/hackernews/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ async def get_top_stories(cls, limit: int = 30) -> List[int]:
return data[:limit]
return []

@classmethod
async def get_new_stories(cls, limit: int = 30) -> List[int]:
"""Get newest story IDs."""
data = await cls.get("/newstories.json")
if data and isinstance(data, list):
return data[:limit]
return []

@classmethod
async def get_ask_stories(cls, limit: int = 30) -> List[int]:
"""Get Ask HN story IDs."""
Expand Down Expand Up @@ -204,6 +212,36 @@ async def fetch_homepage_api_data(limit: int = 30) -> Dict[str, Any]:
return {"stories": stories}


async def fetch_newest_api_data(limit: int = 30) -> Dict[str, Any]:
"""
Fetch API data for HN newest page.

Returns:
{
"category": "newest",
"stories": {
"<id>": {
...
"rank": <1-based newest rank>
}
}
}
"""
story_ids = await HackerNewsClient.get_new_stories(limit=limit)
if not story_ids:
raise APIFetchError("Failed to fetch newest stories", source="hackernews")

items = await HackerNewsClient.get_items_batch(story_ids)
stories = {}
for rank, story_id in enumerate(story_ids, start=1):
if story_id in items:
story = items[story_id]
story["rank"] = rank
stories[str(story_id)] = story

return {"category": "newest", "stories": stories}


async def fetch_category_api_data(category: str, limit: int = 30) -> Dict[str, Any]:
"""
Fetch API data for a category page (ask, show, jobs).
Expand Down
13 changes: 10 additions & 3 deletions liveweb_arena/plugins/hackernews/hackernews.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from liveweb_arena.plugins.base import BasePlugin
from .api_client import (
fetch_homepage_api_data,
fetch_newest_api_data,
fetch_category_api_data,
fetch_item_api_data,
fetch_user_api_data,
Expand Down Expand Up @@ -318,8 +319,14 @@ async def fetch_api_data(self, url: str) -> Dict[str, Any]:
self._extract_external_urls(data)
return data

# Homepage (including news, newest, front, etc. - all show top stories)
if path in ("", "news", "newest", "front") or not path:
# Newest page
if path == "newest":
data = await fetch_newest_api_data()
self._extract_external_urls(data)
return data

# Homepage (top stories)
if path in ("", "news", "front") or not path:
data = await fetch_homepage_api_data()
self._extract_external_urls(data)
return data
Expand Down Expand Up @@ -359,7 +366,7 @@ def needs_api_data(self, url: str) -> bool:
if path in ("ask", "show", "jobs"):
return True

# Homepage needs API data
# Newest/homepage need API data
if path in ("", "news", "newest", "front") or not path:
return True

Expand Down
8 changes: 8 additions & 0 deletions liveweb_arena/plugins/hackernews/templates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,18 @@
from .extrema_comparison import HackerNewsExtremaComparisonTemplate
from .category_comparison import HackerNewsCategoryComparisonTemplate
from .news_summary import HackerNewsNewsSummaryTemplate
from .recent_burst_count import HackerNewsRecentBurstCountTemplate
from .comment_tree_focus import HackerNewsCommentTreeFocusTemplate
from .keyword_scan_rank import HackerNewsKeywordScanRankTemplate
from .user_karma_gap import HackerNewsUserKarmaGapTemplate

__all__ = [
"HackerNewsMultiConditionFilterTemplate",
"HackerNewsExtremaComparisonTemplate",
"HackerNewsCategoryComparisonTemplate",
"HackerNewsNewsSummaryTemplate",
"HackerNewsRecentBurstCountTemplate",
"HackerNewsCommentTreeFocusTemplate",
"HackerNewsKeywordScanRankTemplate",
"HackerNewsUserKarmaGapTemplate",
]
95 changes: 95 additions & 0 deletions liveweb_arena/plugins/hackernews/templates/comment_tree_focus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""Nested-structure navigation template for Hacker News comments."""

import random
from typing import Any, Dict, Optional

from liveweb_arena.core.ground_truth_trigger import GroundTruthResult, TriggerConfig, UrlPatternTrigger
from liveweb_arena.core.gt_collector import GTSourceType
from liveweb_arena.core.validators.base import (
GeneratedQuestion,
QuestionTemplate,
ValidationResult,
register_template,
)

from .common import get_category_stories, get_collected_hn_data, get_item_story

RANK_CHOICES = [1, 2, 3, 4, 5]

PATTERNS = [
"On HN newest, open the #{rank} story and report how many top-level comments it has (the immediate children count).",
"Using Hacker News /newest, inspect rank {rank} story detail: how many direct root comments are attached to the story?",
"From HN newest, visit story #{rank}. What is the count of first-level comments under the story node?",
]


@register_template("hackernews_comment_tree_focus")
class HackerNewsCommentTreeFocusTemplate(QuestionTemplate):
"""Measure top-level comment node count for a selected newest story rank."""

GT_SOURCE = GTSourceType.PAGE_ONLY

def __init__(self):
super().__init__("hackernews_comment_tree_focus")

def generate(self, seed: int, variant: Optional[int] = None) -> GeneratedQuestion:
rng = random.Random(seed)
rank = rng.choice(RANK_CHOICES)
pattern = rng.choice(PATTERNS)
return GeneratedQuestion(
question_text=pattern.format(rank=rank),
start_url="https://news.ycombinator.com/newest",
variables={"rank": rank},
validation_info={"rank": rank, "category_slug": "newest"},
template_name=self.name,
expected_steps=9,
)

def get_validation_rules(self, validation_info: Dict[str, Any]) -> str:
return (
"Task-Specific Rules (HN Comment Tree Focus):\n"
f"- Target newest rank: {validation_info.get('rank')}\n"
"- Expected answer is top-level comment count (immediate children only)\n"
"- Score 1.0: exact\n"
"- Score 0.5: off by <=2\n"
"- Score 0.0: otherwise"
)

async def get_ground_truth(self, validation_info: Dict[str, Any]) -> GroundTruthResult:
collected, failure = get_collected_hn_data()
if failure is not None:
return failure

rank = int(validation_info.get("rank", 1))
stories, failure = get_category_stories(collected, "newest", min_count=rank)
if failure is not None:
return failure

target_story = stories[rank - 1]
item_id = target_story.get("id")
if not isinstance(item_id, int):
return GroundTruthResult.fail("Target story missing id")

item_data, failure = get_item_story(collected, item_id)
if failure is not None:
return failure

kids = item_data.get("kids")
if kids is None:
return GroundTruthResult.ok("0")
if not isinstance(kids, list):
return GroundTruthResult.fail("Malformed kids field in item payload")
return GroundTruthResult.ok(str(len(kids)))

async def validate_answer(self, answer: str, validation_info: Dict[str, Any]) -> ValidationResult:
return ValidationResult(score=0.0, is_correct=False, expected=None, actual=answer, details="Use LLM validation")

def get_ground_truth_trigger(self, validation_info: dict) -> TriggerConfig:
return TriggerConfig(trigger=UrlPatternTrigger(domains=["news.ycombinator.com"]))

@classmethod
def get_cache_source(cls) -> str:
return "hackernews"

def get_gt_source(self) -> GTSourceType:
return self.GT_SOURCE
97 changes: 97 additions & 0 deletions liveweb_arena/plugins/hackernews/templates/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Shared utilities for advanced Hacker News templates."""

from typing import Any, Dict, List, Optional, Tuple

from liveweb_arena.core.ground_truth_trigger import GroundTruthResult
from liveweb_arena.core.gt_collector import get_current_gt_collector


def get_collected_hn_data() -> Tuple[Optional[Dict[str, Dict[str, Any]]], Optional[GroundTruthResult]]:
"""Return collected API payload map for current evaluation."""
gt_collector = get_current_gt_collector()
if gt_collector is None:
return None, GroundTruthResult.system_error("No GT collector")
return gt_collector.get_collected_api_data(), None


def get_category_stories(
collected: Dict[str, Dict[str, Any]],
category_slug: str,
min_count: int = 1,
) -> Tuple[Optional[List[Dict[str, Any]]], Optional[GroundTruthResult]]:
"""Extract ordered category stories from collected data."""
key = f"hn_category:{category_slug}"
category_data = collected.get(key)
if not isinstance(category_data, dict):
return None, GroundTruthResult.not_collected(
f"Category data '{category_slug}' not collected. Visit /{category_slug}."
)

stories = category_data.get("stories")
if not isinstance(stories, dict):
return None, GroundTruthResult.fail(f"Malformed stories in category '{category_slug}'")

result: List[Dict[str, Any]] = []
for _, story in stories.items():
if not isinstance(story, dict):
continue
rank = story.get("rank")
if rank is None:
continue
result.append(story)

result.sort(key=lambda s: s["rank"])
if len(result) < min_count:
return None, GroundTruthResult.not_collected(
f"Need at least {min_count} stories in '{category_slug}', got {len(result)}."
)
return result, None


def get_item_story(
collected: Dict[str, Dict[str, Any]],
item_id: int,
) -> Tuple[Optional[Dict[str, Any]], Optional[GroundTruthResult]]:
"""Get item story data by item id from collected payload."""
key = str(item_id)
story = collected.get(key)
if not isinstance(story, dict):
return None, GroundTruthResult.not_collected(
f"Item {item_id} not collected. Visit /item?id={item_id}."
)
return story, None


def get_user_data(
collected: Dict[str, Dict[str, Any]],
username: str,
) -> Tuple[Optional[Dict[str, Any]], Optional[GroundTruthResult]]:
"""Get user payload by username."""
key = f"user:{username}"
payload = collected.get(key)
if not isinstance(payload, dict):
return None, GroundTruthResult.not_collected(
f"User data for '{username}' not collected. Visit /user?id={username}."
)
user = payload.get("user")
if not isinstance(user, dict):
return None, GroundTruthResult.fail(f"Malformed user payload for '{username}'.")
return user, None


def parse_iso_minutes(timestamp: Any) -> Optional[int]:
"""Parse basic ISO timestamp and return minutes from day start."""
if not isinstance(timestamp, str) or "T" not in timestamp:
return None
time_part = timestamp.split("T", 1)[1]
if ":" not in time_part:
return None
hour_s, minute_s = time_part.split(":", 1)
try:
hour = int(hour_s)
minute = int(minute_s[:2])
except ValueError:
return None
if hour < 0 or hour > 23 or minute < 0 or minute > 59:
return None
return hour * 60 + minute
Loading