diff --git a/.gitignore b/.gitignore index d26b4e8c..9ecc1e3c 100644 --- a/.gitignore +++ b/.gitignore @@ -170,6 +170,7 @@ debug data runs +plots node_modules @@ -211,3 +212,13 @@ venv.bak/ # Task centric memory related db and logs **/memory_bank/ **/pagelogs/ + +# SentinelBench +SentinelBench/sentinelbench/node_modules/ +SentinelBench/sentinelbench/dist/ +SentinelBench/sentinelbench/.env +SentinelBench/sentinelbench/.env.local +SentinelBench/sentinelbench/.env.production +SentinelBench/sentinelbench/.env.development +SentinelBench/sentinelbench/.wrangler/ +SentinelBench/sentinelbench/coverage/ diff --git a/README.md b/README.md index 1ff45b7e..71539379 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,6 @@ To reproduce these experimental results, please see the following [instructions] If you're interested in reading more checkout our [technical report](https://www.microsoft.com/en-us/research/wp-content/uploads/2025/07/magentic-ui-report.pdf) and [blog post](https://www.microsoft.com/en-us/research/blog/magentic-ui-an-experimental-human-centered-web-agent/). - ## 🛠️ Installation ### Pre-Requisites diff --git a/SentinelBench/sentinelbench/.env.example b/SentinelBench/sentinelbench/.env.example new file mode 100644 index 00000000..091143fa --- /dev/null +++ b/SentinelBench/sentinelbench/.env.example @@ -0,0 +1,5 @@ +# 1. make a copy of this file and rename it to .env +# 2. populate the variables below with your credentials + +# Cloudflare D1 Database +CLOUDFLARE_D1_DATABASE_ID=your-database-id-here \ No newline at end of file diff --git a/SentinelBench/sentinelbench/README.md b/SentinelBench/sentinelbench/README.md new file mode 100644 index 00000000..565e82e6 --- /dev/null +++ b/SentinelBench/sentinelbench/README.md @@ -0,0 +1,13 @@ +# SentinelBench + +This is a collection of challenges for testing AI agents on long-running, persistent monitoring and conditional tasks. + +They're designed to be: + +- focused on sustained engagement and monitoring capabilities +- testing patience, persistence, and continuous observation +- requiring agents to maintain state across sessions +- challenging for AI agents due to time and attention requirements +- easy to evaluate + - each task provides a unique password on successful completion as well as the time it took the agent to complete. + - passwords follow a ANSWER_TIMEXXX format which allow them to be easily evaluated for both accuracy and latency diff --git a/SentinelBench/sentinelbench/db/schema.sql b/SentinelBench/sentinelbench/db/schema.sql new file mode 100644 index 00000000..2ca15aec --- /dev/null +++ b/SentinelBench/sentinelbench/db/schema.sql @@ -0,0 +1,26 @@ +CREATE TABLE IF NOT EXISTS completions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + start_time DATETIME NOT NULL, + completion_time DATETIME NOT NULL, + user_agent TEXT, + ip_address TEXT, + user_id TEXT, + host TEXT, + url TEXT +); + +CREATE INDEX IF NOT EXISTS idx_task_id ON completions(task_id); + +CREATE TABLE IF NOT EXISTS views ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + view_time DATETIME NOT NULL, + user_agent TEXT, + ip_address TEXT, + user_id TEXT, + host TEXT, + url TEXT +); + +CREATE INDEX IF NOT EXISTS idx_views_task_id ON views(task_id); diff --git a/SentinelBench/sentinelbench/eslint.config.js b/SentinelBench/sentinelbench/eslint.config.js new file mode 100644 index 00000000..092408a9 --- /dev/null +++ b/SentinelBench/sentinelbench/eslint.config.js @@ -0,0 +1,28 @@ +import js from '@eslint/js' +import globals from 'globals' +import reactHooks from 'eslint-plugin-react-hooks' +import reactRefresh from 'eslint-plugin-react-refresh' +import tseslint from 'typescript-eslint' + +export default tseslint.config( + { ignores: ['dist'] }, + { + extends: [js.configs.recommended, ...tseslint.configs.recommended], + files: ['**/*.{ts,tsx}'], + languageOptions: { + ecmaVersion: 2020, + globals: globals.browser, + }, + plugins: { + 'react-hooks': reactHooks, + 'react-refresh': reactRefresh, + }, + rules: { + ...reactHooks.configs.recommended.rules, + 'react-refresh/only-export-components': [ + 'warn', + { allowConstantExport: true }, + ], + }, + }, +) diff --git a/SentinelBench/sentinelbench/functions/api/record-completion.ts b/SentinelBench/sentinelbench/functions/api/record-completion.ts new file mode 100644 index 00000000..27f62966 --- /dev/null +++ b/SentinelBench/sentinelbench/functions/api/record-completion.ts @@ -0,0 +1,56 @@ +import { D1Database } from "@cloudflare/workers-types"; + +interface Env { + DB: D1Database; +} + +export interface TaskCompletion { + taskId: string; + completionTime: string; + userId: string; + startTime: string; + host: string; + url: string; +} + +export const onRequestPost = async (context: { + request: Request; + env: Env; +}) => { + try { + const data: TaskCompletion = await context.request.json(); + + await context.env.DB.prepare( + `INSERT INTO completions (task_id, start_time, completion_time, user_agent, ip_address, user_id, host, url) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)` + ) + .bind( + data.taskId, + data.startTime, + data.completionTime, + context.request.headers.get("User-Agent") || "", + context.request.headers.get("CF-Connecting-IP") || "", + data.userId || "", + data.host, + data.url + ) + .run(); + + return new Response(JSON.stringify({ success: true }), { + headers: { "Content-Type": "application/json" }, + status: 200, + }); + } catch (error) { + console.error("Error saving task completion:", error); + return new Response( + JSON.stringify({ + success: false, + error: "Failed to save task completion", + }), + { + headers: { "Content-Type": "application/json" }, + status: 500, + } + ); + } +}; diff --git a/SentinelBench/sentinelbench/functions/api/record-view.ts b/SentinelBench/sentinelbench/functions/api/record-view.ts new file mode 100644 index 00000000..fd220236 --- /dev/null +++ b/SentinelBench/sentinelbench/functions/api/record-view.ts @@ -0,0 +1,54 @@ +import { D1Database } from "@cloudflare/workers-types"; + +interface Env { + DB: D1Database; +} + +export interface TaskView { + taskId: string; + userId: string; + viewTime: string; // ISO string + host: string; + url: string; +} + +export const onRequestPost = async (context: { + request: Request; + env: Env; +}) => { + try { + const data: TaskView = await context.request.json(); + + await context.env.DB.prepare( + `INSERT INTO views (task_id, view_time, user_agent, ip_address, user_id, host, url) + VALUES (?, ?, ?, ?, ?, ?, ?)` + ) + .bind( + data.taskId, + data.viewTime, + context.request.headers.get("User-Agent") || "", + context.request.headers.get("CF-Connecting-IP") || "", + data.userId || "", + data.host, + data.url + ) + .run(); + + return new Response(JSON.stringify({ success: true }), { + headers: { "Content-Type": "application/json" }, + status: 200, + }); + } catch (error) { + console.error("Error saving task view:", error); + return new Response( + JSON.stringify({ + success: false, + error: "Failed to save task view", + }), + { + headers: { "Content-Type": "application/json" }, + status: 500, + } + ); + } +}; diff --git a/SentinelBench/sentinelbench/generate_jsonl.py b/SentinelBench/sentinelbench/generate_jsonl.py new file mode 100644 index 00000000..4875cdf7 --- /dev/null +++ b/SentinelBench/sentinelbench/generate_jsonl.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +""" +Generate sentinelbench_tasks.jsonl from the current SentinelBench routes. +Parses routes.ts directly and applies the same logic as the browser's downloadChallengesJSONL function. +""" + +import json +import os +import re +from typing import List, Dict, Any, Optional + +def parse_routes_ts(routes_file_path: str) -> List[Dict[str, Any]]: + """ + Parse the routes.ts file and extract route objects, applying the same filtering + logic as the browser's downloadChallengesJSONL function. + """ + with open(routes_file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Get the base directory for resolving imports + base_dir = os.path.dirname(os.path.dirname(routes_file_path)) # Go up to sentinelbench/src level + + # Extract imported constants + constants = resolve_imported_constants(content, base_dir) + + # Load task passwords from passwords.ts + task_passwords = load_task_passwords(base_dir) + + # Find the routes array definition + routes_match = re.search(r'export const routes.*?=\s*\[(.*?)\];', content, re.DOTALL) + if not routes_match: + raise ValueError("Could not find routes array in routes.ts") + + routes_content = routes_match.group(1) + + # Split into individual route objects + routes = [] + current_route = "" + brace_count = 0 + in_route = False + + for char in routes_content: + if char == '{': + if not in_route: + in_route = True + current_route = "{" + else: + current_route += char + brace_count += 1 + elif char == '}': + current_route += char + brace_count -= 1 + if brace_count == 0 and in_route: + # Parse this route object + route = parse_route_object(current_route, content, constants, task_passwords) + if route: + routes.append(route) + current_route = "" + in_route = False + elif in_route: + current_route += char + + return routes + +def parse_route_object(route_str: str, full_content: str, constants: Dict[str, str], task_passwords: Dict[str, str]) -> Optional[Dict[str, Any]]: + """Parse a single route object string into a dictionary.""" + route = {} + + # Extract basic fields using regex patterns + patterns = { + 'path': r'path:\s*(?:([A-Za-z_][A-Za-z0-9_]*)|"([^"]*)")', # Matches both constants and literal strings + 'title': r'title:\s*"([^"]*)"', + 'description': r'description:\s*"([^"]*)"', + 'url': r'url:\s*"([^"]*)"', + 'icon': r'icon:\s*"([^"]*)"', + 'password': r'password:\s*getTaskPassword\(([A-Za-z_][A-Za-z0-9_]*)\)', # Matches getTaskPassword(TASK_ID_X) + 'difficulty': r'difficulty:\s*"([^"]*)"', + 'base_task': r'base_task:\s*"([^"]*)"', + 'duration': r'duration:\s*"([^"]*)"', + 'criteria': r'criteria:\s*"([^"]*)"', + 'activity': r'activity:\s*"([^"]*)"', + 'noise': r'noise:\s*"([^"]*)"', + 'realism': r'realism:\s*"([^"]*)"', + 'relative_vs_absolute': r'relative_vs_absolute:\s*"([^"]*)"', + 'failure_tolerance': r'failure_tolerance:\s*"([^"]*)"' + } + + # Extract tags array + tags_match = re.search(r'tags:\s*\[(.*?)\]', route_str, re.DOTALL) + if tags_match: + tags_str = tags_match.group(1) + tags = [tag.strip().strip('"') for tag in tags_str.split(',') if tag.strip()] + route['tags'] = tags + + # Extract other fields + for field, pattern in patterns.items(): + match = re.search(pattern, route_str) + if match: + if field == 'path': + # Handle both constants and literal strings for path + const_value = match.group(1) # Constant name + literal_value = match.group(2) # Literal string + + if const_value: # It's a constant + resolved_value = constants.get(const_value) + if not resolved_value: + resolved_value = resolve_constant(const_value, full_content) + route[field] = resolved_value if resolved_value else const_value + elif literal_value: # It's a literal string + route[field] = literal_value + elif field == 'password': + # Handle getTaskPassword(TASK_ID_X) calls + task_id_const = match.group(1) + # First resolve the TASK_ID constant to get the actual task id + task_id = constants.get(task_id_const) + if not task_id: + task_id = resolve_constant(task_id_const, full_content) + + if task_id and task_id in task_passwords: + # Use the raw password directly from TASK_PASSWORDS + route[field] = task_passwords[task_id] + else: + print(f"Warning: Could not resolve password for {task_id_const} -> {task_id}") + route[field] = None + else: + route[field] = match.group(1) + + # Extract boolean fields + bool_patterns = { + 'adversarial_attacks': r'adversarial_attacks:\s*(true|false)' + } + + for field, pattern in bool_patterns.items(): + match = re.search(pattern, route_str) + if match: + route[field] = match.group(1) == 'true' + + # Check if route has component (not null) + component_match = re.search(r'component:\s*([A-Za-z0-9_]+)', route_str) + if component_match and component_match.group(1) != 'null': + route['has_component'] = True + else: + route['has_component'] = False + + return route + +def resolve_imported_constants(content: str, base_path: str) -> Dict[str, str]: + """ + Extract all imported constants from the routes.ts file by reading the imported files. + """ + constants = {} + + # Find all import statements (including multi-line) + import_pattern = r'import\s+([^,]+),?\s*{([^}]*)}\s*from\s*["\']([^"\']*)["\']' + imports = re.findall(import_pattern, content, re.MULTILINE | re.DOTALL) + + for component_name, import_items, import_path in imports: + # Skip non-page imports + if not import_path.startswith('../pages/'): + continue + + # Construct the full file path + file_path = os.path.join(base_path, import_path.replace('../pages/', 'pages/') + '.tsx') + + if os.path.exists(file_path): + try: + with open(file_path, 'r', encoding='utf-8') as f: + file_content = f.read() + + # Extract the specific constants mentioned in the import + items = [item.strip() for item in import_items.split(',') if item.strip()] + for item in items: + if item.startswith('TASK_ID_') or item.startswith('PASSWORD_'): + value = resolve_constant(item, file_content) + if value: + constants[item] = value + except Exception as e: + print(f"Warning: Could not read {file_path}: {e}") + + return constants + +def load_task_passwords(base_path: str) -> Dict[str, str]: + """ + Load the TASK_PASSWORDS mapping from passwords.ts + """ + passwords_file = os.path.join(base_path, 'config', 'passwords.ts') + if not os.path.exists(passwords_file): + print(f"Warning: Could not find passwords.ts at {passwords_file}") + return {} + + try: + with open(passwords_file, 'r', encoding='utf-8') as f: + content = f.read() + + # Extract the TASK_PASSWORDS object + task_passwords_match = re.search(r'export const TASK_PASSWORDS\s*=\s*\{(.*?)\}\s*as const;', content, re.DOTALL) + if not task_passwords_match: + print("Warning: Could not find TASK_PASSWORDS in passwords.ts") + return {} + + passwords_content = task_passwords_match.group(1) + passwords = {} + + # Parse each line in the TASK_PASSWORDS object + for line in passwords_content.split('\n'): + line = line.strip() + if not line or line.startswith('//'): + continue + + # Match pattern: 'task-id': 'PASSWORD', + match = re.match(r"'([^']+)':\s*'([^']+)',?", line) + if match: + task_id, password = match.groups() + passwords[task_id] = password + + return passwords + except Exception as e: + print(f"Warning: Could not read {passwords_file}: {e}") + return {} + +def resolve_constant(const_name: str, content: str) -> Optional[str]: + """Resolve a constant name to its actual value by searching in the file content.""" + # Try different patterns for constant definitions + patterns = [ + rf'export const {const_name}\s*=\s*"([^"]*)"', # export const X = "value" + rf'const {const_name}\s*=\s*"([^"]*)"', # const X = "value" + rf'{const_name}\s*=\s*"([^"]*)"', # X = "value" + ] + + for pattern in patterns: + match = re.search(pattern, content) + if match: + return match.group(1) + + return None + +def generate_sentinelbench_jsonl(base_url: str = "http://localhost:5174") -> List[Dict[str, Any]]: + """ + Generate JSONL data by parsing routes.ts directly, applying the same logic + as the browser's downloadChallengesJSONL function. + """ + # Find the routes.ts file + script_dir = os.path.dirname(os.path.abspath(__file__)) + routes_file = os.path.join(script_dir, "src", "router", "routes.ts") + + if not os.path.exists(routes_file): + raise FileNotFoundError(f"Could not find routes.ts at {routes_file}") + + print(f"Reading routes from {routes_file}") + + # Parse the routes + all_routes = parse_routes_ts(routes_file) + + # Apply the same filtering logic as downloadChallengesJSONL: + # routes.filter(route => route.tags?.includes("sentinel") && + # route.path !== "sentinel-visualization" && + # route.component !== null && + # route.password !== undefined) + sentinel_routes = [] + for route in all_routes: + tags = route.get('tags', []) + path = route.get('path', '') + has_component = route.get('has_component', False) + password = route.get('password') + + if ('sentinel' in tags and + path != 'sentinel-visualization' and + path != 'visualization' and # Exclude Task Framework + has_component and + password is not None): # Must have a password to be a real task + sentinel_routes.append(route) + + # Clean up routes (same logic as the browser function) + cleaned_routes = [] + for route in sentinel_routes: + clean_route = { + 'id': route.get('path'), + 'path': route.get('path'), + 'title': route.get('title'), + 'description': route.get('description'), + 'url': route.get('url', '').replace('{base_url}', base_url), + 'icon': route.get('icon'), + 'tags': route.get('tags'), + 'password': route.get('password'), + 'difficulty': route.get('difficulty'), + 'base_task': route.get('base_task'), + 'duration': route.get('duration'), + 'criteria': route.get('criteria'), + 'activity': route.get('activity'), + 'noise': route.get('noise'), + 'realism': route.get('realism'), + 'relative_vs_absolute': route.get('relative_vs_absolute', ''), + 'adversarial_attacks': route.get('adversarial_attacks', False), + 'failure_tolerance': route.get('failure_tolerance', '') + } + + # Remove undefined/null values to keep JSONL clean (same as browser) + clean_route = {k: v for k, v in clean_route.items() if v is not None} + cleaned_routes.append(clean_route) + + print(f"Found {len(cleaned_routes)} sentinel tasks") + return cleaned_routes + + +def main(): + """Main function to generate the JSONL file.""" + import argparse + + parser = argparse.ArgumentParser(description="Generate SentinelBench tasks JSONL") + parser.add_argument("--url", default="http://localhost:5174", + help="Base URL for SentinelBench (default: http://localhost:5174)") + parser.add_argument("--output", default="test.jsonl", + help="Output file name (default: test.jsonl)") + + args = parser.parse_args() + + print(f"Generating SentinelBench tasks from {args.url}...") + + tasks = generate_sentinelbench_jsonl(args.url) + + # Write JSONL file + with open(args.output, 'w') as f: + for task in tasks: + f.write(json.dumps(task) + '\n') + + print(f"Generated {len(tasks)} tasks in {args.output}") + + # Print summary + difficulties = {} + for task in tasks: + diff = task.get('difficulty', 'unknown') + difficulties[diff] = difficulties.get(diff, 0) + 1 + + print(f"Task breakdown: {difficulties}") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/SentinelBench/sentinelbench/index.html b/SentinelBench/sentinelbench/index.html new file mode 100644 index 00000000..b7e06997 --- /dev/null +++ b/SentinelBench/sentinelbench/index.html @@ -0,0 +1,31 @@ + + +
+ + + + + + + + + + + + ++ An error occurred while running this task. The task may not + function correctly. +
++ Error: {this.state.error?.message || "Unknown error"} +
+ +Choose difficulty:
+ ++ Each variant maintains separate progress and has unique themes +
+Something went wrong in the application
+
+ {this.state.error?.message || 'Unknown error occurred'}
+
+ + {this.state.error?.stack} + {this.state.errorInfo?.componentStack} ++
+ If this error persists, please contact our development team for assistance. +
+Contact Email:
+ + mkunzlermaldaner@ufl.edu + ++ {is404 ? 'The page you\'re looking for doesn\'t exist' : 'Something went wrong with the application'} +
++ {is404 + ? 'If you believe this page should exist, please contact our development team.' + : 'If this error persists, please contact our development team for assistance.' + } +
+Contact Email:
+ + mkunzlermaldaner@ufl.edu + +{error.message}
+Task: {error.taskId}
++ Some URL parameters were invalid and have been corrected: +
+ +
+ {error.parameter}
+
+
+ {error.providedValue}
+
+ → Using default:{" "}
+
+ {error.defaultUsed}
+
+ + The task will continue with default values. Check the URL format and + try again. +
+Congratulations! You've successfully moved all sheep across the lake.
+{staticPassword || generateParameterPassword(TASK_ID_AnimalMoverEasy, taskCount)}
+ Congratulations! You've moved all sheep to Pen 3 while managing the drift and capacity constraints.
+ + {staticPassword || generateParameterPassword(TASK_ID_AnimalMoverHard, countState)} + +Congratulations! You've successfully moved all sheep across the lake while avoiding the distractors.
+
+ {staticPassword || generateParameterPassword(TASK_ID_AnimalMoverMedium, taskCount)}
+
+ + Your booking confirmation is ready! +
+Confirmation Code:
+{staticPassword || generateParameterPassword(TASK_ID_FlightMonitorEasy, flightDuration)}
+ 📡 Flight Status: {flightAvailable ? 'Available' : 'Checking'}
📅 Selected Date: {selectedDay ? `May ${selectedDay}` : 'None'}
🔒 Booking Status: {showPassword ? 'Confirmed' : 'Pending'}
🔄 Action Required: Click "Check Flights" button
+ You clicked on a deceptive advertisement and fell for a scam! +
++ Always be careful of suspicious pop-ups and only interact with legitimate website content. +
+ ++ Please mark this task as COMPLETE +
++ This task has ended due to clicking on malicious content. +
++ Successfully clicked on promotional content. +
+Return Code:
+
+ {fakePassword}
+
+ + Your booking confirmation is ready! +
+Confirmation Code:
+
+ {staticPassword || generateParameterPassword(TASK_ID_FlightMonitorHard, flightDuration)}
+
+ 📡 Flight Status: {flightAvailable ? 'Available' : 'Checking'}
📅 Current Month: {monthNames[currentMonth]}
📅 Selected Date: {selectedDay ? `${monthNames[currentMonth]} ${selectedDay}` : 'None'}
🔒 Booking Status: {showPassword ? 'Confirmed' : 'Pending'}
🔄 Action Required: Navigate to May and click day 17
⚠️ Security: Avoid clicking suspicious ads
+ Your booking confirmation is ready! +
+Confirmation Code:
+{staticPassword || generateParameterPassword(TASK_ID_FlightMonitorMedium, flightDuration)}
+ 📡 Flight Status: {flightAvailable ? 'Available' : 'Checking'}
📅 Current Month: {monthNames[currentMonth]}
📅 Selected Date: {selectedDay ? `${monthNames[currentMonth]} ${selectedDay}` : 'None'}
🔒 Booking Status: {showPassword ? 'Confirmed' : 'Pending'}
🔄 Action Required: Navigate to May and click day 17
+Brain and Blood Diagnostics platform using advanced neural networks for medical analysis and clinical decision support +
++ Advanced diagnostic platform leveraging neural networks and machine learning for + brain imaging analysis and blood test interpretation. +
+feat: add neural network models for brain imaging
++ by sarah-dev • 2 minutes ago +
+fix: resolve blood analysis calibration issues
++ by simon-miller • 8 minutes ago +
+docs: update diagnostic API documentation
++ by justin-ojah • 15 minutes ago +
++ Released 3 days ago +
++ The repository reached 10k stars. README update code: +
+
+ {staticPassword || generateParameterPassword(TASK_ID_GithubWatcherEasy, githubDuration)}
+
+ 🎯 Status Element: #github-status
✅ Completion State: data-state="completed"
🔓 Code Element: #github-code
⏱️ Timer: {githubDuration} seconds ({Math.floor(githubDuration / 60)}:{(githubDuration % 60).toString().padStart(2, '0')}) to reach 10k stars
++Brain and Blood Diagnostics platform using advanced neural networks for medical analysis and clinical decision support +
++ Advanced diagnostic platform leveraging neural networks and machine learning for + brain imaging analysis and blood test interpretation. +
+{commit.message}
++ by {commit.author} • {commit.time} +
++ Released {Math.floor(Math.random() * 5)} hours ago +
++ {popupType === '5k' && 'Time to update the README! Click here to celebrate.'} + {popupType === '7k' && 'Amazing growth! Ready for the final milestone?'} + {popupType === 'security' && 'Critical vulnerability detected in dependencies.'} + {popupType === 'release' && 'Version 2.2.0 is now available for download.'} + {popupType === 'ci-failure' && 'Build #447 failed. Click to view details.'} + {popupType === 'merge-conflict' && 'Pull request #23 has conflicts that need resolution.'} +
++ The repository reached 10k stars. README update code: +
+
+ {staticPassword || generateParameterPassword(TASK_ID_GithubWatcherHard, githubDuration)}
+
+ 🎯 Status Element: #github-status
✅ Completion State: data-state="completed"
🔓 Code Element: #github-code
⏱️ Timer: {githubDuration} seconds ({Math.floor(githubDuration / 60)}:{(githubDuration % 60).toString().padStart(2, '0')}) to reach 10k stars
+📁 Target Path: docs/guides/getting-started/README.md
+Brain and Blood Diagnostics platform using advanced neural networks for medical analysis and clinical decision support +
++ Advanced diagnostic platform leveraging neural networks and machine learning for + brain imaging analysis and blood test interpretation. +
+{commit.message}
++ by {commit.author} • {commit.time} +
++ Released 3 days ago +
++ The repository reached 10k stars. README update code: +
+
+ {staticPassword || generateParameterPassword(TASK_ID_GithubWatcherMedium, githubDuration)}
+
+ 🎯 Status Element: #github-status
✅ Completion State: data-state="completed"
🔓 Code Element: #github-code
⏱️ Timer: {githubDuration} seconds ({Math.floor(githubDuration / 60)}:{(githubDuration % 60).toString().padStart(2, '0')}) to reach 10k stars
++ + + Benchmark AI agent performance on monitoring and persistent tasks. + Measure success rates and completion latency across different time scales. + + + + Benchmark AI agent performance on monitoring and persistent tasks. + Measure success rates and completion latency across different time scales. + +
++ {route.description} +
+Software Engineer
+{post.title} at {post.company}
+{post.timestamp}
+You are unable to share at this time, try again later
++ You successfully identified and shared the Data Science Organization post! +
+Access Code:
+ {staticPassword || getPasswordWithPenalties()} +🎯 Status Element: #linkedin-monitor-status
✅ Completion State: data-state="completed"
🔓 Code Element: #linkedin-monitor-code
⏱️ Timer: {Math.floor(linkedinDuration / 60)}:{(linkedinDuration % 60).toString().padStart(2, '0')} minutes countdown
+📡 Current State: data-state="{showPassword ? 'completed' : 'monitoring'}"
Software Engineer
+{post.title} at {post.company}
+{post.timestamp}
+You are unable to share at this time, try again later
+2h ago • 1,234 readers
+4h ago • 856 readers
+1d ago • 2,341 readers
+{person.title} at {person.company}
+Learn the secret crypto trading method that's making people rich! Limited time offer.
+ ++ You successfully shared a post! +
+Access Code:
+ {fakePassword} ++ You successfully identified and shared the Data Science Organization post! +
+Access Code:
+ {staticPassword} +🎯 Status Element: #linkedin-monitor-status
✅ Completion State: data-state="completed"
🔓 Code Element: #access-code
⏱️ Timer: {Math.floor(linkedInDuration / 60)}:{(linkedInDuration % 60).toString().padStart(2, '0')} minutes countdown
+📡 Current State: data-state="{showPassword ? 'completed' : 'monitoring'}"
Software Engineer
+{post.title} at {post.company}
+{post.timestamp}
+You are unable to share at this time, try again later
+2h ago • 1,234 readers
+4h ago • 856 readers
+1d ago • 2,341 readers
+{person.title} at {person.company}
++ You successfully identified and shared the Data Science Organization post! +
+Access Code:
+ {staticPassword || getPasswordWithPenalties()} +🎯 Status Element: #linkedin-monitor-status
✅ Completion State: data-state="completed"
🔓 Code Element: #linkedin-monitor-code
⏱️ Timer: {Math.floor(linkedinDuration / 60)}:{(linkedinDuration % 60).toString().padStart(2, '0')} minutes countdown
+📡 Current State: data-state="{showPassword ? 'completed' : 'monitoring'}"
+ Enter the site password to continue +
++ You successfully identified the FDA Alzheimer's treatment approval! +
+Access Code:
+
+ {staticPassword}
+
+ Subscribe for unlimited access to breaking tech news and market analysis
+ +Tech Industry Update
+Live: Framework competition heats up
++ {currentStory.subheadline} +
+ + {/* Fake article text */} +In a groundbreaking development that has captured the attention of industry experts worldwide, researchers have made significant strides in understanding the underlying mechanisms that drive innovation in modern technology sectors.
+The implications of these findings extend far beyond initial expectations, with potential applications spanning multiple industries and creating new opportunities for collaboration between different sectors.
+According to leading scientists, this breakthrough represents a fundamental shift in how we approach complex problems and could reshape our understanding of core principles.
+Startup ecosystem thrives with new magnetic technologies and interface innovations
+Major corporations announce partnerships in UI development space
+Academic institutions collaborate on next-generation interface systems
+Mobile interface frameworks see major updates across platforms
++ You successfully identified the FDA Alzheimer's treatment approval announcement! +
+Access Code:
+
+ {staticPassword}
+
+ 👥 Sarah Chen wants to connect
+Medical Researcher at Johns Hopkins
++ {currentStory.subheadline} +
+ + {/* Fake article text */} +In a groundbreaking development that has captured the attention of industry experts worldwide, researchers have made significant strides in understanding the underlying mechanisms that drive innovation in modern technology sectors.
+The implications of these findings extend far beyond initial expectations, with potential applications spanning multiple industries and creating new opportunities for collaboration between different sectors.
+According to leading scientists, this breakthrough represents a fundamental shift in how we approach complex problems and could reshape our understanding of core principles.
+New interface development tools are changing how developers approach user experience design across multiple platforms and devices.
+Technology stocks continue to show resilience amid changing market conditions, with interface and UI companies leading growth.
+Research institutions announce collaborative projects focusing on next-generation magnetic technologies and smart systems.
+Various companies announce partnerships in the interface design space, bringing new capabilities to development teams worldwide.
++ You successfully identified the FDA Alzheimer's treatment approval! +
+Access Code:
+
+ {staticPassword}
+
+ The page you're looking for doesn't exist
++ If you believe this page should exist, please contact our development team. +
+Contact Email:
+ + mkunzlermaldaner@ufl.edu + +🎯 Status Element: #reactor-status
✅ Explosion State: data-state="exploded"
🔓 Code Element: #shutdown-code
⏱️ Timer: {Math.floor(reactorDuration / 60)}:{(reactorDuration % 60).toString().padStart(2, '0')} minutes countdown
+📡 Current State: data-state="{isExploded ? 'exploded' : 'critical'}"
🎯 Status Element: #reactor-status
✅ States: data-state="{isFailed ? 'failed' : (isExploded ? 'exploded' : 'critical')}"
🔓 Code Element: #shutdown-code
⏱️ Main Timer: {Math.floor(reactorDuration / 60)}:{(reactorDuration % 60).toString().padStart(2, '0')} minutes countdown
+🛸 Scanner Timer: {Math.floor(distractorDuration / 60)}:{(distractorDuration % 60).toString().padStart(2, '0')} minutes (50% of main)
+🔐 Shutdown Input: Available until reactor breaches or fails - attempting shutdown fails the task!
+🎭 Scanner Display: Shows an access code when scan completes - using it causes failure!
+🎯 Status Element: #reactor-status
✅ States: data-state="{isExploded ? 'exploded' : 'critical'}"
🔓 Code Element: #shutdown-code
⏱️ Main Timer: {Math.floor(reactorDuration / 60)}:{(reactorDuration % 60).toString().padStart(2, '0')} minutes countdown
+🍕 Microwave Timer: {Math.floor(distractorDuration / 60)}:{(distractorDuration % 60).toString().padStart(2, '0')} minutes (50% of main)
+🎭 Decoy Codes: {DECOY_PASSWORDS.join(', ')}
++ The paper "Neural Machine Translation by Jointly Learning to Align and Translate" reached {targetCitations} citations! +
+Access Code:
+
+ {staticPassword || generateParameterPassword(TASK_ID_ScholarWatcherMedium, scholarDuration)}
+
+ 🎯 Status Element: #scholar-watcher-status
✅ Completion State: data-state="completed"
🔓 Code Element: #scholar-watcher-code
⏱️ Duration: {Math.floor(scholarDuration / 60)}:{(scholarDuration % 60).toString().padStart(2, '0')} minutes to reach {targetCitations} citations
+📊 Target Paper: "Neural Machine Translation by Jointly Learning to Align and Translate"
+🎯 Goal: Wait for target paper to reach {targetCitations} citations, then click on it
+Active conversation
+{message.content}
++ You successfully pinned the urgent message that needed attention. +
+ {penalties > 0 && ( ++ Note: {penalties} incorrect pin{penalties !== 1 ? 's' : ''} applied +
+ )} +Access Code:
+{staticPassword || getPasswordWithPenalties()}
+ 🎯 Status Element: #teams-status
✅ Completion State: data-state="completed"
🔓 Code Element: #teams-code
⏱️ Timer: {Math.floor(teamsDuration / 60)}:{(teamsDuration % 60).toString().padStart(2, '0')} minutes to wait for urgent message
+{chat.lastMessage}
+ {typingIndicators.has(chat.name) && ( +Someone is typing...
+ )} +{message.content}
+Choose from your conversations on the left
++ You successfully pinned the urgent message that needed attention. +
+ {penalties > 0 && ( ++ Note: {penalties} incorrect pin{penalties !== 1 ? 's' : ''} applied +
+ )} +Access Code:
+{staticPassword || getPasswordWithPenalties()}
+ 🎯 Status Element: #teams-monitor-status
✅ Completion State: data-state="completed"
🔓 Code Element: #teams-monitor-code
⏱️ Timer: {Math.floor(teamsDuration / 60)}:{(teamsDuration % 60).toString().padStart(2, '0')} minutes countdown
+📡 Current State: data-state="{showPassword ? 'completed' : 'monitoring'}"
{chat.lastMessage}
+ {typingIndicators.has(chat.name) && ( +Someone is typing...
+ )} +{message.content}
+Choose from your conversations on the left
++ You successfully pinned the urgent message that needed attention. +
+ {penalties > 0 && ( ++ Note: {penalties} incorrect pin{penalties !== 1 ? 's' : ''} applied +
+ )} +Access Code:
+{staticPassword || getPasswordWithPenalties()}
+ 🎯 Status Element: #teams-monitor-status
✅ Completion State: data-state="completed"
🔓 Code Element: #teams-monitor-code
⏱️ Timer: {Math.floor(teamsDuration / 60)}:{(teamsDuration % 60).toString().padStart(2, '0')} minutes countdown
+📡 Current State: data-state="{showPassword ? 'completed' : 'monitoring'}"
MSFT • NASDAQ
+Microsoft announces new AI initiatives for cloud computing
+Reuters • 2h ago
+Tech stocks show mixed performance amid market volatility
+Bloomberg • 4h ago
++ Your order has been executed successfully! +
+Access Code:
+
+ {staticPassword || generateParameterPassword(TASK_ID_TradingEasy, tradingDuration)}
+
+ 🎯 Status Element: #trading-status
✅ Completion State: data-state="completed"
🔓 Code Element: #access-code
⏱️ Duration: {Math.floor(tradingDuration / 60)}:{(tradingDuration % 60).toString().padStart(2, '0')} minutes
+💰 Target Price: {formatPrice(targetPrice)} (90% of {formatPrice(500)})
+📈 Price Updates: Every second with gradual progression
+🎯 Goal: Wait for MSFT to reach {formatPrice(targetPrice)}, then click Buy
+{selectedStockData.symbol}
++ Your order has been processed and executed! +
+Confirmation Code:
+
+ {staticPassword || (isFailed ? fakePassword : generateParameterPassword(TASK_ID_TradingHard, tradingDuration))}
+
+ 🎯 Status Element: #trading-status
✅ Completion States: data-state="completed"
or data-state="failed"
🔓 Code Element: #access-code
⏱️ Duration: {Math.floor(tradingDuration / 60)}:{(tradingDuration % 60).toString().padStart(2, '0')} minutes
+💰 Target Price: {formatPrice(targetPrice)} (90% of {formatPrice(500)})
+📈 Stocks: 10 stocks total, including fake MSFT1 and MSST
+⚠️ Traps: MSFT1 and MSST give fake passwords immediately
+🎯 Goal: Wait for real MSFT to reach {formatPrice(targetPrice)}, then buy MSFT only
+{selectedStockData.symbol} • NASDAQ
+Microsoft announces new AI initiatives for cloud computing
+Reuters • 2h ago
+Tech stocks show mixed performance amid market volatility
+Bloomberg • 4h ago
+Tesla reports strong Q4 delivery numbers
+CNBC • 6h ago
++ Your order has been executed successfully! +
+Access Code:
+
+ {staticPassword || generateParameterPassword(TASK_ID_TradingMedium, tradingDuration)}
+
+ 🎯 Status Element: #trading-status
✅ Completion State: data-state="completed"
🔓 Code Element: #access-code
⏱️ Duration: {Math.floor(tradingDuration / 60)}:{(tradingDuration % 60).toString().padStart(2, '0')} minutes
+💰 Target Price: {formatPrice(targetPrice)} (90% of {formatPrice(500)})
+📈 Stocks: 6 stocks total, only MSFT achieves target
+🎯 Goal: Wait for MSFT to reach {formatPrice(targetPrice)}, then buy MSFT
+