-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathopenenv.yaml
More file actions
102 lines (94 loc) · 3 KB
/
openenv.yaml
File metadata and controls
102 lines (94 loc) · 3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
name: CustomerSupportTriage-v0
version: "1.0.0"
description: >
A real-world AI agent benchmark for customer support triage.
The agent must read a queue of support tickets and for each one:
assign a priority level, route to the correct department,
draft a professional customer reply, and flag tickets requiring human review.
author: support-triage-team
tags:
- openenv
- customer-support
- triage
- nlp
- real-world
tasks:
- name: easy
description: "5 unambiguous tickets with clear signals"
difficulty: easy
max_steps: 5
success_threshold: 0.6
- name: medium
description: "10 tickets with multi-issue bodies and ambiguous routing"
difficulty: medium
max_steps: 10
success_threshold: 0.55
- name: hard
description: "15 adversarial tickets: misleading sentiment, legal edge-cases, downplayed urgency"
difficulty: hard
max_steps: 15
success_threshold: 0.5
observation_space:
type: object
fields:
queue:
type: array
items:
type: object
fields:
ticket_id: {type: string}
subject: {type: string}
body: {type: string}
customer_name: {type: string}
customer_tier: {type: string, enum: [free, pro, enterprise]}
created_at: {type: string, format: "ISO 8601"}
sentiment: {type: string, enum: [positive, neutral, negative, angry]}
tags: {type: array, items: {type: string}}
processed: {type: integer}
total_tickets: {type: integer}
task_name: {type: string}
step_number: {type: integer}
time_remaining: {type: integer}
action_space:
type: object
fields:
actions:
type: array
items:
type: object
fields:
ticket_id:
type: string
description: "ID of the ticket being triaged"
priority:
type: string
enum: [low, medium, high, urgent]
description: "Assigned priority level"
department:
type: string
enum: [billing, technical, shipping, returns, general, escalation]
description: "Department to route this ticket to"
response:
type: string
min_length: 10
max_length: 500
description: "Draft reply to the customer"
needs_human:
type: boolean
description: "Flag for human review"
reasoning:
type: string
description: "Agent's chain of thought (not scored)"
reward:
type: float
range: [0.0, 1.0]
description: >
Mean score across graded tickets in this step.
Composite of: priority accuracy (30%), routing accuracy (30%),
response quality/keyword coverage (25%), escalation correctness (15%).
Partial credit awarded for near-correct priority and routing.
endpoints:
reset: {method: POST, path: /reset}
step: {method: POST, path: /step}
state: {method: GET, path: /state}
health: {method: GET, path: /health}