Skip to content

Commit b538a62

Browse files
feat(ui): Add 'View All Results' modal for evaluation comparison
- Add AllResultsModal component displaying all model results in unified view - Add 'View All Results' button to evaluation details page actions - Modal shows instruction, rubric, expected output, and all model responses - Uses card-luxe styling with gold-themed custom scrollbar - Integrates with existing window.__EVALUATION_DATA__ global pattern - Follows Modal.astro base component with 4xl size for wide display
1 parent c0650b1 commit b538a62

File tree

3 files changed

+290
-0
lines changed

3 files changed

+290
-0
lines changed
Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
---
2+
/**
3+
* All Results Modal component.
4+
*
5+
* Displays all model results from an evaluation in a single, scrollable view
6+
* for easy comparison. Uses data from window.__EVALUATION_DATA__ global.
7+
*
8+
* @example
9+
* // In evaluation details page
10+
* <AllResultsModal />
11+
*/
12+
import Modal from './ui/Modal.astro';
13+
---
14+
15+
<Modal id="all-results-modal" title="All Model Results" size="4xl">
16+
<div slot="content" class="space-y-6">
17+
<!-- Instruction -->
18+
<div>
19+
<h3 class="text-sm font-semibold text-base-content/70 mb-2">Instruction</h3>
20+
<div
21+
id="all-results-instruction"
22+
class="bg-base-200 p-4 rounded-lg whitespace-pre-wrap text-sm"
23+
>
24+
</div>
25+
</div>
26+
27+
<!-- Rubric (conditional) -->
28+
<div id="all-results-rubric-container" class="hidden">
29+
<h3 class="text-sm font-semibold text-base-content/70 mb-2">Accuracy Rubric</h3>
30+
<div id="all-results-rubric" class="bg-base-200 p-4 rounded-lg font-mono text-sm"></div>
31+
</div>
32+
33+
<!-- Expected Output (conditional) -->
34+
<div id="all-results-expected-container" class="hidden">
35+
<h3 class="text-sm font-semibold text-base-content/70 mb-2">Expected Output</h3>
36+
<div
37+
id="all-results-expected"
38+
class="bg-base-200 p-4 rounded-lg font-mono text-sm whitespace-pre-wrap"
39+
>
40+
</div>
41+
</div>
42+
43+
<!-- Model Results Grid -->
44+
<div>
45+
<h3 class="text-sm font-semibold text-base-content/70 mb-3">Model Responses</h3>
46+
<div id="all-results-grid" class="grid grid-cols-1 md:grid-cols-2 gap-4">
47+
<!-- Dynamically populated result cards -->
48+
</div>
49+
</div>
50+
</div>
51+
52+
<div slot="footer">
53+
<button class="btn btn-ghost" onclick="document.getElementById('all-results-modal').close()">
54+
Close
55+
</button>
56+
</div>
57+
</Modal>
58+
59+
<script>
60+
import { initializeOnce } from '@lib/utils/client-utils';
61+
import { ModalUtils } from '@lib/utils/client-utils';
62+
63+
/**
64+
* Data structure for evaluation results from window.__EVALUATION_DATA__.
65+
*/
66+
interface EvaluationResult {
67+
model_id: string;
68+
model_name: string;
69+
provider: string;
70+
response_text?: string;
71+
execution_time_ms?: number;
72+
input_tokens?: number;
73+
output_tokens?: number;
74+
total_tokens?: number;
75+
accuracy_score?: number;
76+
accuracy_reasoning?: string;
77+
status: string;
78+
temperature_used?: number;
79+
}
80+
81+
interface EvaluationData {
82+
instruction_text: string;
83+
accuracy_rubric?: string;
84+
expected_output?: string;
85+
results: EvaluationResult[];
86+
}
87+
88+
/**
89+
* Get badge class for status.
90+
*/
91+
function getStatusBadgeClass(status: string): string {
92+
switch (status) {
93+
case 'completed':
94+
return 'badge-success';
95+
case 'failed':
96+
return 'badge-error';
97+
case 'running':
98+
return 'badge-info';
99+
default:
100+
return 'badge-warning';
101+
}
102+
}
103+
104+
/**
105+
* Get status label with proper capitalization.
106+
*/
107+
function getStatusLabel(status: string): string {
108+
return status.charAt(0).toUpperCase() + status.slice(1);
109+
}
110+
111+
/**
112+
* Populate and show the all results modal.
113+
*/
114+
function showAllResults(): void {
115+
const evalData = window.__EVALUATION_DATA__ as EvaluationData | undefined;
116+
if (!evalData) {
117+
console.error('No evaluation data available');
118+
return;
119+
}
120+
121+
// Populate instruction
122+
const instructionEl = document.getElementById('all-results-instruction');
123+
if (instructionEl) {
124+
instructionEl.textContent = evalData.instruction_text || '-';
125+
}
126+
127+
// Populate rubric (conditional)
128+
const rubricContainer = document.getElementById('all-results-rubric-container');
129+
const rubricEl = document.getElementById('all-results-rubric');
130+
if (evalData.accuracy_rubric && rubricEl) {
131+
rubricContainer?.classList.remove('hidden');
132+
rubricEl.textContent = evalData.accuracy_rubric;
133+
} else {
134+
rubricContainer?.classList.add('hidden');
135+
}
136+
137+
// Populate expected output (conditional)
138+
const expectedContainer = document.getElementById('all-results-expected-container');
139+
const expectedEl = document.getElementById('all-results-expected');
140+
if (evalData.expected_output && expectedEl) {
141+
expectedContainer?.classList.remove('hidden');
142+
expectedEl.textContent = evalData.expected_output;
143+
} else {
144+
expectedContainer?.classList.add('hidden');
145+
}
146+
147+
// Populate results grid
148+
const gridEl = document.getElementById('all-results-grid');
149+
if (gridEl) {
150+
gridEl.innerHTML = evalData.results
151+
.map(
152+
(result) => `
153+
<div class="card-luxe p-4 space-y-3">
154+
<!-- Model Header -->
155+
<div class="flex items-center justify-between">
156+
<div>
157+
<h4 class="font-display font-semibold text-gradient-gold text-lg">
158+
${result.model_name}
159+
</h4>
160+
<div class="text-xs text-base-content/50 capitalize">${result.provider}</div>
161+
</div>
162+
<span class="badge ${getStatusBadgeClass(result.status)} badge-sm">
163+
${getStatusLabel(result.status)}
164+
</span>
165+
</div>
166+
167+
<!-- Metrics -->
168+
<div class="grid grid-cols-3 gap-2 text-center">
169+
<div class="bg-base-200 p-2 rounded-lg">
170+
<div class="text-[10px] text-base-content/50 uppercase tracking-wider">Time</div>
171+
<div class="font-mono text-sm font-semibold">
172+
${result.execution_time_ms ? `${result.execution_time_ms}ms` : '-'}
173+
</div>
174+
</div>
175+
<div class="bg-base-200 p-2 rounded-lg">
176+
<div class="text-[10px] text-base-content/50 uppercase tracking-wider">Tokens</div>
177+
<div class="font-mono text-sm font-semibold">
178+
${result.total_tokens ?? '-'}
179+
</div>
180+
</div>
181+
<div class="bg-base-200 p-2 rounded-lg">
182+
<div class="text-[10px] text-base-content/50 uppercase tracking-wider">Accuracy</div>
183+
<div class="font-mono text-sm font-semibold ${
184+
result.accuracy_score !== undefined && result.accuracy_score >= 90
185+
? 'metric-success'
186+
: result.accuracy_score !== undefined && result.accuracy_score >= 70
187+
? 'text-warning'
188+
: result.accuracy_score !== undefined
189+
? 'text-error'
190+
: ''
191+
}">
192+
${result.accuracy_score !== undefined ? `${result.accuracy_score}%` : '-'}
193+
</div>
194+
</div>
195+
</div>
196+
197+
<!-- Response -->
198+
<div>
199+
<h5 class="text-xs font-semibold text-base-content/70 mb-1">Response</h5>
200+
<div class="bg-base-200 p-3 rounded-lg font-mono text-sm max-h-60 overflow-y-auto whitespace-pre-wrap custom-scrollbar">
201+
${result.response_text || '[No response]'}
202+
</div>
203+
</div>
204+
205+
<!-- Reasoning (if available) -->
206+
${
207+
result.accuracy_reasoning
208+
? `
209+
<div>
210+
<h5 class="text-xs font-semibold text-base-content/70 mb-1">Reasoning</h5>
211+
<div class="bg-base-200 p-3 rounded-lg text-sm max-h-40 overflow-y-auto custom-scrollbar">
212+
${result.accuracy_reasoning}
213+
</div>
214+
</div>
215+
`
216+
: ''
217+
}
218+
219+
<!-- Temperature (if not default) -->
220+
${
221+
result.temperature_used !== undefined && result.temperature_used !== 0.3
222+
? `
223+
<div class="text-xs text-base-content/50">
224+
Temperature: ${result.temperature_used.toFixed(1)}
225+
</div>
226+
`
227+
: ''
228+
}
229+
</div>
230+
`
231+
)
232+
.join('');
233+
}
234+
235+
// Open modal
236+
ModalUtils.open('all-results-modal');
237+
}
238+
239+
// Initialize once to prevent duplicate listeners on Astro page transitions
240+
initializeOnce('allResultsModal', () => {
241+
// Expose to window for button click handler
242+
window.showAllResults = showAllResults;
243+
});
244+
245+
// Re-initialize on Astro page transitions
246+
document.addEventListener('astro:page-load', () => {
247+
initializeOnce('allResultsModal', () => {
248+
window.showAllResults = showAllResults;
249+
});
250+
});
251+
</script>
252+
253+
<style>
254+
/* Custom gold-themed scrollbar for response areas */
255+
.custom-scrollbar {
256+
scrollbar-width: thin;
257+
scrollbar-color: var(--color-luxe-gold) var(--color-gold-bg-subtle);
258+
}
259+
260+
.custom-scrollbar::-webkit-scrollbar {
261+
width: 8px;
262+
}
263+
264+
.custom-scrollbar::-webkit-scrollbar-track {
265+
background: var(--color-gold-bg-subtle);
266+
border-radius: 4px;
267+
}
268+
269+
.custom-scrollbar::-webkit-scrollbar-thumb {
270+
background: var(--color-luxe-gold);
271+
border-radius: 4px;
272+
}
273+
274+
.custom-scrollbar::-webkit-scrollbar-thumb:hover {
275+
background: var(--color-luxe-gold-light);
276+
}
277+
</style>

src/env.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ declare global {
5252
totalTokens?: number;
5353
temperature?: number;
5454
}) => void;
55+
/** Show all results modal for evaluation comparison */
56+
showAllResults?: () => void;
5557
openDrawer?: (id: string) => void;
5658
closeDrawer?: (id: string) => void;
5759
/** Toast notification system */

src/pages/evaluations/[id].astro

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import Badge from '@components/ui/Badge.astro';
44
import Button from '@components/ui/Button.astro';
55
import Breadcrumbs from '@components/ui/Breadcrumbs.astro';
66
import EvaluationDetailsDrawer from '@components/EvaluationDetailsDrawer.astro';
7+
import AllResultsModal from '@components/AllResultsModal.astro';
78
import TemplateManager from '@components/TemplateManager.astro';
89
import { getEvaluation, getResults } from '@lib/db';
910
@@ -119,6 +120,9 @@ function getStatusBadgeVariant(status: string) {
119120
</div>
120121

121122
<div class={evaluationData.results.length > 0 ? '' : 'hidden'} id="actions-container">
123+
<Button variant="ghost" size="md" id="view-all-results-btn" class="mr-2">
124+
View All Results
125+
</Button>
122126
<Button variant="ghost" size="md" id="save-template-btn"> Save as Template </Button>
123127
</div>
124128
</div>
@@ -270,6 +274,7 @@ function getStatusBadgeVariant(status: string) {
270274
<!-- Overlays at body level for proper z-index stacking -->
271275
<Fragment slot="overlays">
272276
<EvaluationDetailsDrawer />
277+
<AllResultsModal />
273278
</Fragment>
274279
</Layout>
275280

@@ -302,6 +307,12 @@ function getStatusBadgeVariant(status: string) {
302307
});
303308
}
304309

310+
// View All Results
311+
const viewAllResultsBtn = document.getElementById('view-all-results-btn');
312+
if (viewAllResultsBtn) {
313+
viewAllResultsBtn.addEventListener('click', () => window.showAllResults?.());
314+
}
315+
305316
/**
306317
* Initializes click event listeners for all result rows.
307318
* Clicking a row opens the evaluation details drawer.

0 commit comments

Comments
 (0)