|
| 1 | +--- |
| 2 | +/** |
| 3 | + * All Results Modal component. |
| 4 | + * |
| 5 | + * Displays all model results from an evaluation in a single, scrollable view |
| 6 | + * for easy comparison. Uses data from window.__EVALUATION_DATA__ global. |
| 7 | + * |
| 8 | + * @example |
| 9 | + * // In evaluation details page |
| 10 | + * <AllResultsModal /> |
| 11 | + */ |
| 12 | +import Modal from './ui/Modal.astro'; |
| 13 | +--- |
| 14 | + |
| 15 | +<Modal id="all-results-modal" title="All Model Results" size="4xl"> |
| 16 | + <div slot="content" class="space-y-6"> |
| 17 | + <!-- Instruction --> |
| 18 | + <div> |
| 19 | + <h3 class="text-sm font-semibold text-base-content/70 mb-2">Instruction</h3> |
| 20 | + <div |
| 21 | + id="all-results-instruction" |
| 22 | + class="bg-base-200 p-4 rounded-lg whitespace-pre-wrap text-sm" |
| 23 | + > |
| 24 | + </div> |
| 25 | + </div> |
| 26 | + |
| 27 | + <!-- Rubric (conditional) --> |
| 28 | + <div id="all-results-rubric-container" class="hidden"> |
| 29 | + <h3 class="text-sm font-semibold text-base-content/70 mb-2">Accuracy Rubric</h3> |
| 30 | + <div id="all-results-rubric" class="bg-base-200 p-4 rounded-lg font-mono text-sm"></div> |
| 31 | + </div> |
| 32 | + |
| 33 | + <!-- Expected Output (conditional) --> |
| 34 | + <div id="all-results-expected-container" class="hidden"> |
| 35 | + <h3 class="text-sm font-semibold text-base-content/70 mb-2">Expected Output</h3> |
| 36 | + <div |
| 37 | + id="all-results-expected" |
| 38 | + class="bg-base-200 p-4 rounded-lg font-mono text-sm whitespace-pre-wrap" |
| 39 | + > |
| 40 | + </div> |
| 41 | + </div> |
| 42 | + |
| 43 | + <!-- Model Results Grid --> |
| 44 | + <div> |
| 45 | + <h3 class="text-sm font-semibold text-base-content/70 mb-3">Model Responses</h3> |
| 46 | + <div id="all-results-grid" class="grid grid-cols-1 md:grid-cols-2 gap-4"> |
| 47 | + <!-- Dynamically populated result cards --> |
| 48 | + </div> |
| 49 | + </div> |
| 50 | + </div> |
| 51 | + |
| 52 | + <div slot="footer"> |
| 53 | + <button class="btn btn-ghost" onclick="document.getElementById('all-results-modal').close()"> |
| 54 | + Close |
| 55 | + </button> |
| 56 | + </div> |
| 57 | +</Modal> |
| 58 | + |
| 59 | +<script> |
| 60 | + import { initializeOnce } from '@lib/utils/client-utils'; |
| 61 | + import { ModalUtils } from '@lib/utils/client-utils'; |
| 62 | + |
| 63 | + /** |
| 64 | + * Data structure for evaluation results from window.__EVALUATION_DATA__. |
| 65 | + */ |
| 66 | + interface EvaluationResult { |
| 67 | + model_id: string; |
| 68 | + model_name: string; |
| 69 | + provider: string; |
| 70 | + response_text?: string; |
| 71 | + execution_time_ms?: number; |
| 72 | + input_tokens?: number; |
| 73 | + output_tokens?: number; |
| 74 | + total_tokens?: number; |
| 75 | + accuracy_score?: number; |
| 76 | + accuracy_reasoning?: string; |
| 77 | + status: string; |
| 78 | + temperature_used?: number; |
| 79 | + } |
| 80 | + |
| 81 | + interface EvaluationData { |
| 82 | + instruction_text: string; |
| 83 | + accuracy_rubric?: string; |
| 84 | + expected_output?: string; |
| 85 | + results: EvaluationResult[]; |
| 86 | + } |
| 87 | + |
| 88 | + /** |
| 89 | + * Get badge class for status. |
| 90 | + */ |
| 91 | + function getStatusBadgeClass(status: string): string { |
| 92 | + switch (status) { |
| 93 | + case 'completed': |
| 94 | + return 'badge-success'; |
| 95 | + case 'failed': |
| 96 | + return 'badge-error'; |
| 97 | + case 'running': |
| 98 | + return 'badge-info'; |
| 99 | + default: |
| 100 | + return 'badge-warning'; |
| 101 | + } |
| 102 | + } |
| 103 | + |
| 104 | + /** |
| 105 | + * Get status label with proper capitalization. |
| 106 | + */ |
| 107 | + function getStatusLabel(status: string): string { |
| 108 | + return status.charAt(0).toUpperCase() + status.slice(1); |
| 109 | + } |
| 110 | + |
| 111 | + /** |
| 112 | + * Populate and show the all results modal. |
| 113 | + */ |
| 114 | + function showAllResults(): void { |
| 115 | + const evalData = window.__EVALUATION_DATA__ as EvaluationData | undefined; |
| 116 | + if (!evalData) { |
| 117 | + console.error('No evaluation data available'); |
| 118 | + return; |
| 119 | + } |
| 120 | + |
| 121 | + // Populate instruction |
| 122 | + const instructionEl = document.getElementById('all-results-instruction'); |
| 123 | + if (instructionEl) { |
| 124 | + instructionEl.textContent = evalData.instruction_text || '-'; |
| 125 | + } |
| 126 | + |
| 127 | + // Populate rubric (conditional) |
| 128 | + const rubricContainer = document.getElementById('all-results-rubric-container'); |
| 129 | + const rubricEl = document.getElementById('all-results-rubric'); |
| 130 | + if (evalData.accuracy_rubric && rubricEl) { |
| 131 | + rubricContainer?.classList.remove('hidden'); |
| 132 | + rubricEl.textContent = evalData.accuracy_rubric; |
| 133 | + } else { |
| 134 | + rubricContainer?.classList.add('hidden'); |
| 135 | + } |
| 136 | + |
| 137 | + // Populate expected output (conditional) |
| 138 | + const expectedContainer = document.getElementById('all-results-expected-container'); |
| 139 | + const expectedEl = document.getElementById('all-results-expected'); |
| 140 | + if (evalData.expected_output && expectedEl) { |
| 141 | + expectedContainer?.classList.remove('hidden'); |
| 142 | + expectedEl.textContent = evalData.expected_output; |
| 143 | + } else { |
| 144 | + expectedContainer?.classList.add('hidden'); |
| 145 | + } |
| 146 | + |
| 147 | + // Populate results grid |
| 148 | + const gridEl = document.getElementById('all-results-grid'); |
| 149 | + if (gridEl) { |
| 150 | + gridEl.innerHTML = evalData.results |
| 151 | + .map( |
| 152 | + (result) => ` |
| 153 | + <div class="card-luxe p-4 space-y-3"> |
| 154 | + <!-- Model Header --> |
| 155 | + <div class="flex items-center justify-between"> |
| 156 | + <div> |
| 157 | + <h4 class="font-display font-semibold text-gradient-gold text-lg"> |
| 158 | + ${result.model_name} |
| 159 | + </h4> |
| 160 | + <div class="text-xs text-base-content/50 capitalize">${result.provider}</div> |
| 161 | + </div> |
| 162 | + <span class="badge ${getStatusBadgeClass(result.status)} badge-sm"> |
| 163 | + ${getStatusLabel(result.status)} |
| 164 | + </span> |
| 165 | + </div> |
| 166 | + |
| 167 | + <!-- Metrics --> |
| 168 | + <div class="grid grid-cols-3 gap-2 text-center"> |
| 169 | + <div class="bg-base-200 p-2 rounded-lg"> |
| 170 | + <div class="text-[10px] text-base-content/50 uppercase tracking-wider">Time</div> |
| 171 | + <div class="font-mono text-sm font-semibold"> |
| 172 | + ${result.execution_time_ms ? `${result.execution_time_ms}ms` : '-'} |
| 173 | + </div> |
| 174 | + </div> |
| 175 | + <div class="bg-base-200 p-2 rounded-lg"> |
| 176 | + <div class="text-[10px] text-base-content/50 uppercase tracking-wider">Tokens</div> |
| 177 | + <div class="font-mono text-sm font-semibold"> |
| 178 | + ${result.total_tokens ?? '-'} |
| 179 | + </div> |
| 180 | + </div> |
| 181 | + <div class="bg-base-200 p-2 rounded-lg"> |
| 182 | + <div class="text-[10px] text-base-content/50 uppercase tracking-wider">Accuracy</div> |
| 183 | + <div class="font-mono text-sm font-semibold ${ |
| 184 | + result.accuracy_score !== undefined && result.accuracy_score >= 90 |
| 185 | + ? 'metric-success' |
| 186 | + : result.accuracy_score !== undefined && result.accuracy_score >= 70 |
| 187 | + ? 'text-warning' |
| 188 | + : result.accuracy_score !== undefined |
| 189 | + ? 'text-error' |
| 190 | + : '' |
| 191 | + }"> |
| 192 | + ${result.accuracy_score !== undefined ? `${result.accuracy_score}%` : '-'} |
| 193 | + </div> |
| 194 | + </div> |
| 195 | + </div> |
| 196 | + |
| 197 | + <!-- Response --> |
| 198 | + <div> |
| 199 | + <h5 class="text-xs font-semibold text-base-content/70 mb-1">Response</h5> |
| 200 | + <div class="bg-base-200 p-3 rounded-lg font-mono text-sm max-h-60 overflow-y-auto whitespace-pre-wrap custom-scrollbar"> |
| 201 | + ${result.response_text || '[No response]'} |
| 202 | + </div> |
| 203 | + </div> |
| 204 | + |
| 205 | + <!-- Reasoning (if available) --> |
| 206 | + ${ |
| 207 | + result.accuracy_reasoning |
| 208 | + ? ` |
| 209 | + <div> |
| 210 | + <h5 class="text-xs font-semibold text-base-content/70 mb-1">Reasoning</h5> |
| 211 | + <div class="bg-base-200 p-3 rounded-lg text-sm max-h-40 overflow-y-auto custom-scrollbar"> |
| 212 | + ${result.accuracy_reasoning} |
| 213 | + </div> |
| 214 | + </div> |
| 215 | + ` |
| 216 | + : '' |
| 217 | + } |
| 218 | + |
| 219 | + <!-- Temperature (if not default) --> |
| 220 | + ${ |
| 221 | + result.temperature_used !== undefined && result.temperature_used !== 0.3 |
| 222 | + ? ` |
| 223 | + <div class="text-xs text-base-content/50"> |
| 224 | + Temperature: ${result.temperature_used.toFixed(1)} |
| 225 | + </div> |
| 226 | + ` |
| 227 | + : '' |
| 228 | + } |
| 229 | + </div> |
| 230 | + ` |
| 231 | + ) |
| 232 | + .join(''); |
| 233 | + } |
| 234 | + |
| 235 | + // Open modal |
| 236 | + ModalUtils.open('all-results-modal'); |
| 237 | + } |
| 238 | + |
| 239 | + // Initialize once to prevent duplicate listeners on Astro page transitions |
| 240 | + initializeOnce('allResultsModal', () => { |
| 241 | + // Expose to window for button click handler |
| 242 | + window.showAllResults = showAllResults; |
| 243 | + }); |
| 244 | + |
| 245 | + // Re-initialize on Astro page transitions |
| 246 | + document.addEventListener('astro:page-load', () => { |
| 247 | + initializeOnce('allResultsModal', () => { |
| 248 | + window.showAllResults = showAllResults; |
| 249 | + }); |
| 250 | + }); |
| 251 | +</script> |
| 252 | + |
| 253 | +<style> |
| 254 | + /* Custom gold-themed scrollbar for response areas */ |
| 255 | + .custom-scrollbar { |
| 256 | + scrollbar-width: thin; |
| 257 | + scrollbar-color: var(--color-luxe-gold) var(--color-gold-bg-subtle); |
| 258 | + } |
| 259 | + |
| 260 | + .custom-scrollbar::-webkit-scrollbar { |
| 261 | + width: 8px; |
| 262 | + } |
| 263 | + |
| 264 | + .custom-scrollbar::-webkit-scrollbar-track { |
| 265 | + background: var(--color-gold-bg-subtle); |
| 266 | + border-radius: 4px; |
| 267 | + } |
| 268 | + |
| 269 | + .custom-scrollbar::-webkit-scrollbar-thumb { |
| 270 | + background: var(--color-luxe-gold); |
| 271 | + border-radius: 4px; |
| 272 | + } |
| 273 | + |
| 274 | + .custom-scrollbar::-webkit-scrollbar-thumb:hover { |
| 275 | + background: var(--color-luxe-gold-light); |
| 276 | + } |
| 277 | +</style> |
0 commit comments