22
22
import tempfile
23
23
import threading
24
24
import time
25
- from typing import Any , Callable , Dict , Literal , Optional , TYPE_CHECKING , Union
25
+ from typing import Any , Callable , Dict , List , Literal , Optional , TYPE_CHECKING , Union
26
26
27
27
from google .cloud import bigquery
28
28
from google .cloud import storage
35
35
evaluation_service as gapic_evaluation_services ,
36
36
)
37
37
from vertexai .evaluation import _base as eval_base
38
+ from vertexai .evaluation .metrics import (
39
+ _base as metrics_base ,
40
+ metric_prompt_template as metric_prompt_template_base ,
41
+ )
38
42
39
43
40
44
if TYPE_CHECKING :
@@ -286,6 +290,8 @@ def _upload_evaluation_summary_to_gcs(
286
290
upload_gcs_path : str ,
287
291
candidate_model_name : Optional [str ] = None ,
288
292
baseline_model_name : Optional [str ] = None ,
293
+ dataset_uri : Optional [str ] = None ,
294
+ metrics : Optional [List [Union [str , metrics_base ._Metric ]]] = None ,
289
295
) -> None :
290
296
"""Uploads the evaluation summary to a GCS bucket."""
291
297
summary = {
@@ -295,6 +301,21 @@ def _upload_evaluation_summary_to_gcs(
295
301
summary ["candidate_model_name" ] = candidate_model_name
296
302
if baseline_model_name :
297
303
summary ["baseline_model_name" ] = baseline_model_name
304
+ if dataset_uri :
305
+ summary ["dataset_uri" ] = dataset_uri
306
+
307
+ if metrics :
308
+ metric_descriptions = {}
309
+ for metric in metrics :
310
+ if isinstance (metric , metrics_base ._ModelBasedMetric ) and isinstance (
311
+ metric ._raw_metric_prompt_template ,
312
+ metric_prompt_template_base ._MetricPromptTemplate ,
313
+ ):
314
+ metric_descriptions [metric .metric_name ] = {
315
+ "criteria" : metric ._raw_metric_prompt_template ._criteria ,
316
+ "rating_rubric" : metric ._raw_metric_prompt_template ._rating_rubric ,
317
+ }
318
+ summary ["metric_descriptions" ] = metric_descriptions
298
319
299
320
with tempfile .TemporaryDirectory () as temp_dir :
300
321
local_summary_path = os .path .join (temp_dir , "summary_metrics.json" )
@@ -318,6 +339,8 @@ def upload_evaluation_results(
318
339
file_name : str ,
319
340
candidate_model_name : Optional [str ] = None ,
320
341
baseline_model_name : Optional [str ] = None ,
342
+ dataset_uri : Optional [str ] = None ,
343
+ metrics : Optional [List [Union [str , metrics_base ._Metric ]]] = None ,
321
344
) -> None :
322
345
"""Uploads eval results to GCS destination.
323
346
@@ -327,6 +350,8 @@ def upload_evaluation_results(
327
350
file_name: File name to store the metrics table.
328
351
candidate_model_name: Optional. Candidate model name.
329
352
baseline_model_name: Optional. Baseline model name.
353
+ dataset_uri: Optional. URI pointing to the dataset.
354
+ metrics: Optional. List of metrics used for evaluation.
330
355
"""
331
356
if not destination_uri_prefix :
332
357
_ipython_utils .display_gen_ai_evaluation_results_button ()
@@ -346,6 +371,8 @@ def upload_evaluation_results(
346
371
output_folder + "/summary_metrics.json" ,
347
372
candidate_model_name ,
348
373
baseline_model_name ,
374
+ dataset_uri ,
375
+ metrics ,
349
376
)
350
377
_ipython_utils .display_gen_ai_evaluation_results_button (
351
378
metrics_table_path .split (_GCS_PREFIX )[1 ]
0 commit comments