|
17 | 17 |
|
18 | 18 | import functools
|
19 | 19 | import io
|
| 20 | +import json |
20 | 21 | import os
|
21 | 22 | import tempfile
|
22 | 23 | import threading
|
23 | 24 | import time
|
24 |
| -from typing import Any, Dict, Optional, TYPE_CHECKING, Union, Callable, Literal |
| 25 | +from typing import Any, Callable, Dict, Literal, Optional, TYPE_CHECKING, Union |
25 | 26 |
|
26 | 27 | from google.cloud import bigquery
|
27 | 28 | from google.cloud import storage
|
|
33 | 34 | from google.cloud.aiplatform_v1.services import (
|
34 | 35 | evaluation_service as gapic_evaluation_services,
|
35 | 36 | )
|
| 37 | +from vertexai.evaluation import _base as eval_base |
36 | 38 |
|
37 | 39 |
|
38 | 40 | if TYPE_CHECKING:
|
@@ -276,35 +278,77 @@ def _upload_pandas_df_to_gcs(
|
276 | 278 | " Please provide a valid GCS path with `jsonl` or `csv` suffix."
|
277 | 279 | )
|
278 | 280 |
|
279 |
| - storage_client = storage.Client( |
280 |
| - project=initializer.global_config.project, |
281 |
| - credentials=initializer.global_config.credentials, |
282 |
| - ) |
283 |
| - storage.Blob.from_string( |
284 |
| - uri=upload_gcs_path, client=storage_client |
285 |
| - ).upload_from_filename(filename=local_dataset_path) |
| 281 | + _upload_file_to_gcs(upload_gcs_path, local_dataset_path) |
| 282 | + |
| 283 | + |
| 284 | +def _upload_evaluation_summary_to_gcs( |
| 285 | + summary_metrics: Dict[str, float], |
| 286 | + upload_gcs_path: str, |
| 287 | + candidate_model_name: Optional[str] = None, |
| 288 | + baseline_model_name: Optional[str] = None, |
| 289 | +): |
| 290 | + """Uploads the evaluation summary to a GCS bucket.""" |
| 291 | + summary = { |
| 292 | + "summary_metrics": summary_metrics, |
| 293 | + } |
| 294 | + if candidate_model_name: |
| 295 | + summary["candidate_model_name"] = candidate_model_name |
| 296 | + if baseline_model_name: |
| 297 | + summary["baseline_model_name"] = baseline_model_name |
| 298 | + |
| 299 | + with tempfile.TemporaryDirectory() as temp_dir: |
| 300 | + local_summary_path = os.path.join(temp_dir, "summary_metrics.json") |
| 301 | + json.dump(summary, open(local_summary_path, "w")) |
| 302 | + _upload_file_to_gcs(upload_gcs_path, local_summary_path) |
| 303 | + |
| 304 | + |
| 305 | +def _upload_file_to_gcs(upload_gcs_path: str, filename: str): |
| 306 | + storage_client = storage.Client( |
| 307 | + project=initializer.global_config.project, |
| 308 | + credentials=initializer.global_config.credentials, |
| 309 | + ) |
| 310 | + storage.Blob.from_string( |
| 311 | + uri=upload_gcs_path, client=storage_client |
| 312 | + ).upload_from_filename(filename) |
286 | 313 |
|
287 | 314 |
|
288 | 315 | def upload_evaluation_results(
|
289 |
| - dataset: "pd.DataFrame", destination_uri_prefix: str, file_name: str |
| 316 | + eval_result: eval_base.EvalResult, |
| 317 | + destination_uri_prefix: str, |
| 318 | + file_name: str, |
| 319 | + candidate_model_name: Optional[str] = None, |
| 320 | + baseline_model_name: Optional[str] = None, |
290 | 321 | ) -> None:
|
291 | 322 | """Uploads eval results to GCS destination.
|
292 | 323 |
|
293 | 324 | Args:
|
294 |
| - dataset: Pandas dataframe to upload. |
| 325 | + eval_result: Eval results to upload. |
295 | 326 | destination_uri_prefix: GCS folder to store the data.
|
296 |
| - file_name: File name to store the data. |
| 327 | + file_name: File name to store the metrics table. |
| 328 | + candidate_model_name: Optional. Candidate model name. |
| 329 | + baseline_model_name: Optional. Baseline model name. |
297 | 330 | """
|
298 | 331 | if not destination_uri_prefix:
|
299 | 332 | _ipython_utils.display_gen_ai_evaluation_results_button()
|
300 | 333 | return
|
| 334 | + if eval_result.metrics_table is None: |
| 335 | + return |
301 | 336 | if destination_uri_prefix.startswith(_GCS_PREFIX):
|
302 |
| - _, extension = os.path.splitext(file_name) |
| 337 | + base_name, extension = os.path.splitext(file_name) |
303 | 338 | file_type = extension.lower()[1:]
|
304 |
| - output_path = destination_uri_prefix + "/" + file_name |
305 |
| - _upload_pandas_df_to_gcs(dataset, output_path, file_type) |
| 339 | + output_folder = destination_uri_prefix + "/" + base_name |
| 340 | + metrics_table_path = output_folder + "/" + file_name |
| 341 | + _upload_pandas_df_to_gcs( |
| 342 | + eval_result.metrics_table, metrics_table_path, file_type |
| 343 | + ) |
| 344 | + _upload_evaluation_summary_to_gcs( |
| 345 | + eval_result.summary_metrics, |
| 346 | + output_folder + "/summary_metrics.json", |
| 347 | + candidate_model_name, |
| 348 | + baseline_model_name, |
| 349 | + ) |
306 | 350 | _ipython_utils.display_gen_ai_evaluation_results_button(
|
307 |
| - output_path.split(_GCS_PREFIX)[1] |
| 351 | + metrics_table_path.split(_GCS_PREFIX)[1] |
308 | 352 | )
|
309 | 353 | else:
|
310 | 354 | raise ValueError(
|
|
0 commit comments