|
17 | 17 |
|
18 | 18 | import functools
|
19 | 19 | import io
|
| 20 | +import json |
20 | 21 | import os
|
21 | 22 | import tempfile
|
22 | 23 | import threading
|
23 | 24 | import time
|
24 |
| -from typing import Any, Dict, Optional, TYPE_CHECKING, Union, Callable, Literal |
| 25 | +from typing import Any, Callable, Dict, Literal, Optional, TYPE_CHECKING, Union |
25 | 26 |
|
26 | 27 | from google.cloud import bigquery
|
27 | 28 | from google.cloud import storage
|
28 | 29 | from google.cloud.aiplatform import base
|
29 | 30 | from google.cloud.aiplatform import compat
|
30 | 31 | from google.cloud.aiplatform import initializer
|
31 | 32 | from google.cloud.aiplatform import utils
|
| 33 | +from google.cloud.aiplatform.utils import _ipython_utils |
32 | 34 | from google.cloud.aiplatform_v1.services import (
|
33 | 35 | evaluation_service as gapic_evaluation_services,
|
34 | 36 | )
|
| 37 | +from vertexai.evaluation import _base as eval_base |
35 | 38 |
|
36 | 39 |
|
37 | 40 | if TYPE_CHECKING:
|
@@ -275,32 +278,77 @@ def _upload_pandas_df_to_gcs(
|
275 | 278 | " Please provide a valid GCS path with `jsonl` or `csv` suffix."
|
276 | 279 | )
|
277 | 280 |
|
278 |
| - storage_client = storage.Client( |
279 |
| - project=initializer.global_config.project, |
280 |
| - credentials=initializer.global_config.credentials, |
281 |
| - ) |
282 |
| - storage.Blob.from_string( |
283 |
| - uri=upload_gcs_path, client=storage_client |
284 |
| - ).upload_from_filename(filename=local_dataset_path) |
| 281 | + _upload_file_to_gcs(upload_gcs_path, local_dataset_path) |
| 282 | + |
| 283 | + |
| 284 | +def _upload_evaluation_summary_to_gcs( |
| 285 | + summary_metrics: Dict[str, float], |
| 286 | + upload_gcs_path: str, |
| 287 | + candidate_model_name: Optional[str] = None, |
| 288 | + baseline_model_name: Optional[str] = None, |
| 289 | +): |
| 290 | + """Uploads the evaluation summary to a GCS bucket.""" |
| 291 | + summary = { |
| 292 | + "summary_metrics": summary_metrics, |
| 293 | + } |
| 294 | + if candidate_model_name: |
| 295 | + summary["candidate_model_name"] = candidate_model_name |
| 296 | + if baseline_model_name: |
| 297 | + summary["baseline_model_name"] = baseline_model_name |
| 298 | + |
| 299 | + with tempfile.TemporaryDirectory() as temp_dir: |
| 300 | + local_summary_path = os.path.join(temp_dir, "summary_metrics.json") |
| 301 | + json.dump(summary, open(local_summary_path, "w")) |
| 302 | + _upload_file_to_gcs(upload_gcs_path, local_summary_path) |
| 303 | + |
| 304 | + |
| 305 | +def _upload_file_to_gcs(upload_gcs_path: str, filename: str): |
| 306 | + storage_client = storage.Client( |
| 307 | + project=initializer.global_config.project, |
| 308 | + credentials=initializer.global_config.credentials, |
| 309 | + ) |
| 310 | + storage.Blob.from_string( |
| 311 | + uri=upload_gcs_path, client=storage_client |
| 312 | + ).upload_from_filename(filename) |
285 | 313 |
|
286 | 314 |
|
287 | 315 | def upload_evaluation_results(
|
288 |
| - dataset: "pd.DataFrame", destination_uri_prefix: str, file_name: str |
| 316 | + eval_result: eval_base.EvalResult, |
| 317 | + destination_uri_prefix: str, |
| 318 | + file_name: str, |
| 319 | + candidate_model_name: Optional[str] = None, |
| 320 | + baseline_model_name: Optional[str] = None, |
289 | 321 | ) -> None:
|
290 | 322 | """Uploads eval results to GCS destination.
|
291 | 323 |
|
292 | 324 | Args:
|
293 |
| - dataset: Pandas dataframe to upload. |
| 325 | + eval_result: Eval results to upload. |
294 | 326 | destination_uri_prefix: GCS folder to store the data.
|
295 |
| - file_name: File name to store the data. |
| 327 | + file_name: File name to store the metrics table. |
| 328 | + candidate_model_name: Optional. Candidate model name. |
| 329 | + baseline_model_name: Optional. Baseline model name. |
296 | 330 | """
|
297 | 331 | if not destination_uri_prefix:
|
298 | 332 | return
|
| 333 | + if eval_result.metrics_table is None: |
| 334 | + return |
299 | 335 | if destination_uri_prefix.startswith(_GCS_PREFIX):
|
300 |
| - _, extension = os.path.splitext(file_name) |
| 336 | + base_name, extension = os.path.splitext(file_name) |
301 | 337 | file_type = extension.lower()[1:]
|
302 |
| - output_path = destination_uri_prefix + "/" + file_name |
303 |
| - _upload_pandas_df_to_gcs(dataset, output_path, file_type) |
| 338 | + output_folder = destination_uri_prefix + "/" + base_name |
| 339 | + metrics_table_path = output_folder + "/" + file_name |
| 340 | + _upload_pandas_df_to_gcs( |
| 341 | + eval_result.metrics_table, metrics_table_path, file_type |
| 342 | + ) |
| 343 | + _upload_evaluation_summary_to_gcs( |
| 344 | + eval_result.summary_metrics, |
| 345 | + output_folder + "/summary_metrics.json", |
| 346 | + candidate_model_name, |
| 347 | + baseline_model_name, |
| 348 | + ) |
| 349 | + _ipython_utils.display_gen_ai_evaluation_results_button( |
| 350 | + metrics_table_path.split(_GCS_PREFIX)[1] |
| 351 | + ) |
304 | 352 | else:
|
305 | 353 | raise ValueError(
|
306 | 354 | f"Unsupported destination URI: {destination_uri_prefix}."
|
|
0 commit comments