Skip to content

Commit ca0badf

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Display button to view evaluation results in an Ipython environment.
PiperOrigin-RevId: 713454093
1 parent 26a08c7 commit ca0badf

File tree

4 files changed

+123
-26
lines changed

4 files changed

+123
-26
lines changed

google/cloud/aiplatform/utils/_ipython_utils.py

+11
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import sys
1919
import typing
20+
import urllib
2021
from uuid import uuid4
2122
from typing import Optional
2223

@@ -266,3 +267,13 @@ def display_browse_prebuilt_metrics_button() -> None:
266267
"https://cloud.google.com/vertex-ai/generative-ai/docs/models/metrics-templates"
267268
)
268269
display_link("Browse pre-built metrics", uri, "list")
270+
271+
272+
def display_gen_ai_evaluation_results_button(gcs_file_path: str) -> None:
273+
"""Function to generate a link bound to the Gen AI evaluation run."""
274+
if not is_ipython_available():
275+
return
276+
277+
gcs_file_path = urllib.parse.quote(gcs_file_path)
278+
uri = f"https://console.cloud.google.com/storage/browser/_details/{gcs_file_path}"
279+
display_link("View evaluation results", uri, "bar_chart")

tests/unit/vertexai/test_evaluation.py

+28-11
Original file line numberDiff line numberDiff line change
@@ -558,13 +558,9 @@ def mock_experiment_tracker():
558558

559559

560560
@pytest.fixture
561-
def mock_storage_blob_upload_from_filename():
562-
with mock.patch(
563-
"google.cloud.storage.Blob.upload_from_filename"
564-
) as mock_blob_upload_from_filename, mock.patch(
565-
"google.cloud.storage.Bucket.exists", return_value=True
566-
):
567-
yield mock_blob_upload_from_filename
561+
def mock_storage_blob_from_string():
562+
with mock.patch("google.cloud.storage.Blob.from_string") as mock_blob_from_string:
563+
yield mock_blob_from_string
568564

569565

570566
@pytest.mark.usefixtures("google_auth_mock")
@@ -1948,8 +1944,29 @@ def test_pairtwise_metric_prompt_template_with_default_values(self):
19481944
== _EXPECTED_PAIRWISE_PROMPT_TEMPLATE_WITH_DEFAULT_VALUES.strip()
19491945
)
19501946

1951-
def test_upload_results(self, mock_storage_blob_upload_from_filename):
1952-
evaluation.utils.upload_evaluation_results(
1953-
_TEST_CSV, _TEST_BUCKET, _TEST_FILE_NAME
1947+
def test_upload_results(self, mock_storage_blob_from_string):
1948+
with mock.patch("json.dump") as mock_json_dump:
1949+
evaluation.utils.upload_evaluation_results(
1950+
MOCK_EVAL_RESULT,
1951+
_TEST_BUCKET,
1952+
_TEST_FILE_NAME,
1953+
"candidate_model",
1954+
"baseline_model",
1955+
)
1956+
1957+
mock_storage_blob_from_string.assert_any_call(
1958+
uri="gs://test-bucket/test-file-name/test-file-name.csv",
1959+
client=mock.ANY,
1960+
)
1961+
mock_storage_blob_from_string.assert_any_call(
1962+
uri="gs://test-bucket/test-file-name/summary_metrics.json",
1963+
client=mock.ANY,
1964+
)
1965+
mock_json_dump.assert_called_once_with(
1966+
{
1967+
"summary_metrics": MOCK_EVAL_RESULT.summary_metrics,
1968+
"candidate_model_name": "candidate_model",
1969+
"baseline_model_name": "baseline_model",
1970+
},
1971+
mock.ANY,
19541972
)
1955-
assert mock_storage_blob_upload_from_filename.called_once_with(_TEST_CSV)

vertexai/evaluation/eval_task.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -464,8 +464,29 @@ def evaluate(
464464
evaluation_service_qps=evaluation_service_qps,
465465
retry_timeout=retry_timeout,
466466
)
467+
468+
candidate_model_name = None
469+
if isinstance(model, generative_models.GenerativeModel):
470+
candidate_model_name = model._model_name
471+
472+
baseline_model_name = None
473+
pairwise_metrics = [
474+
metric
475+
for metric in self.metrics
476+
if isinstance(metric, pairwise_metric.PairwiseMetric)
477+
]
478+
if pairwise_metrics:
479+
# All pairwise metrics should have the same baseline model.
480+
baseline_model = pairwise_metrics[0].baseline_model
481+
if isinstance(baseline_model, generative_models.GenerativeModel):
482+
baseline_model_name = baseline_model._model_name
483+
467484
utils.upload_evaluation_results(
468-
eval_result.metrics_table, self.output_uri_prefix, output_file_name
485+
eval_result,
486+
self.output_uri_prefix,
487+
output_file_name,
488+
candidate_model_name,
489+
baseline_model_name,
469490
)
470491
return eval_result
471492

vertexai/evaluation/utils.py

+62-14
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,24 @@
1717

1818
import functools
1919
import io
20+
import json
2021
import os
2122
import tempfile
2223
import threading
2324
import time
24-
from typing import Any, Dict, Optional, TYPE_CHECKING, Union, Callable, Literal
25+
from typing import Any, Callable, Dict, Literal, Optional, TYPE_CHECKING, Union
2526

2627
from google.cloud import bigquery
2728
from google.cloud import storage
2829
from google.cloud.aiplatform import base
2930
from google.cloud.aiplatform import compat
3031
from google.cloud.aiplatform import initializer
3132
from google.cloud.aiplatform import utils
33+
from google.cloud.aiplatform.utils import _ipython_utils
3234
from google.cloud.aiplatform_v1.services import (
3335
evaluation_service as gapic_evaluation_services,
3436
)
37+
from vertexai.evaluation import _base as eval_base
3538

3639

3740
if TYPE_CHECKING:
@@ -275,32 +278,77 @@ def _upload_pandas_df_to_gcs(
275278
" Please provide a valid GCS path with `jsonl` or `csv` suffix."
276279
)
277280

278-
storage_client = storage.Client(
279-
project=initializer.global_config.project,
280-
credentials=initializer.global_config.credentials,
281-
)
282-
storage.Blob.from_string(
283-
uri=upload_gcs_path, client=storage_client
284-
).upload_from_filename(filename=local_dataset_path)
281+
_upload_file_to_gcs(upload_gcs_path, local_dataset_path)
282+
283+
284+
def _upload_evaluation_summary_to_gcs(
285+
summary_metrics: Dict[str, float],
286+
upload_gcs_path: str,
287+
candidate_model_name: Optional[str] = None,
288+
baseline_model_name: Optional[str] = None,
289+
):
290+
"""Uploads the evaluation summary to a GCS bucket."""
291+
summary = {
292+
"summary_metrics": summary_metrics,
293+
}
294+
if candidate_model_name:
295+
summary["candidate_model_name"] = candidate_model_name
296+
if baseline_model_name:
297+
summary["baseline_model_name"] = baseline_model_name
298+
299+
with tempfile.TemporaryDirectory() as temp_dir:
300+
local_summary_path = os.path.join(temp_dir, "summary_metrics.json")
301+
json.dump(summary, open(local_summary_path, "w"))
302+
_upload_file_to_gcs(upload_gcs_path, local_summary_path)
303+
304+
305+
def _upload_file_to_gcs(upload_gcs_path: str, filename: str):
306+
storage_client = storage.Client(
307+
project=initializer.global_config.project,
308+
credentials=initializer.global_config.credentials,
309+
)
310+
storage.Blob.from_string(
311+
uri=upload_gcs_path, client=storage_client
312+
).upload_from_filename(filename)
285313

286314

287315
def upload_evaluation_results(
288-
dataset: "pd.DataFrame", destination_uri_prefix: str, file_name: str
316+
eval_result: eval_base.EvalResult,
317+
destination_uri_prefix: str,
318+
file_name: str,
319+
candidate_model_name: Optional[str] = None,
320+
baseline_model_name: Optional[str] = None,
289321
) -> None:
290322
"""Uploads eval results to GCS destination.
291323
292324
Args:
293-
dataset: Pandas dataframe to upload.
325+
eval_result: Eval results to upload.
294326
destination_uri_prefix: GCS folder to store the data.
295-
file_name: File name to store the data.
327+
file_name: File name to store the metrics table.
328+
candidate_model_name: Optional. Candidate model name.
329+
baseline_model_name: Optional. Baseline model name.
296330
"""
297331
if not destination_uri_prefix:
298332
return
333+
if eval_result.metrics_table is None:
334+
return
299335
if destination_uri_prefix.startswith(_GCS_PREFIX):
300-
_, extension = os.path.splitext(file_name)
336+
base_name, extension = os.path.splitext(file_name)
301337
file_type = extension.lower()[1:]
302-
output_path = destination_uri_prefix + "/" + file_name
303-
_upload_pandas_df_to_gcs(dataset, output_path, file_type)
338+
output_folder = destination_uri_prefix + "/" + base_name
339+
metrics_table_path = output_folder + "/" + file_name
340+
_upload_pandas_df_to_gcs(
341+
eval_result.metrics_table, metrics_table_path, file_type
342+
)
343+
_upload_evaluation_summary_to_gcs(
344+
eval_result.summary_metrics,
345+
output_folder + "/summary_metrics.json",
346+
candidate_model_name,
347+
baseline_model_name,
348+
)
349+
_ipython_utils.display_gen_ai_evaluation_results_button(
350+
metrics_table_path.split(_GCS_PREFIX)[1]
351+
)
304352
else:
305353
raise ValueError(
306354
f"Unsupported destination URI: {destination_uri_prefix}."

0 commit comments

Comments
 (0)