Skip to content

Commit 5f3c655

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
fix: GenAI Client(evals) - Reformat codebase 1. Remove duplicated code in _evals_utils and _evals_metric_loader 2. Keep metric utils in _evals_metric_loader and data util in _evals_utils
PiperOrigin-RevId: 833893675
1 parent f7e718f commit 5f3c655

File tree

4 files changed

+37
-444
lines changed

4 files changed

+37
-444
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def setup_method(self):
287287
self.client = vertexai.Client(project=_TEST_PROJECT, location=_TEST_LOCATION)
288288

289289
@mock.patch.object(_evals_common, "Models")
290-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
290+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
291291
def test_inference_with_string_model_success(
292292
self, mock_eval_dataset_loader, mock_models
293293
):
@@ -330,7 +330,7 @@ def test_inference_with_string_model_success(
330330
assert inference_result.candidate_name == "gemini-pro"
331331
assert inference_result.gcs_source is None
332332

333-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
333+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
334334
def test_inference_with_callable_model_sets_candidate_name(
335335
self, mock_eval_dataset_loader
336336
):
@@ -349,7 +349,7 @@ def my_model_fn(contents):
349349
assert inference_result.candidate_name == "my_model_fn"
350350
assert inference_result.gcs_source is None
351351

352-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
352+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
353353
def test_inference_with_lambda_model_candidate_name_is_none(
354354
self, mock_eval_dataset_loader
355355
):
@@ -371,7 +371,7 @@ def test_inference_with_lambda_model_candidate_name_is_none(
371371
)
372372
assert inference_result.gcs_source is None
373373

374-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
374+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
375375
def test_inference_with_callable_model_success(self, mock_eval_dataset_loader):
376376
mock_df = pd.DataFrame({"prompt": ["test prompt"]})
377377
mock_eval_dataset_loader.return_value.load.return_value = mock_df.to_dict(
@@ -399,7 +399,7 @@ def mock_model_fn(contents):
399399
assert inference_result.gcs_source is None
400400

401401
@mock.patch.object(_evals_common, "Models")
402-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
402+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
403403
def test_inference_with_prompt_template(
404404
self, mock_eval_dataset_loader, mock_models
405405
):
@@ -446,7 +446,7 @@ def test_inference_with_prompt_template(
446446
assert inference_result.gcs_source is None
447447

448448
@mock.patch.object(_evals_common, "Models")
449-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
449+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
450450
@mock.patch.object(_gcs_utils, "GcsUtils")
451451
def test_inference_with_gcs_destination(
452452
self, mock_gcs_utils, mock_eval_dataset_loader, mock_models
@@ -500,7 +500,7 @@ def test_inference_with_gcs_destination(
500500
)
501501

502502
@mock.patch.object(_evals_common, "Models")
503-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
503+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
504504
@mock.patch("pandas.DataFrame.to_json")
505505
@mock.patch("os.makedirs")
506506
def test_inference_with_local_destination(
@@ -552,7 +552,7 @@ def test_inference_with_local_destination(
552552
assert inference_result.gcs_source is None
553553

554554
@mock.patch.object(_evals_common, "Models")
555-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
555+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
556556
def test_inference_from_request_column_save_to_local_dir(
557557
self, mock_eval_dataset_loader, mock_models
558558
):
@@ -787,7 +787,7 @@ def test_inference_from_local_csv_file(self, mock_models):
787787
assert inference_result.gcs_source is None
788788

789789
@mock.patch.object(_evals_common, "Models")
790-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
790+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
791791
def test_inference_with_row_level_config_overrides(
792792
self, mock_eval_dataset_loader, mock_models
793793
):
@@ -972,7 +972,7 @@ def mock_generate_content_logic(*args, **kwargs):
972972
assert inference_result.gcs_source is None
973973

974974
@mock.patch.object(_evals_common, "Models")
975-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
975+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
976976
def test_inference_with_multimodal_content(
977977
self, mock_eval_dataset_loader, mock_models
978978
):
@@ -1048,7 +1048,7 @@ def test_inference_with_multimodal_content(
10481048
assert inference_result.candidate_name == "gemini-pro"
10491049
assert inference_result.gcs_source is None
10501050

1051-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
1051+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
10521052
@mock.patch("vertexai._genai._evals_common.vertexai.Client")
10531053
def test_run_inference_with_agent_engine_and_session_inputs_dict(
10541054
self,
@@ -1136,7 +1136,7 @@ def test_run_inference_with_agent_engine_and_session_inputs_dict(
11361136
assert inference_result.candidate_name is None
11371137
assert inference_result.gcs_source is None
11381138

1139-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
1139+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
11401140
@mock.patch("vertexai._genai._evals_common.vertexai.Client")
11411141
def test_run_inference_with_agent_engine_and_session_inputs_literal_string(
11421142
self,
@@ -1411,7 +1411,7 @@ def test_run_inference_with_litellm_import_error(self, mock_api_client_fixture):
14111411
@mock.patch.object(_evals_common, "_is_gemini_model")
14121412
@mock.patch.object(_evals_common, "_is_litellm_model")
14131413
@mock.patch.object(_evals_common, "_is_litellm_vertex_maas_model")
1414-
@mock.patch.object(_evals_metric_loaders, "EvalDatasetLoader")
1414+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
14151415
def test_run_inference_with_litellm_parsing(
14161416
self,
14171417
mock_eval_dataset_loader,
@@ -4536,9 +4536,7 @@ def test_execute_evaluation_with_openai_schema(
45364536
name="test_metric", prompt_template="Evaluate: {response}"
45374537
)
45384538

4539-
with mock.patch.object(
4540-
_evals_metric_loaders, "EvalDatasetLoader"
4541-
) as mock_loader_class:
4539+
with mock.patch.object(_evals_utils, "EvalDatasetLoader") as mock_loader_class:
45424540
mock_loader_instance = mock_loader_class.return_value
45434541
mock_loader_instance.load.return_value = mock_openai_raw_data
45444542

vertexai/_genai/_evals_common.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from . import _evals_data_converters
3838
from . import _evals_metric_handlers
3939
from . import _evals_metric_loaders
40+
from . import _evals_utils
4041
from . import _gcs_utils
4142

4243
from . import evals
@@ -695,7 +696,7 @@ def _load_dataframe(
695696
"""Loads and prepares the prompt dataset for inference."""
696697
logger.info("Loading prompt dataset from: %s", src)
697698
try:
698-
loader = _evals_metric_loaders.EvalDatasetLoader(api_client=api_client)
699+
loader = _evals_utils.EvalDatasetLoader(api_client=api_client)
699700
dataset_list_of_dicts = loader.load(src)
700701
if not dataset_list_of_dicts:
701702
raise ValueError("Prompt dataset 'prompt_dataset' must not be empty.")
@@ -859,7 +860,7 @@ def _get_dataset_source(
859860
def _resolve_dataset_inputs(
860861
dataset: list[types.EvaluationDataset],
861862
dataset_schema: Optional[Literal["GEMINI", "FLATTEN", "OPENAI"]],
862-
loader: "_evals_metric_loaders.EvalDatasetLoader",
863+
loader: "_evals_utils.EvalDatasetLoader",
863864
agent_info: Optional[types.evals.AgentInfo] = None,
864865
) -> tuple[types.EvaluationDataset, int]:
865866
"""Loads and processes single or multiple datasets for evaluation.
@@ -1103,7 +1104,7 @@ def _execute_evaluation( # type: ignore[no-untyped-def]
11031104
else:
11041105
deduped_candidate_names.append(name)
11051106

1106-
loader = _evals_metric_loaders.EvalDatasetLoader(api_client=api_client)
1107+
loader = _evals_utils.EvalDatasetLoader(api_client=api_client)
11071108

11081109
agent_info = kwargs.get("agent_info", None)
11091110
validated_agent_info = None

vertexai/_genai/_evals_metric_loaders.py

Lines changed: 16 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,10 @@
2020
import re
2121
from typing import Any, Optional, Union, TYPE_CHECKING
2222

23-
from google.genai._api_client import BaseApiClient
24-
import pandas as pd
2523
import yaml
2624

2725
from . import _evals_constant
2826
from . import _gcs_utils
29-
from . import _bigquery_utils
3027

3128
if TYPE_CHECKING:
3229
from . import types
@@ -35,72 +32,6 @@
3532
logger = logging.getLogger(__name__)
3633

3734

38-
GCS_PREFIX = "gs://"
39-
BQ_PREFIX = "bq://"
40-
41-
42-
class EvalDatasetLoader:
43-
"""A loader for datasets from various sources, using a shared client."""
44-
45-
def __init__(self, api_client: BaseApiClient):
46-
self.api_client = api_client
47-
self.gcs_utils = _gcs_utils.GcsUtils(self.api_client)
48-
self.bigquery_utils = _bigquery_utils.BigQueryUtils(self.api_client)
49-
50-
def _load_file(
51-
self, filepath: str, file_type: str
52-
) -> Union[list[dict[str, Any]], Any]:
53-
"""Loads data from a file into a list of dictionaries."""
54-
if filepath.startswith(GCS_PREFIX):
55-
df = self.gcs_utils.read_gcs_file_to_dataframe(filepath, file_type)
56-
return df.to_dict(orient="records")
57-
else:
58-
if file_type == "jsonl":
59-
df = pd.read_json(filepath, lines=True)
60-
return df.to_dict(orient="records")
61-
elif file_type == "csv":
62-
df = pd.read_csv(filepath, encoding="utf-8")
63-
return df.to_dict(orient="records")
64-
else:
65-
raise ValueError(
66-
f"Unsupported file type: '{file_type}'. Please provide 'jsonl' or"
67-
" 'csv'."
68-
)
69-
70-
def load(
71-
self, source: Union[str, "pd.DataFrame"]
72-
) -> Union[list[dict[str, Any]], Any]:
73-
"""Loads dataset from various sources into a list of dictionaries."""
74-
if isinstance(source, pd.DataFrame):
75-
return source.to_dict(orient="records")
76-
elif isinstance(source, str):
77-
if source.startswith(BQ_PREFIX):
78-
df = self.bigquery_utils.load_bigquery_to_dataframe(
79-
source[len(BQ_PREFIX) :]
80-
)
81-
return df.to_dict(orient="records")
82-
83-
_, extension = os.path.splitext(source)
84-
file_type = extension.lower()[1:]
85-
86-
if file_type == "jsonl":
87-
return self._load_file(source, "jsonl")
88-
elif file_type == "csv":
89-
return self._load_file(source, "csv")
90-
else:
91-
raise TypeError(
92-
f"Unsupported file type: {file_type} from {source}. Please"
93-
" provide a valid GCS path with `jsonl` or `csv` suffix, "
94-
"a local file path, or a valid BigQuery table URI."
95-
)
96-
else:
97-
raise TypeError(
98-
"Unsupported dataset type. Must be a `pd.DataFrame`, Python"
99-
" a valid GCS path with `jsonl` or `csv` suffix, a local"
100-
" file path, or a valid BigQuery table URI."
101-
)
102-
103-
10435
class LazyLoadedPrebuiltMetric:
10536
"""A proxy object representing a prebuilt metric to be loaded on demand.
10637
@@ -408,6 +339,22 @@ def MULTI_TURN_SAFETY(self) -> LazyLoadedPrebuiltMetric:
408339
def FINAL_RESPONSE_QUALITY(self) -> LazyLoadedPrebuiltMetric:
409340
return self.__getattr__("FINAL_RESPONSE_QUALITY")
410341

342+
@property
343+
def HALLUCINATION(self) -> LazyLoadedPrebuiltMetric:
344+
return self.__getattr__("HALLUCINATION")
345+
346+
@property
347+
def TOOL_USE_QUALITY(self) -> LazyLoadedPrebuiltMetric:
348+
return self.__getattr__("TOOL_USE_QUALITY")
349+
350+
@property
351+
def GECKO_TEXT2IMAGE(self) -> LazyLoadedPrebuiltMetric:
352+
return self.__getattr__("GECKO_TEXT2IMAGE")
353+
354+
@property
355+
def GECKO_TEXT2VIDEO(self) -> LazyLoadedPrebuiltMetric:
356+
return self.__getattr__("GECKO_TEXT2VIDEO")
357+
411358

412359
PrebuiltMetric = PrebuiltMetricLoader()
413360
RubricMetric = PrebuiltMetric

0 commit comments

Comments
 (0)