UiPath
diff --git a/‎src/uipath/_cli/_evals/_models/_evaluation_set.py‎
Lines changed: 70 additions & 4 deletions b/‎src/uipath/_cli/_evals/_models/_evaluation_set.py‎
Lines changed: 70 additions & 4 deletions
diff --git a/‎src/uipath/_cli/_evals/_models/_evaluator.py‎
Lines changed: 146 additions & 0 deletions b/‎src/uipath/_cli/_evals/_models/_evaluator.py‎
Lines changed: 146 additions & 0 deletions
diff --git a/‎src/uipath/_cli/_evals/_models/_evaluator_base_params.py‎
Lines changed: 3 additions & 3 deletions b/‎src/uipath/_cli/_evals/_models/_evaluator_base_params.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/uipath/_cli/_evals/_progress_reporter.py‎
Lines changed: 10 additions & 7 deletions b/‎src/uipath/_cli/_evals/_progress_reporter.py‎
Lines changed: 10 additions & 7 deletions
@@ -1,13 +1,29 @@
 from enum import IntEnum
-from typing import Any, Dict, List
+from typing import Annotated, Any, Dict, List, Literal, Union
 
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Discriminator, Field, Tag
 from pydantic.alias_generators import to_camel
 
+from uipath.eval.coded_evaluators import BaseEvaluator
+from uipath.eval.evaluators import LegacyBaseEvaluator
+
 
 class EvaluationItem(BaseModel):
     """Individual evaluation item within an evaluation set."""
 
+    model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
+    id: str
+    name: str
+    inputs: Dict[str, Any]
+    evaluation_criterias: dict[str, dict[str, Any] | None] = Field(
+        ..., alias="evaluationCriterias"
+    )
+    expected_agent_behavior: str = Field(default="", alias="expectedAgentBehavior")
+
+
+class LegacyEvaluationItem(BaseModel):
+    """Individual evaluation item within an evaluation set."""
+
     model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
 
     id: str
@@ -28,12 +44,36 @@ class EvaluationItem(BaseModel):
 class EvaluationSet(BaseModel):
     """Complete evaluation set model."""
 
+    model_config = ConfigDict(
+        alias_generator=to_camel, populate_by_name=True, extra="allow"
+    )
+
+    id: str
+    name: str
+    version: Literal["1.0"] = "1.0"
+    evaluator_refs: List[str] = Field(default_factory=list)
+    evaluations: List[EvaluationItem] = Field(default_factory=list)
+
+    def extract_selected_evals(self, eval_ids) -> None:
+        selected_evals: list[EvaluationItem] = []
+        for evaluation in self.evaluations:
+            if evaluation.id in eval_ids:
+                selected_evals.append(evaluation)
+                eval_ids.remove(evaluation.id)
+        if len(eval_ids) > 0:
+            raise ValueError("Unknown evaluation ids: {}".format(eval_ids))
+        self.evaluations = selected_evals
+
+
+class LegacyEvaluationSet(BaseModel):
+    """Complete evaluation set model."""
+
     model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
 
     id: str
     file_name: str
     evaluator_refs: List[str] = Field(default_factory=list)
-    evaluations: List[EvaluationItem] = Field(default_factory=list)
+    evaluations: List[LegacyEvaluationItem] = Field(default_factory=list)
     name: str
     batch_size: int = 10
     timeout_minutes: int = 20
@@ -42,7 +82,7 @@ class EvaluationSet(BaseModel):
     updated_at: str
 
     def extract_selected_evals(self, eval_ids) -> None:
-        selected_evals: list[EvaluationItem] = []
+        selected_evals: list[LegacyEvaluationItem] = []
         for evaluation in self.evaluations:
             if evaluation.id in eval_ids:
                 selected_evals.append(evaluation)
@@ -56,3 +96,29 @@ class EvaluationStatus(IntEnum):
     PENDING = 0
     IN_PROGRESS = 1
     COMPLETED = 2
+
+
+def _discriminate_eval_set(
+    v: Any,
+) -> Literal["evaluation_set", "legacy_evaluation_set"]:
+    """Discriminator function that returns a tag based on version field."""
+    if isinstance(v, dict):
+        version = v.get("version")
+        if version == "1.0":
+            return "evaluation_set"
+    return "legacy_evaluation_set"
+
+
+AnyEvaluationSet = Annotated[
+    Union[
+        Annotated[EvaluationSet, Tag("evaluation_set")],
+        Annotated[LegacyEvaluationSet, Tag("legacy_evaluation_set")],
+    ],
+    Discriminator(_discriminate_eval_set),
+]
+
+AnyEvaluationItem = Union[EvaluationItem, LegacyEvaluationItem]
+
+AnyEvaluator = Annotated[
+    Union[LegacyBaseEvaluator[Any], BaseEvaluator[Any, Any, Any]], "List of evaluators"
+]
@@ -0,0 +1,146 @@
+from typing import Annotated, Any, Literal, Union
+
+from pydantic import BaseModel, ConfigDict, Discriminator, Field, Tag
+
+from uipath.eval.coded_evaluators.base_evaluator import BaseEvaluatorConfig
+from uipath.eval.coded_evaluators.contains_evaluator import ContainsEvaluatorConfig
+from uipath.eval.models.models import (
+    EvaluatorType,
+    LegacyEvaluatorCategory,
+    LegacyEvaluatorType,
+)
+
+
+class EvaluatorBaseParams(BaseModel):
+    """Parameters for initializing the base evaluator."""
+
+    id: str
+    name: str
+    description: str
+    evaluator_type: LegacyEvaluatorType = Field(..., alias="type")
+    created_at: str = Field(..., alias="createdAt")
+    updated_at: str = Field(..., alias="updatedAt")
+    target_output_key: str = Field(..., alias="targetOutputKey")
+    file_name: str = Field(..., alias="fileName")
+
+
+class LLMEvaluatorParams(EvaluatorBaseParams):
+    category: Literal[LegacyEvaluatorCategory.LlmAsAJudge] = Field(
+        ..., alias="category"
+    )
+    prompt: str = Field(..., alias="prompt")
+    model: str = Field(..., alias="model")
+
+    model_config = ConfigDict(
+        validate_by_name=True, validate_by_alias=True, extra="allow"
+    )
+
+
+class TrajectoryEvaluatorParams(EvaluatorBaseParams):
+    category: Literal[LegacyEvaluatorCategory.Trajectory] = Field(..., alias="category")
+    prompt: str = Field(..., alias="prompt")
+    model: str = Field(..., alias="model")
+
+    model_config = ConfigDict(
+        validate_by_name=True, validate_by_alias=True, extra="allow"
+    )
+
+
+class EqualsEvaluatorParams(EvaluatorBaseParams):
+    model_config = ConfigDict(
+        validate_by_name=True, validate_by_alias=True, extra="allow"
+    )
+
+
+class JsonSimilarityEvaluatorParams(EvaluatorBaseParams):
+    model_config = ConfigDict(
+        validate_by_name=True, validate_by_alias=True, extra="allow"
+    )
+
+
+class UnknownEvaluatorParams(EvaluatorBaseParams):
+    model_config = ConfigDict(
+        validate_by_name=True, validate_by_alias=True, extra="allow"
+    )
+
+
+class UnknownEvaluatorConfig(BaseEvaluatorConfig):
+    model_config = ConfigDict(
+        validate_by_name=True, validate_by_alias=True, extra="allow"
+    )
+
+
+def legacy_evaluator_discriminator(data: Any) -> str:
+    if isinstance(data, dict):
+        category = data.get("category")
+        evaluator_type = data.get("type")
+        match category:
+            case LegacyEvaluatorCategory.LlmAsAJudge:
+                return "LLMEvaluatorParams"
+            case LegacyEvaluatorCategory.Trajectory:
+                return "TrajectoryEvaluatorParams"
+            case LegacyEvaluatorCategory.Deterministic:
+                match evaluator_type:
+                    case LegacyEvaluatorType.Equals:
+                        return "EqualsEvaluatorParams"
+                    case LegacyEvaluatorType.JsonSimilarity:
+                        return "JsonSimilarityEvaluatorParams"
+                    case _:
+                        return "UnknownEvaluatorParams"
+            case _:
+                return "UnknownEvaluatorParams"
+    else:
+        return "UnknownEvaluatorParams"
+
+
+def evaluator_config_discriminator(data: Any) -> str:
+    if isinstance(data, dict):
+        evaluator_type_id = data.get("evaluatorTypeId")
+        match evaluator_type_id:
+            case EvaluatorType.CONTAINS:
+                return "ContainsEvaluatorConfig"
+            case _:
+                return "UnknownEvaluatorConfig"
+    else:
+        return "UnknownEvaluatorConfig"
+
+
+EvaluatorLegacy = Annotated[
+    Union[
+        Annotated[
+            LLMEvaluatorParams,
+            Tag("LLMEvaluatorParams"),
+        ],
+        Annotated[
+            TrajectoryEvaluatorParams,
+            Tag("TrajectoryEvaluatorParams"),
+        ],
+        Annotated[
+            EqualsEvaluatorParams,
+            Tag("EqualsEvaluatorParams"),
+        ],
+        Annotated[
+            JsonSimilarityEvaluatorParams,
+            Tag("JsonSimilarityEvaluatorParams"),
+        ],
+        Annotated[
+            UnknownEvaluatorParams,
+            Tag("UnknownEvaluatorParams"),
+        ],
+    ],
+    Field(discriminator=Discriminator(legacy_evaluator_discriminator)),
+]
+
+EvaluatorConfig = Annotated[
+    Union[
+        Annotated[
+            ContainsEvaluatorConfig,
+            Tag("ContainsEvaluatorConfig"),
+        ],
+        Annotated[
+            UnknownEvaluatorConfig,
+            Tag("UnknownEvaluatorConfig"),
+        ],
+    ],
+    Field(discriminator=Discriminator(evaluator_config_discriminator)),
+]
@@ -1,14 +1,14 @@
 from pydantic import BaseModel
 
-from uipath.eval.models.models import EvaluatorCategory, EvaluatorType
+from uipath.eval.models.models import LegacyEvaluatorCategory, LegacyEvaluatorType
 
 
 class EvaluatorBaseParams(BaseModel):
     """Parameters for initializing the base evaluator."""
 
     id: str
-    category: EvaluatorCategory
-    evaluator_type: EvaluatorType
+    category: LegacyEvaluatorCategory
+    evaluator_type: LegacyEvaluatorType
     name: str
     description: str
     created_at: str
 
@@ -9,7 +9,10 @@
 from opentelemetry import trace
 
 from uipath import UiPath
-from uipath._cli._evals._models._evaluation_set import EvaluationItem, EvaluationStatus
+from uipath._cli._evals._models._evaluation_set import (
+    EvaluationStatus,
+    LegacyEvaluationItem,
+)
 from uipath._cli._evals._models._sw_reporting import (
     StudioWebAgentSnapshot,
     StudioWebProgressItem,
@@ -28,7 +31,7 @@
 )
 from uipath._utils import Endpoint, RequestSpec
 from uipath._utils.constants import ENV_TENANT_ID, HEADER_INTERNAL_TENANT_ID
-from uipath.eval.evaluators import BaseEvaluator
+from uipath.eval.evaluators import LegacyBaseEvaluator
 from uipath.eval.models import EvalItemResult, ScoreType
 from uipath.tracing import LlmOpsHttpExporter
 
@@ -85,7 +88,7 @@ async def create_eval_set_run(
         eval_set_id: str,
         agent_snapshot: StudioWebAgentSnapshot,
         no_of_evals: int,
-        evaluators: List[BaseEvaluator[Any]],
+        evaluators: List[LegacyBaseEvaluator[Any]],
     ) -> str:
         """Create a new evaluation set run in StudioWeb."""
         spec = self._create_eval_set_run_spec(eval_set_id, agent_snapshot, no_of_evals)
@@ -101,7 +104,7 @@ async def create_eval_set_run(
 
     @gracefully_handle_errors
     async def create_eval_run(
-        self, eval_item: EvaluationItem, eval_set_run_id: str
+        self, eval_item: LegacyEvaluationItem, eval_set_run_id: str
     ) -> str:
         """Create a new evaluation run in StudioWeb.
 
@@ -126,7 +129,7 @@ async def create_eval_run(
     async def update_eval_run(
         self,
         sw_progress_item: StudioWebProgressItem,
-        evaluators: dict[str, BaseEvaluator[Any]],
+        evaluators: dict[str, LegacyBaseEvaluator[Any]],
     ):
         """Update an evaluation run with results."""
         assertion_runs, evaluator_scores = self._collect_results(
@@ -300,7 +303,7 @@ def _extract_agent_snapshot(self, entrypoint: str) -> StudioWebAgentSnapshot:
     def _collect_results(
         self,
         eval_results: list[EvalItemResult],
-        evaluators: dict[str, BaseEvaluator[Any]],
+        evaluators: dict[str, LegacyBaseEvaluator[Any]],
     ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
         assertion_runs: list[dict[str, Any]] = []
         evaluator_scores_list: list[dict[str, Any]] = []
@@ -365,7 +368,7 @@ def _update_eval_run_spec(
         )
 
     def _create_eval_run_spec(
-        self, eval_item: EvaluationItem, eval_set_run_id: str
+        self, eval_item: LegacyEvaluationItem, eval_set_run_id: str
     ) -> RequestSpec:
         return RequestSpec(
             method="POST",