uipath 2.1.107__py3-none-any.whl → 2.1.109__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of uipath might be problematic. Click here for more details.
- uipath/_cli/__init__.py +4 -0
- uipath/_cli/_evals/_console_progress_reporter.py +2 -2
- uipath/_cli/_evals/_evaluator_factory.py +314 -29
- uipath/_cli/_evals/_helpers.py +194 -0
- uipath/_cli/_evals/_models/_evaluation_set.py +73 -7
- uipath/_cli/_evals/_models/_evaluator.py +183 -9
- uipath/_cli/_evals/_models/_evaluator_base_params.py +3 -3
- uipath/_cli/_evals/_models/_output.py +87 -3
- uipath/_cli/_evals/_progress_reporter.py +288 -28
- uipath/_cli/_evals/_runtime.py +80 -26
- uipath/_cli/_evals/mocks/input_mocker.py +1 -3
- uipath/_cli/_evals/mocks/llm_mocker.py +2 -2
- uipath/_cli/_evals/mocks/mocker_factory.py +2 -2
- uipath/_cli/_evals/mocks/mockito_mocker.py +2 -2
- uipath/_cli/_evals/mocks/mocks.py +5 -3
- uipath/_cli/_push/models.py +17 -0
- uipath/_cli/_push/sw_file_handler.py +336 -3
- uipath/_cli/_runtime/_contracts.py +25 -5
- uipath/_cli/_templates/custom_evaluator.py.template +65 -0
- uipath/_cli/_utils/_eval_set.py +30 -9
- uipath/_cli/_utils/_resources.py +21 -0
- uipath/_cli/_utils/_studio_project.py +18 -0
- uipath/_cli/cli_add.py +114 -0
- uipath/_cli/cli_eval.py +5 -1
- uipath/_cli/cli_pull.py +11 -26
- uipath/_cli/cli_push.py +2 -0
- uipath/_cli/cli_register.py +45 -0
- uipath/_events/_events.py +6 -5
- uipath/_resources/SDK_REFERENCE.md +0 -97
- uipath/_uipath.py +10 -37
- uipath/_utils/constants.py +4 -0
- uipath/eval/_helpers/evaluators_helpers.py +494 -0
- uipath/eval/_helpers/helpers.py +30 -2
- uipath/eval/evaluators/__init__.py +60 -5
- uipath/eval/evaluators/base_evaluator.py +546 -44
- uipath/eval/evaluators/contains_evaluator.py +80 -0
- uipath/eval/evaluators/exact_match_evaluator.py +43 -12
- uipath/eval/evaluators/json_similarity_evaluator.py +41 -12
- uipath/eval/evaluators/legacy_base_evaluator.py +89 -0
- uipath/eval/evaluators/{deterministic_evaluator_base.py → legacy_deterministic_evaluator_base.py} +2 -2
- uipath/eval/evaluators/legacy_exact_match_evaluator.py +37 -0
- uipath/eval/evaluators/legacy_json_similarity_evaluator.py +151 -0
- uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py +137 -0
- uipath/eval/evaluators/{trajectory_evaluator.py → legacy_trajectory_evaluator.py} +5 -6
- uipath/eval/evaluators/llm_as_judge_evaluator.py +143 -78
- uipath/eval/evaluators/llm_judge_output_evaluator.py +112 -0
- uipath/eval/evaluators/llm_judge_trajectory_evaluator.py +142 -0
- uipath/eval/evaluators/output_evaluator.py +117 -0
- uipath/eval/evaluators/tool_call_args_evaluator.py +82 -0
- uipath/eval/evaluators/tool_call_count_evaluator.py +87 -0
- uipath/eval/evaluators/tool_call_order_evaluator.py +84 -0
- uipath/eval/evaluators/tool_call_output_evaluator.py +87 -0
- uipath/eval/evaluators_types/ContainsEvaluator.json +73 -0
- uipath/eval/evaluators_types/ExactMatchEvaluator.json +89 -0
- uipath/eval/evaluators_types/JsonSimilarityEvaluator.json +81 -0
- uipath/eval/evaluators_types/LLMJudgeOutputEvaluator.json +110 -0
- uipath/eval/evaluators_types/LLMJudgeSimulationTrajectoryEvaluator.json +88 -0
- uipath/eval/evaluators_types/LLMJudgeStrictJSONSimilarityOutputEvaluator.json +110 -0
- uipath/eval/evaluators_types/LLMJudgeTrajectoryEvaluator.json +88 -0
- uipath/eval/evaluators_types/ToolCallArgsEvaluator.json +131 -0
- uipath/eval/evaluators_types/ToolCallCountEvaluator.json +104 -0
- uipath/eval/evaluators_types/ToolCallOrderEvaluator.json +100 -0
- uipath/eval/evaluators_types/ToolCallOutputEvaluator.json +124 -0
- uipath/eval/evaluators_types/generate_types.py +31 -0
- uipath/eval/models/__init__.py +16 -1
- uipath/eval/models/llm_judge_types.py +196 -0
- uipath/eval/models/models.py +109 -7
- {uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/METADATA +1 -1
- {uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/RECORD +72 -40
- {uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/WHEEL +0 -0
- {uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/entry_points.txt +0 -0
- {uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,61 +1,93 @@
|
|
|
1
1
|
"""Base evaluator abstract class for agent evaluation."""
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
import
|
|
3
|
+
import json
|
|
4
|
+
import warnings
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
|
-
from typing import Generic, TypeVar
|
|
6
|
+
from typing import Any, Generic, TypeVar, Union, cast, get_args
|
|
7
7
|
|
|
8
|
-
from pydantic import BaseModel, ConfigDict
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
9
|
+
from pydantic.alias_generators import to_camel
|
|
9
10
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
|
|
13
|
-
ErrorEvaluationResult,
|
|
14
|
-
EvaluatorCategory,
|
|
15
|
-
EvaluatorType,
|
|
16
|
-
)
|
|
11
|
+
from .._helpers.helpers import track_evaluation_metrics
|
|
12
|
+
from ..models import AgentExecution, EvaluationResult
|
|
13
|
+
from ..models.models import UiPathEvaluationError, UiPathEvaluationErrorCategory
|
|
17
14
|
|
|
18
15
|
|
|
19
|
-
|
|
20
|
-
"""
|
|
16
|
+
class BaseEvaluationCriteria(BaseModel):
|
|
17
|
+
"""Base class for all evaluation criteria."""
|
|
18
|
+
|
|
19
|
+
model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Type variable for evaluation criteria, used by both Config and Evaluator
|
|
24
|
+
T = TypeVar("T", bound=BaseEvaluationCriteria)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BaseEvaluatorConfig(BaseModel, Generic[T]):
|
|
28
|
+
"""Base class for all evaluator configurations.
|
|
29
|
+
|
|
30
|
+
Generic over T (evaluation criteria type) to ensure type safety between
|
|
31
|
+
the config's default_evaluation_criteria and the evaluator's expected criteria type.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
|
|
35
|
+
|
|
36
|
+
name: str
|
|
37
|
+
default_evaluation_criteria: T | None = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class BaseEvaluatorJustification(BaseModel):
|
|
41
|
+
"""Base class for all evaluator justifications."""
|
|
42
|
+
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Additional type variables for Config and Justification
|
|
47
|
+
# Note: C must be BaseEvaluatorConfig[T] to ensure type consistency
|
|
48
|
+
C = TypeVar("C", bound=BaseEvaluatorConfig[Any])
|
|
49
|
+
J = TypeVar("J", bound=Union[str, None, BaseEvaluatorJustification])
|
|
21
50
|
|
|
22
|
-
@functools.wraps(func)
|
|
23
|
-
async def wrapper(*args, **kwargs) -> EvaluationResult:
|
|
24
|
-
start_time = time.time()
|
|
25
|
-
try:
|
|
26
|
-
result = await func(*args, **kwargs)
|
|
27
|
-
except Exception as e:
|
|
28
|
-
result = ErrorEvaluationResult(
|
|
29
|
-
details="Exception thrown by evaluator: {}".format(e),
|
|
30
|
-
evaluation_time=time.time() - start_time,
|
|
31
|
-
)
|
|
32
|
-
end_time = time.time()
|
|
33
|
-
execution_time = end_time - start_time
|
|
34
51
|
|
|
35
|
-
|
|
36
|
-
|
|
52
|
+
class BaseEvaluator(BaseModel, Generic[T, C, J], ABC):
|
|
53
|
+
"""Abstract base class for all evaluators.
|
|
37
54
|
|
|
38
|
-
|
|
55
|
+
Generic Parameters:
|
|
56
|
+
T: The evaluation criteria type (bound to BaseEvaluationCriteria)
|
|
57
|
+
C: The evaluator config type (bound to BaseEvaluatorConfig[T])
|
|
58
|
+
J: The justification type (str, None, or BaseEvaluatorJustification subclass)
|
|
39
59
|
|
|
60
|
+
Design Rationale:
|
|
61
|
+
T is explicitly specified even though C = BaseEvaluatorConfig[T] already encodes it.
|
|
62
|
+
This redundancy is intentional and provides:
|
|
40
63
|
|
|
41
|
-
|
|
64
|
+
1. **Type Checker Support**: Static type checkers can infer the exact criteria type
|
|
65
|
+
for the evaluate() method signature without runtime introspection
|
|
42
66
|
|
|
67
|
+
2. **Clear API**: The signature BaseEvaluator[MyCriteria, MyConfig[MyCriteria], str]
|
|
68
|
+
makes it immediately obvious what criteria type is expected
|
|
43
69
|
|
|
44
|
-
|
|
45
|
-
|
|
70
|
+
3. **IDE Support**: Autocomplete and type hints work perfectly for method parameters
|
|
71
|
+
|
|
72
|
+
Runtime validation ensures T and C's generic parameter are consistent.
|
|
73
|
+
"""
|
|
46
74
|
|
|
47
75
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
48
76
|
|
|
49
77
|
id: str
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
78
|
+
config: dict[str, Any] = Field(description="The config dictionary")
|
|
79
|
+
config_type: type[C] = Field(description="The config type class")
|
|
80
|
+
evaluation_criteria_type: type[T] = Field(
|
|
81
|
+
description="The type used for evaluation criteria validation and creation"
|
|
82
|
+
)
|
|
83
|
+
justification_type: type[J] = Field(
|
|
84
|
+
description="The type used for justification validation and creation"
|
|
85
|
+
)
|
|
86
|
+
evaluator_config: C = Field(
|
|
87
|
+
exclude=True, description="The validated config object instance"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def __init_subclass__(cls, **kwargs: Any):
|
|
59
91
|
"""Hook for subclass creation - automatically applies evaluation metrics tracking."""
|
|
60
92
|
super().__init_subclass__(**kwargs)
|
|
61
93
|
|
|
@@ -65,10 +97,479 @@ class BaseEvaluator(BaseModel, Generic[T], ABC):
|
|
|
65
97
|
cls.evaluate = track_evaluation_metrics(cls.evaluate) # type: ignore[method-assign]
|
|
66
98
|
cls.evaluate._has_metrics_decorator = True # type: ignore[attr-defined]
|
|
67
99
|
|
|
68
|
-
|
|
69
|
-
|
|
100
|
+
@property
|
|
101
|
+
def name(self) -> str:
|
|
102
|
+
"""Evaluator's name."""
|
|
103
|
+
return self.evaluator_config.name
|
|
104
|
+
|
|
105
|
+
@model_validator(mode="before")
|
|
106
|
+
@classmethod
|
|
107
|
+
def validate_model(cls, values: Any) -> Any:
|
|
108
|
+
"""Pre-initialization model validator for Pydantic models.
|
|
109
|
+
|
|
110
|
+
This validator extracts the Generic type parameters and validates their consistency.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
values: The raw input values before validation
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
The validated/transformed values with types set
|
|
117
|
+
|
|
118
|
+
Raises:
|
|
119
|
+
ValueError: If types cannot be determined or are inconsistent
|
|
120
|
+
"""
|
|
121
|
+
if isinstance(values, dict):
|
|
122
|
+
# Always extract and set evaluation_criteria_type
|
|
123
|
+
criteria_type = cls._extract_evaluation_criteria_type()
|
|
124
|
+
values["evaluation_criteria_type"] = criteria_type
|
|
125
|
+
|
|
126
|
+
# Always extract and set config_type
|
|
127
|
+
config_type = cls._extract_config_type()
|
|
128
|
+
values["config_type"] = config_type
|
|
129
|
+
|
|
130
|
+
# Always extract and set justification_type
|
|
131
|
+
justification_type = cls._extract_justification_type()
|
|
132
|
+
values["justification_type"] = justification_type
|
|
133
|
+
|
|
134
|
+
# Validate consistency: config's generic parameter should match criteria_type
|
|
135
|
+
cls._validate_type_consistency(config_type, criteria_type)
|
|
136
|
+
|
|
137
|
+
# Validate and create the config object if config dict is provided
|
|
138
|
+
try:
|
|
139
|
+
validated_config = config_type.model_validate(values.get("config", {}))
|
|
140
|
+
values["evaluator_config"] = validated_config
|
|
141
|
+
except Exception as e:
|
|
142
|
+
raise UiPathEvaluationError(
|
|
143
|
+
code="FAILED_TO_VALIDATE_EVALUATOR_CONFIG",
|
|
144
|
+
title=f"Failed to validate evaluator config for {cls.__name__}",
|
|
145
|
+
detail=f"Error: {e}",
|
|
146
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
147
|
+
) from e
|
|
148
|
+
|
|
149
|
+
return values
|
|
150
|
+
|
|
151
|
+
@classmethod
|
|
152
|
+
def _validate_type_consistency(
|
|
153
|
+
cls,
|
|
154
|
+
config_type: type[BaseEvaluatorConfig[Any]],
|
|
155
|
+
criteria_type: type[BaseEvaluationCriteria],
|
|
156
|
+
) -> None:
|
|
157
|
+
"""Validate that the config's generic parameter matches the evaluator's criteria type.
|
|
158
|
+
|
|
159
|
+
Extracts the criteria type from the config's default_evaluation_criteria field
|
|
160
|
+
annotation and validates it matches the evaluator's expected criteria type.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
config_type: The config type to validate
|
|
164
|
+
criteria_type: The expected evaluation criteria type
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
ValueError: If the types are inconsistent
|
|
168
|
+
"""
|
|
169
|
+
# Skip validation for base classes
|
|
170
|
+
if config_type.__name__ in (
|
|
171
|
+
"BaseEvaluatorConfig",
|
|
172
|
+
"OutputEvaluatorConfig",
|
|
173
|
+
"BaseLLMJudgeEvaluatorConfig",
|
|
174
|
+
):
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
# Extract from Pydantic's model_fields which preserves generic types
|
|
178
|
+
if (
|
|
179
|
+
hasattr(config_type, "model_fields")
|
|
180
|
+
and "default_evaluation_criteria" in config_type.model_fields
|
|
181
|
+
):
|
|
182
|
+
field_info = config_type.model_fields["default_evaluation_criteria"]
|
|
183
|
+
if hasattr(field_info, "annotation"):
|
|
184
|
+
annotation = field_info.annotation
|
|
185
|
+
# The annotation will be SomeCriteria | None
|
|
186
|
+
args = get_args(annotation)
|
|
187
|
+
if args:
|
|
188
|
+
# Get the criteria type (the non-None arg)
|
|
189
|
+
for arg in args:
|
|
190
|
+
if (
|
|
191
|
+
arg is not type(None)
|
|
192
|
+
and isinstance(arg, type)
|
|
193
|
+
and issubclass(arg, BaseEvaluationCriteria)
|
|
194
|
+
):
|
|
195
|
+
# Found the config's criteria type, check if it matches
|
|
196
|
+
if arg != criteria_type:
|
|
197
|
+
raise UiPathEvaluationError(
|
|
198
|
+
code="TYPE_INCONSISTENCY_IN_EVALUATOR",
|
|
199
|
+
title=f"Type inconsistency in {cls.__name__}: "
|
|
200
|
+
f"Config {config_type.__name__} expects criteria type {arg.__name__}",
|
|
201
|
+
detail=f"Evaluator expects {criteria_type.__name__}. "
|
|
202
|
+
f"Ensure BaseEvaluator[T, C[T], J] has matching T and C[T] parameters.",
|
|
203
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
204
|
+
)
|
|
205
|
+
return # Validation passed
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def _extract_evaluation_criteria_type(cls) -> type[BaseEvaluationCriteria]:
|
|
209
|
+
"""Extract the evaluation criteria type from Pydantic model fields.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
The evaluation criteria type
|
|
213
|
+
|
|
214
|
+
Raises:
|
|
215
|
+
ValueError: If no valid evaluation criteria type can be determined from the class definition
|
|
216
|
+
"""
|
|
217
|
+
# Special case: if this is the BaseEvaluator class itself, return BaseEvaluationCriteria
|
|
218
|
+
if cls.__name__ == ("BaseEvaluator" or "BaseEvaluator[Any, Any, Any]"):
|
|
219
|
+
return BaseEvaluationCriteria
|
|
220
|
+
|
|
221
|
+
# Check if Pydantic has already resolved the evaluation_criteria_type field annotation
|
|
222
|
+
if not (
|
|
223
|
+
hasattr(cls, "model_fields")
|
|
224
|
+
and "evaluation_criteria_type" in cls.model_fields
|
|
225
|
+
):
|
|
226
|
+
raise UiPathEvaluationError(
|
|
227
|
+
code="COULD_NOT_FIND_EVALUATION_CRITERIA_TYPE_FIELD",
|
|
228
|
+
title=f"Could not find evaluation_criteria_type field in {cls.__name__}",
|
|
229
|
+
detail="Ensure the class properly inherits from BaseEvaluator with correct Generic parameters.",
|
|
230
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
field_info = cls.model_fields["evaluation_criteria_type"]
|
|
234
|
+
if not hasattr(field_info, "annotation"):
|
|
235
|
+
raise UiPathEvaluationError(
|
|
236
|
+
code="NO_ANNOTATION_FOUND_FOR_EVALUATION_CRITERIA_TYPE_FIELD",
|
|
237
|
+
title=f"No annotation found for evaluation_criteria_type field in {cls.__name__}",
|
|
238
|
+
detail="Ensure the class properly inherits from BaseEvaluator with correct Generic parameters.",
|
|
239
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Extract the inner type from type[SomeType]
|
|
243
|
+
annotation = field_info.annotation
|
|
244
|
+
args = get_args(annotation)
|
|
245
|
+
if not args:
|
|
246
|
+
raise UiPathEvaluationError(
|
|
247
|
+
code="INVALID_ANNOTATION_FOR_EVALUATION_CRITERIA_TYPE",
|
|
248
|
+
title=f"Invalid annotation for evaluation_criteria_type in {cls.__name__}: {annotation}",
|
|
249
|
+
detail="Expected type[SomeEvaluationCriteria]",
|
|
250
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
criteria_type = args[0]
|
|
254
|
+
if not (
|
|
255
|
+
isinstance(criteria_type, type)
|
|
256
|
+
and issubclass(criteria_type, BaseEvaluationCriteria)
|
|
257
|
+
):
|
|
258
|
+
raise UiPathEvaluationError(
|
|
259
|
+
code="INVALID_EVALUATION_CRITERIA_TYPE",
|
|
260
|
+
title=f"Invalid evaluation criteria type {criteria_type} in {cls.__name__}",
|
|
261
|
+
detail=f"{criteria_type} must be a subclass of BaseEvaluationCriteria",
|
|
262
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
return criteria_type
|
|
266
|
+
|
|
267
|
+
@classmethod
|
|
268
|
+
def _extract_config_type(cls) -> type[BaseEvaluatorConfig[Any]]:
|
|
269
|
+
"""Extract the config type from Pydantic model fields.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
The config type for this evaluator
|
|
273
|
+
|
|
274
|
+
Raises:
|
|
275
|
+
ValueError: If no valid config type can be determined from the class definition
|
|
276
|
+
"""
|
|
277
|
+
# Special case: if this is the BaseEvaluator class itself, return BaseEvaluatorConfig
|
|
278
|
+
if cls.__name__ == ("BaseEvaluator" or "BaseEvaluator[Any, Any, Any]"):
|
|
279
|
+
return BaseEvaluatorConfig
|
|
280
|
+
# Check if Pydantic has already resolved the config_type field annotation
|
|
281
|
+
if not (hasattr(cls, "model_fields") and "config_type" in cls.model_fields):
|
|
282
|
+
raise UiPathEvaluationError(
|
|
283
|
+
code="COULD_NOT_FIND_CONFIG_TYPE_FIELD",
|
|
284
|
+
title=f"Could not find config_type field in {cls.__name__}",
|
|
285
|
+
detail="Ensure the class properly inherits from BaseEvaluator with correct Generic parameters.",
|
|
286
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
field_info = cls.model_fields["config_type"]
|
|
290
|
+
if not hasattr(field_info, "annotation"):
|
|
291
|
+
raise UiPathEvaluationError(
|
|
292
|
+
code="NO_ANNOTATION_FOUND_FOR_CONFIG_TYPE_FIELD",
|
|
293
|
+
title=f"No annotation found for config_type field in {cls.__name__}",
|
|
294
|
+
detail="Ensure the class properly inherits from BaseEvaluator with correct Generic parameters.",
|
|
295
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Extract the inner type from type[SomeType]
|
|
299
|
+
annotation = field_info.annotation
|
|
300
|
+
args = get_args(annotation)
|
|
301
|
+
if not args:
|
|
302
|
+
raise UiPathEvaluationError(
|
|
303
|
+
code="INVALID_ANNOTATION_FOR_CONFIG_TYPE",
|
|
304
|
+
title=f"Invalid annotation for config_type in {cls.__name__}: {annotation}",
|
|
305
|
+
detail="Expected type[SomeEvaluatorConfig]",
|
|
306
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
config_type = args[0]
|
|
310
|
+
if not (
|
|
311
|
+
isinstance(config_type, type)
|
|
312
|
+
and issubclass(config_type, BaseEvaluatorConfig)
|
|
313
|
+
):
|
|
314
|
+
raise UiPathEvaluationError(
|
|
315
|
+
code="INVALID_CONFIG_TYPE",
|
|
316
|
+
title=f"Invalid config type {config_type} in {cls.__name__}",
|
|
317
|
+
detail=f"{config_type} must be a subclass of BaseEvaluatorConfig",
|
|
318
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
return config_type
|
|
322
|
+
|
|
323
|
+
@classmethod
|
|
324
|
+
def _extract_justification_type(cls) -> type[J]:
|
|
325
|
+
"""Extract the justification type from Pydantic model fields.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
The justification type (str, None, or BaseEvaluatorJustification subclass)
|
|
329
|
+
|
|
330
|
+
Note:
|
|
331
|
+
Unlike the other type extraction methods, this one returns a default (type(None))
|
|
332
|
+
instead of raising an error, since justification support is optional and
|
|
333
|
+
defaults to None for evaluators that don't specify a justification type.
|
|
334
|
+
"""
|
|
335
|
+
try:
|
|
336
|
+
# Special case: if this is the BaseEvaluator class itself, return type(None)
|
|
337
|
+
if cls.__name__ == "BaseEvaluator[Any, Any, Any]":
|
|
338
|
+
return cast(type[J], type(None))
|
|
339
|
+
|
|
340
|
+
# Check if Pydantic has resolved the justification_type field annotation
|
|
341
|
+
if not (
|
|
342
|
+
hasattr(cls, "model_fields")
|
|
343
|
+
and "justification_type" in cls.model_fields
|
|
344
|
+
):
|
|
345
|
+
# Default to None if field doesn't exist (justification is optional)
|
|
346
|
+
return cast(type[J], type(None))
|
|
347
|
+
|
|
348
|
+
field_info = cls.model_fields["justification_type"]
|
|
349
|
+
if not hasattr(field_info, "annotation"):
|
|
350
|
+
# Default to None if no annotation (justification is optional)
|
|
351
|
+
return cast(type[J], type(None))
|
|
352
|
+
|
|
353
|
+
# Extract the inner type from type[SomeType]
|
|
354
|
+
annotation = field_info.annotation
|
|
355
|
+
args = get_args(annotation)
|
|
356
|
+
if not args:
|
|
357
|
+
# Default to None if no type args (justification is optional)
|
|
358
|
+
return cast(type[J], type(None))
|
|
359
|
+
|
|
360
|
+
justification_type = args[0]
|
|
361
|
+
|
|
362
|
+
# Validate the justification type - must be str, type(None), or BaseEvaluatorJustification subclass
|
|
363
|
+
if justification_type is str or justification_type is type(None):
|
|
364
|
+
return cast(type[J], justification_type)
|
|
365
|
+
elif isinstance(justification_type, type) and issubclass(
|
|
366
|
+
justification_type, BaseEvaluatorJustification
|
|
367
|
+
):
|
|
368
|
+
return cast(type[J], justification_type)
|
|
369
|
+
else:
|
|
370
|
+
# Invalid justification type - log warning but default to None for robustness
|
|
371
|
+
warnings.warn(
|
|
372
|
+
f"Invalid justification type {justification_type} in {cls.__name__}. "
|
|
373
|
+
f"Must be str, None, or subclass of BaseEvaluatorJustification. Defaulting to None.",
|
|
374
|
+
UserWarning,
|
|
375
|
+
stacklevel=2,
|
|
376
|
+
)
|
|
377
|
+
return cast(type[J], type(None))
|
|
378
|
+
except Exception as e:
|
|
379
|
+
raise UiPathEvaluationError(
|
|
380
|
+
code="CANNOT_EXTRACT_JUSTIFICATION_TYPE",
|
|
381
|
+
title=f"Cannot extract justification type from {cls.__name__}",
|
|
382
|
+
detail=f"Error: {e}",
|
|
383
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
384
|
+
) from e
|
|
385
|
+
|
|
386
|
+
def validate_evaluation_criteria(self, criteria: Any) -> T:
|
|
387
|
+
"""Validate and convert input to the correct evaluation criteria type.
|
|
388
|
+
|
|
389
|
+
Uses Pydantic's model_validate for proper validation, type coercion,
|
|
390
|
+
and error handling.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
criteria: The criteria to validate (dict, BaseEvaluationCriteria, or other)
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
An instance of the evaluation criteria type (T)
|
|
397
|
+
|
|
398
|
+
Raises:
|
|
399
|
+
ValueError: If the criteria cannot be converted to the expected type
|
|
400
|
+
"""
|
|
401
|
+
try:
|
|
402
|
+
if isinstance(criteria, self.evaluation_criteria_type):
|
|
403
|
+
return criteria
|
|
404
|
+
elif isinstance(criteria, dict):
|
|
405
|
+
return self.evaluation_criteria_type.model_validate(criteria)
|
|
406
|
+
elif hasattr(criteria, "__dict__"):
|
|
407
|
+
# Try to convert from another object type
|
|
408
|
+
return self.evaluation_criteria_type.model_validate(criteria.__dict__)
|
|
409
|
+
else:
|
|
410
|
+
# Try to let Pydantic handle the conversion
|
|
411
|
+
return self.evaluation_criteria_type.model_validate(criteria)
|
|
412
|
+
except Exception as e:
|
|
413
|
+
raise UiPathEvaluationError(
|
|
414
|
+
code="CANNOT_VALIDATE_EVALUATION_CRITERIA",
|
|
415
|
+
title=f"Cannot validate {type(criteria)} to {self.evaluation_criteria_type}",
|
|
416
|
+
detail=f"Error: {e}",
|
|
417
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
418
|
+
) from e
|
|
419
|
+
|
|
420
|
+
def validate_justification(self, justification: Any) -> J:
|
|
421
|
+
"""Validate and convert input to the correct justification type.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
justification: The justification to validate (str, None, dict, BaseEvaluatorJustification, or other)
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
The validated justification of the correct type
|
|
428
|
+
"""
|
|
429
|
+
# The key insight: J is constrained to be one of str, None, or BaseEvaluatorJustification
|
|
430
|
+
# At instantiation time, J gets bound to exactly one of these types
|
|
431
|
+
# We need to handle each case and ensure the return matches the bound type
|
|
432
|
+
try:
|
|
433
|
+
# Handle None type - when J is bound to None (the literal None type)
|
|
434
|
+
if self.justification_type is type(None):
|
|
435
|
+
# When J is None, we can only return None
|
|
436
|
+
return cast(J, justification if justification is None else None)
|
|
437
|
+
|
|
438
|
+
# Handle str type - when J is bound to str
|
|
439
|
+
if self.justification_type is str:
|
|
440
|
+
# When J is str, we must return a str
|
|
441
|
+
if justification is None:
|
|
442
|
+
return cast(J, "")
|
|
443
|
+
return cast(J, str(justification))
|
|
444
|
+
|
|
445
|
+
# Handle BaseEvaluatorJustification subclasses - when J is bound to a specific subclass
|
|
446
|
+
if isinstance(self.justification_type, type) and issubclass(
|
|
447
|
+
self.justification_type, BaseEvaluatorJustification
|
|
448
|
+
):
|
|
449
|
+
# When J is a BaseEvaluatorJustification subclass, we must return that type
|
|
450
|
+
if justification is None:
|
|
451
|
+
raise ValueError(
|
|
452
|
+
f"None is not allowed for justification type {self.justification_type}"
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
if isinstance(justification, self.justification_type):
|
|
456
|
+
return justification
|
|
457
|
+
elif isinstance(justification, dict):
|
|
458
|
+
return self.justification_type.model_validate(justification)
|
|
459
|
+
elif hasattr(justification, "__dict__"):
|
|
460
|
+
return self.justification_type.model_validate(
|
|
461
|
+
justification.__dict__
|
|
462
|
+
)
|
|
463
|
+
else:
|
|
464
|
+
return self.justification_type.model_validate(justification)
|
|
465
|
+
except Exception as e:
|
|
466
|
+
raise UiPathEvaluationError(
|
|
467
|
+
code="CANNOT_CONVERT_JUSTIFICATION",
|
|
468
|
+
title=f"Cannot convert {type(justification)} to {self.justification_type}",
|
|
469
|
+
detail=f"Error: {e}",
|
|
470
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
471
|
+
) from e
|
|
472
|
+
|
|
473
|
+
# Fallback: this should never happen
|
|
474
|
+
raise UiPathEvaluationError(
|
|
475
|
+
code="UNSUPPORTED_JUSTIFICATION_TYPE",
|
|
476
|
+
title=f"Unsupported justification type {self.justification_type} for input {type(justification)}",
|
|
477
|
+
detail=f"Unsupported justification type {self.justification_type} for input {type(justification)}",
|
|
478
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
@classmethod
|
|
482
|
+
def get_evaluation_criteria_schema(cls) -> dict[str, Any]:
|
|
483
|
+
"""Get the JSON schema for the evaluation criteria type.
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
The JSON schema for the evaluation criteria type
|
|
487
|
+
"""
|
|
488
|
+
criteria_type = cls._extract_evaluation_criteria_type()
|
|
489
|
+
return criteria_type.model_json_schema(by_alias=False)
|
|
490
|
+
|
|
491
|
+
@classmethod
|
|
492
|
+
def get_config_schema(cls) -> dict[str, Any]:
|
|
493
|
+
"""Get the JSON schema for the config type.
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
The JSON schema for the config type
|
|
497
|
+
"""
|
|
498
|
+
config_type = cls._extract_config_type()
|
|
499
|
+
return config_type.model_json_schema(by_alias=False)
|
|
500
|
+
|
|
501
|
+
@classmethod
|
|
502
|
+
def get_justification_schema(cls) -> dict[str, Any]:
|
|
503
|
+
"""Get the JSON schema for the justification type.
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
The JSON schema for the justification type
|
|
507
|
+
"""
|
|
508
|
+
justification_type = cls._extract_justification_type()
|
|
509
|
+
if justification_type is type(None):
|
|
510
|
+
return {}
|
|
511
|
+
elif justification_type is str:
|
|
512
|
+
return {"type": "string"}
|
|
513
|
+
elif isinstance(justification_type, type) and issubclass(
|
|
514
|
+
justification_type, BaseEvaluatorJustification
|
|
515
|
+
):
|
|
516
|
+
return justification_type.model_json_schema(by_alias=False)
|
|
517
|
+
else:
|
|
518
|
+
raise UiPathEvaluationError(
|
|
519
|
+
code="INVALID_JUSTIFICATION_TYPE",
|
|
520
|
+
title=f"Invalid justification type {justification_type} in {cls.__name__}",
|
|
521
|
+
detail="Must be str, None, or subclass of BaseEvaluatorJustification",
|
|
522
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
def _canonical_json(self, obj: Any) -> str:
|
|
526
|
+
"""Convert an object to canonical JSON string for consistent comparison.
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
obj: The object to convert to canonical JSON
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
str: Canonical JSON string with normalized numbers and sorted keys
|
|
533
|
+
"""
|
|
534
|
+
return json.dumps(
|
|
535
|
+
obj,
|
|
536
|
+
sort_keys=True,
|
|
537
|
+
separators=(",", ":"),
|
|
538
|
+
ensure_ascii=False,
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
@classmethod
|
|
542
|
+
@abstractmethod
|
|
543
|
+
def get_evaluator_id(cls) -> str:
|
|
544
|
+
"""Get the evaluator id."""
|
|
70
545
|
pass
|
|
71
546
|
|
|
547
|
+
@classmethod
|
|
548
|
+
def generate_json_type(cls) -> dict[str, Any]:
|
|
549
|
+
"""Generate the JSON schema for the evaluator."""
|
|
550
|
+
return {
|
|
551
|
+
"evaluatorTypeId": cls.get_evaluator_id(),
|
|
552
|
+
"evaluatorConfigSchema": cls.get_config_schema(),
|
|
553
|
+
"evaluationCriteriaSchema": cls.get_evaluation_criteria_schema(),
|
|
554
|
+
"justificationSchema": cls.get_justification_schema(),
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
async def validate_and_evaluate_criteria(
|
|
558
|
+
self, agent_execution: AgentExecution, evaluation_criteria: Any
|
|
559
|
+
) -> EvaluationResult:
|
|
560
|
+
"""Evaluate the given data and return a result from a raw evaluation criteria."""
|
|
561
|
+
if evaluation_criteria is None:
|
|
562
|
+
evaluation_criteria = self.evaluator_config.default_evaluation_criteria
|
|
563
|
+
if evaluation_criteria is None:
|
|
564
|
+
raise UiPathEvaluationError(
|
|
565
|
+
code="NO_EVALUATION_CRITERIA_PROVIDED",
|
|
566
|
+
title="No evaluation criteria provided and no default evaluation criteria configured",
|
|
567
|
+
detail="No evaluation criteria provided and no default evaluation criteria configured",
|
|
568
|
+
category=UiPathEvaluationErrorCategory.SYSTEM,
|
|
569
|
+
)
|
|
570
|
+
criteria = self.validate_evaluation_criteria(evaluation_criteria)
|
|
571
|
+
return await self.evaluate(agent_execution, criteria)
|
|
572
|
+
|
|
72
573
|
@abstractmethod
|
|
73
574
|
async def evaluate(
|
|
74
575
|
self, agent_execution: AgentExecution, evaluation_criteria: T
|
|
@@ -78,8 +579,9 @@ class BaseEvaluator(BaseModel, Generic[T], ABC):
|
|
|
78
579
|
Args:
|
|
79
580
|
agent_execution: The execution details containing:
|
|
80
581
|
- agent_input: The input received by the agent
|
|
81
|
-
-
|
|
82
|
-
-
|
|
582
|
+
- agent_output: The actual output from the agent
|
|
583
|
+
- agent_trace: The execution trace from the agent
|
|
584
|
+
- simulation_instructions: The simulation instructions for the agent
|
|
83
585
|
evaluation_criteria: The criteria to evaluate
|
|
84
586
|
|
|
85
587
|
Returns:
|