uipath 2.1.108__py3-none-any.whl → 2.1.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of uipath might be problematic. Click here for more details.

Files changed (69) hide show
  1. uipath/_cli/__init__.py +4 -0
  2. uipath/_cli/_evals/_console_progress_reporter.py +2 -2
  3. uipath/_cli/_evals/_evaluator_factory.py +314 -29
  4. uipath/_cli/_evals/_helpers.py +194 -0
  5. uipath/_cli/_evals/_models/_evaluation_set.py +73 -7
  6. uipath/_cli/_evals/_models/_evaluator.py +183 -9
  7. uipath/_cli/_evals/_models/_evaluator_base_params.py +3 -3
  8. uipath/_cli/_evals/_models/_output.py +87 -3
  9. uipath/_cli/_evals/_progress_reporter.py +288 -28
  10. uipath/_cli/_evals/_runtime.py +80 -26
  11. uipath/_cli/_evals/mocks/input_mocker.py +1 -3
  12. uipath/_cli/_evals/mocks/llm_mocker.py +2 -2
  13. uipath/_cli/_evals/mocks/mocker_factory.py +2 -2
  14. uipath/_cli/_evals/mocks/mockito_mocker.py +2 -2
  15. uipath/_cli/_evals/mocks/mocks.py +5 -3
  16. uipath/_cli/_push/models.py +17 -0
  17. uipath/_cli/_push/sw_file_handler.py +336 -3
  18. uipath/_cli/_templates/custom_evaluator.py.template +65 -0
  19. uipath/_cli/_utils/_eval_set.py +30 -9
  20. uipath/_cli/_utils/_resources.py +21 -0
  21. uipath/_cli/_utils/_studio_project.py +18 -0
  22. uipath/_cli/cli_add.py +114 -0
  23. uipath/_cli/cli_eval.py +5 -1
  24. uipath/_cli/cli_pull.py +11 -26
  25. uipath/_cli/cli_push.py +2 -0
  26. uipath/_cli/cli_register.py +45 -0
  27. uipath/_events/_events.py +6 -5
  28. uipath/_utils/constants.py +4 -0
  29. uipath/eval/_helpers/evaluators_helpers.py +494 -0
  30. uipath/eval/_helpers/helpers.py +30 -2
  31. uipath/eval/evaluators/__init__.py +60 -5
  32. uipath/eval/evaluators/base_evaluator.py +546 -44
  33. uipath/eval/evaluators/contains_evaluator.py +80 -0
  34. uipath/eval/evaluators/exact_match_evaluator.py +43 -12
  35. uipath/eval/evaluators/json_similarity_evaluator.py +41 -12
  36. uipath/eval/evaluators/legacy_base_evaluator.py +89 -0
  37. uipath/eval/evaluators/{deterministic_evaluator_base.py → legacy_deterministic_evaluator_base.py} +2 -2
  38. uipath/eval/evaluators/legacy_exact_match_evaluator.py +37 -0
  39. uipath/eval/evaluators/legacy_json_similarity_evaluator.py +151 -0
  40. uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py +137 -0
  41. uipath/eval/evaluators/{trajectory_evaluator.py → legacy_trajectory_evaluator.py} +5 -6
  42. uipath/eval/evaluators/llm_as_judge_evaluator.py +143 -78
  43. uipath/eval/evaluators/llm_judge_output_evaluator.py +112 -0
  44. uipath/eval/evaluators/llm_judge_trajectory_evaluator.py +142 -0
  45. uipath/eval/evaluators/output_evaluator.py +117 -0
  46. uipath/eval/evaluators/tool_call_args_evaluator.py +82 -0
  47. uipath/eval/evaluators/tool_call_count_evaluator.py +87 -0
  48. uipath/eval/evaluators/tool_call_order_evaluator.py +84 -0
  49. uipath/eval/evaluators/tool_call_output_evaluator.py +87 -0
  50. uipath/eval/evaluators_types/ContainsEvaluator.json +73 -0
  51. uipath/eval/evaluators_types/ExactMatchEvaluator.json +89 -0
  52. uipath/eval/evaluators_types/JsonSimilarityEvaluator.json +81 -0
  53. uipath/eval/evaluators_types/LLMJudgeOutputEvaluator.json +110 -0
  54. uipath/eval/evaluators_types/LLMJudgeSimulationTrajectoryEvaluator.json +88 -0
  55. uipath/eval/evaluators_types/LLMJudgeStrictJSONSimilarityOutputEvaluator.json +110 -0
  56. uipath/eval/evaluators_types/LLMJudgeTrajectoryEvaluator.json +88 -0
  57. uipath/eval/evaluators_types/ToolCallArgsEvaluator.json +131 -0
  58. uipath/eval/evaluators_types/ToolCallCountEvaluator.json +104 -0
  59. uipath/eval/evaluators_types/ToolCallOrderEvaluator.json +100 -0
  60. uipath/eval/evaluators_types/ToolCallOutputEvaluator.json +124 -0
  61. uipath/eval/evaluators_types/generate_types.py +31 -0
  62. uipath/eval/models/__init__.py +16 -1
  63. uipath/eval/models/llm_judge_types.py +196 -0
  64. uipath/eval/models/models.py +109 -7
  65. {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/METADATA +1 -1
  66. {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/RECORD +69 -37
  67. {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/WHEEL +0 -0
  68. {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/entry_points.txt +0 -0
  69. {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/licenses/LICENSE +0 -0
uipath/_cli/__init__.py CHANGED
@@ -4,6 +4,7 @@ import sys
4
4
  import click
5
5
 
6
6
  from ._utils._common import add_cwd_to_path, load_environment_variables
7
+ from .cli_add import add as add
7
8
  from .cli_auth import auth as auth
8
9
  from .cli_debug import debug as debug # type: ignore
9
10
  from .cli_deploy import deploy as deploy # type: ignore
@@ -16,6 +17,7 @@ from .cli_pack import pack as pack # type: ignore
16
17
  from .cli_publish import publish as publish # type: ignore
17
18
  from .cli_pull import pull as pull # type: ignore
18
19
  from .cli_push import push as push # type: ignore
20
+ from .cli_register import register as register # type: ignore
19
21
  from .cli_run import run as run # type: ignore
20
22
 
21
23
 
@@ -75,4 +77,6 @@ cli.add_command(push)
75
77
  cli.add_command(pull)
76
78
  cli.add_command(eval)
77
79
  cli.add_command(dev)
80
+ cli.add_command(add)
81
+ cli.add_command(register)
78
82
  cli.add_command(debug)
@@ -7,6 +7,7 @@ from rich.console import Console
7
7
  from rich.rule import Rule
8
8
  from rich.table import Table
9
9
 
10
+ from uipath._cli._evals._models._evaluation_set import AnyEvaluator
10
11
  from uipath._events._event_bus import EventBus
11
12
  from uipath._events._events import (
12
13
  EvalRunCreatedEvent,
@@ -15,7 +16,6 @@ from uipath._events._events import (
15
16
  EvalSetRunUpdatedEvent,
16
17
  EvaluationEvents,
17
18
  )
18
- from uipath.eval.evaluators import BaseEvaluator
19
19
  from uipath.eval.models import ScoreType
20
20
 
21
21
  logger = logging.getLogger(__name__)
@@ -26,7 +26,7 @@ class ConsoleProgressReporter:
26
26
 
27
27
  def __init__(self):
28
28
  self.console = Console()
29
- self.evaluators: Dict[str, BaseEvaluator[Any]] = {}
29
+ self.evaluators: Dict[str, AnyEvaluator] = {}
30
30
  self.display_started = False
31
31
  self.eval_results_by_name: Dict[str, list[Any]] = {}
32
32
 
@@ -1,21 +1,69 @@
1
+ import importlib.util
2
+ import sys
3
+ from pathlib import Path
1
4
  from typing import Any, Dict
2
5
 
3
6
  from pydantic import TypeAdapter
4
7
 
8
+ from uipath._cli._evals._helpers import try_extract_file_and_class_name # type: ignore
9
+ from uipath._cli._evals._models._evaluation_set import AnyEvaluator
5
10
  from uipath._cli._evals._models._evaluator import (
6
11
  EqualsEvaluatorParams,
7
- Evaluator,
12
+ EvaluatorConfig,
8
13
  JsonSimilarityEvaluatorParams,
14
+ LegacyEvaluator,
9
15
  LLMEvaluatorParams,
10
16
  TrajectoryEvaluatorParams,
11
17
  )
12
18
  from uipath._cli._evals._models._evaluator_base_params import EvaluatorBaseParams
13
19
  from uipath.eval.evaluators import (
14
20
  BaseEvaluator,
21
+ LegacyBaseEvaluator,
22
+ LegacyExactMatchEvaluator,
23
+ LegacyJsonSimilarityEvaluator,
24
+ LegacyLlmAsAJudgeEvaluator,
25
+ LegacyTrajectoryEvaluator,
26
+ )
27
+ from uipath.eval.evaluators.base_evaluator import BaseEvaluatorConfig
28
+ from uipath.eval.evaluators.contains_evaluator import (
29
+ ContainsEvaluator,
30
+ ContainsEvaluatorConfig,
31
+ )
32
+ from uipath.eval.evaluators.exact_match_evaluator import (
15
33
  ExactMatchEvaluator,
34
+ ExactMatchEvaluatorConfig,
35
+ )
36
+ from uipath.eval.evaluators.json_similarity_evaluator import (
16
37
  JsonSimilarityEvaluator,
17
- LlmAsAJudgeEvaluator,
18
- TrajectoryEvaluator,
38
+ JsonSimilarityEvaluatorConfig,
39
+ )
40
+ from uipath.eval.evaluators.llm_judge_output_evaluator import (
41
+ LLMJudgeOutputEvaluator,
42
+ LLMJudgeOutputEvaluatorConfig,
43
+ LLMJudgeStrictJSONSimilarityOutputEvaluator,
44
+ LLMJudgeStrictJSONSimilarityOutputEvaluatorConfig,
45
+ )
46
+ from uipath.eval.evaluators.llm_judge_trajectory_evaluator import (
47
+ LLMJudgeTrajectoryEvaluator,
48
+ LLMJudgeTrajectoryEvaluatorConfig,
49
+ LLMJudgeTrajectorySimulationEvaluator,
50
+ LLMJudgeTrajectorySimulationEvaluatorConfig,
51
+ )
52
+ from uipath.eval.evaluators.tool_call_args_evaluator import (
53
+ ToolCallArgsEvaluator,
54
+ ToolCallArgsEvaluatorConfig,
55
+ )
56
+ from uipath.eval.evaluators.tool_call_count_evaluator import (
57
+ ToolCallCountEvaluator,
58
+ ToolCallCountEvaluatorConfig,
59
+ )
60
+ from uipath.eval.evaluators.tool_call_order_evaluator import (
61
+ ToolCallOrderEvaluator,
62
+ ToolCallOrderEvaluatorConfig,
63
+ )
64
+ from uipath.eval.evaluators.tool_call_output_evaluator import (
65
+ ToolCallOutputEvaluator,
66
+ ToolCallOutputEvaluatorConfig,
19
67
  )
20
68
 
21
69
 
@@ -23,7 +71,252 @@ class EvaluatorFactory:
23
71
  """Factory class for creating evaluator instances based on configuration."""
24
72
 
25
73
  @classmethod
26
- def create_evaluator(cls, data: Dict[str, Any]) -> BaseEvaluator[Any]:
74
+ def create_evaluator(cls, data: Dict[str, Any]) -> AnyEvaluator:
75
+ if data.get("version", None) == "1.0":
76
+ return cls._create_evaluator_internal(data)
77
+ return cls._create_legacy_evaluator_internal(data)
78
+
79
+ @staticmethod
80
+ def _create_evaluator_internal(
81
+ data: Dict[str, Any],
82
+ ) -> BaseEvaluator[Any, Any, Any]:
83
+ # check custom evaluator
84
+ evaluator_schema = data.get("evaluatorSchema", "")
85
+ success, file_path, class_name = try_extract_file_and_class_name(
86
+ evaluator_schema
87
+ )
88
+ if success:
89
+ return EvaluatorFactory._create_coded_evaluator_internal(
90
+ data, file_path, class_name
91
+ )
92
+
93
+ # use built-in evaluators
94
+ config: BaseEvaluatorConfig[Any] = TypeAdapter(EvaluatorConfig).validate_python(
95
+ data
96
+ )
97
+ match config:
98
+ case ContainsEvaluatorConfig():
99
+ return EvaluatorFactory._create_contains_evaluator(data)
100
+ case ExactMatchEvaluatorConfig():
101
+ return EvaluatorFactory._create_exact_match_evaluator(data)
102
+ case JsonSimilarityEvaluatorConfig():
103
+ return EvaluatorFactory._create_json_similarity_evaluator(data)
104
+ case LLMJudgeOutputEvaluatorConfig():
105
+ return EvaluatorFactory._create_llm_judge_output_evaluator(data)
106
+ case LLMJudgeStrictJSONSimilarityOutputEvaluatorConfig():
107
+ return EvaluatorFactory._create_llm_judge_strict_json_similarity_output_evaluator(
108
+ data
109
+ )
110
+ case LLMJudgeTrajectoryEvaluatorConfig():
111
+ return EvaluatorFactory._create_trajectory_evaluator(data)
112
+ case ToolCallArgsEvaluatorConfig():
113
+ return EvaluatorFactory._create_tool_call_args_evaluator(data)
114
+ case ToolCallCountEvaluatorConfig():
115
+ return EvaluatorFactory._create_tool_call_count_evaluator(data)
116
+ case ToolCallOrderEvaluatorConfig():
117
+ return EvaluatorFactory._create_tool_call_order_evaluator(data)
118
+ case ToolCallOutputEvaluatorConfig():
119
+ return EvaluatorFactory._create_tool_call_output_evaluator(data)
120
+ case LLMJudgeTrajectorySimulationEvaluatorConfig():
121
+ return (
122
+ EvaluatorFactory._create_llm_judge_simulation_trajectory_evaluator(
123
+ data
124
+ )
125
+ )
126
+ case _:
127
+ raise ValueError(f"Unknown evaluator configuration: {config}")
128
+
129
+ @staticmethod
130
+ def _create_contains_evaluator(data: Dict[str, Any]) -> ContainsEvaluator:
131
+ evaluator_id = data.get("id")
132
+ if not evaluator_id or not isinstance(evaluator_id, str):
133
+ raise ValueError("Evaluator 'id' must be a non-empty string")
134
+ return ContainsEvaluator(
135
+ id=evaluator_id,
136
+ config=data.get("evaluatorConfig"),
137
+ ) # type: ignore
138
+
139
+ @staticmethod
140
+ def _create_coded_evaluator_internal(
141
+ data: Dict[str, Any], file_path_str: str, class_name: str
142
+ ) -> BaseEvaluator[Any, Any, Any]:
143
+ """Create a coded evaluator by dynamically loading from a Python file.
144
+
145
+ Args:
146
+ data: Dictionary containing evaluator configuration with evaluatorTypeId
147
+ in format "file://path/to/file.py:ClassName"
148
+
149
+ Returns:
150
+ Instance of the dynamically loaded evaluator class
151
+
152
+ Raises:
153
+ ValueError: If file or class cannot be loaded, or if the class is not a BaseEvaluator subclass
154
+ """
155
+ file_path = Path(file_path_str)
156
+ if not file_path.is_absolute():
157
+ if not file_path.exists():
158
+ file_path = (
159
+ Path.cwd() / "evals" / "evaluators" / "custom" / file_path_str
160
+ )
161
+
162
+ if not file_path.exists():
163
+ raise ValueError(
164
+ f"Evaluator file not found: {file_path}. "
165
+ f"Make sure the file exists in evals/evaluators/custom/"
166
+ )
167
+
168
+ module_name = f"_custom_evaluator_{file_path.stem}_{id(data)}"
169
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
170
+ if spec is None or spec.loader is None:
171
+ raise ValueError(f"Could not load module from {file_path}")
172
+
173
+ module = importlib.util.module_from_spec(spec)
174
+ sys.modules[module_name] = module
175
+ try:
176
+ spec.loader.exec_module(module)
177
+ except Exception as e:
178
+ raise ValueError(
179
+ f"Error executing module from {file_path}: {str(e)}"
180
+ ) from e
181
+
182
+ # Get the class from the module
183
+ if not hasattr(module, class_name):
184
+ raise ValueError(
185
+ f"Class '{class_name}' not found in {file_path}. "
186
+ f"Available classes: {[name for name in dir(module) if not name.startswith('_')]}"
187
+ )
188
+
189
+ evaluator_class = getattr(module, class_name)
190
+
191
+ if not isinstance(evaluator_class, type) or not issubclass(
192
+ evaluator_class, BaseEvaluator
193
+ ):
194
+ raise ValueError(
195
+ f"Class '{class_name}' must be a subclass of BaseEvaluator"
196
+ )
197
+
198
+ evaluator_id = data.get("id")
199
+ if not evaluator_id or not isinstance(evaluator_id, str):
200
+ raise ValueError("Evaluator 'id' must be a non-empty string")
201
+ return evaluator_class(
202
+ id=evaluator_id,
203
+ config=data.get("evaluatorConfig", {}),
204
+ ) # type: ignore
205
+
206
+ @staticmethod
207
+ def _create_exact_match_evaluator(
208
+ data: Dict[str, Any],
209
+ ) -> ExactMatchEvaluator:
210
+ return TypeAdapter(ExactMatchEvaluator).validate_python(
211
+ {
212
+ "id": data.get("id"),
213
+ "config": data.get("evaluatorConfig"),
214
+ }
215
+ )
216
+
217
+ @staticmethod
218
+ def _create_json_similarity_evaluator(
219
+ data: Dict[str, Any],
220
+ ) -> JsonSimilarityEvaluator:
221
+ return TypeAdapter(JsonSimilarityEvaluator).validate_python(
222
+ {
223
+ "id": data.get("id"),
224
+ "config": data.get("evaluatorConfig"),
225
+ }
226
+ )
227
+
228
+ @staticmethod
229
+ def _create_llm_judge_output_evaluator(
230
+ data: Dict[str, Any],
231
+ ) -> LLMJudgeOutputEvaluator:
232
+ return TypeAdapter(LLMJudgeOutputEvaluator).validate_python(
233
+ {
234
+ "id": data.get("id"),
235
+ "config": data.get("evaluatorConfig"),
236
+ }
237
+ )
238
+
239
+ @staticmethod
240
+ def _create_llm_judge_strict_json_similarity_output_evaluator(
241
+ data: Dict[str, Any],
242
+ ) -> LLMJudgeStrictJSONSimilarityOutputEvaluator:
243
+ return TypeAdapter(LLMJudgeStrictJSONSimilarityOutputEvaluator).validate_python(
244
+ {
245
+ "id": data.get("id"),
246
+ "config": data.get("evaluatorConfig"),
247
+ }
248
+ )
249
+
250
+ @staticmethod
251
+ def _create_trajectory_evaluator(
252
+ data: Dict[str, Any],
253
+ ) -> LLMJudgeTrajectoryEvaluator:
254
+ return TypeAdapter(LLMJudgeTrajectoryEvaluator).validate_python(
255
+ {
256
+ "id": data.get("id"),
257
+ "config": data.get("evaluatorConfig"),
258
+ }
259
+ )
260
+
261
+ @staticmethod
262
+ def _create_tool_call_args_evaluator(
263
+ data: Dict[str, Any],
264
+ ) -> ToolCallArgsEvaluator:
265
+ return TypeAdapter(ToolCallArgsEvaluator).validate_python(
266
+ {
267
+ "id": data.get("id"),
268
+ "config": data.get("evaluatorConfig"),
269
+ }
270
+ )
271
+
272
+ @staticmethod
273
+ def _create_tool_call_count_evaluator(
274
+ data: Dict[str, Any],
275
+ ) -> ToolCallCountEvaluator:
276
+ return TypeAdapter(ToolCallCountEvaluator).validate_python(
277
+ {
278
+ "id": data.get("id"),
279
+ "config": data.get("evaluatorConfig"),
280
+ }
281
+ )
282
+
283
+ @staticmethod
284
+ def _create_tool_call_order_evaluator(
285
+ data: Dict[str, Any],
286
+ ) -> ToolCallOrderEvaluator:
287
+ return TypeAdapter(ToolCallOrderEvaluator).validate_python(
288
+ {
289
+ "id": data.get("id"),
290
+ "config": data.get("evaluatorConfig"),
291
+ }
292
+ )
293
+
294
+ @staticmethod
295
+ def _create_tool_call_output_evaluator(
296
+ data: Dict[str, Any],
297
+ ) -> ToolCallOutputEvaluator:
298
+ return TypeAdapter(ToolCallOutputEvaluator).validate_python(
299
+ {
300
+ "id": data.get("id"),
301
+ "config": data.get("evaluatorConfig"),
302
+ }
303
+ )
304
+
305
+ @staticmethod
306
+ def _create_llm_judge_simulation_trajectory_evaluator(
307
+ data: Dict[str, Any],
308
+ ) -> LLMJudgeTrajectorySimulationEvaluator:
309
+ return TypeAdapter(LLMJudgeTrajectorySimulationEvaluator).validate_python(
310
+ {
311
+ "id": data.get("id"),
312
+ "config": data.get("evaluatorConfig"),
313
+ }
314
+ )
315
+
316
+ @staticmethod
317
+ def _create_legacy_evaluator_internal(
318
+ data: Dict[str, Any],
319
+ ) -> LegacyBaseEvaluator[Any]:
27
320
  """Create an evaluator instance from configuration data.
28
321
 
29
322
  Args:
@@ -35,46 +328,38 @@ class EvaluatorFactory:
35
328
  Raises:
36
329
  ValueError: If category is unknown or required fields are missing
37
330
  """
38
- # Extract common fields
39
- name = data.get("name", "")
40
- if not name:
41
- raise ValueError("Evaluator configuration must include 'name' field")
42
- id = data.get("id", "")
43
- if not id:
44
- raise ValueError("Evaluator configuration must include 'id' field")
45
-
46
- params: EvaluatorBaseParams = TypeAdapter(Evaluator).validate_python(data)
331
+ params: EvaluatorBaseParams = TypeAdapter(LegacyEvaluator).validate_python(data)
47
332
 
48
333
  match params:
49
334
  case EqualsEvaluatorParams():
50
- return EvaluatorFactory._create_exact_match_evaluator(params)
335
+ return EvaluatorFactory._create_legacy_exact_match_evaluator(params)
51
336
  case JsonSimilarityEvaluatorParams():
52
- return EvaluatorFactory._create_json_similarity_evaluator(params)
337
+ return EvaluatorFactory._create_legacy_json_similarity_evaluator(params)
53
338
  case LLMEvaluatorParams():
54
- return EvaluatorFactory._create_llm_as_judge_evaluator(params)
339
+ return EvaluatorFactory._create_legacy_llm_as_judge_evaluator(params)
55
340
  case TrajectoryEvaluatorParams():
56
- return EvaluatorFactory._create_trajectory_evaluator(params)
341
+ return EvaluatorFactory._create_legacy_trajectory_evaluator(params)
57
342
  case _:
58
343
  raise ValueError(f"Unknown evaluator category: {params}")
59
344
 
60
345
  @staticmethod
61
- def _create_exact_match_evaluator(
346
+ def _create_legacy_exact_match_evaluator(
62
347
  params: EqualsEvaluatorParams,
63
- ) -> ExactMatchEvaluator:
348
+ ) -> LegacyExactMatchEvaluator:
64
349
  """Create a deterministic evaluator."""
65
- return ExactMatchEvaluator(**params.model_dump())
350
+ return LegacyExactMatchEvaluator(**params.model_dump())
66
351
 
67
352
  @staticmethod
68
- def _create_json_similarity_evaluator(
353
+ def _create_legacy_json_similarity_evaluator(
69
354
  params: JsonSimilarityEvaluatorParams,
70
- ) -> JsonSimilarityEvaluator:
355
+ ) -> LegacyJsonSimilarityEvaluator:
71
356
  """Create a deterministic evaluator."""
72
- return JsonSimilarityEvaluator(**params.model_dump())
357
+ return LegacyJsonSimilarityEvaluator(**params.model_dump())
73
358
 
74
359
  @staticmethod
75
- def _create_llm_as_judge_evaluator(
360
+ def _create_legacy_llm_as_judge_evaluator(
76
361
  params: LLMEvaluatorParams,
77
- ) -> LlmAsAJudgeEvaluator:
362
+ ) -> LegacyLlmAsAJudgeEvaluator:
78
363
  """Create an LLM-as-a-judge evaluator."""
79
364
  if not params.prompt:
80
365
  raise ValueError("LLM evaluator must include 'prompt' field")
@@ -86,12 +371,12 @@ class EvaluatorFactory:
86
371
  "'same-as-agent' model option is not supported by coded agents evaluations. Please select a specific model for the evaluator."
87
372
  )
88
373
 
89
- return LlmAsAJudgeEvaluator(**params.model_dump())
374
+ return LegacyLlmAsAJudgeEvaluator(**params.model_dump())
90
375
 
91
376
  @staticmethod
92
- def _create_trajectory_evaluator(
377
+ def _create_legacy_trajectory_evaluator(
93
378
  params: TrajectoryEvaluatorParams,
94
- ) -> TrajectoryEvaluator:
379
+ ) -> LegacyTrajectoryEvaluator:
95
380
  """Create a trajectory evaluator."""
96
381
  if not params.prompt:
97
382
  raise ValueError("Trajectory evaluator must include 'prompt' field")
@@ -103,4 +388,4 @@ class EvaluatorFactory:
103
388
  "'same-as-agent' model option is not supported by coded agents evaluations. Please select a specific model for the evaluator."
104
389
  )
105
390
 
106
- return TrajectoryEvaluator(**params.model_dump())
391
+ return LegacyTrajectoryEvaluator(**params.model_dump())
@@ -0,0 +1,194 @@
1
+ # type: ignore
2
+ import ast
3
+ import importlib.util
4
+ import json
5
+ import logging
6
+ import re
7
+ import sys
8
+ from pathlib import Path
9
+ from typing import Any, Optional
10
+
11
+ import click
12
+
13
+ from uipath._cli._utils._console import ConsoleLogger
14
+ from uipath._utils.constants import CUSTOM_EVALUATOR_PREFIX
15
+
16
+ logger = logging.getLogger(__name__)
17
+ console = ConsoleLogger().get_instance()
18
+
19
+
20
+ def try_extract_file_and_class_name(text: str) -> tuple[bool, str, str]:
21
+ if text.startswith(CUSTOM_EVALUATOR_PREFIX):
22
+ file_and_class = text[len(CUSTOM_EVALUATOR_PREFIX) :]
23
+ if ":" not in file_and_class:
24
+ raise ValueError(
25
+ f"evaluatorSchema must include class name after ':' - got: {text}"
26
+ )
27
+ file_path_str, class_name = file_and_class.rsplit(":", 1)
28
+
29
+ return True, file_path_str, class_name
30
+ return False, "", ""
31
+
32
+
33
+ def to_kebab_case(text: str) -> str:
34
+ return re.sub(r"(?<!^)(?=[A-Z])", "-", text).lower()
35
+
36
+
37
+ def find_evaluator_file(filename: str) -> Optional[Path]:
38
+ """Find the evaluator file in evals/evaluators/custom folder."""
39
+ custom_evaluators_path = Path.cwd() / "evals" / "evaluators" / "custom"
40
+
41
+ if not custom_evaluators_path.exists():
42
+ return None
43
+
44
+ file_path = custom_evaluators_path / filename
45
+ if file_path.exists():
46
+ return file_path
47
+
48
+ return None
49
+
50
+
51
+ def find_base_evaluator_class(file_path: Path) -> Optional[str]:
52
+ """Parse the Python file and find the class that inherits from BaseEvaluator."""
53
+ try:
54
+ with open(file_path, "r") as f:
55
+ tree = ast.parse(f.read(), filename=str(file_path))
56
+
57
+ for node in ast.walk(tree):
58
+ if isinstance(node, ast.ClassDef):
59
+ for base in node.bases:
60
+ if isinstance(base, ast.Name) and base.id == "BaseEvaluator":
61
+ return node.name
62
+ elif isinstance(base, ast.Subscript):
63
+ if (
64
+ isinstance(base.value, ast.Name)
65
+ and base.value.id == "BaseEvaluator"
66
+ ):
67
+ return node.name
68
+
69
+ return None
70
+ except Exception as e:
71
+ logger.error(f"Error parsing file: {e}")
72
+ return None
73
+
74
+
75
+ def load_evaluator_class(file_path: Path, class_name: str) -> Optional[type]:
76
+ """Dynamically load the evaluator class from the file."""
77
+ try:
78
+ parent_dir = str(file_path.parent)
79
+ if parent_dir not in sys.path:
80
+ sys.path.insert(0, parent_dir)
81
+
82
+ spec = importlib.util.spec_from_file_location("custom_evaluator", file_path)
83
+ if spec is None or spec.loader is None:
84
+ return None
85
+
86
+ module = importlib.util.module_from_spec(spec)
87
+ spec.loader.exec_module(module)
88
+
89
+ if hasattr(module, class_name):
90
+ return getattr(module, class_name)
91
+
92
+ return None
93
+ except Exception as e:
94
+ logger.error(f"Error loading class: {e}")
95
+ return None
96
+ finally:
97
+ # Remove from sys.path
98
+ if parent_dir in sys.path:
99
+ sys.path.remove(parent_dir)
100
+
101
+
102
+ def generate_evaluator_config(evaluator_class: type, class_name: str) -> dict[str, Any]:
103
+ """Generate the evaluator config from the class."""
104
+ try:
105
+ config_type = evaluator_class._extract_config_type()
106
+ config_instance = config_type()
107
+ config_dict = config_instance.model_dump(by_alias=True, exclude_none=False)
108
+
109
+ return config_dict
110
+ except Exception as e:
111
+ console.error(f"Error inferring evaluator config: {e}")
112
+
113
+
114
+ def register_evaluator(filename: str) -> tuple[str, str]:
115
+ """Infers the schema and types of a custom evaluator.
116
+
117
+ Returns:
118
+ tuple[str, str]:
119
+ - The first string is the path to the python evaluator file.
120
+ - The second string is the evaluator type that corresponds to the schema file.
121
+ """
122
+ if not filename.endswith(".py"):
123
+ filename = filename + ".py"
124
+ file_path = find_evaluator_file(filename)
125
+ if file_path is None:
126
+ console.error(f"Could not find '{filename}' in evals/evaluators/custom folder")
127
+
128
+ relative_path = f"evals/evaluators/custom/{filename}"
129
+ console.info(
130
+ f"Found custom evaluator file: {click.style(relative_path, fg='cyan')}"
131
+ )
132
+
133
+ class_name = find_base_evaluator_class(file_path)
134
+ if class_name is None:
135
+ console.error(
136
+ f"Could not find a class inheriting from BaseEvaluator in {filename}"
137
+ )
138
+
139
+ console.info(f"Found custom evaluator class: {click.style(class_name, fg='cyan')}")
140
+
141
+ evaluator_class = load_evaluator_class(file_path, class_name)
142
+ if evaluator_class is None:
143
+ console.error(f"Could not load class {class_name} from {filename}")
144
+
145
+ try:
146
+ evaluator_id = evaluator_class.get_evaluator_id()
147
+ except Exception as e:
148
+ console.error(f"Error getting evaluator ID: {e}")
149
+
150
+ evaluator_config = generate_evaluator_config(evaluator_class, class_name)
151
+ evaluator_json_type = evaluator_class.generate_json_type()
152
+
153
+ evaluators_dir = Path.cwd() / "evals" / "evaluators"
154
+ evaluators_dir.mkdir(parents=True, exist_ok=True)
155
+
156
+ evaluator_types_dir = evaluators_dir / "custom" / "types"
157
+ evaluator_types_dir.mkdir(parents=True, exist_ok=True)
158
+
159
+ kebab_class_name = to_kebab_case(class_name)
160
+ output_file_evaluator_types = kebab_class_name + "-types.json"
161
+ evaluator_types_output_path = (
162
+ evaluators_dir / "custom" / "types" / output_file_evaluator_types
163
+ )
164
+
165
+ with open(evaluator_types_output_path, "w") as f:
166
+ json.dump(evaluator_json_type, f, indent=2)
167
+
168
+ relative_output_path = (
169
+ f"evals/evaluators/custom/types/{output_file_evaluator_types}"
170
+ )
171
+ console.success(
172
+ f"Generated evaluator types: {click.style(relative_output_path, fg='cyan')}"
173
+ )
174
+
175
+ output = {
176
+ "version": "1.0",
177
+ "id": evaluator_id,
178
+ "evaluatorTypeId": f"{CUSTOM_EVALUATOR_PREFIX}types/{output_file_evaluator_types}",
179
+ "evaluatorSchema": f"{CUSTOM_EVALUATOR_PREFIX}{filename}:{class_name}",
180
+ "description": evaluator_class.__doc__,
181
+ "evaluatorConfig": evaluator_config,
182
+ }
183
+
184
+ output_file_evaluator_spec = kebab_class_name + ".json"
185
+ evaluator_spec_output_path = evaluators_dir / output_file_evaluator_spec
186
+ with open(evaluator_spec_output_path, "w") as f:
187
+ json.dump(output, f, indent=2)
188
+
189
+ relative_output_path = f"evals/evaluators/{output_file_evaluator_spec}"
190
+ console.success(
191
+ f"Generated evaluator spec: {click.style(relative_output_path, fg='cyan')}"
192
+ )
193
+
194
+ return str(file_path), str(evaluator_types_output_path)