uipath 2.1.51__py3-none-any.whl → 2.1.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. uipath/_cli/_evals/{_evaluators/_evaluator_factory.py → _evaluator_factory.py} +24 -23
  2. uipath/_cli/_evals/_models/_evaluation_set.py +23 -18
  3. uipath/_cli/_evals/_models/_evaluator_base_params.py +16 -0
  4. uipath/_cli/_evals/_models/_output.py +85 -0
  5. uipath/_cli/_evals/_runtime.py +102 -10
  6. uipath/_cli/_runtime/_contracts.py +12 -3
  7. uipath/_cli/_utils/_eval_set.py +1 -1
  8. uipath/_cli/_utils/_project_files.py +1 -0
  9. uipath/_cli/cli_eval.py +46 -61
  10. uipath/eval/evaluators/__init__.py +15 -0
  11. uipath/eval/evaluators/base_evaluator.py +88 -0
  12. uipath/eval/evaluators/deterministic_evaluator_base.py +53 -0
  13. uipath/eval/evaluators/exact_match_evaluator.py +37 -0
  14. uipath/{_cli/_evals/_evaluators/_json_similarity_evaluator.py → eval/evaluators/json_similarity_evaluator.py} +23 -40
  15. uipath/eval/evaluators/llm_as_judge_evaluator.py +137 -0
  16. uipath/eval/evaluators/trajectory_evaluator.py +36 -0
  17. uipath/eval/models/__init__.py +19 -0
  18. uipath/{_cli/_evals/_models/_evaluators.py → eval/models/models.py} +67 -43
  19. {uipath-2.1.51.dist-info → uipath-2.1.53.dist-info}/METADATA +1 -1
  20. {uipath-2.1.51.dist-info → uipath-2.1.53.dist-info}/RECORD +23 -23
  21. uipath/_cli/_evals/_evaluators/__init__.py +0 -22
  22. uipath/_cli/_evals/_evaluators/_deterministic_evaluator_base.py +0 -46
  23. uipath/_cli/_evals/_evaluators/_evaluator_base.py +0 -124
  24. uipath/_cli/_evals/_evaluators/_exact_match_evaluator.py +0 -40
  25. uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py +0 -183
  26. uipath/_cli/_evals/_evaluators/_trajectory_evaluator.py +0 -48
  27. uipath/_cli/_evals/_models/__init__.py +0 -18
  28. uipath/_cli/_evals/_models/_agent_execution_output.py +0 -14
  29. uipath/_cli/_evals/progress_reporter.py +0 -304
  30. {uipath-2.1.51.dist-info → uipath-2.1.53.dist-info}/WHEEL +0 -0
  31. {uipath-2.1.51.dist-info → uipath-2.1.53.dist-info}/entry_points.txt +0 -0
  32. {uipath-2.1.51.dist-info → uipath-2.1.53.dist-info}/licenses/LICENSE +0 -0
@@ -1,304 +0,0 @@
1
- """Progress reporter for sending evaluation updates to StudioWeb."""
2
-
3
- import json
4
- import logging
5
- import os
6
- from typing import Any, List
7
-
8
- from uipath import UiPath
9
- from uipath._cli._evals._evaluators import EvaluatorBase
10
- from uipath._cli._evals._models._evaluation_set import EvaluationStatus
11
- from uipath._cli._evals._models._evaluators import EvalItemResult, ScoreType
12
- from uipath._cli._utils._console import ConsoleLogger
13
- from uipath._utils import Endpoint, RequestSpec
14
- from uipath._utils.constants import ENV_TENANT_ID, HEADER_INTERNAL_TENANT_ID
15
-
16
-
17
- class ProgressReporter:
18
- """Handles reporting evaluation progress to StudioWeb via API calls."""
19
-
20
- def __init__(
21
- self,
22
- eval_set_id: str,
23
- agent_snapshot: str,
24
- no_of_evals: int,
25
- evaluators: List[EvaluatorBase],
26
- ):
27
- """Initialize the progress reporter.
28
-
29
- Args:
30
- eval_set_id: ID of the evaluation set
31
- agent_snapshot: JSON snapshot of the agent configuration
32
- no_of_evals: Number of evaluations in the set
33
- evaluators: List of evaluator instances
34
- """
35
- self._eval_set_id = eval_set_id
36
- self.agent_snapshot = agent_snapshot
37
- self._no_of_evals = no_of_evals
38
- self._evaluators: dict[str, EvaluatorBase] = {
39
- evaluator.id: evaluator for evaluator in evaluators
40
- }
41
- self._evaluator_scores: dict[str, list[float]] = {
42
- evaluator.id: [] for evaluator in evaluators
43
- }
44
-
45
- # Disable middleware logging and use the same console as ConsoleLogger
46
- logging.getLogger("uipath._cli.middlewares").setLevel(logging.CRITICAL)
47
-
48
- console_logger = ConsoleLogger.get_instance()
49
-
50
- uipath = UiPath()
51
-
52
- self._eval_set_run_id = None
53
- self._client = uipath.api_client
54
- self._console = console_logger
55
- self._project_id = os.getenv("UIPATH_PROJECT_ID", None)
56
- if not self._project_id:
57
- self._console.warning(
58
- "Cannot report data to StudioWeb. Please set UIPATH_PROJECT_ID."
59
- )
60
-
61
- async def create_eval_set_run(self):
62
- """Create a new evaluation set run in StudioWeb."""
63
- spec = self._create_eval_set_run_spec()
64
- response = await self._client.request_async(
65
- method=spec.method,
66
- url=spec.endpoint,
67
- params=spec.params,
68
- json=spec.json,
69
- headers=spec.headers,
70
- )
71
- self._eval_set_run_id = json.loads(response.content)["id"]
72
-
73
- async def create_eval_run(self, eval_item: dict[str, Any]):
74
- """Create a new evaluation run in StudioWeb.
75
-
76
- Args:
77
- eval_item: Dictionary containing evaluation data
78
-
79
- Returns:
80
- The ID of the created evaluation run
81
- """
82
- spec = self._create_eval_run_spec(eval_item)
83
- response = await self._client.request_async(
84
- method=spec.method,
85
- url=spec.endpoint,
86
- params=spec.params,
87
- json=spec.json,
88
- headers=spec.headers,
89
- )
90
- return json.loads(response.content)["id"]
91
-
92
- async def update_eval_run(
93
- self,
94
- eval_results: list[EvalItemResult],
95
- eval_run_id: str,
96
- execution_time: float,
97
- ):
98
- """Update an evaluation run with results.
99
-
100
- Args:
101
- eval_results: Dictionary mapping evaluator IDs to evaluation results
102
- eval_run_id: ID of the evaluation run to update
103
- execution_time: The agent execution time
104
- """
105
- assertion_runs, evaluator_scores, actual_output = self._collect_results(
106
- eval_results
107
- )
108
- spec = self._update_eval_run_spec(
109
- assertion_runs=assertion_runs,
110
- evaluator_scores=evaluator_scores,
111
- eval_run_id=eval_run_id,
112
- execution_time=execution_time,
113
- actual_output=actual_output,
114
- )
115
- await self._client.request_async(
116
- method=spec.method,
117
- url=spec.endpoint,
118
- params=spec.params,
119
- json=spec.json,
120
- headers=spec.headers,
121
- )
122
-
123
- async def update_eval_set_run(self):
124
- """Update the evaluation set run status to complete."""
125
- spec = self._update_eval_set_run_spec()
126
- await self._client.request_async(
127
- method=spec.method,
128
- url=spec.endpoint,
129
- params=spec.params,
130
- json=spec.json,
131
- headers=spec.headers,
132
- )
133
-
134
- def _collect_results(
135
- self, eval_results: list[EvalItemResult]
136
- ) -> tuple[list[dict[str, Any]], list[dict[str, Any]], dict[str, Any]]:
137
- assertion_runs: list[dict[str, Any]] = []
138
- evaluator_scores: list[dict[str, Any]] = []
139
- actual_output: dict[str, Any] = {}
140
- for eval_result in eval_results:
141
- # keep track of evaluator scores. this should be removed after this computation is done server-side
142
-
143
- # check the evaluator score type
144
- match eval_result.result.score_type:
145
- case ScoreType.NUMERICAL:
146
- self._evaluator_scores[eval_result.evaluator_id].append(
147
- eval_result.result.score
148
- )
149
- case ScoreType.BOOLEAN:
150
- self._evaluator_scores[eval_result.evaluator_id].append(
151
- 100 if eval_result.result.score else 0
152
- )
153
- case ScoreType.ERROR:
154
- self._evaluator_scores[eval_result.evaluator_id].append(0)
155
-
156
- evaluator_scores.append(
157
- {
158
- "type": eval_result.result.score_type.value,
159
- "value": eval_result.result.score,
160
- "justification": eval_result.result.details,
161
- "evaluatorId": eval_result.evaluator_id,
162
- }
163
- )
164
- assertion_runs.append(
165
- {
166
- "status": EvaluationStatus.COMPLETED.value,
167
- "evaluatorId": eval_result.evaluator_id,
168
- "completionMetrics": {
169
- "duration": eval_result.result.evaluation_time,
170
- "cost": None,
171
- "tokens": 0,
172
- "completionTokens": 0,
173
- "promptTokens": 0,
174
- },
175
- "assertionSnapshot": {
176
- "assertionType": self._evaluators[
177
- eval_result.evaluator_id
178
- ].type.name,
179
- "outputKey": self._evaluators[
180
- eval_result.evaluator_id
181
- ].target_output_key,
182
- },
183
- }
184
- )
185
-
186
- # we extract the actual output here. we should have the same 'actual_output' for each 'EvalItemResult'
187
- actual_output = eval_result.result.actual_output
188
-
189
- return assertion_runs, evaluator_scores, actual_output
190
-
191
- def _update_eval_run_spec(
192
- self,
193
- assertion_runs: list[dict[str, Any]],
194
- evaluator_scores: list[dict[str, Any]],
195
- eval_run_id: str,
196
- actual_output: dict[str, Any],
197
- execution_time: float,
198
- ) -> RequestSpec:
199
- return RequestSpec(
200
- method="PUT",
201
- endpoint=Endpoint(
202
- f"agentsruntime_/api/execution/agents/{self._project_id}/evalRun"
203
- ),
204
- json={
205
- "evalRunId": eval_run_id,
206
- "status": EvaluationStatus.COMPLETED.value,
207
- "result": {
208
- "output": {"content": {**actual_output}},
209
- "evaluatorScores": evaluator_scores,
210
- },
211
- "completionMetrics": {"duration": int(execution_time)},
212
- "assertionRuns": assertion_runs,
213
- },
214
- headers=self._tenant_header(),
215
- )
216
-
217
- def _create_eval_run_spec(self, eval_item: dict[str, Any]) -> RequestSpec:
218
- return RequestSpec(
219
- method="POST",
220
- endpoint=Endpoint(
221
- f"agentsruntime_/api/execution/agents/{self._project_id}/evalRun"
222
- ),
223
- json={
224
- "evalSetRunId": self._eval_set_run_id,
225
- "evalSnapshot": {
226
- "id": eval_item["id"],
227
- "name": eval_item["name"],
228
- "inputs": eval_item.get("inputs"),
229
- "expectedOutput": eval_item.get("expectedOutput", {}),
230
- },
231
- "status": EvaluationStatus.IN_PROGRESS.value,
232
- },
233
- headers=self._tenant_header(),
234
- )
235
-
236
- def _create_eval_set_run_spec(
237
- self,
238
- ) -> RequestSpec:
239
- agent_snapshot_dict = json.loads(self.agent_snapshot)
240
-
241
- return RequestSpec(
242
- method="POST",
243
- endpoint=Endpoint(
244
- f"agentsruntime_/api/execution/agents/{self._project_id}/evalSetRun"
245
- ),
246
- json={
247
- "agentId": self._project_id,
248
- "evalSetId": self._eval_set_id,
249
- "agentSnapshot": agent_snapshot_dict,
250
- "status": EvaluationStatus.IN_PROGRESS.value,
251
- "numberOfEvalsExecuted": self._no_of_evals,
252
- },
253
- headers=self._tenant_header(),
254
- )
255
-
256
- def _compute_evaluator_scores(self):
257
- evaluator_scores = []
258
- evaluator_averages = []
259
-
260
- for evaluator in self._evaluators.values():
261
- scores = self._evaluator_scores[evaluator.id]
262
- if scores:
263
- avg_score = sum(scores) / len(scores)
264
- evaluator_scores.append(
265
- {"value": avg_score, "evaluatorId": evaluator.id}
266
- )
267
- evaluator_averages.append(avg_score)
268
- else:
269
- # fallback to score 0
270
- evaluator_scores.append({"value": 0, "evaluatorId": evaluator.id})
271
- evaluator_averages.append(0)
272
-
273
- overall_score = (
274
- sum(evaluator_averages) / len(evaluator_averages)
275
- if evaluator_averages
276
- else 0
277
- )
278
- return evaluator_scores, overall_score
279
-
280
- def _update_eval_set_run_spec(
281
- self,
282
- ) -> RequestSpec:
283
- # this should be removed after computations are done server-side
284
- evaluator_scores, overall_score = self._compute_evaluator_scores()
285
- return RequestSpec(
286
- method="PUT",
287
- endpoint=Endpoint(
288
- f"agentsruntime_/api/execution/agents/{self._project_id}/evalSetRun"
289
- ),
290
- json={
291
- "evalSetRunId": self._eval_set_run_id,
292
- "status": EvaluationStatus.COMPLETED.value,
293
- "evaluatorScores": evaluator_scores,
294
- },
295
- headers=self._tenant_header(),
296
- )
297
-
298
- def _tenant_header(self) -> dict[str, str]:
299
- tenant_id = os.getenv(ENV_TENANT_ID, None)
300
- if not tenant_id:
301
- self._console.error(
302
- f"{ENV_TENANT_ID} env var is not set. Please run 'uipath auth'."
303
- )
304
- return {HEADER_INTERNAL_TENANT_ID: tenant_id} # type: ignore