uipath 2.1.8__py3-none-any.whl → 2.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,356 @@
1
+ """Progress reporter for sending evaluation updates to StudioWeb."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ from typing import Any, List
7
+
8
+ from uipath import UiPath
9
+ from uipath._cli._evals._evaluators import EvaluatorBase
10
+ from uipath._cli._evals._models._evaluation_set import EvaluationStatus
11
+ from uipath._cli._evals._models._evaluators import EvalItemResult, ScoreType
12
+ from uipath._cli._utils._console import ConsoleLogger
13
+ from uipath._utils import Endpoint, RequestSpec
14
+ from uipath._utils.constants import ENV_TENANT_ID, HEADER_INTERNAL_TENANT_ID
15
+
16
+
17
+ class ProgressReporter:
18
+ """Handles reporting evaluation progress to StudioWeb via API calls."""
19
+
20
+ def __init__(
21
+ self,
22
+ eval_set_id: str,
23
+ agent_snapshot: str,
24
+ no_of_evals: int,
25
+ evaluators: List[EvaluatorBase],
26
+ ):
27
+ """Initialize the progress reporter.
28
+
29
+ Args:
30
+ eval_set_id: ID of the evaluation set
31
+ agent_snapshot: JSON snapshot of the agent configuration
32
+ no_of_evals: Number of evaluations in the set
33
+ evaluators: List of evaluator instances
34
+ """
35
+ self._eval_set_id = eval_set_id
36
+ self.agent_snapshot = agent_snapshot
37
+ self._no_of_evals = no_of_evals
38
+ self._evaluators = evaluators
39
+ self._evaluator_scores: dict[str, list[float]] = {
40
+ evaluator.id: [] for evaluator in evaluators
41
+ }
42
+
43
+ # Disable middleware logging and use the same console as ConsoleLogger
44
+ logging.getLogger("uipath._cli.middlewares").setLevel(logging.CRITICAL)
45
+
46
+ console_logger = ConsoleLogger.get_instance()
47
+
48
+ uipath = UiPath()
49
+
50
+ self._eval_set_run_id = None
51
+ self._client = uipath.api_client
52
+ self._console = console_logger
53
+ self._project_id = os.getenv("UIPATH_PROJECT_ID", None)
54
+ if not self._project_id:
55
+ self._console.warning(
56
+ "Cannot report data to StudioWeb. Please set UIPATH_PROJECT_ID."
57
+ )
58
+
59
+ async def create_eval_run(self, eval_item: dict[str, Any]):
60
+ """Create a new evaluation run in StudioWeb.
61
+
62
+ Args:
63
+ eval_item: Dictionary containing evaluation data
64
+
65
+ Returns:
66
+ The ID of the created evaluation run
67
+ """
68
+ spec = self._create_eval_run_spec(eval_item)
69
+ response = await self._client.request_async(
70
+ method=spec.method,
71
+ url=spec.endpoint,
72
+ params=spec.params,
73
+ content=spec.content,
74
+ headers=spec.headers,
75
+ scoped="org",
76
+ )
77
+ return json.loads(response.content)["id"]
78
+
79
+ async def update_eval_run(
80
+ self,
81
+ eval_results: list[EvalItemResult],
82
+ eval_run_id: str,
83
+ success: bool,
84
+ execution_time: float,
85
+ ):
86
+ """Update an evaluation run with results.
87
+
88
+ Args:
89
+ eval_results: Dictionary mapping evaluator IDs to evaluation results
90
+ eval_run_id: ID of the evaluation run to update
91
+ success: Whether the evaluation was successful
92
+ execution_time: The agent execution time
93
+ """
94
+ assertion_runs, evaluator_scores, actual_output = self._collect_results(
95
+ eval_results
96
+ )
97
+ spec = self._update_eval_run_spec(
98
+ assertion_runs=assertion_runs,
99
+ evaluator_scores=evaluator_scores,
100
+ eval_run_id=eval_run_id,
101
+ execution_time=execution_time,
102
+ actual_output=actual_output,
103
+ )
104
+ await self._client.request_async(
105
+ method=spec.method,
106
+ url=spec.endpoint,
107
+ params=spec.params,
108
+ content=spec.content,
109
+ headers=spec.headers,
110
+ scoped="org",
111
+ )
112
+
113
+ async def create_eval_set_run(self):
114
+ """Create a new evaluation set run in StudioWeb."""
115
+ spec = self._create_eval_set_run_spec()
116
+ response = await self._client.request_async(
117
+ method=spec.method,
118
+ url=spec.endpoint,
119
+ params=spec.params,
120
+ content=spec.content,
121
+ headers=spec.headers,
122
+ scoped="org",
123
+ )
124
+ self._eval_set_run_id = json.loads(response.content)["id"]
125
+
126
+ async def update_eval_set_run(self):
127
+ """Update the evaluation set run status to complete."""
128
+ spec = self._update_eval_set_run_spec()
129
+ await self._client.request_async(
130
+ method=spec.method,
131
+ url=spec.endpoint,
132
+ params=spec.params,
133
+ content=spec.content,
134
+ headers=spec.headers,
135
+ scoped="org",
136
+ )
137
+
138
+ def _collect_results(
139
+ self, eval_results: list[EvalItemResult]
140
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]], dict[str, Any]]:
141
+ assertion_runs: list[dict[str, Any]] = []
142
+ evaluator_scores: list[dict[str, Any]] = []
143
+ actual_output: dict[str, Any] = {}
144
+ for eval_result in eval_results:
145
+ # keep track of evaluator scores. this should be removed after this computation is done server-side
146
+ self._evaluator_scores[eval_result.evaluator_id].append(
147
+ eval_result.result.score
148
+ )
149
+ evaluator_scores.append(
150
+ {
151
+ "type": ScoreType.NUMERICAL.value,
152
+ "value": eval_result.result.score,
153
+ "justification": eval_result.result.details,
154
+ "evaluatorId": eval_result.evaluator_id,
155
+ }
156
+ )
157
+ assertion_runs.append(
158
+ {
159
+ "status": EvaluationStatus.COMPLETED.value,
160
+ "evaluatorId": eval_result.evaluator_id,
161
+ "result": {
162
+ "output": {"content": {**eval_result.result.actual_output}},
163
+ "score": {
164
+ "type": ScoreType.NUMERICAL.value,
165
+ "value": eval_result.result.score,
166
+ "justification": eval_result.result.details,
167
+ },
168
+ },
169
+ "completionMetrics": {
170
+ "duration": eval_result.result.evaluation_time,
171
+ "cost": None,
172
+ "tokens": 0,
173
+ "completionTokens": 0,
174
+ "promptTokens": 0,
175
+ },
176
+ }
177
+ )
178
+
179
+ # we extract the actual output here. we should have the same 'actual_output' for each 'EvalItemResult'
180
+ actual_output = eval_result.result.actual_output
181
+
182
+ return assertion_runs, evaluator_scores, actual_output
183
+
184
+ def _update_eval_run_spec(
185
+ self,
186
+ assertion_runs: list[dict[str, Any]],
187
+ evaluator_scores: list[dict[str, Any]],
188
+ eval_run_id: str,
189
+ actual_output: dict[str, Any],
190
+ execution_time: float,
191
+ ) -> RequestSpec:
192
+ return RequestSpec(
193
+ method="PUT",
194
+ endpoint=Endpoint(
195
+ f"agents_/api/execution/agents/{self._project_id}/evalRun"
196
+ ),
197
+ content=json.dumps(
198
+ {
199
+ "evalRunId": eval_run_id,
200
+ "status": EvaluationStatus.COMPLETED.value,
201
+ "result": {
202
+ "output": {"content": {**actual_output}},
203
+ "evaluatorScores": evaluator_scores,
204
+ },
205
+ "completionMetrics": {"duration": int(execution_time)},
206
+ "assertionRuns": assertion_runs,
207
+ }
208
+ ),
209
+ headers=self._tenant_header(),
210
+ )
211
+
212
+ def _create_eval_run_spec(self, eval_item: dict[str, Any]) -> RequestSpec:
213
+ return RequestSpec(
214
+ method="POST",
215
+ endpoint=Endpoint(
216
+ f"agents_/api/execution/agents/{self._project_id}/evalRun"
217
+ ),
218
+ content=json.dumps(
219
+ {
220
+ "evalSetRunId": self._eval_set_run_id,
221
+ "evalSnapshot": {
222
+ "id": eval_item["id"],
223
+ "name": eval_item["name"],
224
+ "assertionType": "unknown",
225
+ "assertionProperties": {},
226
+ "inputs": eval_item.get("inputs"),
227
+ "outputKey": "*",
228
+ },
229
+ "status": EvaluationStatus.IN_PROGRESS.value,
230
+ "assertionRuns": [
231
+ # TODO: replace default values
232
+ {
233
+ "assertionSnapshot": {
234
+ "assertionProperties": {
235
+ "expectedOutput": eval_item.get(
236
+ "expectedOutput", {}
237
+ ),
238
+ "prompt": "No prompt for coded agents",
239
+ "simulationInstructions": "",
240
+ "expectedAgentBehavior": "",
241
+ "inputGenerationInstructions": "",
242
+ "simulateTools": False,
243
+ "simulateInput": False,
244
+ "toolsToSimulate": [],
245
+ **(
246
+ {"model": evaluator.model}
247
+ if hasattr(evaluator, "model")
248
+ else {}
249
+ ),
250
+ },
251
+ "assertionType": "Custom",
252
+ "outputKey": "*",
253
+ },
254
+ "status": 1,
255
+ "evaluatorId": evaluator.id,
256
+ }
257
+ for evaluator in self._evaluators
258
+ ],
259
+ }
260
+ ),
261
+ headers=self._tenant_header(),
262
+ )
263
+
264
+ def _create_eval_set_run_spec(
265
+ self,
266
+ ) -> RequestSpec:
267
+ self._add_defaults_to_agent_snapshot()
268
+ agent_snapshot_dict = json.loads(self.agent_snapshot)
269
+
270
+ return RequestSpec(
271
+ method="POST",
272
+ endpoint=Endpoint(
273
+ f"agents_/api/execution/agents/{self._project_id}/evalSetRun"
274
+ ),
275
+ content=json.dumps(
276
+ {
277
+ "agentId": self._project_id,
278
+ "evalSetId": self._eval_set_id,
279
+ "agentSnapshot": agent_snapshot_dict,
280
+ "status": EvaluationStatus.IN_PROGRESS.value,
281
+ "numberOfEvalsExecuted": self._no_of_evals,
282
+ }
283
+ ),
284
+ headers=self._tenant_header(),
285
+ )
286
+
287
+ def _compute_evaluator_scores(self):
288
+ evaluator_scores = []
289
+ evaluator_averages = []
290
+
291
+ for evaluator in self._evaluators:
292
+ scores = self._evaluator_scores[evaluator.id]
293
+ if scores:
294
+ avg_score = sum(scores) / len(scores)
295
+ evaluator_scores.append(
296
+ {"value": avg_score, "evaluatorId": evaluator.id}
297
+ )
298
+ evaluator_averages.append(avg_score)
299
+ else:
300
+ # fallback to score 0
301
+ evaluator_scores.append({"value": 0, "evaluatorId": evaluator.id})
302
+ evaluator_averages.append(0)
303
+
304
+ overall_score = (
305
+ sum(evaluator_averages) / len(evaluator_averages)
306
+ if evaluator_averages
307
+ else 0
308
+ )
309
+ return evaluator_scores, overall_score
310
+
311
+ def _update_eval_set_run_spec(
312
+ self,
313
+ ) -> RequestSpec:
314
+ # this should be removed after computations are done server-side
315
+ evaluator_scores, overall_score = self._compute_evaluator_scores()
316
+ return RequestSpec(
317
+ method="PUT",
318
+ endpoint=Endpoint(
319
+ f"agents_/api/execution/agents/{self._project_id}/evalSetRun"
320
+ ),
321
+ content=json.dumps(
322
+ {
323
+ ## TODO: send the actual data here (do we need to send those again? isn't it redundant?)
324
+ "evalSetRunId": self._eval_set_run_id,
325
+ ## this should be removed. not used but enforced by the API
326
+ "score": overall_score,
327
+ "status": EvaluationStatus.COMPLETED.value,
328
+ "evaluatorScores": evaluator_scores,
329
+ }
330
+ ),
331
+ headers=self._tenant_header(),
332
+ )
333
+
334
+ def _add_defaults_to_agent_snapshot(self):
335
+ ## TODO: remove this after properties are marked as optional at api level
336
+ agent_snapshot_dict = json.loads(self.agent_snapshot)
337
+ agent_snapshot_dict["tools"] = []
338
+ agent_snapshot_dict["contexts"] = []
339
+ agent_snapshot_dict["escalations"] = []
340
+ agent_snapshot_dict["systemPrompt"] = ""
341
+ agent_snapshot_dict["userPrompt"] = ""
342
+ agent_snapshot_dict["settings"] = {
343
+ "model": "",
344
+ "maxTokens": 0,
345
+ "temperature": 0,
346
+ "engine": "",
347
+ }
348
+ self.agent_snapshot = json.dumps(agent_snapshot_dict)
349
+
350
+ def _tenant_header(self) -> dict[str, str]:
351
+ tenant_id = os.getenv(ENV_TENANT_ID, None)
352
+ if not tenant_id:
353
+ self._console.error(
354
+ f"{ENV_TENANT_ID} env var is not set. Please run 'uipath auth'."
355
+ )
356
+ return {HEADER_INTERNAL_TENANT_ID: tenant_id} # type: ignore
@@ -158,15 +158,17 @@ class UiPathRuntimeContext(BaseModel):
158
158
  result: Optional[UiPathRuntimeResult] = None
159
159
  execution_output_file: Optional[str] = None
160
160
  input_file: Optional[str] = None
161
+ is_eval_run: bool = False
161
162
 
162
163
  model_config = {"arbitrary_types_allowed": True}
163
164
 
164
165
  @classmethod
165
- def from_config(cls, config_path=None):
166
+ def from_config(cls, config_path=None, **kwargs):
166
167
  """Load configuration from uipath.json file.
167
168
 
168
169
  Args:
169
170
  config_path: Path to the configuration file. If None, uses the default "uipath.json"
171
+ **kwargs: Additional keyword arguments to use as fallback for configuration values
170
172
 
171
173
  Returns:
172
174
  An instance of the class with fields populated from the config file
@@ -184,20 +186,29 @@ class UiPathRuntimeContext(BaseModel):
184
186
 
185
187
  instance = cls()
186
188
 
189
+ mapping = {
190
+ "dir": "runtime_dir",
191
+ "outputFile": "output_file",
192
+ "stateFile": "state_file",
193
+ "logsFile": "logs_file",
194
+ }
195
+
196
+ attributes_set = set()
197
+ # set values from config file if available
187
198
  if "runtime" in config:
188
199
  runtime_config = config["runtime"]
189
-
190
- mapping = {
191
- "dir": "runtime_dir",
192
- "outputFile": "output_file",
193
- "stateFile": "state_file",
194
- "logsFile": "logs_file",
195
- }
196
-
197
200
  for config_key, attr_name in mapping.items():
198
201
  if config_key in runtime_config and hasattr(instance, attr_name):
202
+ attributes_set.add(attr_name)
199
203
  setattr(instance, attr_name, runtime_config[config_key])
200
204
 
205
+ # fallback to kwargs for any values not set from config file
206
+ for _, attr_name in mapping.items():
207
+ if attr_name in kwargs and hasattr(instance, attr_name):
208
+ # Only set from kwargs if not already set from config file
209
+ if attr_name not in attributes_set:
210
+ setattr(instance, attr_name, kwargs[attr_name])
211
+
201
212
  return instance
202
213
 
203
214
 
@@ -310,12 +321,13 @@ class UiPathBaseRuntime(ABC):
310
321
  with open(self.context.input_file) as f:
311
322
  self.context.input = f.read()
312
323
 
313
- # Intercept all stdout/stderr/logs and write them to a file (runtime), stdout (debug)
324
+ # Intercept all stdout/stderr/logs and write them to a file (runtime/evals), stdout (debug)
314
325
  self.logs_interceptor = LogsInterceptor(
315
326
  min_level=self.context.logs_min_level,
316
327
  dir=self.context.runtime_dir,
317
328
  file=self.context.logs_file,
318
329
  job_id=self.context.job_id,
330
+ is_debug_run=self.is_debug_run(),
319
331
  )
320
332
  self.logs_interceptor.setup()
321
333
 
@@ -437,6 +449,9 @@ class UiPathBaseRuntime(ABC):
437
449
 
438
450
  await self.cleanup()
439
451
 
452
+ def is_debug_run(self) -> bool:
453
+ return not self.context.is_eval_run and not self.context.job_id
454
+
440
455
  @cached_property
441
456
  def output_file_path(self) -> str:
442
457
  if self.context.runtime_dir and self.context.output_file:
@@ -29,6 +29,7 @@ class LogsInterceptor:
29
29
  dir: Optional[str] = "__uipath",
30
30
  file: Optional[str] = "execution.log",
31
31
  job_id: Optional[str] = None,
32
+ is_debug_run: bool = False,
32
33
  ):
33
34
  """Initialize the log interceptor.
34
35
 
@@ -37,6 +38,7 @@ class LogsInterceptor:
37
38
  dir (str): The directory where logs should be stored.
38
39
  file (str): The log file name.
39
40
  job_id (str, optional): If provided, logs go to file; otherwise, to stdout.
41
+ is_debug_run (bool, optional): If True, log the output to stdout/stderr.
40
42
  """
41
43
  min_level = min_level or "INFO"
42
44
  self.job_id = job_id
@@ -58,18 +60,18 @@ class LogsInterceptor:
58
60
  self.log_handler: Union[PersistentLogsHandler, logging.StreamHandler[TextIO]]
59
61
 
60
62
  # Create either file handler (runtime) or stdout handler (debug)
61
- if self.job_id:
63
+ if is_debug_run:
64
+ # Use stdout handler when not running as a job or eval
65
+ self.log_handler = logging.StreamHandler(sys.stdout)
66
+ formatter = logging.Formatter("%(message)s")
67
+ self.log_handler.setFormatter(formatter)
68
+ else:
62
69
  # Ensure directory exists for file logging
63
70
  dir = dir or "__uipath"
64
71
  file = file or "execution.log"
65
72
  os.makedirs(dir, exist_ok=True)
66
73
  log_file = os.path.join(dir, file)
67
74
  self.log_handler = PersistentLogsHandler(file=log_file)
68
- else:
69
- # Use stdout handler when not running as a job
70
- self.log_handler = logging.StreamHandler(sys.stdout)
71
- formatter = logging.Formatter("%(message)s")
72
- self.log_handler.setFormatter(formatter)
73
75
 
74
76
  self.log_handler.setLevel(self.numeric_min_level)
75
77
  self.logger = logging.getLogger("runtime")
@@ -1,10 +1,17 @@
1
1
  from contextlib import contextmanager
2
2
  from enum import Enum
3
- from typing import Any, Iterator, List, Optional, Type, TypeVar
3
+ from typing import Any, Dict, Iterator, List, Optional, Type, TypeVar
4
4
 
5
5
  import click
6
6
  from rich.console import Console
7
7
  from rich.live import Live
8
+ from rich.progress import (
9
+ Progress,
10
+ SpinnerColumn,
11
+ TaskID,
12
+ TextColumn,
13
+ TimeElapsedColumn,
14
+ )
8
15
  from rich.spinner import Spinner as RichSpinner
9
16
  from rich.text import Text
10
17
 
@@ -50,6 +57,8 @@ class ConsoleLogger:
50
57
  self._console = Console()
51
58
  self._spinner_live: Optional[Live] = None
52
59
  self._spinner = RichSpinner("dots")
60
+ self._progress: Optional[Progress] = None
61
+ self._progress_tasks: Dict[str, TaskID] = {}
53
62
  self._initialized = True
54
63
 
55
64
  def _stop_spinner_if_active(self) -> None:
@@ -58,6 +67,13 @@ class ConsoleLogger:
58
67
  self._spinner_live.stop()
59
68
  self._spinner_live = None
60
69
 
70
+ def _stop_progress_if_active(self) -> None:
71
+ """Internal method to stop the progress if it's active."""
72
+ if self._progress:
73
+ self._progress.stop()
74
+ self._progress = None
75
+ self._progress_tasks.clear()
76
+
61
77
  def log(
62
78
  self, message: str, level: LogLevel = LogLevel.INFO, fg: Optional[str] = None
63
79
  ) -> None:
@@ -203,6 +219,44 @@ class ConsoleLogger:
203
219
  if self._spinner_live and self._spinner_live.is_started:
204
220
  self._spinner.text = Text(message)
205
221
 
222
+ @contextmanager
223
+ def evaluation_progress(
224
+ self, evaluations: List[Dict[str, str]]
225
+ ) -> Iterator["EvaluationProgressManager"]:
226
+ """Context manager for evaluation progress tracking.
227
+
228
+ Args:
229
+ evaluations: List of evaluation items with 'id' and 'name' keys
230
+
231
+ Yields:
232
+ EvaluationProgressManager instance
233
+ """
234
+ try:
235
+ # Stop any existing progress or spinner
236
+ self._stop_spinner_if_active()
237
+ self._stop_progress_if_active()
238
+
239
+ # Create progress with custom columns
240
+ self._progress = Progress(
241
+ SpinnerColumn(),
242
+ TextColumn("[bold blue]{task.description}"),
243
+ TimeElapsedColumn(),
244
+ console=self._console,
245
+ transient=False,
246
+ )
247
+
248
+ # Add tasks for each evaluation
249
+ for eval_item in evaluations:
250
+ task_id = self._progress.add_task(eval_item["name"], total=1)
251
+ self._progress_tasks[eval_item["id"]] = task_id
252
+
253
+ self._progress.start()
254
+
255
+ yield EvaluationProgressManager(self._progress, self._progress_tasks)
256
+
257
+ finally:
258
+ self._stop_progress_if_active()
259
+
206
260
  @classmethod
207
261
  def get_instance(cls) -> "ConsoleLogger":
208
262
  """Get the singleton instance of ConsoleLogger.
@@ -213,3 +267,53 @@ class ConsoleLogger:
213
267
  if cls._instance is None:
214
268
  return cls()
215
269
  return cls._instance
270
+
271
+
272
+ class EvaluationProgressManager:
273
+ """Manager for evaluation progress updates."""
274
+
275
+ def __init__(self, progress: Progress, tasks: Dict[str, TaskID]):
276
+ """Initialize the progress manager.
277
+
278
+ Args:
279
+ progress: The Rich Progress instance
280
+ tasks: Mapping of evaluation IDs to task IDs
281
+ """
282
+ self.progress = progress
283
+ self.tasks = tasks
284
+
285
+ def complete_evaluation(self, eval_id: str) -> None:
286
+ """Mark an evaluation as completed.
287
+
288
+ Args:
289
+ eval_id: The evaluation ID
290
+ """
291
+ if eval_id in self.tasks:
292
+ task_id = self.tasks[eval_id]
293
+ # Update description to show completion
294
+ current_desc = self.progress.tasks[task_id].description
295
+ self.progress.update(
296
+ task_id,
297
+ completed=1,
298
+ description=f"[green]✅ {current_desc}[/green]",
299
+ )
300
+
301
+ def fail_evaluation(self, eval_id: str, error_message: str) -> None:
302
+ """Mark an evaluation as failed.
303
+
304
+ Args:
305
+ eval_id: The evaluation ID
306
+ error_message: The error message
307
+ """
308
+ if eval_id in self.tasks:
309
+ task_id = self.tasks[eval_id]
310
+ # Truncate error message if too long
311
+ short_error = (
312
+ error_message[:40] + "..." if len(error_message) > 40 else error_message
313
+ )
314
+ # Update the description to show failure
315
+ current_desc = self.progress.tasks[task_id].description
316
+ self.progress.update(
317
+ task_id,
318
+ description=f"[red]❌ {current_desc} - {short_error}[/red]",
319
+ )