uipath 2.1.41__py3-none-any.whl → 2.1.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,582 +0,0 @@
1
- """Evaluation service for running and managing evaluation sets."""
2
-
3
- import asyncio
4
- import json
5
- import os
6
- import tempfile
7
- import warnings
8
- from datetime import datetime, timezone
9
- from pathlib import Path
10
- from typing import Any, Dict, List, Optional
11
-
12
- import click
13
-
14
- from uipath._cli._utils._console import ConsoleLogger, EvaluationProgressManager
15
-
16
- from ..cli_run import run_core # type: ignore
17
- from ._evaluators._evaluator_base import EvaluatorBase
18
- from ._evaluators._evaluator_factory import EvaluatorFactory
19
- from ._models import (
20
- EvaluationSet,
21
- EvaluationSetResult,
22
- )
23
- from ._models._evaluators import EvalItemResult
24
- from .progress_reporter import ProgressReporter
25
-
26
- console = ConsoleLogger()
27
-
28
-
29
- class EvaluationService:
30
- """Service for running evaluations."""
31
-
32
- def __init__(
33
- self,
34
- entrypoint: Optional[str] = None,
35
- eval_set_path: Optional[str | Path] = None,
36
- eval_ids: Optional[List[str]] = None,
37
- workers: int = 8,
38
- report_progress: bool = True,
39
- ):
40
- """Initialize the evaluation service.
41
-
42
- Args:
43
- entrypoint: Path to the agent script to evaluate (optional, will auto-discover if not provided)
44
- eval_set_path: Path to the evaluation set file (optional, will auto-discover if not provided)
45
- workers: Number of parallel workers for running evaluations
46
- report_progress: Whether to report progress to StudioWeb
47
- """
48
- self.entrypoint, self.eval_set_path = self._resolve_paths(
49
- entrypoint, eval_set_path
50
- )
51
- self._eval_set = self._load_eval_set(eval_ids)
52
- self._evaluators = self._load_evaluators()
53
- self._num_workers = workers
54
- self._results_lock = asyncio.Lock()
55
- self._progress_manager: Optional[EvaluationProgressManager] = None
56
- self._report_progress = report_progress
57
- self._progress_reporter: Optional[ProgressReporter] = None
58
- self._initialize_results()
59
-
60
- def _resolve_paths(
61
- self, entrypoint: Optional[str], eval_set_path: Optional[str | Path]
62
- ) -> tuple[str, Path]:
63
- """Resolve entrypoint and eval_set_path, auto-discovering if not provided.
64
-
65
- Args:
66
- entrypoint: Optional entrypoint path
67
- eval_set_path: Optional eval set path
68
-
69
- Returns:
70
- Tuple of (resolved_entrypoint, resolved_eval_set_path)
71
-
72
- Raises:
73
- ValueError: If paths cannot be resolved or multiple options exist
74
- """
75
- resolved_entrypoint = entrypoint
76
- resolved_eval_set_path = eval_set_path
77
-
78
- if resolved_entrypoint is None:
79
- resolved_entrypoint = self._auto_discover_entrypoint()
80
-
81
- if resolved_eval_set_path is None:
82
- resolved_eval_set_path = self._auto_discover_eval_set()
83
-
84
- eval_set_path_obj = Path(resolved_eval_set_path)
85
- if not eval_set_path_obj.is_file() or eval_set_path_obj.suffix != ".json":
86
- raise ValueError("Evaluation set must be a JSON file")
87
-
88
- return resolved_entrypoint, eval_set_path_obj
89
-
90
- def _auto_discover_entrypoint(self) -> str:
91
- """Auto-discover entrypoint from config file.
92
-
93
- Returns:
94
- Path to the entrypoint
95
-
96
- Raises:
97
- ValueError: If no entrypoint found or multiple entrypoints exist
98
- """
99
- config_file = "uipath.json"
100
- if not os.path.isfile(config_file):
101
- raise ValueError(
102
- f"File '{config_file}' not found. Please run 'uipath init'."
103
- )
104
-
105
- with open(config_file, "r", encoding="utf-8") as f:
106
- uipath_config = json.loads(f.read())
107
-
108
- entrypoints = uipath_config.get("entryPoints", [])
109
-
110
- if not entrypoints:
111
- raise ValueError(
112
- "No entrypoints found in uipath.json. Please run 'uipath init'."
113
- )
114
-
115
- if len(entrypoints) > 1:
116
- entrypoint_paths = [ep.get("filePath") for ep in entrypoints]
117
- raise ValueError(
118
- f"Multiple entrypoints found: {entrypoint_paths}. "
119
- f"Please specify which entrypoint to use: uipath eval <entrypoint> [eval_set]"
120
- )
121
-
122
- entrypoint_path = entrypoints[0].get("filePath")
123
-
124
- console.info(
125
- f"Auto-discovered entrypoint: {click.style(entrypoint_path, fg='cyan')}"
126
- )
127
- return entrypoint_path
128
-
129
- def _auto_discover_eval_set(self) -> str:
130
- """Auto-discover evaluation set from evals/eval-sets directory.
131
-
132
- Returns:
133
- Path to the evaluation set file
134
-
135
- Raises:
136
- ValueError: If no eval set found or multiple eval sets exist
137
- """
138
- eval_sets_dir = Path("evals/eval-sets")
139
-
140
- if not eval_sets_dir.exists():
141
- raise ValueError(
142
- "No 'evals/eval-sets' directory found. "
143
- "Please set 'UIPATH_PROJECT_ID' env var and run 'uipath pull'."
144
- )
145
-
146
- eval_set_files = list(eval_sets_dir.glob("*.json"))
147
-
148
- if not eval_set_files:
149
- raise ValueError(
150
- "No evaluation set files found in 'evals/eval-sets' directory. "
151
- )
152
-
153
- if len(eval_set_files) > 1:
154
- file_names = [f.name for f in eval_set_files]
155
- raise ValueError(
156
- f"Multiple evaluation sets found: {file_names}. "
157
- f"Please specify which evaluation set to use: 'uipath eval [entrypoint] <eval_set_path>'"
158
- )
159
-
160
- eval_set_path = str(eval_set_files[0])
161
- console.info(
162
- f"Auto-discovered evaluation set: {click.style(eval_set_path, fg='cyan')}"
163
- )
164
- return eval_set_path
165
-
166
- def _initialize_results(self) -> None:
167
- """Initialize the results file and directory."""
168
- self._create_and_initialize_results_file()
169
- # Initialize progress reporter if needed
170
- if self._report_progress:
171
- agent_snapshot = self._extract_agent_snapshot()
172
- self._progress_reporter = ProgressReporter(
173
- eval_set_id=self._eval_set.id,
174
- agent_snapshot=agent_snapshot,
175
- no_of_evals=len(self._eval_set.evaluations),
176
- evaluators=self._evaluators,
177
- )
178
-
179
- def _extract_agent_snapshot(self) -> str:
180
- """Extract agent snapshot from uipath.json file.
181
-
182
- Returns:
183
- JSON string containing the agent snapshot with input and output schemas
184
- """
185
- config_file = "uipath.json"
186
- if not os.path.isfile(config_file):
187
- console.error(f"File '{config_file}' not found. Please run 'uipath init'")
188
-
189
- with open(config_file, "r", encoding="utf-8") as f:
190
- file_content = f.read()
191
- uipath_config = json.loads(file_content)
192
-
193
- entry_point = None
194
- for ep in uipath_config.get("entryPoints", []):
195
- if ep.get("filePath") == self.entrypoint:
196
- entry_point = ep
197
- break
198
-
199
- if not entry_point:
200
- console.error(
201
- f"No entry point found with filePath '{self.entrypoint}' in uipath.json"
202
- )
203
-
204
- input_schema = entry_point.get("input", {}) # type: ignore
205
- output_schema = entry_point.get("output", {}) # type: ignore
206
-
207
- # Format as agent snapshot
208
- agent_snapshot = {"inputSchema": input_schema, "outputSchema": output_schema}
209
-
210
- return json.dumps(agent_snapshot)
211
-
212
- def _create_and_initialize_results_file(self):
213
- # Create results directory if it doesn't exist
214
- results_dir = self.eval_set_path.parent.parent / "results"
215
- results_dir.mkdir(exist_ok=True)
216
-
217
- # Create results file
218
- timestamp = datetime.now(timezone.utc).strftime("%M-%H-%d-%m-%Y")
219
- eval_set_name = self._eval_set.name
220
- self.result_file = results_dir / f"eval-{eval_set_name}-{timestamp}.json"
221
-
222
- initial_results = EvaluationSetResult(
223
- eval_set_id=self._eval_set.id,
224
- eval_set_name=self._eval_set.name,
225
- results=[],
226
- average_score=0.0,
227
- )
228
-
229
- with open(self.result_file, "w", encoding="utf-8") as f:
230
- f.write(initial_results.model_dump_json(indent=2))
231
-
232
- def _load_eval_set(self, eval_ids: Optional[List[str]] = None) -> EvaluationSet:
233
- """Load the evaluation set from file.
234
-
235
- Returns:
236
- The loaded evaluation set as EvaluationSet model
237
- """
238
- with open(self.eval_set_path, "r", encoding="utf-8") as f:
239
- data = json.load(f)
240
- eval_set = EvaluationSet(**data)
241
- if eval_ids:
242
- eval_set.extract_selected_evals(eval_ids)
243
- return eval_set
244
-
245
- def _load_evaluators(self) -> List[EvaluatorBase]:
246
- """Load evaluators referenced by the evaluation set."""
247
- evaluators = []
248
- evaluators_dir = self.eval_set_path.parent.parent / "evaluators"
249
- evaluator_refs = set(self._eval_set.evaluatorRefs)
250
- found_evaluator_ids = set()
251
-
252
- # Load evaluators from JSON files
253
- for file in evaluators_dir.glob("*.json"):
254
- with open(file, "r", encoding="utf-8") as f:
255
- data = json.load(f)
256
- evaluator_id = data.get("id")
257
-
258
- if evaluator_id in evaluator_refs:
259
- evaluator = EvaluatorFactory.create_evaluator(data)
260
- evaluators.append(evaluator)
261
- found_evaluator_ids.add(evaluator_id)
262
-
263
- # Check if all referenced evaluators were found
264
- missing_evaluators = evaluator_refs - found_evaluator_ids
265
- if missing_evaluators:
266
- raise ValueError(
267
- f"Could not find evaluators with IDs: {missing_evaluators}"
268
- )
269
-
270
- return evaluators
271
-
272
- async def _write_results(self, results: List[Any]) -> None:
273
- """Write evaluation results to file with async lock.
274
-
275
- Args:
276
- results: List of evaluation results to write
277
- """
278
- async with self._results_lock:
279
- # Read current results
280
- with open(self.result_file, "r", encoding="utf-8") as f:
281
- current_results = EvaluationSetResult.model_validate_json(f.read())
282
-
283
- # Add new results
284
- current_results.results.extend(results)
285
-
286
- if current_results.results:
287
- current_results.average_score = sum(
288
- r.score for r in current_results.results
289
- ) / len(current_results.results)
290
-
291
- # Write updated results
292
- with open(self.result_file, "w", encoding="utf-8") as f:
293
- f.write(current_results.model_dump_json(indent=2))
294
-
295
- async def _results_queue_consumer(self, results_queue: asyncio.Queue[Any]) -> None:
296
- """Consumer task for the results queue that writes to local file.
297
-
298
- Args:
299
- results_queue: Queue containing evaluation results to write to file
300
- """
301
- while True:
302
- results: list[EvalItemResult] = await results_queue.get()
303
- if results is None:
304
- # Sentinel value - consumer should stop
305
- results_queue.task_done()
306
- return
307
-
308
- try:
309
- await self._write_results([eval_item.result for eval_item in results])
310
- results_queue.task_done()
311
- except Exception as e:
312
- console.warning(f"Error writing results to file: {str(e)}")
313
- results_queue.task_done()
314
-
315
- async def _sw_progress_reporter_queue_consumer(
316
- self, sw_progress_reporter_queue: asyncio.Queue[Any]
317
- ) -> None:
318
- """Consumer task for the SW progress reporter.
319
-
320
- Args:
321
- sw_progress_reporter_queue: Queue containing evaluation results to report to StudioWeb
322
- """
323
- while True:
324
- queue_item = await sw_progress_reporter_queue.get()
325
- if queue_item is None:
326
- # Sentinel value - consumer should stop
327
- sw_progress_reporter_queue.task_done()
328
- return
329
- eval_run_id: str
330
- eval_results: list[EvalItemResult]
331
- success: bool
332
- execution_time: float
333
-
334
- eval_run_id, eval_results, success, execution_time = queue_item
335
-
336
- try:
337
- if self._progress_reporter:
338
- await self._progress_reporter.update_eval_run(
339
- eval_results, eval_run_id, execution_time
340
- )
341
- sw_progress_reporter_queue.task_done()
342
- except Exception as e:
343
- console.warning(f"Error reporting progress to StudioWeb: {str(e)}")
344
- sw_progress_reporter_queue.task_done()
345
-
346
- def _run_agent(self, input_json: str) -> tuple[Dict[str, Any], bool, float]:
347
- """Run the agent with the given input.
348
-
349
- Args:
350
- input_json: JSON string containing input data
351
-
352
- Returns:
353
- Agent output as dictionary and success status
354
- """
355
- with tempfile.TemporaryDirectory() as tmpdir:
356
- try:
357
- import time
358
-
359
- output_file = Path(tmpdir) / "output.json"
360
- logs_file = Path(tmpdir) / "execution.log"
361
-
362
- # Suppress LangChain deprecation warnings during agent execution
363
- with warnings.catch_warnings():
364
- warnings.filterwarnings(
365
- "ignore", category=UserWarning, module="langchain"
366
- )
367
- # Note: Progress reporting is handled outside this method since it's async
368
- start_time = time.time()
369
- success, error_message, info_message = run_core(
370
- entrypoint=self.entrypoint,
371
- input=input_json,
372
- resume=False,
373
- input_file=None,
374
- execution_output_file=output_file,
375
- logs_file=logs_file,
376
- runtime_dir=tmpdir,
377
- is_eval_run=True,
378
- )
379
- execution_time = time.time() - start_time
380
- if not success:
381
- console.warning(error_message)
382
- return {}, False, execution_time
383
- else:
384
- # Read the output file
385
- with open(output_file, "r", encoding="utf-8") as f:
386
- result = json.load(f)
387
-
388
- # uncomment the following lines to have access to the execution.logs (needed for some types of evals)
389
- # with open(logs_file, "r", encoding="utf-8") as f:
390
- # logs = f.read()
391
- if isinstance(result, str):
392
- try:
393
- return json.loads(result), True, execution_time
394
- except json.JSONDecodeError as e:
395
- raise Exception(f"Error parsing output: {e}") from e
396
- return result, True, 0.0
397
-
398
- except Exception as e:
399
- console.warning(f"Error running agent: {str(e)}")
400
- return {"error": str(e)}, False, execution_time
401
-
402
- async def _process_evaluation(
403
- self,
404
- eval_item: Dict[str, Any],
405
- results_queue: asyncio.Queue[Any],
406
- sw_progress_reporter_queue: asyncio.Queue[Any],
407
- ) -> None:
408
- """Process a single evaluation item.
409
-
410
- Args:
411
- eval_item: The evaluation item to process
412
- results_queue: Queue for local file results
413
- sw_progress_reporter_queue: Queue for StudioWeb progress reporting
414
- """
415
- eval_id = eval_item["id"]
416
- eval_run_id: Optional[str] = None
417
-
418
- try:
419
- input_json = json.dumps(eval_item["inputs"])
420
-
421
- if self._report_progress and self._progress_reporter:
422
- eval_run_id = await self._progress_reporter.create_eval_run(eval_item)
423
-
424
- loop = asyncio.get_running_loop()
425
- actual_output, success, execution_time = await loop.run_in_executor(
426
- None,
427
- self._run_agent,
428
- input_json,
429
- )
430
-
431
- if success:
432
- # Run each evaluator
433
- eval_results: list[EvalItemResult] = []
434
- for evaluator in self._evaluators:
435
- result = await evaluator.evaluate(
436
- evaluation_id=eval_item["id"],
437
- evaluation_name=eval_item["name"],
438
- input_data=eval_item["inputs"],
439
- expected_output=eval_item["expectedOutput"],
440
- actual_output=actual_output,
441
- )
442
- eval_results.append(
443
- EvalItemResult(evaluator_id=evaluator.id, result=result)
444
- )
445
-
446
- await results_queue.put(eval_results)
447
- if self._report_progress:
448
- # TODO: modify this, here we are only reporting for success
449
- await sw_progress_reporter_queue.put(
450
- (eval_run_id, eval_results, success, execution_time)
451
- )
452
-
453
- # Update progress to completed
454
- if self._progress_manager:
455
- self._progress_manager.complete_evaluation(eval_id)
456
- else:
457
- # Mark as failed if agent execution failed
458
- if self._progress_manager:
459
- self._progress_manager.fail_evaluation(
460
- eval_id, "Agent execution failed"
461
- )
462
-
463
- except Exception as e:
464
- # Mark as failed with error message
465
- if self._progress_manager:
466
- self._progress_manager.fail_evaluation(eval_id, str(e))
467
- raise
468
-
469
- async def _producer_task(self, task_queue: asyncio.Queue[Any]) -> None:
470
- """Producer task that adds all evaluations to the queue.
471
-
472
- Args:
473
- task_queue: The asyncio queue to add tasks to
474
- """
475
- for eval_item in self._eval_set.evaluations:
476
- await task_queue.put(eval_item.model_dump())
477
-
478
- # Add sentinel values to signal workers to stop
479
- for _ in range(self._num_workers):
480
- await task_queue.put(None)
481
-
482
- async def _consumer_task(
483
- self,
484
- task_queue: asyncio.Queue[Any],
485
- worker_id: int,
486
- results_queue: asyncio.Queue[Any],
487
- sw_progress_reporter_queue: asyncio.Queue[Any],
488
- ) -> None:
489
- """Consumer task that processes evaluations from the queue.
490
-
491
- Args:
492
- task_queue: The asyncio queue to get tasks from
493
- worker_id: ID of this worker for logging
494
- results_queue: Queue for local file results
495
- sw_progress_reporter_queue: Queue for StudioWeb progress reporting
496
- """
497
- while True:
498
- eval_item = await task_queue.get()
499
- if eval_item is None:
500
- # Sentinel value - worker should stop
501
- task_queue.task_done()
502
- return
503
-
504
- try:
505
- await self._process_evaluation(
506
- eval_item, results_queue, sw_progress_reporter_queue
507
- )
508
- task_queue.task_done()
509
- except Exception as e:
510
- # Log error and continue to next item
511
- task_queue.task_done()
512
- console.warning(
513
- f"Evaluation {eval_item.get('name', 'Unknown')} failed: {str(e)}"
514
- )
515
-
516
- async def run_evaluation(self) -> None:
517
- """Run the evaluation set using multiple worker tasks."""
518
- console.info(
519
- f"Starting evaluating {click.style(self._eval_set.name, fg='cyan')} evaluation set..."
520
- )
521
-
522
- if self._report_progress and self._progress_reporter:
523
- await self._progress_reporter.create_eval_set_run()
524
-
525
- # Prepare items for progress tracker
526
- progress_items = [
527
- {"id": eval_item.id, "name": eval_item.name}
528
- for eval_item in self._eval_set.evaluations
529
- ]
530
-
531
- with console.evaluation_progress(progress_items) as progress_manager:
532
- self._progress_manager = progress_manager
533
-
534
- task_queue: asyncio.Queue[Any] = asyncio.Queue()
535
- results_queue: asyncio.Queue[Any] = asyncio.Queue()
536
- sw_progress_reporter_queue: asyncio.Queue[Any] = asyncio.Queue()
537
-
538
- producer = asyncio.create_task(self._producer_task(task_queue))
539
-
540
- consumers = []
541
- for worker_id in range(self._num_workers):
542
- consumer = asyncio.create_task(
543
- self._consumer_task(
544
- task_queue, worker_id, results_queue, sw_progress_reporter_queue
545
- )
546
- )
547
- consumers.append(consumer)
548
-
549
- # Create results queue consumer
550
- results_consumer = asyncio.create_task(
551
- self._results_queue_consumer(results_queue)
552
- )
553
-
554
- # Create SW progress reporter queue consumer
555
- sw_progress_consumer = None
556
- if self._report_progress:
557
- sw_progress_consumer = asyncio.create_task(
558
- self._sw_progress_reporter_queue_consumer(
559
- sw_progress_reporter_queue
560
- )
561
- )
562
-
563
- # Wait for producer to finish
564
- await producer
565
- await task_queue.join()
566
-
567
- # Wait for all consumers to finish
568
- await asyncio.gather(*consumers)
569
-
570
- # Signal queue consumers to stop by sending sentinel values
571
- await results_queue.put(None)
572
- if self._report_progress:
573
- await sw_progress_reporter_queue.put(None)
574
-
575
- await results_consumer
576
- if sw_progress_consumer:
577
- await sw_progress_consumer
578
-
579
- if self._progress_reporter:
580
- await self._progress_reporter.update_eval_set_run()
581
-
582
- console.info(f"Results saved to {click.style(self.result_file, fg='cyan')}")