PyPI - uipath - Versions diffs - 2.1.8__py3-none-any.whl → 2.1.10__py3-none-any.whl - Mend

uipath 2.1.8py3-none-any.whl → 2.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

uipath/_cli/__init__.py +2 -0
uipath/_cli/_evals/_evaluators/__init__.py +20 -0
uipath/_cli/_evals/_evaluators/_agent_scorer_evaluator.py +48 -0
uipath/_cli/_evals/_evaluators/_deterministic_evaluator.py +41 -0
uipath/_cli/_evals/_evaluators/_evaluator_base.py +124 -0
uipath/_cli/_evals/_evaluators/_evaluator_factory.py +103 -0
uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py +181 -0
uipath/_cli/_evals/_evaluators/_trajectory_evaluator.py +48 -0
uipath/_cli/_evals/_models/__init__.py +18 -0
uipath/_cli/_evals/_models/_evaluation_set.py +43 -0
uipath/_cli/_evals/_models/_evaluators.py +89 -0
uipath/_cli/_evals/evaluation_service.py +583 -0
uipath/_cli/_evals/progress_reporter.py +356 -0
uipath/_cli/_runtime/_contracts.py +25 -10
uipath/_cli/_runtime/_logging.py +8 -6
uipath/_cli/_utils/_console.py +105 -1
uipath/_cli/cli_eval.py +95 -0
uipath/_cli/cli_run.py +74 -32
uipath/_services/api_client.py +5 -3
uipath/_services/llm_gateway_service.py +4 -4
uipath/_utils/constants.py +4 -0
uipath/telemetry/_constants.py +3 -3
{uipath-2.1.8.dist-info → uipath-2.1.10.dist-info}/METADATA +1 -1
{uipath-2.1.8.dist-info → uipath-2.1.10.dist-info}/RECORD +27 -14
{uipath-2.1.8.dist-info → uipath-2.1.10.dist-info}/WHEEL +0 -0
{uipath-2.1.8.dist-info → uipath-2.1.10.dist-info}/entry_points.txt +0 -0
{uipath-2.1.8.dist-info → uipath-2.1.10.dist-info}/licenses/LICENSE +0 -0

uipath/_cli/_evals/progress_reporter.py ADDED Viewed

@@ -0,0 +1,356 @@
+"""Progress reporter for sending evaluation updates to StudioWeb."""
+import json
+import logging
+import os
+from typing import Any, List
+from uipath import UiPath
+from uipath._cli._evals._evaluators import EvaluatorBase
+from uipath._cli._evals._models._evaluation_set import EvaluationStatus
+from uipath._cli._evals._models._evaluators import EvalItemResult, ScoreType
+from uipath._cli._utils._console import ConsoleLogger
+from uipath._utils import Endpoint, RequestSpec
+from uipath._utils.constants import ENV_TENANT_ID, HEADER_INTERNAL_TENANT_ID
+class ProgressReporter:
+    """Handles reporting evaluation progress to StudioWeb via API calls."""
+    def __init__(
+        self,
+        eval_set_id: str,
+        agent_snapshot: str,
+        no_of_evals: int,
+        evaluators: List[EvaluatorBase],
+    ):
+        """Initialize the progress reporter.
+        Args:
+            eval_set_id: ID of the evaluation set
+            agent_snapshot: JSON snapshot of the agent configuration
+            no_of_evals: Number of evaluations in the set
+            evaluators: List of evaluator instances
+        """
+        self._eval_set_id = eval_set_id
+        self.agent_snapshot = agent_snapshot
+        self._no_of_evals = no_of_evals
+        self._evaluators = evaluators
+        self._evaluator_scores: dict[str, list[float]] = {
+            evaluator.id: [] for evaluator in evaluators
+        }
+        # Disable middleware logging and use the same console as ConsoleLogger
+        logging.getLogger("uipath._cli.middlewares").setLevel(logging.CRITICAL)
+        console_logger = ConsoleLogger.get_instance()
+        uipath = UiPath()
+        self._eval_set_run_id = None
+        self._client = uipath.api_client
+        self._console = console_logger
+        self._project_id = os.getenv("UIPATH_PROJECT_ID", None)
+        if not self._project_id:
+            self._console.warning(
+                "Cannot report data to StudioWeb. Please set UIPATH_PROJECT_ID."
+            )
+    async def create_eval_run(self, eval_item: dict[str, Any]):
+        """Create a new evaluation run in StudioWeb.
+        Args:
+            eval_item: Dictionary containing evaluation data
+        Returns:
+            The ID of the created evaluation run
+        """
+        spec = self._create_eval_run_spec(eval_item)
+        response = await self._client.request_async(
+            method=spec.method,
+            url=spec.endpoint,
+            params=spec.params,
+            content=spec.content,
+            headers=spec.headers,
+            scoped="org",
+        )
+        return json.loads(response.content)["id"]
+    async def update_eval_run(
+        self,
+        eval_results: list[EvalItemResult],
+        eval_run_id: str,
+        success: bool,
+        execution_time: float,
+    ):
+        """Update an evaluation run with results.
+        Args:
+            eval_results: Dictionary mapping evaluator IDs to evaluation results
+            eval_run_id: ID of the evaluation run to update
+            success: Whether the evaluation was successful
+            execution_time: The agent execution time
+        """
+        assertion_runs, evaluator_scores, actual_output = self._collect_results(
+            eval_results
+        )
+        spec = self._update_eval_run_spec(
+            assertion_runs=assertion_runs,
+            evaluator_scores=evaluator_scores,
+            eval_run_id=eval_run_id,
+            execution_time=execution_time,
+            actual_output=actual_output,
+        )
+        await self._client.request_async(
+            method=spec.method,
+            url=spec.endpoint,
+            params=spec.params,
+            content=spec.content,
+            headers=spec.headers,
+            scoped="org",
+        )
+    async def create_eval_set_run(self):
+        """Create a new evaluation set run in StudioWeb."""
+        spec = self._create_eval_set_run_spec()
+        response = await self._client.request_async(
+            method=spec.method,
+            url=spec.endpoint,
+            params=spec.params,
+            content=spec.content,
+            headers=spec.headers,
+            scoped="org",
+        )
+        self._eval_set_run_id = json.loads(response.content)["id"]
+    async def update_eval_set_run(self):
+        """Update the evaluation set run status to complete."""
+        spec = self._update_eval_set_run_spec()
+        await self._client.request_async(
+            method=spec.method,
+            url=spec.endpoint,
+            params=spec.params,
+            content=spec.content,
+            headers=spec.headers,
+            scoped="org",
+        )
+    def _collect_results(
+        self, eval_results: list[EvalItemResult]
+    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]], dict[str, Any]]:
+        assertion_runs: list[dict[str, Any]] = []
+        evaluator_scores: list[dict[str, Any]] = []
+        actual_output: dict[str, Any] = {}
+        for eval_result in eval_results:
+            # keep track of evaluator scores. this should be removed after this computation is done server-side
+            self._evaluator_scores[eval_result.evaluator_id].append(
+                eval_result.result.score
+            )
+            evaluator_scores.append(
+                {
+                    "type": ScoreType.NUMERICAL.value,
+                    "value": eval_result.result.score,
+                    "justification": eval_result.result.details,
+                    "evaluatorId": eval_result.evaluator_id,
+                }
+            )
+            assertion_runs.append(
+                {
+                    "status": EvaluationStatus.COMPLETED.value,
+                    "evaluatorId": eval_result.evaluator_id,
+                    "result": {
+                        "output": {"content": {**eval_result.result.actual_output}},
+                        "score": {
+                            "type": ScoreType.NUMERICAL.value,
+                            "value": eval_result.result.score,
+                            "justification": eval_result.result.details,
+                        },
+                    },
+                    "completionMetrics": {
+                        "duration": eval_result.result.evaluation_time,
+                        "cost": None,
+                        "tokens": 0,
+                        "completionTokens": 0,
+                        "promptTokens": 0,
+                    },
+                }
+            )
+            # we extract the actual output here. we should have the same 'actual_output' for each 'EvalItemResult'
+            actual_output = eval_result.result.actual_output
+        return assertion_runs, evaluator_scores, actual_output
+    def _update_eval_run_spec(
+        self,
+        assertion_runs: list[dict[str, Any]],
+        evaluator_scores: list[dict[str, Any]],
+        eval_run_id: str,
+        actual_output: dict[str, Any],
+        execution_time: float,
+    ) -> RequestSpec:
+        return RequestSpec(
+            method="PUT",
+            endpoint=Endpoint(
+                f"agents_/api/execution/agents/{self._project_id}/evalRun"
+            ),
+            content=json.dumps(
+                {
+                    "evalRunId": eval_run_id,
+                    "status": EvaluationStatus.COMPLETED.value,
+                    "result": {
+                        "output": {"content": {**actual_output}},
+                        "evaluatorScores": evaluator_scores,
+                    },
+                    "completionMetrics": {"duration": int(execution_time)},
+                    "assertionRuns": assertion_runs,
+                }
+            ),
+            headers=self._tenant_header(),
+        )
+    def _create_eval_run_spec(self, eval_item: dict[str, Any]) -> RequestSpec:
+        return RequestSpec(
+            method="POST",
+            endpoint=Endpoint(
+                f"agents_/api/execution/agents/{self._project_id}/evalRun"
+            ),
+            content=json.dumps(
+                {
+                    "evalSetRunId": self._eval_set_run_id,
+                    "evalSnapshot": {
+                        "id": eval_item["id"],
+                        "name": eval_item["name"],
+                        "assertionType": "unknown",
+                        "assertionProperties": {},
+                        "inputs": eval_item.get("inputs"),
+                        "outputKey": "*",
+                    },
+                    "status": EvaluationStatus.IN_PROGRESS.value,
+                    "assertionRuns": [
+                        # TODO: replace default values
+                        {
+                            "assertionSnapshot": {
+                                "assertionProperties": {
+                                    "expectedOutput": eval_item.get(
+                                        "expectedOutput", {}
+                                    ),
+                                    "prompt": "No prompt for coded agents",
+                                    "simulationInstructions": "",
+                                    "expectedAgentBehavior": "",
+                                    "inputGenerationInstructions": "",
+                                    "simulateTools": False,
+                                    "simulateInput": False,
+                                    "toolsToSimulate": [],
+                                    **(
+                                        {"model": evaluator.model}
+                                        if hasattr(evaluator, "model")
+                                        else {}
+                                    ),
+                                },
+                                "assertionType": "Custom",
+                                "outputKey": "*",
+                            },
+                            "status": 1,
+                            "evaluatorId": evaluator.id,
+                        }
+                        for evaluator in self._evaluators
+                    ],
+                }
+            ),
+            headers=self._tenant_header(),
+        )
+    def _create_eval_set_run_spec(
+        self,
+    ) -> RequestSpec:
+        self._add_defaults_to_agent_snapshot()
+        agent_snapshot_dict = json.loads(self.agent_snapshot)
+        return RequestSpec(
+            method="POST",
+            endpoint=Endpoint(
+                f"agents_/api/execution/agents/{self._project_id}/evalSetRun"
+            ),
+            content=json.dumps(
+                {
+                    "agentId": self._project_id,
+                    "evalSetId": self._eval_set_id,
+                    "agentSnapshot": agent_snapshot_dict,
+                    "status": EvaluationStatus.IN_PROGRESS.value,
+                    "numberOfEvalsExecuted": self._no_of_evals,
+                }
+            ),
+            headers=self._tenant_header(),
+        )
+    def _compute_evaluator_scores(self):
+        evaluator_scores = []
+        evaluator_averages = []
+        for evaluator in self._evaluators:
+            scores = self._evaluator_scores[evaluator.id]
+            if scores:
+                avg_score = sum(scores) / len(scores)
+                evaluator_scores.append(
+                    {"value": avg_score, "evaluatorId": evaluator.id}
+                )
+                evaluator_averages.append(avg_score)
+            else:
+                # fallback to score 0
+                evaluator_scores.append({"value": 0, "evaluatorId": evaluator.id})
+                evaluator_averages.append(0)
+        overall_score = (
+            sum(evaluator_averages) / len(evaluator_averages)
+            if evaluator_averages
+            else 0
+        )
+        return evaluator_scores, overall_score
+    def _update_eval_set_run_spec(
+        self,
+    ) -> RequestSpec:
+        # this should be removed after computations are done server-side
+        evaluator_scores, overall_score = self._compute_evaluator_scores()
+        return RequestSpec(
+            method="PUT",
+            endpoint=Endpoint(
+                f"agents_/api/execution/agents/{self._project_id}/evalSetRun"
+            ),
+            content=json.dumps(
+                {
+                    ## TODO: send the actual data here (do we need to send those again? isn't it redundant?)
+                    "evalSetRunId": self._eval_set_run_id,
+                    ## this should be removed. not used but enforced by the API
+                    "score": overall_score,
+                    "status": EvaluationStatus.COMPLETED.value,
+                    "evaluatorScores": evaluator_scores,
+                }
+            ),
+            headers=self._tenant_header(),
+        )
+    def _add_defaults_to_agent_snapshot(self):
+        ## TODO: remove this after properties are marked as optional at api level
+        agent_snapshot_dict = json.loads(self.agent_snapshot)
+        agent_snapshot_dict["tools"] = []
+        agent_snapshot_dict["contexts"] = []
+        agent_snapshot_dict["escalations"] = []
+        agent_snapshot_dict["systemPrompt"] = ""
+        agent_snapshot_dict["userPrompt"] = ""
+        agent_snapshot_dict["settings"] = {
+            "model": "",
+            "maxTokens": 0,
+            "temperature": 0,
+            "engine": "",
+        }
+        self.agent_snapshot = json.dumps(agent_snapshot_dict)
+    def _tenant_header(self) -> dict[str, str]:
+        tenant_id = os.getenv(ENV_TENANT_ID, None)
+        if not tenant_id:
+            self._console.error(
+                f"{ENV_TENANT_ID} env var is not set. Please run 'uipath auth'."
+            )
+        return {HEADER_INTERNAL_TENANT_ID: tenant_id}  # type: ignore

uipath/_cli/_runtime/_contracts.py CHANGED Viewed

@@ -158,15 +158,17 @@ class UiPathRuntimeContext(BaseModel):
     result: Optional[UiPathRuntimeResult] = None
     execution_output_file: Optional[str] = None
     input_file: Optional[str] = None
+    is_eval_run: bool = False
     model_config = {"arbitrary_types_allowed": True}
     @classmethod
-    def from_config(cls, config_path=None):
+    def from_config(cls, config_path=None, **kwargs):
         """Load configuration from uipath.json file.
         Args:
             config_path: Path to the configuration file. If None, uses the default "uipath.json"
+            **kwargs: Additional keyword arguments to use as fallback for configuration values
         Returns:
             An instance of the class with fields populated from the config file
@@ -184,20 +186,29 @@ class UiPathRuntimeContext(BaseModel):
         instance = cls()
+        mapping = {
+            "dir": "runtime_dir",
+            "outputFile": "output_file",
+            "stateFile": "state_file",
+            "logsFile": "logs_file",
+        }
+        attributes_set = set()
+        # set values from config file if available
         if "runtime" in config:
             runtime_config = config["runtime"]
-            mapping = {
-                "dir": "runtime_dir",
-                "outputFile": "output_file",
-                "stateFile": "state_file",
-                "logsFile": "logs_file",
-            }
             for config_key, attr_name in mapping.items():
                 if config_key in runtime_config and hasattr(instance, attr_name):
+                    attributes_set.add(attr_name)
                     setattr(instance, attr_name, runtime_config[config_key])
+        # fallback to kwargs for any values not set from config file
+        for _, attr_name in mapping.items():
+            if attr_name in kwargs and hasattr(instance, attr_name):
+                # Only set from kwargs if not already set from config file
+                if attr_name not in attributes_set:
+                    setattr(instance, attr_name, kwargs[attr_name])
         return instance
@@ -310,12 +321,13 @@ class UiPathBaseRuntime(ABC):
             with open(self.context.input_file) as f:
                 self.context.input = f.read()
-        # Intercept all stdout/stderr/logs and write them to a file (runtime), stdout (debug)
+        # Intercept all stdout/stderr/logs and write them to a file (runtime/evals), stdout (debug)
         self.logs_interceptor = LogsInterceptor(
             min_level=self.context.logs_min_level,
             dir=self.context.runtime_dir,
             file=self.context.logs_file,
             job_id=self.context.job_id,
+            is_debug_run=self.is_debug_run(),
         )
         self.logs_interceptor.setup()
@@ -437,6 +449,9 @@ class UiPathBaseRuntime(ABC):
             await self.cleanup()
+    def is_debug_run(self) -> bool:
+        return not self.context.is_eval_run and not self.context.job_id
     @cached_property
     def output_file_path(self) -> str:
         if self.context.runtime_dir and self.context.output_file:

uipath/_cli/_runtime/_logging.py CHANGED Viewed

@@ -29,6 +29,7 @@ class LogsInterceptor:
         dir: Optional[str] = "__uipath",
         file: Optional[str] = "execution.log",
         job_id: Optional[str] = None,
+        is_debug_run: bool = False,
     ):
         """Initialize the log interceptor.
@@ -37,6 +38,7 @@ class LogsInterceptor:
             dir (str): The directory where logs should be stored.
             file (str): The log file name.
             job_id (str, optional): If provided, logs go to file; otherwise, to stdout.
+            is_debug_run (bool, optional): If True, log the output to stdout/stderr.
         """
         min_level = min_level or "INFO"
         self.job_id = job_id
@@ -58,18 +60,18 @@ class LogsInterceptor:
         self.log_handler: Union[PersistentLogsHandler, logging.StreamHandler[TextIO]]
         # Create either file handler (runtime) or stdout handler (debug)
-        if self.job_id:
+        if is_debug_run:
+            # Use stdout handler when not running as a job or eval
+            self.log_handler = logging.StreamHandler(sys.stdout)
+            formatter = logging.Formatter("%(message)s")
+            self.log_handler.setFormatter(formatter)
+        else:
             # Ensure directory exists for file logging
             dir = dir or "__uipath"
             file = file or "execution.log"
             os.makedirs(dir, exist_ok=True)
             log_file = os.path.join(dir, file)
             self.log_handler = PersistentLogsHandler(file=log_file)
-        else:
-            # Use stdout handler when not running as a job
-            self.log_handler = logging.StreamHandler(sys.stdout)
-            formatter = logging.Formatter("%(message)s")
-            self.log_handler.setFormatter(formatter)
         self.log_handler.setLevel(self.numeric_min_level)
         self.logger = logging.getLogger("runtime")

uipath/_cli/_utils/_console.py CHANGED Viewed

@@ -1,10 +1,17 @@
 from contextlib import contextmanager
 from enum import Enum
-from typing import Any, Iterator, List, Optional, Type, TypeVar
+from typing import Any, Dict, Iterator, List, Optional, Type, TypeVar
 import click
 from rich.console import Console
 from rich.live import Live
+from rich.progress import (
+    Progress,
+    SpinnerColumn,
+    TaskID,
+    TextColumn,
+    TimeElapsedColumn,
+)
 from rich.spinner import Spinner as RichSpinner
 from rich.text import Text
@@ -50,6 +57,8 @@ class ConsoleLogger:
             self._console = Console()
             self._spinner_live: Optional[Live] = None
             self._spinner = RichSpinner("dots")
+            self._progress: Optional[Progress] = None
+            self._progress_tasks: Dict[str, TaskID] = {}
             self._initialized = True
     def _stop_spinner_if_active(self) -> None:
@@ -58,6 +67,13 @@ class ConsoleLogger:
             self._spinner_live.stop()
             self._spinner_live = None
+    def _stop_progress_if_active(self) -> None:
+        """Internal method to stop the progress if it's active."""
+        if self._progress:
+            self._progress.stop()
+            self._progress = None
+            self._progress_tasks.clear()
     def log(
         self, message: str, level: LogLevel = LogLevel.INFO, fg: Optional[str] = None
     ) -> None:
@@ -203,6 +219,44 @@ class ConsoleLogger:
         if self._spinner_live and self._spinner_live.is_started:
             self._spinner.text = Text(message)
+    @contextmanager
+    def evaluation_progress(
+        self, evaluations: List[Dict[str, str]]
+    ) -> Iterator["EvaluationProgressManager"]:
+        """Context manager for evaluation progress tracking.
+        Args:
+            evaluations: List of evaluation items with 'id' and 'name' keys
+        Yields:
+            EvaluationProgressManager instance
+        """
+        try:
+            # Stop any existing progress or spinner
+            self._stop_spinner_if_active()
+            self._stop_progress_if_active()
+            # Create progress with custom columns
+            self._progress = Progress(
+                SpinnerColumn(),
+                TextColumn("[bold blue]{task.description}"),
+                TimeElapsedColumn(),
+                console=self._console,
+                transient=False,
+            )
+            # Add tasks for each evaluation
+            for eval_item in evaluations:
+                task_id = self._progress.add_task(eval_item["name"], total=1)
+                self._progress_tasks[eval_item["id"]] = task_id
+            self._progress.start()
+            yield EvaluationProgressManager(self._progress, self._progress_tasks)
+        finally:
+            self._stop_progress_if_active()
     @classmethod
     def get_instance(cls) -> "ConsoleLogger":
         """Get the singleton instance of ConsoleLogger.
@@ -213,3 +267,53 @@ class ConsoleLogger:
         if cls._instance is None:
             return cls()
         return cls._instance
+class EvaluationProgressManager:
+    """Manager for evaluation progress updates."""
+    def __init__(self, progress: Progress, tasks: Dict[str, TaskID]):
+        """Initialize the progress manager.
+        Args:
+            progress: The Rich Progress instance
+            tasks: Mapping of evaluation IDs to task IDs
+        """
+        self.progress = progress
+        self.tasks = tasks
+    def complete_evaluation(self, eval_id: str) -> None:
+        """Mark an evaluation as completed.
+        Args:
+            eval_id: The evaluation ID
+        """
+        if eval_id in self.tasks:
+            task_id = self.tasks[eval_id]
+            # Update description to show completion
+            current_desc = self.progress.tasks[task_id].description
+            self.progress.update(
+                task_id,
+                completed=1,
+                description=f"[green]✅ {current_desc}[/green]",
+            )
+    def fail_evaluation(self, eval_id: str, error_message: str) -> None:
+        """Mark an evaluation as failed.
+        Args:
+            eval_id: The evaluation ID
+            error_message: The error message
+        """
+        if eval_id in self.tasks:
+            task_id = self.tasks[eval_id]
+            # Truncate error message if too long
+            short_error = (
+                error_message[:40] + "..." if len(error_message) > 40 else error_message
+            )
+            # Update the description to show failure
+            current_desc = self.progress.tasks[task_id].description
+            self.progress.update(
+                task_id,
+                description=f"[red]❌ {current_desc} - {short_error}[/red]",
+            )

uipath 2.1.8__py3-none-any.whl → 2.1.10__py3-none-any.whl

uipath 2.1.8py3-none-any.whl → 2.1.10py3-none-any.whl