PyPI - uipath - Versions diffs - 2.1.52__tar.gz → 2.1.53__tar.gz - Mend

uipath 2.1.52tar.gz → 2.1.53tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

{uipath-2.1.52 → uipath-2.1.53}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: uipath
-Version: 2.1.52
+Version: 2.1.53
 Summary: Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools.
 Project-URL: Homepage, https://uipath.com
 Project-URL: Repository, https://github.com/UiPath/uipath-python

{uipath-2.1.52 → uipath-2.1.53}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "uipath"
-version = "2.1.52"
+version = "2.1.53"
 description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.10"

{uipath-2.1.52/src/uipath/_cli/_evals/_evaluators → uipath-2.1.53/src/uipath/_cli/_evals}/_evaluator_factory.py RENAMED Viewed

@@ -1,18 +1,21 @@
 from typing import Any, Dict
-from .._models import EvaluatorCategory, EvaluatorType
-from ._evaluator_base import EvaluatorBase, EvaluatorBaseParams
-from ._exact_match_evaluator import ExactMatchEvaluator
-from ._json_similarity_evaluator import JsonSimilarityEvaluator
-from ._llm_as_judge_evaluator import LlmAsAJudgeEvaluator
-from ._trajectory_evaluator import TrajectoryEvaluator
+from uipath._cli._evals._models._evaluator_base_params import EvaluatorBaseParams
+from uipath.eval.evaluators import (
+    BaseEvaluator,
+    ExactMatchEvaluator,
+    JsonSimilarityEvaluator,
+    LlmAsAJudgeEvaluator,
+    TrajectoryEvaluator,
+)
+from uipath.eval.models.models import EvaluatorCategory, EvaluatorType
 class EvaluatorFactory:
     """Factory class for creating evaluator instances based on configuration."""
-    @staticmethod
-    def create_evaluator(data: Dict[str, Any]) -> EvaluatorBase:
+    @classmethod
+    def create_evaluator(cls, data: Dict[str, Any]) -> BaseEvaluator[Any]:
         """Create an evaluator instance from configuration data.
         Args:
@@ -25,13 +28,15 @@ class EvaluatorFactory:
             ValueError: If category is unknown or required fields are missing
         """
         # Extract common fields
-        evaluator_id = data.get("id")
-        if not evaluator_id:
+        name = data.get("name", "")
+        if not name:
+            raise ValueError("Evaluator configuration must include 'name' field")
+        id = data.get("id", "")
+        if not id:
             raise ValueError("Evaluator configuration must include 'id' field")
         category = EvaluatorCategory.from_int(data.get("category"))
         evaluator_type = EvaluatorType.from_int(data.get("type", EvaluatorType.Unknown))
-        name = data.get("name", "")
         description = data.get("description", "")
         created_at = data.get("createdAt", "")
         updated_at = data.get("updatedAt", "")
@@ -39,7 +44,7 @@ class EvaluatorFactory:
         # Create base parameters
         base_params = EvaluatorBaseParams(
-            evaluator_id=evaluator_id,
+            id=id,
             category=category,
             evaluator_type=evaluator_type,
             name=name,
@@ -49,7 +54,6 @@ class EvaluatorFactory:
             target_output_key=target_output_key,
         )
-        # Create evaluator based on category
         match category:
             case EvaluatorCategory.Deterministic:
                 if evaluator_type == evaluator_type.Equals:
@@ -80,9 +84,8 @@ class EvaluatorFactory:
         base_params: EvaluatorBaseParams, data: Dict[str, Any]
     ) -> ExactMatchEvaluator:
         """Create a deterministic evaluator."""
-        return ExactMatchEvaluator.from_params(
-            base_params,
-            target_output_key=data.get("targetOutputKey", "*"),
+        return ExactMatchEvaluator(
+            **base_params.model_dump(),
         )
     @staticmethod
@@ -90,9 +93,8 @@ class EvaluatorFactory:
         base_params: EvaluatorBaseParams, data: Dict[str, Any]
     ) -> JsonSimilarityEvaluator:
         """Create a deterministic evaluator."""
-        return JsonSimilarityEvaluator.from_params(
-            base_params,
-            target_output_key=data.get("targetOutputKey", "*"),
+        return JsonSimilarityEvaluator(
+            **base_params.model_dump(),
         )
     @staticmethod
@@ -112,16 +114,15 @@ class EvaluatorFactory:
                 "'same-as-agent' model option is not supported by coded agents evaluations. Please select a specific model for the evaluator."
             )
-        return LlmAsAJudgeEvaluator.from_params(
-            base_params,
+        return LlmAsAJudgeEvaluator(
+            **base_params.model_dump(),
             prompt=prompt,
             model=model,
-            target_output_key=data.get("targetOutputKey", "*"),
         )
     @staticmethod
     def _create_trajectory_evaluator(
         base_params: EvaluatorBaseParams, data: Dict[str, Any]
-    ) -> TrajectoryEvaluator:
+    ) -> TrajectoryEvaluator[Any]:
         """Create a trajectory evaluator."""
         raise NotImplementedError()

{uipath-2.1.52 → uipath-2.1.53}/src/uipath/_cli/_evals/_models/_evaluation_set.py RENAMED Viewed

@@ -1,40 +1,45 @@
 from enum import IntEnum
 from typing import Any, Dict, List
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
+from pydantic.alias_generators import to_camel
 class EvaluationItem(BaseModel):
     """Individual evaluation item within an evaluation set."""
+    model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
     id: str
     name: str
     inputs: Dict[str, Any]
-    expectedOutput: Dict[str, Any]
-    expectedAgentBehavior: str = ""
-    simulationInstructions: str = ""
-    simulateInput: bool = False
-    inputGenerationInstructions: str = ""
-    simulateTools: bool = False
-    toolsToSimulate: List[str] = Field(default_factory=list)
-    evalSetId: str
-    createdAt: str
-    updatedAt: str
+    expected_output: Dict[str, Any]
+    expected_agent_behavior: str = ""
+    simulation_instructions: str = ""
+    simulate_input: bool = False
+    input_generation_instructions: str = ""
+    simulate_tools: bool = False
+    tools_to_simulate: List[str] = Field(default_factory=list)
+    eval_set_id: str
+    created_at: str
+    updated_at: str
 class EvaluationSet(BaseModel):
     """Complete evaluation set model."""
+    model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
     id: str
-    fileName: str
-    evaluatorRefs: List[str] = Field(default_factory=list)
+    file_name: str
+    evaluator_refs: List[str] = Field(default_factory=list)
     evaluations: List[EvaluationItem] = Field(default_factory=list)
     name: str
-    batchSize: int = 10
-    timeoutMinutes: int = 20
-    modelSettings: List[Dict[str, Any]] = Field(default_factory=list)
-    createdAt: str
-    updatedAt: str
+    batch_size: int = 10
+    timeout_minutes: int = 20
+    model_settings: List[Dict[str, Any]] = Field(default_factory=list)
+    created_at: str
+    updated_at: str
     def extract_selected_evals(self, eval_ids) -> None:
         selected_evals: list[EvaluationItem] = []

uipath-2.1.53/src/uipath/_cli/_evals/_models/_evaluator_base_params.py ADDED Viewed

@@ -0,0 +1,16 @@
+from pydantic import BaseModel
+from uipath.eval.models.models import EvaluatorCategory, EvaluatorType
+class EvaluatorBaseParams(BaseModel):
+    """Parameters for initializing the base evaluator."""
+    id: str
+    category: EvaluatorCategory
+    evaluator_type: EvaluatorType
+    name: str
+    description: str
+    created_at: str
+    updated_at: str
+    target_output_key: str

uipath-2.1.53/src/uipath/_cli/_evals/_models/_output.py ADDED Viewed

@@ -0,0 +1,85 @@
+from typing import List, Optional
+from opentelemetry.sdk.trace import ReadableSpan
+from pydantic import BaseModel, ConfigDict, model_serializer
+from pydantic.alias_generators import to_camel
+from uipath._cli._runtime._contracts import UiPathRuntimeResult
+from uipath.eval.models.models import EvaluationResult, ScoreType
+class UiPathEvalRunExecutionOutput(BaseModel):
+    """Result of a single agent response."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    execution_time: float
+    spans: list[ReadableSpan]
+    result: UiPathRuntimeResult
+class EvaluationResultDto(BaseModel):
+    model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
+    score: float
+    details: Optional[str] = None
+    evaluation_time: Optional[float] = None
+    @model_serializer(mode="wrap")
+    def serialize_model(self, serializer, info):
+        data = serializer(self)
+        if self.details is None and isinstance(data, dict):
+            data.pop("details", None)
+        return data
+    @classmethod
+    def from_evaluation_result(
+        cls, evaluation_result: EvaluationResult
+    ) -> "EvaluationResultDto":
+        score_type = evaluation_result.score_type
+        score: float
+        if score_type == ScoreType.BOOLEAN:
+            score = 100 if evaluation_result.score else 0
+        elif score_type == ScoreType.ERROR:
+            score = 0
+        else:
+            score = evaluation_result.score
+        return cls(
+            score=score,
+            details=evaluation_result.details,
+            evaluation_time=evaluation_result.evaluation_time,
+        )
+class EvaluationRunResultDto(BaseModel):
+    model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
+    evaluator_name: str
+    result: EvaluationResultDto
+class EvaluationRunResult(BaseModel):
+    model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
+    evaluation_name: str
+    evaluation_run_results: List[EvaluationRunResultDto]
+class UiPathEvalOutput(BaseModel):
+    model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
+    evaluation_set_name: str
+    score: float
+    evaluation_set_results: List[EvaluationRunResult]
+    def compute_average_score(self) -> None:
+        total_score = 0.0
+        total_count = 0
+        for evaluation_set_result in self.evaluation_set_results:
+            for evaluation_run_result in evaluation_set_result.evaluation_run_results:
+                total_score += evaluation_run_result.result.score
+                total_count += 1
+        self.score = total_score / total_count if total_count > 0 else 0.0

{uipath-2.1.52 → uipath-2.1.53}/src/uipath/_cli/_evals/_runtime.py RENAMED Viewed

@@ -1,10 +1,15 @@
+import json
 from collections import defaultdict
+from pathlib import Path
 from time import time
-from typing import Dict, Generic, List, Optional, Sequence, TypeVar
+from typing import Any, Dict, Generic, List, Optional, Sequence, TypeVar
 from opentelemetry.sdk.trace import ReadableSpan
 from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from ...eval.evaluators import BaseEvaluator
+from ...eval.models import EvaluationResult
+from ...eval.models.models import AgentExecution
 from .._runtime._contracts import (
     UiPathBaseRuntime,
     UiPathRuntimeContext,
@@ -13,8 +18,15 @@ from .._runtime._contracts import (
     UiPathRuntimeStatus,
 )
 from .._utils._eval_set import EvalHelpers
-from ._models import EvaluationItem
-from ._models._agent_execution_output import UiPathEvalRunExecutionOutput
+from ._evaluator_factory import EvaluatorFactory
+from ._models._evaluation_set import EvaluationItem, EvaluationSet
+from ._models._output import (
+    EvaluationResultDto,
+    EvaluationRunResult,
+    EvaluationRunResultDto,
+    UiPathEvalOutput,
+    UiPathEvalRunExecutionOutput,
+)
 T = TypeVar("T", bound=UiPathBaseRuntime)
 C = TypeVar("C", bound=UiPathRuntimeContext)
@@ -86,15 +98,36 @@ class UiPathEvalRuntime(UiPathBaseRuntime, Generic[T, C]):
         evaluation_set = EvalHelpers.load_eval_set(
             self.context.eval_set, self.context.eval_ids
         )
-        execution_output_list: list[UiPathEvalRunExecutionOutput] = []
+        evaluators = self._load_evaluators(evaluation_set)
+        results = UiPathEvalOutput(
+            evaluation_set_name=evaluation_set.name, score=0, evaluation_set_results=[]
+        )
         for eval_item in evaluation_set.evaluations:
-            execution_output = await self.execute_runtime(eval_item)
-            execution_output_list.append(execution_output)
+            evaluation_run_results = EvaluationRunResult(
+                evaluation_name=eval_item.name, evaluation_run_results=[]
+            )
+            results.evaluation_set_results.append(evaluation_run_results)
+            agent_execution_output = await self.execute_runtime(eval_item)
+            # we run each evaluator on the agent_output
+            for evaluator in evaluators:
+                evaluation_result = await self.run_evaluator(
+                    evaluator=evaluator,
+                    execution_output=agent_execution_output,
+                    eval_item=eval_item,
+                )
+                evaluation_run_results.evaluation_run_results.append(
+                    EvaluationRunResultDto(
+                        evaluator_name=evaluator.name,
+                        result=EvaluationResultDto.from_evaluation_result(
+                            evaluation_result
+                        ),
+                    )
+                )
+        results.compute_average_score()
         self.context.result = UiPathRuntimeResult(
-            output={
-                "results": execution_output_list,
-            },
+            output={**results.model_dump(by_alias=True)},
             status=UiPathRuntimeStatus.SUCCESSFUL,
         )
@@ -128,6 +161,65 @@ class UiPathEvalRuntime(UiPathBaseRuntime, Generic[T, C]):
             result=result,
         )
+    async def run_evaluator(
+        self,
+        evaluator: BaseEvaluator[Any],
+        execution_output: UiPathEvalRunExecutionOutput,
+        eval_item: EvaluationItem,
+    ) -> EvaluationResult:
+        agent_execution = AgentExecution(
+            agent_input=eval_item.inputs,
+            agent_output=execution_output.result.output or {},
+            agent_trace=execution_output.spans,
+        )
+        result = await evaluator.evaluate(
+            agent_execution=agent_execution,
+            # at the moment evaluation_criteria is always the expected output
+            evaluation_criteria=eval_item.expected_output,
+        )
+        return result
+    def _load_evaluators(
+        self, evaluation_set: EvaluationSet
+    ) -> List[BaseEvaluator[Any]]:
+        """Load evaluators referenced by the evaluation set."""
+        evaluators = []
+        evaluators_dir = Path(self.context.eval_set).parent.parent / "evaluators"  # type: ignore
+        evaluator_refs = set(evaluation_set.evaluator_refs)
+        found_evaluator_ids = set()
+        for file in evaluators_dir.glob("*.json"):
+            try:
+                with open(file, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+            except json.JSONDecodeError as e:
+                raise ValueError(
+                    f"Invalid JSON in evaluator file '{file}': {str(e)}. "
+                    f"Please check the file for syntax errors."
+                ) from e
+            try:
+                evaluator_id = data.get("id")
+                if evaluator_id in evaluator_refs:
+                    evaluator = EvaluatorFactory.create_evaluator(data)
+                    evaluators.append(evaluator)
+                    found_evaluator_ids.add(evaluator_id)
+            except Exception as e:
+                raise ValueError(
+                    f"Failed to create evaluator from file '{file}': {str(e)}. "
+                    f"Please verify the evaluator configuration."
+                ) from e
+        missing_evaluators = evaluator_refs - found_evaluator_ids
+        if missing_evaluators:
+            raise ValueError(
+                f"Could not find the following evaluators: {missing_evaluators}"
+            )
+        return evaluators
     async def cleanup(self) -> None:
         """Cleanup runtime resources."""
         pass

{uipath-2.1.52 → uipath-2.1.53}/src/uipath/_cli/_runtime/_contracts.py RENAMED Viewed

@@ -125,8 +125,12 @@ class UiPathRuntimeResult(BaseModel):
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary format for output."""
+        output_data = self.output or {}
+        if isinstance(self.output, BaseModel):
+            output_data = self.output.model_dump()
         result = {
-            "output": self.output or {},
+            "output": output_data,
             "status": self.status,
         }
@@ -315,7 +319,7 @@ class UiPathRuntimeContext(BaseModel):
     chat_handler: Optional[UiPathConversationHandler] = None
     is_conversational: Optional[bool] = None
-    model_config = {"arbitrary_types_allowed": True, "extra": "allow"}
+    model_config = {"arbitrary_types_allowed": True}
     @classmethod
     def with_defaults(cls: type[C], config_path: Optional[str] = None, **kwargs) -> C:
@@ -595,7 +599,12 @@ class UiPathBaseRuntime(ABC):
             # Write the execution output to file if requested
             if self.context.execution_output_file:
                 with open(self.context.execution_output_file, "w") as f:
-                    json.dump(execution_result.output or {}, f, indent=2, default=str)
+                    if isinstance(execution_result.output, BaseModel):
+                        f.write(execution_result.output.model_dump())
+                    else:
+                        json.dump(
+                            execution_result.output or {}, f, indent=2, default=str
+                        )
             # Don't suppress exceptions
             return False

{uipath-2.1.52 → uipath-2.1.53}/src/uipath/_cli/_utils/_eval_set.py RENAMED Viewed

@@ -4,7 +4,7 @@ from typing import List, Optional
 import click
-from uipath._cli._evals._models import EvaluationSet
+from uipath._cli._evals._models._evaluation_set import EvaluationSet
 from uipath._cli._utils._console import ConsoleLogger
 console = ConsoleLogger()

{uipath-2.1.52 → uipath-2.1.53}/src/uipath/_cli/cli_eval.py RENAMED Viewed

@@ -2,7 +2,6 @@
 import ast
 import asyncio
 import os
-from datetime import datetime, timezone
 from typing import List, Optional
 import click
@@ -13,7 +12,7 @@ from uipath._cli._runtime._contracts import (
     UiPathRuntimeFactory,
 )
 from uipath._cli._runtime._runtime import UiPathScriptRuntime
-from uipath._cli.middlewares import MiddlewareResult, Middlewares
+from uipath._cli.middlewares import Middlewares
 from uipath.eval._helpers import auto_discover_entrypoint
 from .._utils.constants import ENV_JOB_ID
@@ -32,55 +31,6 @@ class LiteralOption(click.Option):
             raise click.BadParameter(value) from e
-def eval_agent_middleware(
-    entrypoint: Optional[str] = None,
-    eval_set: Optional[str] = None,
-    eval_ids: Optional[List[str]] = None,
-    workers: int = 8,
-    no_report: bool = False,
-    **kwargs,
-) -> MiddlewareResult:
-    """Middleware to run an evaluation set against the agent."""
-    timestamp = datetime.now(timezone.utc).strftime("%M-%H-%d-%m-%Y")
-    eval_context = UiPathEvalContext.with_defaults()
-    eval_context.no_report = no_report
-    eval_context.workers = workers
-    eval_context.eval_set = eval_set or EvalHelpers.auto_discover_eval_set()
-    eval_context.eval_ids = eval_ids
-    eval_context.execution_output_file = (
-        f"evals/results/{timestamp}.json" if not os.getenv("UIPATH_JOB_KEY") else None
-    )
-    runtime_entrypoint = entrypoint or auto_discover_entrypoint()
-    def generate_runtime_context(**context_kwargs) -> UiPathRuntimeContext:
-        runtime_context = UiPathRuntimeContext.with_defaults(**context_kwargs)
-        runtime_context.entrypoint = runtime_entrypoint
-        return runtime_context
-    try:
-        runtime_factory = UiPathRuntimeFactory(
-            UiPathScriptRuntime,
-            UiPathRuntimeContext,
-            context_generator=generate_runtime_context,
-        )
-        async def execute():
-            async with UiPathEvalRuntime.from_eval_context(
-                factory=runtime_factory, context=eval_context
-            ) as eval_runtime:
-                await eval_runtime.execute()
-        asyncio.run(execute())
-        return MiddlewareResult(should_continue=False)
-    except Exception as e:
-        return MiddlewareResult(
-            should_continue=False, error_message=f"Error running evaluation: {str(e)}"
-        )
 @click.command()
 @click.argument("entrypoint", required=False)
 @click.argument("eval_set", required=False)
@@ -97,6 +47,12 @@ def eval_agent_middleware(
     default=8,
     help="Number of parallel workers for running evaluations (default: 8)",
 )
+@click.option(
+    "--output-file",
+    required=False,
+    type=click.Path(exists=False),
+    help="File path where the output will be written",
+)
 @track(when=lambda *_a, **_kw: os.getenv(ENV_JOB_ID) is None)
 def eval(
     entrypoint: Optional[str],
@@ -104,6 +60,7 @@ def eval(
     eval_ids: List[str],
     no_report: bool,
     workers: int,
+    output_file: Optional[str],
 ) -> None:
     """Run an evaluation set against the agent.
@@ -121,21 +78,49 @@ def eval(
         eval_ids,
         no_report=no_report,
         workers=workers,
+        execution_output_file=output_file,
     )
-    if result.should_continue:
-        result = eval_agent_middleware(
-            entrypoint=entrypoint,
-            eval_set=eval_set,
-            eval_ids=eval_ids,
-            workers=workers,
-            no_report=no_report,
-        )
-    if result.should_continue:
-        console.error("Could not process the request with any available handler.")
     if result.error_message:
         console.error(result.error_message)
+    if result.should_continue:
+        def generate_runtime_context(**context_kwargs) -> UiPathRuntimeContext:
+            runtime_context = UiPathRuntimeContext.with_defaults(**context_kwargs)
+            runtime_context.entrypoint = runtime_entrypoint
+            return runtime_context
+        eval_context = UiPathEvalContext.with_defaults(
+            execution_output_file=output_file
+        )
+        eval_context.no_report = no_report
+        eval_context.workers = workers
+        eval_context.eval_set = eval_set or EvalHelpers.auto_discover_eval_set()
+        eval_context.eval_ids = eval_ids
+        runtime_entrypoint = entrypoint or auto_discover_entrypoint()
+        try:
+            runtime_factory = UiPathRuntimeFactory(
+                UiPathScriptRuntime,
+                UiPathRuntimeContext,
+                context_generator=generate_runtime_context,
+            )
+            async def execute():
+                async with UiPathEvalRuntime.from_eval_context(
+                    factory=runtime_factory, context=eval_context
+                ) as eval_runtime:
+                    await eval_runtime.execute()
+            asyncio.run(execute())
+        except Exception as e:
+            console.error(
+                f"Error: Unexpected error occurred - {str(e)}", include_traceback=True
+            )
     console.success("Evaluation completed successfully")

uipath-2.1.53/src/uipath/eval/evaluators/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""UiPath evaluator implementations for agent performance evaluation."""
+from .base_evaluator import BaseEvaluator
+from .exact_match_evaluator import ExactMatchEvaluator
+from .json_similarity_evaluator import JsonSimilarityEvaluator
+from .llm_as_judge_evaluator import LlmAsAJudgeEvaluator
+from .trajectory_evaluator import TrajectoryEvaluator
+__all__ = [
+    "BaseEvaluator",
+    "ExactMatchEvaluator",
+    "JsonSimilarityEvaluator",
+    "LlmAsAJudgeEvaluator",
+    "TrajectoryEvaluator",
+]

uipath 2.1.52__tar.gz → 2.1.53__tar.gz

uipath 2.1.52tar.gz → 2.1.53tar.gz