PyPI - uipath - Versions diffs - 2.1.66__py3-none-any.whl → 2.1.67__py3-none-any.whl - Mend

uipath 2.1.66py3-none-any.whl → 2.1.67py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

uipath/_cli/_evals/_evaluator_factory.py CHANGED Viewed

@@ -123,6 +123,22 @@ class EvaluatorFactory:
     @staticmethod
     def _create_trajectory_evaluator(
         base_params: EvaluatorBaseParams, data: Dict[str, Any]
-    ) -> TrajectoryEvaluator[Any]:
+    ) -> TrajectoryEvaluator:
         """Create a trajectory evaluator."""
-        raise NotImplementedError()
+        prompt = data.get("prompt", "")
+        if not prompt:
+            raise ValueError("Trajectory evaluator must include 'prompt' field")
+        model = data.get("model", "")
+        if not model:
+            raise ValueError("LLM evaluator must include 'model' field")
+        if model == "same-as-agent":
+            raise ValueError(
+                "'same-as-agent' model option is not supported by coded agents evaluations. Please select a specific model for the evaluator."
+            )
+        return TrajectoryEvaluator(
+            **base_params.model_dump(),
+            prompt=prompt,
+            model=model,
+        )

uipath/_cli/_evals/_models/_trajectory_span.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""Trajectory evaluation span model for serializing span data in evaluations."""
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+from opentelemetry.sdk.trace import ReadableSpan
+from pydantic import BaseModel
+@dataclass
+class TrajectoryEvaluationSpan:
+    """Simplified span representation for trajectory evaluation.
+    Contains span information needed for evaluating agent execution paths,
+    excluding timestamps which are not useful for trajectory analysis.
+    """
+    name: str
+    status: str
+    attributes: Dict[str, Any]
+    parent_name: Optional[str] = None
+    events: Optional[List[Dict[str, Any]]] = None
+    def __post_init__(self):
+        """Initialize default values."""
+        if self.events is None:
+            self.events = []
+    @classmethod
+    def from_readable_span(
+        cls, span: ReadableSpan, parent_spans: Optional[Dict[int, str]] = None
+    ) -> "TrajectoryEvaluationSpan":
+        """Convert a ReadableSpan to a TrajectoryEvaluationSpan.
+        Args:
+            span: The OpenTelemetry ReadableSpan to convert
+            parent_spans: Optional mapping of span IDs to names for parent lookup
+        Returns:
+            TrajectoryEvaluationSpan with relevant data extracted
+        """
+        # Extract status
+        status_map = {0: "unset", 1: "ok", 2: "error"}
+        status = status_map.get(span.status.status_code.value, "unknown")
+        # Extract attributes - keep all attributes for now
+        attributes = {}
+        if span.attributes:
+            attributes = dict(span.attributes)
+        # Get parent name if available
+        parent_name = None
+        if span.parent and parent_spans and span.parent.span_id in parent_spans:
+            parent_name = parent_spans[span.parent.span_id]
+        # Extract events (without timestamps)
+        events = []
+        if hasattr(span, "events") and span.events:
+            for event in span.events:
+                event_data = {
+                    "name": event.name,
+                    "attributes": dict(event.attributes) if event.attributes else {},
+                }
+                events.append(event_data)
+        return cls(
+            name=span.name,
+            status=status,
+            attributes=attributes,
+            parent_name=parent_name,
+            events=events,
+        )
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "name": self.name,
+            "status": self.status,
+            "parent_name": self.parent_name,
+            "attributes": self.attributes,
+            "events": self.events,
+        }
+class TrajectoryEvaluationTrace(BaseModel):
+    """Container for a collection of trajectory evaluation spans."""
+    spans: List[TrajectoryEvaluationSpan]
+    @classmethod
+    def from_readable_spans(
+        cls, spans: List[ReadableSpan]
+    ) -> "TrajectoryEvaluationTrace":
+        """Convert a list of ReadableSpans to TrajectoryEvaluationTrace.
+        Args:
+            spans: List of OpenTelemetry ReadableSpans to convert
+        Returns:
+            TrajectoryEvaluationTrace with converted spans
+        """
+        # Create a mapping of span IDs to names for parent lookup
+        span_id_to_name = {span.get_span_context().span_id: span.name for span in spans}
+        evaluation_spans = [
+            TrajectoryEvaluationSpan.from_readable_span(span, span_id_to_name)
+            for span in spans
+        ]
+        return cls(spans=evaluation_spans)
+    class Config:
+        """Pydantic configuration."""
+        arbitrary_types_allowed = True

uipath/_cli/_evals/_runtime.py CHANGED Viewed

@@ -264,6 +264,7 @@ class UiPathEvalRuntime(UiPathBaseRuntime, Generic[T, C]):
             agent_input=eval_item.inputs,
             agent_output=execution_output.result.output or {},
             agent_trace=execution_output.spans,
+            expected_agent_behavior=eval_item.expected_agent_behavior,
         )
         result = await evaluator.evaluate(

uipath/eval/evaluators/trajectory_evaluator.py CHANGED Viewed

@@ -1,31 +1,66 @@
 """Trajectory evaluator for analyzing execution paths and decision sequences."""
-from typing import TypeVar
+import json
+from typing import Any, Optional
+from opentelemetry.sdk.trace import ReadableSpan
+from pydantic import field_validator
+from uipath._cli._evals._models._trajectory_span import TrajectoryEvaluationTrace
 from uipath.eval.models import EvaluationResult
-from ..models.models import AgentExecution
+from ..._services import UiPathLlmChatService
+from ..._utils.constants import COMMUNITY_agents_SUFFIX
+from ..models.models import AgentExecution, LLMResponse, NumericEvaluationResult
 from .base_evaluator import BaseEvaluator
-T = TypeVar("T")
-class TrajectoryEvaluator(BaseEvaluator[T]):
+class TrajectoryEvaluator(BaseEvaluator[dict[str, Any]]):
     """Evaluator that analyzes the trajectory/path taken to reach outputs."""
+    prompt: str
+    model: str
+    expected_agent_behavior_placeholder: str = "{{ExpectedAgentBehavior}}"
+    agent_run_history_placeholder: str = "{{AgentRunHistory}}"
+    llm: Optional[UiPathLlmChatService] = None
+    @field_validator("prompt")
+    @classmethod
+    def validate_prompt_placeholder(cls, v: str) -> str:
+        """Validate that prompt contains required placeholders."""
+        if "{{ExpectedAgentBehavior}}" not in v or "{{AgentRunHistory}}" not in v:
+            raise ValueError(
+                "Prompt must contain {ExpectedAgentBehavior} and {{AgentRunHistory}} placeholders"
+            )
+        return v
+    def model_post_init(self, __context):
+        """Initialize the LLM service after model creation."""
+        super().model_post_init(__context)
+        self._initialize_llm()
+    def _initialize_llm(self):
+        """Initialize the LLM used for evaluation."""
+        from uipath import UiPath
+        uipath = UiPath()
+        self.llm = uipath.llm
     async def evaluate(
-        self, agent_execution: AgentExecution, evaluation_criteria: T
+        self,
+        agent_execution: AgentExecution,
+        evaluation_criteria: dict[str, Any],
     ) -> EvaluationResult:
         """Evaluate using trajectory analysis.
-        Analyzes the execution path and decision sequence taken by the agent
-        to assess the quality of the reasoning process.
+        Analyzes the execution path and decision sequence taken by the agent.
         Args:
             agent_execution: The execution details containing:
                 - agent_input: The input received by the agent
                 - actual_output: The actual output from the agent
-                - spans: The execution spans to use for the evaluation
+                - agent_trace: The execution spans to use for the evaluation
+                - expected_agent_behavior: The expected agent behavior
             evaluation_criteria: The criteria to evaluate
         Returns:
             EvaluationResult: Score based on trajectory analysis
@@ -33,4 +68,92 @@ class TrajectoryEvaluator(BaseEvaluator[T]):
         Raises:
             NotImplementedError: This evaluator is not yet implemented
         """
-        raise NotImplementedError()
+        evaluation_prompt = self._create_evaluation_prompt(
+            expected_agent_behavior=agent_execution.expected_agent_behavior,
+            agent_run_history=agent_execution.agent_trace,
+        )
+        llm_response = await self._get_llm_response(evaluation_prompt)
+        return NumericEvaluationResult(
+            score=llm_response.score,
+            details=llm_response.justification,
+        )
+    def _create_evaluation_prompt(
+        self,
+        expected_agent_behavior: Any,
+        agent_run_history: Any,
+    ) -> str:
+        """Create the evaluation prompt for the LLM."""
+        formatted_prompt = self.prompt.replace(
+            self.expected_agent_behavior_placeholder,
+            str(expected_agent_behavior),
+        )
+        # Trim extra properties from the spans (such as timestamps which are not relevant to the eval)
+        if (
+            isinstance(agent_run_history, list)
+            and agent_run_history
+            and isinstance(agent_run_history[0], ReadableSpan)
+        ):
+            trajectory_trace = TrajectoryEvaluationTrace.from_readable_spans(
+                agent_run_history
+            )
+            agent_run_history = str(trajectory_trace.spans)
+        else:
+            agent_run_history = str(agent_run_history)
+        formatted_prompt = formatted_prompt.replace(
+            self.agent_run_history_placeholder,
+            agent_run_history,
+        )
+        return formatted_prompt
+    async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
+        """Get response from the LLM.
+        Args:
+            evaluation_prompt: The formatted prompt to send to the LLM
+        Returns:
+            LLMResponse with score and justification
+        """
+        if not self.llm:
+            raise ValueError("LLM service not initialized")
+        model = self.model
+        if model.endswith(COMMUNITY_agents_SUFFIX):
+            model = model.replace(COMMUNITY_agents_SUFFIX, "")
+        # Prepare the request
+        request_data = {
+            "model": model,
+            "messages": [{"role": "user", "content": evaluation_prompt}],
+            "response_format": {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "evaluation_response",
+                    "schema": {
+                        "type": "object",
+                        "properties": {
+                            "score": {
+                                "type": "number",
+                                "minimum": 0,
+                                "maximum": 100,
+                                "description": "Score between 0 and 100",
+                            },
+                            "justification": {
+                                "type": "string",
+                                "description": "Explanation for the score",
+                            },
+                        },
+                        "required": ["score", "justification"],
+                    },
+                },
+            },
+        }
+        response = await self.llm.chat_completions(**request_data)
+        return LLMResponse(**json.loads(response.choices[-1].message.content))

uipath/eval/models/models.py CHANGED Viewed

@@ -15,6 +15,7 @@ class AgentExecution(BaseModel):
     agent_input: Optional[Dict[str, Any]]
     agent_output: Dict[str, Any]
     agent_trace: list[ReadableSpan]
+    expected_agent_behavior: Optional[str] = None
 class LLMResponse(BaseModel):

{uipath-2.1.66.dist-info → uipath-2.1.67.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: uipath
-Version: 2.1.66
+Version: 2.1.67
 Summary: Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools.
 Project-URL: Homepage, https://uipath.com
 Project-URL: Repository, https://github.com/UiPath/uipath-python

{uipath-2.1.66.dist-info → uipath-2.1.67.dist-info}/RECORD RENAMED Viewed

@@ -44,13 +44,14 @@ uipath/_cli/_dev/_terminal/_styles/terminal.tcss,sha256=ktVpKwXIXw2VZp8KIZD6fO9i
 uipath/_cli/_dev/_terminal/_utils/_chat.py,sha256=YUZxYVdmEManwHDuZsczJT1dWIYE1dVBgABlurwMFcE,8493
 uipath/_cli/_dev/_terminal/_utils/_exporter.py,sha256=oI6D_eMwrh_2aqDYUh4GrJg8VLGrLYhDahR-_o0uJns,4144
 uipath/_cli/_dev/_terminal/_utils/_logger.py,sha256=jeNShEED27cNIHTe_NNx-2kUiXpSLTmi0onM6tVkqRM,888
-uipath/_cli/_evals/_evaluator_factory.py,sha256=2lOalabNSzmnnwr0SfoPWvFWXs0Ly857XBmPuOdhFBQ,4729
+uipath/_cli/_evals/_evaluator_factory.py,sha256=OWfLxPOEcDn4qv5m3n7LBfIBKcdTPml2ZCLcsqSymlU,5329
 uipath/_cli/_evals/_progress_reporter.py,sha256=hpSt0CXpIoFJGsbqZkqmwyGO_TBNesbWKlvDJUEDxd8,16455
-uipath/_cli/_evals/_runtime.py,sha256=n14A1gTFtZekcdpswB_plpbaB81Q44_mvHqXQEqrB8o,11347
+uipath/_cli/_evals/_runtime.py,sha256=WKcBT6DGzNRjgEOpmH0b7RoEbEsHMyAbcAMs8b_CAI0,11418
 uipath/_cli/_evals/_models/_evaluation_set.py,sha256=mwcTstHuyHd7ys_nLzgCNKBAsS4ns9UL2TF5Oq2Cc64,1758
 uipath/_cli/_evals/_models/_evaluator_base_params.py,sha256=lTYKOV66tcjW85KHTyOdtF1p1VDaBNemrMAvH8bFIFc,382
 uipath/_cli/_evals/_models/_output.py,sha256=LjwMBGI78sDFa2Dl8b9ReXJmjig57pdLWpuiwChrRLo,3096
 uipath/_cli/_evals/_models/_sw_reporting.py,sha256=tSBLQFAdOIun8eP0vsqt56K6bmCZz_uMaWI3hskg_24,536
+uipath/_cli/_evals/_models/_trajectory_span.py,sha256=8ukM8sB9rvzBMHfC_gnexAC3xlp4uMDevKZrRzcgrm4,3637
 uipath/_cli/_push/sw_file_handler.py,sha256=iE8Sk1Z-9hxmLFFj3j-k4kTK6TzNFP6hUCmxTudG6JQ,18251
 uipath/_cli/_runtime/_contracts.py,sha256=xIcKq0xRbenzmJkZQO8blKwZ3b72Ntm4YONSYwaI-kg,28880
 uipath/_cli/_runtime/_escalation.py,sha256=x3vI98qsfRA-fL_tNkRVTFXioM5Gv2w0GFcXJJ5eQtg,7981
@@ -126,9 +127,9 @@ uipath/eval/evaluators/deterministic_evaluator_base.py,sha256=yDWTMU1mG-93D6DscA
 uipath/eval/evaluators/exact_match_evaluator.py,sha256=Qfz-kIUf80PKjAuge1Tc1GvN6kDB6hHveBZ86w_2How,1512
 uipath/eval/evaluators/json_similarity_evaluator.py,sha256=cP4kpN-UIf690V5dq4LaCjJc2zFx-nEffUclCwDdlhM,6607
 uipath/eval/evaluators/llm_as_judge_evaluator.py,sha256=l0bbn8ZLi9ZTXcgr7tJ2tsCvHFqIIeGa7sobaAHgI2Y,4927
-uipath/eval/evaluators/trajectory_evaluator.py,sha256=7boiKzjLpQPs8M8y2PGnI3bZQ1MEwR6QRZpXyKQcR7Y,1244
+uipath/eval/evaluators/trajectory_evaluator.py,sha256=IylFm4yeNcVYgtmBzvzFn4Y2GXdSNnvAF8F4bCvPYdw,5774
 uipath/eval/models/__init__.py,sha256=x360CDZaRjUL3q3kh2CcXYYrQ47jwn6p6JnmhEIvMlA,419
-uipath/eval/models/models.py,sha256=rWwQaXLo3hJAgIdLoU2NMOI6vRW6fwWV7YfK_rnYHCc,2836
+uipath/eval/models/models.py,sha256=is2wo-i0ld8Y_oZpbw5nG4cTXBz4bDLNxN6IjrfRcyM,2886
 uipath/models/__init__.py,sha256=d_DkK1AtRUetM1t2NrH5UKgvJOBiynzaKnK5pMY7aIc,1289
 uipath/models/action_schema.py,sha256=tBn1qQ3NQLU5nwWlBIzIKIx3XK5pO_D1S51IjFlZ1FA,610
 uipath/models/actions.py,sha256=1vRsJ3JSmMdPkbiYAiHzY8K44vmW3VlMsmQUBAkSgrQ,3141
@@ -155,8 +156,8 @@ uipath/tracing/_traced.py,sha256=yBIY05PCCrYyx50EIHZnwJaKNdHPNx-YTR1sHQl0a98,199
 uipath/tracing/_utils.py,sha256=qd7N56tg6VXQ9pREh61esBgUWLNA0ssKsE0QlwrRWFM,11974
 uipath/utils/__init__.py,sha256=VD-KXFpF_oWexFg6zyiWMkxl2HM4hYJMIUDZ1UEtGx0,105
 uipath/utils/_endpoints_manager.py,sha256=iRTl5Q0XAm_YgcnMcJOXtj-8052sr6jpWuPNz6CgT0Q,8408
-uipath-2.1.66.dist-info/METADATA,sha256=z7MeB9RnfKf0m7T_AkTDc3jUJJt-yurcdkUwndUw7gM,6482
-uipath-2.1.66.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-uipath-2.1.66.dist-info/entry_points.txt,sha256=9C2_29U6Oq1ExFu7usihR-dnfIVNSKc-0EFbh0rskB4,43
-uipath-2.1.66.dist-info/licenses/LICENSE,sha256=-KBavWXepyDjimmzH5fVAsi-6jNVpIKFc2kZs0Ri4ng,1058
-uipath-2.1.66.dist-info/RECORD,,
+uipath-2.1.67.dist-info/METADATA,sha256=l-1OOoU7DUoo1PenVsoRIorYjP0wFq861zJeP4obFVs,6482
+uipath-2.1.67.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+uipath-2.1.67.dist-info/entry_points.txt,sha256=9C2_29U6Oq1ExFu7usihR-dnfIVNSKc-0EFbh0rskB4,43
+uipath-2.1.67.dist-info/licenses/LICENSE,sha256=-KBavWXepyDjimmzH5fVAsi-6jNVpIKFc2kZs0Ri4ng,1058
+uipath-2.1.67.dist-info/RECORD,,

{uipath-2.1.66.dist-info → uipath-2.1.67.dist-info}/WHEEL RENAMED Viewed

File without changes

{uipath-2.1.66.dist-info → uipath-2.1.67.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{uipath-2.1.66.dist-info → uipath-2.1.67.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

uipath 2.1.66__py3-none-any.whl → 2.1.67__py3-none-any.whl

uipath 2.1.66py3-none-any.whl → 2.1.67py3-none-any.whl