PyPI - uipath - Versions diffs - 2.1.107__py3-none-any.whl → 2.1.109__py3-none-any.whl - Mend

uipath 2.1.107py3-none-any.whl → 2.1.109py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of uipath might be problematic. Click here for more details.

Files changed (72) hide show

uipath/_cli/__init__.py +4 -0
uipath/_cli/_evals/_console_progress_reporter.py +2 -2
uipath/_cli/_evals/_evaluator_factory.py +314 -29
uipath/_cli/_evals/_helpers.py +194 -0
uipath/_cli/_evals/_models/_evaluation_set.py +73 -7
uipath/_cli/_evals/_models/_evaluator.py +183 -9
uipath/_cli/_evals/_models/_evaluator_base_params.py +3 -3
uipath/_cli/_evals/_models/_output.py +87 -3
uipath/_cli/_evals/_progress_reporter.py +288 -28
uipath/_cli/_evals/_runtime.py +80 -26
uipath/_cli/_evals/mocks/input_mocker.py +1 -3
uipath/_cli/_evals/mocks/llm_mocker.py +2 -2
uipath/_cli/_evals/mocks/mocker_factory.py +2 -2
uipath/_cli/_evals/mocks/mockito_mocker.py +2 -2
uipath/_cli/_evals/mocks/mocks.py +5 -3
uipath/_cli/_push/models.py +17 -0
uipath/_cli/_push/sw_file_handler.py +336 -3
uipath/_cli/_runtime/_contracts.py +25 -5
uipath/_cli/_templates/custom_evaluator.py.template +65 -0
uipath/_cli/_utils/_eval_set.py +30 -9
uipath/_cli/_utils/_resources.py +21 -0
uipath/_cli/_utils/_studio_project.py +18 -0
uipath/_cli/cli_add.py +114 -0
uipath/_cli/cli_eval.py +5 -1
uipath/_cli/cli_pull.py +11 -26
uipath/_cli/cli_push.py +2 -0
uipath/_cli/cli_register.py +45 -0
uipath/_events/_events.py +6 -5
uipath/_resources/SDK_REFERENCE.md +0 -97
uipath/_uipath.py +10 -37
uipath/_utils/constants.py +4 -0
uipath/eval/_helpers/evaluators_helpers.py +494 -0
uipath/eval/_helpers/helpers.py +30 -2
uipath/eval/evaluators/__init__.py +60 -5
uipath/eval/evaluators/base_evaluator.py +546 -44
uipath/eval/evaluators/contains_evaluator.py +80 -0
uipath/eval/evaluators/exact_match_evaluator.py +43 -12
uipath/eval/evaluators/json_similarity_evaluator.py +41 -12
uipath/eval/evaluators/legacy_base_evaluator.py +89 -0
uipath/eval/evaluators/{deterministic_evaluator_base.py → legacy_deterministic_evaluator_base.py} +2 -2
uipath/eval/evaluators/legacy_exact_match_evaluator.py +37 -0
uipath/eval/evaluators/legacy_json_similarity_evaluator.py +151 -0
uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py +137 -0
uipath/eval/evaluators/{trajectory_evaluator.py → legacy_trajectory_evaluator.py} +5 -6
uipath/eval/evaluators/llm_as_judge_evaluator.py +143 -78
uipath/eval/evaluators/llm_judge_output_evaluator.py +112 -0
uipath/eval/evaluators/llm_judge_trajectory_evaluator.py +142 -0
uipath/eval/evaluators/output_evaluator.py +117 -0
uipath/eval/evaluators/tool_call_args_evaluator.py +82 -0
uipath/eval/evaluators/tool_call_count_evaluator.py +87 -0
uipath/eval/evaluators/tool_call_order_evaluator.py +84 -0
uipath/eval/evaluators/tool_call_output_evaluator.py +87 -0
uipath/eval/evaluators_types/ContainsEvaluator.json +73 -0
uipath/eval/evaluators_types/ExactMatchEvaluator.json +89 -0
uipath/eval/evaluators_types/JsonSimilarityEvaluator.json +81 -0
uipath/eval/evaluators_types/LLMJudgeOutputEvaluator.json +110 -0
uipath/eval/evaluators_types/LLMJudgeSimulationTrajectoryEvaluator.json +88 -0
uipath/eval/evaluators_types/LLMJudgeStrictJSONSimilarityOutputEvaluator.json +110 -0
uipath/eval/evaluators_types/LLMJudgeTrajectoryEvaluator.json +88 -0
uipath/eval/evaluators_types/ToolCallArgsEvaluator.json +131 -0
uipath/eval/evaluators_types/ToolCallCountEvaluator.json +104 -0
uipath/eval/evaluators_types/ToolCallOrderEvaluator.json +100 -0
uipath/eval/evaluators_types/ToolCallOutputEvaluator.json +124 -0
uipath/eval/evaluators_types/generate_types.py +31 -0
uipath/eval/models/__init__.py +16 -1
uipath/eval/models/llm_judge_types.py +196 -0
uipath/eval/models/models.py +109 -7
{uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/METADATA +1 -1
{uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/RECORD +72 -40
{uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/WHEEL +0 -0
{uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/entry_points.txt +0 -0
{uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/licenses/LICENSE +0 -0

uipath/eval/evaluators/{trajectory_evaluator.py → legacy_trajectory_evaluator.py} RENAMED Viewed

@@ -16,11 +16,11 @@ from ..models.models import (
     NumericEvaluationResult,
     TrajectoryEvaluationTrace,
 )
-from .base_evaluator import BaseEvaluator
+from .legacy_base_evaluator import LegacyBaseEvaluator
-class TrajectoryEvaluator(BaseEvaluator[dict[str, Any]]):
-    """Evaluator that analyzes the trajectory/path taken to reach outputs."""
+class LegacyTrajectoryEvaluator(LegacyBaseEvaluator[dict[str, Any]]):
+    """Legacy evaluator that analyzes the trajectory/path taken to reach outputs."""
     prompt: str
     model: str
@@ -38,7 +38,7 @@ class TrajectoryEvaluator(BaseEvaluator[dict[str, Any]]):
             )
         return v
-    def model_post_init(self, __context):
+    def model_post_init(self, __context: Any):
         """Initialize the LLM service after model creation."""
         super().model_post_init(__context)
         self._initialize_llm()
@@ -76,7 +76,6 @@ class TrajectoryEvaluator(BaseEvaluator[dict[str, Any]]):
             expected_agent_behavior=agent_execution.expected_agent_behavior,
             agent_run_history=agent_execution.agent_trace,
         )
         llm_response = await self._get_llm_response(evaluation_prompt)
         return NumericEvaluationResult(
@@ -160,4 +159,4 @@ class TrajectoryEvaluator(BaseEvaluator[dict[str, Any]]):
         }
         response = await self.llm.chat_completions(**request_data)
-        return LLMResponse(**json.loads(response.choices[-1].message.content))
+        return LLMResponse(**json.loads(response.choices[-1].message.content or "{}"))

uipath/eval/evaluators/llm_as_judge_evaluator.py CHANGED Viewed

@@ -1,137 +1,202 @@
 """LLM-as-a-judge evaluator for subjective quality assessment of agent outputs."""
 import json
-from typing import Any, Optional
+from abc import abstractmethod
+from collections.abc import Callable
+from typing import Any, TypeVar
+from pydantic import BaseModel, Field, model_validator
+from .._helpers.evaluators_helpers import COMMUNITY_agents_SUFFIX
+from ..models import (
+    AgentExecution,
+    EvaluationResult,
+    LLMResponse,
+    NumericEvaluationResult,
+)
+from ..models.llm_judge_types import (
+    LLMJudgeOutputSchema,
+    LLMJudgePromptTemplates,
+)
+from ..models.models import UiPathEvaluationError, UiPathEvaluationErrorCategory
+from .base_evaluator import (
+    BaseEvaluationCriteria,
+    BaseEvaluator,
+    BaseEvaluatorConfig,
+)
+T = TypeVar("T", bound=BaseEvaluationCriteria)
+class BaseLLMJudgeEvaluatorConfig(BaseEvaluatorConfig[T]):
+    """Base config for all LLM evaluators.
+    Generic over T (evaluation criteria type) to ensure type safety between
+    the config's default_evaluation_criteria and the evaluator's expected criteria type.
+    """
-from pydantic import field_validator
+    prompt: str
+    model: str = ""
+    temperature: float = 0.0
+    max_tokens: int | None = None
-from uipath.eval.models import NumericEvaluationResult
-from ..._services import UiPathLlmChatService
-from ..._utils.constants import COMMUNITY_agents_SUFFIX
-from ..models.models import AgentExecution, EvaluationResult, LLMResponse
-from .base_evaluator import BaseEvaluator
+C = TypeVar("C", bound=BaseLLMJudgeEvaluatorConfig[Any])
-class LlmAsAJudgeEvaluator(BaseEvaluator[dict[str, Any]]):
-    """Evaluator that uses an LLM to judge the quality of agent output."""
+class LLMJudgeMixin(BaseEvaluator[T, C, str]):
+    """Mixin that provides common LLM judge functionality."""
-    prompt: str
-    model: str
+    system_prompt: str = LLMJudgePromptTemplates.LLM_JUDGE_SYSTEM_PROMPT
+    output_schema: type[BaseModel] = LLMJudgeOutputSchema
     actual_output_placeholder: str = "{{ActualOutput}}"
     expected_output_placeholder: str = "{{ExpectedOutput}}"
-    llm: Optional[UiPathLlmChatService] = None
+    llm_service: Callable[..., Any] | None = Field(
+        default=None, exclude=True, description="The LLM service for evaluation"
+    )
-    @field_validator("prompt")
-    @classmethod
-    def validate_prompt_placeholders(cls, v: str) -> str:
+    @model_validator(mode="after")
+    def validate_prompt_placeholders(self) -> "LLMJudgeMixin[T, C]":
         """Validate that prompt contains required placeholders."""
-        if "{{ActualOutput}}" not in v or "{{ExpectedOutput}}" not in v:
-            raise ValueError(
-                "Prompt must contain both {ActualOutput} and {ExpectedOutput} placeholders"
+        if (
+            self.actual_output_placeholder not in self.evaluator_config.prompt
+            or self.expected_output_placeholder not in self.evaluator_config.prompt
+        ):
+            raise UiPathEvaluationError(
+                code="INVALID_PROMPT_PLACEHOLDERS",
+                title="Prompt must contain both {ActualOutput} and {ExpectedOutput} placeholders",
+                detail="Prompt must contain both {ActualOutput} and {ExpectedOutput} placeholders",
+                category=UiPathEvaluationErrorCategory.USER,
             )
-        return v
+        return self
-    def model_post_init(self, __context):
-        """Initialize the LLM service after model creation."""
+    def model_post_init(self, __context: Any) -> None:
+        """Initialize the LLM service if not provided."""
         super().model_post_init(__context)
-        self._initialize_llm()
+        if self.llm_service is None:
+            self.llm_service = self._get_llm_service()
-    def _initialize_llm(self):
-        """Initialize the LLM used for evaluation."""
+    def _get_llm_service(self):
+        """Get the LLM service from the UiPath instance."""
         from uipath import UiPath
-        uipath = UiPath()
-        self.llm = uipath.llm
+        try:
+            uipath = UiPath()
+            return uipath.llm.chat_completions
+        except Exception as e:
+            raise UiPathEvaluationError(
+                code="FAILED_TO_GET_LLM_SERVICE",
+                title="Failed to get LLM service from the SDK and no otherLLM service provided",
+                detail=f"Error: {e}",
+                category=UiPathEvaluationErrorCategory.SYSTEM,
+            ) from e
+    @abstractmethod
+    def _get_actual_output(self, agent_execution: AgentExecution) -> Any:
+        """Get the actual output from the agent execution. Must be implemented by concrete evaluator classes."""
+        pass
+    @abstractmethod
+    def _get_expected_output(self, evaluation_criteria: T) -> Any:
+        """Get the expected output from the evaluation criteria. Must be implemented by concrete evaluator classes."""
+        pass
     async def evaluate(
         self,
         agent_execution: AgentExecution,
-        evaluation_criteria: dict[str, Any],
+        evaluation_criteria: T,
     ) -> EvaluationResult:
-        """Evaluate using an LLM as a judge.
-        Sends the formatted prompt to the configured LLM and expects a JSON response
-        with a numerical score (0-100) and justification.
-            agent_execution: The execution details containing:
-                - agent_input: The input received by the agent
-                - actual_output: The actual output from the agent
-                - spans: The execution spans to use for the evaluation
-            evaluation_criteria: The criteria to evaluate
-        Returns:
-            EvaluationResult: Numerical score with LLM justification as details
-        """
-        # Create the evaluation prompt
+        """Evaluate using an LLM as a judge."""
         evaluation_prompt = self._create_evaluation_prompt(
-            expected_output=evaluation_criteria,
-            actual_output=agent_execution.agent_output,
+            agent_execution=agent_execution,
+            evaluation_criteria=evaluation_criteria,
         )
         llm_response = await self._get_llm_response(evaluation_prompt)
+        validated_justification = self.validate_justification(
+            llm_response.justification
+        )
         return NumericEvaluationResult(
-            score=llm_response.score,
-            details=llm_response.justification,
+            score=max(0.0, min(1.0, round(llm_response.score / 100.0, 2))),
+            details=validated_justification,
         )
     def _create_evaluation_prompt(
-        self, expected_output: Any, actual_output: Any
+        self,
+        agent_execution: AgentExecution,
+        evaluation_criteria: T,
     ) -> str:
         """Create the evaluation prompt for the LLM."""
-        formatted_prompt = self.prompt.replace(
+        formatted_prompt = self.evaluator_config.prompt.replace(
             self.actual_output_placeholder,
-            str(actual_output),
+            str(self._get_actual_output(agent_execution)),
         )
         formatted_prompt = formatted_prompt.replace(
             self.expected_output_placeholder,
-            str(expected_output),
+            str(self._get_expected_output(evaluation_criteria)),
         )
         return formatted_prompt
     async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
-        """Get response from the LLM.
-        Args:
-            evaluation_prompt: The formatted prompt to send to the LLM
-        Returns:
-            LLMResponse with score and justification
-        """
+        """Get response from the LLM."""
         # remove community-agents suffix from llm model name
-        model = self.model
+        model = self.evaluator_config.model
         if model.endswith(COMMUNITY_agents_SUFFIX):
             model = model.replace(COMMUNITY_agents_SUFFIX, "")
         # Prepare the request
         request_data = {
             "model": model,
-            "messages": [{"role": "user", "content": evaluation_prompt}],
+            "messages": [
+                {"role": "system", "content": self.system_prompt},
+                {"role": "user", "content": evaluation_prompt},
+            ],
             "response_format": {
                 "type": "json_schema",
                 "json_schema": {
                     "name": "evaluation_response",
-                    "schema": {
-                        "type": "object",
-                        "properties": {
-                            "score": {
-                                "type": "number",
-                                "minimum": 0,
-                                "maximum": 100,
-                                "description": "Score between 0 and 100",
-                            },
-                            "justification": {
-                                "type": "string",
-                                "description": "Explanation for the score",
-                            },
-                        },
-                        "required": ["score", "justification"],
-                    },
+                    "schema": self.output_schema.model_json_schema(),
                 },
             },
+            "max_tokens": self.evaluator_config.max_tokens,
+            "temperature": self.evaluator_config.temperature,
         }
-        response = await self.llm.chat_completions(**request_data)  # type: ignore
-        return LLMResponse(**json.loads(response.choices[-1].message.content))
+        if self.llm_service is None:
+            raise UiPathEvaluationError(
+                code="LLM_SERVICE_NOT_INITIALIZED",
+                title="LLM service not initialized",
+                detail="LLM service not initialized",
+                category=UiPathEvaluationErrorCategory.SYSTEM,
+            )
+        try:
+            response = await self.llm_service(**request_data)
+        except Exception as e:
+            raise UiPathEvaluationError(
+                code="FAILED_TO_GET_LLM_RESPONSE",
+                title="Failed to get LLM response",
+                detail=f"Error: {e}",
+                category=UiPathEvaluationErrorCategory.SYSTEM,
+            ) from e
+        try:
+            content = response.choices[-1].message.content
+            if content is None:
+                raise UiPathEvaluationError(
+                    code="EMPTY_LLM_RESPONSE",
+                    title="Empty LLM response",
+                    detail="The LLM response message content was None.",
+                    category=UiPathEvaluationErrorCategory.SYSTEM,
+                )
+            parsed_response = json.loads(str(content))
+        except Exception as e:
+            raise UiPathEvaluationError(
+                code="FAILED_TO_PARSE_LLM_RESPONSE",
+                title="Failed to parse LLM response",
+                detail=f"Error: {e}",
+                category=UiPathEvaluationErrorCategory.SYSTEM,
+            ) from e
+        return LLMResponse(**parsed_response)

uipath/eval/evaluators/llm_judge_output_evaluator.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""LLM judge output evaluators for evaluating agent outputs."""
+from typing import TypeVar
+from pydantic import BaseModel
+from uipath.eval.models import EvaluatorType
+from ..models import AgentExecution, EvaluationResult
+from ..models.llm_judge_types import (
+    LLMJudgeOutputSchema,
+    LLMJudgePromptTemplates,
+    LLMJudgeStrictJSONSimilarityOutputSchema,
+)
+from .llm_as_judge_evaluator import (
+    BaseLLMJudgeEvaluatorConfig,
+    LLMJudgeMixin,
+)
+from .output_evaluator import (
+    OutputEvaluationCriteria,
+    OutputEvaluator,
+    OutputEvaluatorConfig,
+)
+class BaseLLMJudgeOutputCriteriaEvaluatorConfig(
+    OutputEvaluatorConfig[OutputEvaluationCriteria],
+    BaseLLMJudgeEvaluatorConfig[OutputEvaluationCriteria],
+):
+    """Base configuration for LLM judge output criteria evaluators."""
+    pass
+class LLMJudgeOutputEvaluatorConfig(BaseLLMJudgeOutputCriteriaEvaluatorConfig):
+    """Configuration for the LLM judge output evaluator."""
+    name: str = "LLMJudgeOutputEvaluator"
+    prompt: str = LLMJudgePromptTemplates.LLM_JUDGE_DEFAULT_USER_PROMPT
+class LLMJudgeStrictJSONSimilarityOutputEvaluatorConfig(LLMJudgeOutputEvaluatorConfig):
+    """Configuration for the LLM judge strict JSON similarity output evaluator."""
+    name: str = "LLMJudgeStrictJSONSimilarityOutputEvaluator"
+    prompt: str = (
+        LLMJudgePromptTemplates.LLM_JUDGE_STRICT_JSON_SIMILARITY_DEFAULT_USER_PROMPT
+    )
+OC = TypeVar("OC", bound=LLMJudgeOutputEvaluatorConfig)
+class BaseLLMOutputEvaluator(
+    OutputEvaluator[OutputEvaluationCriteria, OC, str],
+    LLMJudgeMixin[OutputEvaluationCriteria, OC],
+):
+    """Base class for LLM judge output evaluators that contains all shared functionality.
+    This class encapsulates the common evaluation logic for output-based LLM evaluators,
+    combining OutputEvaluator (for output extraction) with LLMJudgeMixin (for LLM functionality).
+    """
+    @classmethod
+    def get_evaluator_id(cls) -> str:
+        """Get the evaluator id."""
+        return EvaluatorType.LLM_JUDGE_OUTPUT.value
+    async def evaluate(
+        self,
+        agent_execution: AgentExecution,
+        evaluation_criteria: OutputEvaluationCriteria,
+    ) -> EvaluationResult:
+        """Evaluate using an LLM as a judge."""
+        # Explicitly delegate to LLMJudgeMixin's evaluate method to override BaseEvaluator
+        return await LLMJudgeMixin.evaluate(self, agent_execution, evaluation_criteria)
+class LLMJudgeOutputEvaluator(BaseLLMOutputEvaluator[LLMJudgeOutputEvaluatorConfig]):
+    """Evaluator that uses an LLM to judge the quality of agent output.
+    Inherits all functionality from BaseLLMOutputEvaluator but uses the standard
+    system prompt and output schema for general output evaluation.
+    """
+    system_prompt: str = LLMJudgePromptTemplates.LLM_JUDGE_SYSTEM_PROMPT
+    output_schema: type[BaseModel] = LLMJudgeOutputSchema
+    @classmethod
+    def get_evaluator_id(cls) -> str:
+        """Get the evaluator id."""
+        return EvaluatorType.LLM_JUDGE_OUTPUT_SEMANTIC_SIMILARITY.value
+class LLMJudgeStrictJSONSimilarityOutputEvaluator(
+    BaseLLMOutputEvaluator[LLMJudgeStrictJSONSimilarityOutputEvaluatorConfig]
+):
+    """Evaluator that uses an LLM to judge the quality of agent output with strict JSON similarity.
+    Inherits all functionality from BaseLLMOutputEvaluator but uses a different system prompt
+    and output schema specific to strict JSON similarity evaluation.
+    """
+    system_prompt: str = (
+        LLMJudgePromptTemplates.LLM_JUDGE_STRICT_JSON_SIMILARITY_SYSTEM_PROMPT
+    )
+    output_schema: type[BaseModel] = LLMJudgeStrictJSONSimilarityOutputSchema
+    @classmethod
+    def get_evaluator_id(cls) -> str:
+        """Get the evaluator id."""
+        return EvaluatorType.LLM_JUDGE_OUTPUT_STRICT_JSON_SIMILARITY.value

uipath/eval/evaluators/llm_judge_trajectory_evaluator.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""LLM judge trajectory evaluator for evaluating agent execution trajectories."""
+from typing import Any, TypeVar
+from pydantic import BaseModel
+from .._helpers.evaluators_helpers import trace_to_str
+from ..models import (
+    AgentExecution,
+    EvaluationResult,
+    EvaluatorType,
+)
+from ..models.llm_judge_types import (
+    LLMJudgePromptTemplates,
+    LLMJudgeTrajectoryOutputSchema,
+)
+from .base_evaluator import BaseEvaluationCriteria
+from .llm_as_judge_evaluator import (
+    BaseLLMJudgeEvaluatorConfig,
+    LLMJudgeMixin,
+)
+class TrajectoryEvaluationCriteria(BaseEvaluationCriteria):
+    """Evaluation criteria for trajectory-based evaluations."""
+    expected_agent_behavior: str
+class LLMJudgeTrajectoryEvaluatorConfig(
+    BaseLLMJudgeEvaluatorConfig[TrajectoryEvaluationCriteria]
+):
+    """Configuration for the llm judge trajectory evaluator."""
+    name: str = "LLMJudgeTrajectoryEvaluator"
+    prompt: str = LLMJudgePromptTemplates.LLM_JUDGE_TRAJECTORY_DEFAULT_USER_PROMPT
+class LLMJudgeTrajectorySimulationEvaluatorConfig(
+    BaseLLMJudgeEvaluatorConfig[TrajectoryEvaluationCriteria]
+):
+    """Configuration for the llm judge simulation trajectory evaluator."""
+    name: str = "LLMJudgeTrajectorySimulationEvaluator"
+    prompt: str = (
+        LLMJudgePromptTemplates.LLM_JUDGE_SIMULATION_TRAJECTORY_DEFAULT_USER_PROMPT
+    )
+TC = TypeVar("TC", bound=BaseLLMJudgeEvaluatorConfig[TrajectoryEvaluationCriteria])
+class BaseLLMTrajectoryEvaluator(LLMJudgeMixin[TrajectoryEvaluationCriteria, TC]):
+    """Base class for LLM trajectory evaluators that contains all shared functionality.
+    This class encapsulates the common evaluation logic for trajectory-based LLM evaluators,
+    including output extraction, prompt formatting, and evaluation criteria handling.
+    """
+    output_schema: type[BaseModel] = LLMJudgeTrajectoryOutputSchema
+    actual_output_placeholder: str = "{{AgentRunHistory}}"
+    expected_output_placeholder: str = "{{ExpectedAgentBehavior}}"
+    user_input_placeholder: str = "{{UserOrSyntheticInput}}"
+    simulation_instructions_placeholder: str = "{{SimulationInstructions}}"
+    @classmethod
+    def get_evaluator_id(cls) -> str:
+        """Get the evaluator id."""
+        return EvaluatorType.LLM_JUDGE_TRAJECTORY.value
+    async def evaluate(
+        self,
+        agent_execution: AgentExecution,
+        evaluation_criteria: TrajectoryEvaluationCriteria,
+    ) -> EvaluationResult:
+        """Evaluate using trajectory analysis."""
+        return await super().evaluate(agent_execution, evaluation_criteria)
+    def _get_actual_output(self, agent_execution: AgentExecution) -> Any:
+        """Get the actual output from the agent execution."""
+        return trace_to_str(agent_execution.agent_trace)
+    def _get_expected_output(
+        self, evaluation_criteria: TrajectoryEvaluationCriteria
+    ) -> Any:
+        """Get the expected agent behavior from the evaluation criteria."""
+        return evaluation_criteria.expected_agent_behavior
+    def _create_evaluation_prompt(
+        self,
+        agent_execution: AgentExecution,
+        evaluation_criteria: TrajectoryEvaluationCriteria,
+    ) -> str:
+        """Create the evaluation prompt for the LLM."""
+        formatted_prompt = super()._create_evaluation_prompt(
+            agent_execution, evaluation_criteria
+        )
+        formatted_prompt = formatted_prompt.replace(
+            self.user_input_placeholder,
+            str(agent_execution.agent_input),
+        )
+        formatted_prompt = formatted_prompt.replace(
+            self.simulation_instructions_placeholder,
+            agent_execution.simulation_instructions,
+        )
+        return formatted_prompt
+class LLMJudgeTrajectoryEvaluator(
+    BaseLLMTrajectoryEvaluator[LLMJudgeTrajectoryEvaluatorConfig]
+):
+    """Evaluator that uses an LLM to judge the quality of agent trajectory.
+    Inherits all functionality from BaseLLMTrajectoryEvaluator but uses the standard
+    system prompt and configuration for general trajectory evaluation.
+    """
+    system_prompt: str = LLMJudgePromptTemplates.LLM_JUDGE_TRAJECTORY_SYSTEM_PROMPT
+    @classmethod
+    def get_evaluator_id(cls) -> str:
+        """Get the evaluator id."""
+        return EvaluatorType.LLM_JUDGE_TRAJECTORY_SIMILARITY.value
+class LLMJudgeTrajectorySimulationEvaluator(
+    BaseLLMTrajectoryEvaluator[LLMJudgeTrajectorySimulationEvaluatorConfig]
+):
+    """Evaluator that uses an LLM to judge the quality of agent trajectory for simulations.
+    Inherits all functionality from BaseLLMTrajectoryEvaluator but uses a different system prompt
+    and configuration specific to simulation evaluation.
+    """
+    system_prompt: str = (
+        LLMJudgePromptTemplates.LLM_JUDGE_SIMULATION_TRAJECTORY_SYSTEM_PROMPT
+    )
+    @classmethod
+    def get_evaluator_id(cls) -> str:
+        """Get the evaluator id."""
+        return EvaluatorType.LLM_JUDGE_TRAJECTORY_SIMULATION.value

uipath 2.1.107__py3-none-any.whl → 2.1.109__py3-none-any.whl

Potentially problematic release.

uipath 2.1.107py3-none-any.whl → 2.1.109py3-none-any.whl