PyPI - uipath - Versions diffs - 2.1.52__py3-none-any.whl → 2.1.54__py3-none-any.whl - Mend

uipath 2.1.52py3-none-any.whl → 2.1.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

uipath/_cli/_evals/{_evaluators/_evaluator_factory.py → _evaluator_factory.py} +24 -23
uipath/_cli/_evals/_models/_evaluation_set.py +23 -18
uipath/_cli/_evals/_models/_evaluator_base_params.py +16 -0
uipath/_cli/_evals/_models/_output.py +85 -0
uipath/_cli/_evals/_runtime.py +102 -10
uipath/_cli/_runtime/_contracts.py +11 -2
uipath/_cli/_utils/_eval_set.py +1 -1
uipath/_cli/_utils/_studio_project.py +30 -29
uipath/_cli/cli_eval.py +46 -61
uipath/eval/evaluators/__init__.py +15 -0
uipath/eval/evaluators/base_evaluator.py +88 -0
uipath/eval/evaluators/deterministic_evaluator_base.py +53 -0
uipath/eval/evaluators/exact_match_evaluator.py +37 -0
uipath/{_cli/_evals/_evaluators/_json_similarity_evaluator.py → eval/evaluators/json_similarity_evaluator.py} +23 -40
uipath/eval/evaluators/llm_as_judge_evaluator.py +137 -0
uipath/eval/evaluators/trajectory_evaluator.py +36 -0
uipath/eval/models/__init__.py +19 -0
uipath/{_cli/_evals/_models/_evaluators.py → eval/models/models.py} +67 -43
{uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/METADATA +1 -1
{uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/RECORD +23 -23
uipath/_cli/_evals/_evaluators/__init__.py +0 -22
uipath/_cli/_evals/_evaluators/_deterministic_evaluator_base.py +0 -46
uipath/_cli/_evals/_evaluators/_evaluator_base.py +0 -124
uipath/_cli/_evals/_evaluators/_exact_match_evaluator.py +0 -40
uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py +0 -183
uipath/_cli/_evals/_evaluators/_trajectory_evaluator.py +0 -48
uipath/_cli/_evals/_models/__init__.py +0 -18
uipath/_cli/_evals/_models/_agent_execution_output.py +0 -14
uipath/_cli/_evals/progress_reporter.py +0 -304
{uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/WHEEL +0 -0
{uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/entry_points.txt +0 -0
{uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/licenses/LICENSE +0 -0

uipath/{_cli/_evals/_models/_evaluators.py → eval/models/models.py} RENAMED Viewed

@@ -1,15 +1,79 @@
-from datetime import datetime, timezone
+"""Models for evaluation framework including execution data and evaluation results."""
 from enum import IntEnum
-from typing import Any, Dict, List, Optional
+from typing import Annotated, Any, Dict, Literal, Optional, Union
+from opentelemetry.sdk.trace import ReadableSpan
+from pydantic import BaseModel, ConfigDict, Field
+class AgentExecution(BaseModel):
+    """Represents the execution data of an agent for evaluation purposes."""
-from pydantic import BaseModel
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    agent_input: Optional[Dict[str, Any]]
+    agent_output: Dict[str, Any]
+    agent_trace: list[ReadableSpan]
 class LLMResponse(BaseModel):
+    """Response from an LLM evaluator."""
     score: float
     justification: str
+class ScoreType(IntEnum):
+    """Types of evaluation scores."""
+    BOOLEAN = 0
+    NUMERICAL = 1
+    ERROR = 2
+class BaseEvaluationResult(BaseModel):
+    """Base class for evaluation results."""
+    details: Optional[str] = None
+    # this is marked as optional, as it is populated inside the 'measure_execution_time' decorator
+    evaluation_time: Optional[float] = None
+class BooleanEvaluationResult(BaseEvaluationResult):
+    """Result of a boolean evaluation."""
+    score: bool
+    score_type: Literal[ScoreType.BOOLEAN] = ScoreType.BOOLEAN
+class NumericEvaluationResult(BaseEvaluationResult):
+    """Result of a numerical evaluation."""
+    score: float
+    score_type: Literal[ScoreType.NUMERICAL] = ScoreType.NUMERICAL
+class ErrorEvaluationResult(BaseEvaluationResult):
+    """Result of an error evaluation."""
+    score: float = 0.0
+    score_type: Literal[ScoreType.ERROR] = ScoreType.ERROR
+EvaluationResult = Annotated[
+    Union[BooleanEvaluationResult, NumericEvaluationResult, ErrorEvaluationResult],
+    Field(discriminator="score_type"),
+]
+class EvalItemResult(BaseModel):
+    """Result of a single evaluation item."""
+    evaluator_name: str
+    result: EvaluationResult
 class EvaluatorCategory(IntEnum):
     """Types of evaluators."""
@@ -48,43 +112,3 @@ class EvaluatorType(IntEnum):
             return cls(value)
         else:
             raise ValueError(f"{value} is not a valid EvaluatorType value")
-class ScoreType(IntEnum):
-    BOOLEAN = 0
-    NUMERICAL = 1
-    ERROR = 2
-class EvaluationResult(BaseModel):
-    """Result of a single evaluation."""
-    evaluation_id: str
-    evaluation_name: str
-    evaluator_id: str
-    evaluator_name: str
-    score: float | bool
-    score_type: ScoreType
-    # this is marked as optional, as it is populated inside the 'measure_execution_time' decorator
-    evaluation_time: Optional[float] = None
-    input: Dict[str, Any]
-    expected_output: Dict[str, Any]
-    actual_output: Dict[str, Any]
-    timestamp: datetime = datetime.now(timezone.utc)
-    details: Optional[str] = None
-class EvaluationSetResult(BaseModel):
-    """Result of a complete evaluation set."""
-    eval_set_id: str
-    eval_set_name: str
-    results: List[EvaluationResult]
-    average_score: float
-class EvalItemResult(BaseModel):
-    """Result of a single evaluation item."""
-    evaluator_id: str
-    result: EvaluationResult

{uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: uipath
-Version: 2.1.52
+Version: 2.1.54
 Summary: Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools.
 Project-URL: Homepage, https://uipath.com
 Project-URL: Repository, https://github.com/UiPath/uipath-python

{uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,7 @@ uipath/_cli/__init__.py,sha256=tscKceSouYcEOxUbGjoyHi4qGi74giBFeXG1I-ut1hs,2308
 uipath/_cli/cli_auth.py,sha256=i3ykLlCg68xgPXHHaa0agHwGFIiLiTLzOiF6Su8XaEo,2436
 uipath/_cli/cli_deploy.py,sha256=KPCmQ0c_NYD5JofSDao5r6QYxHshVCRxlWDVnQvlp5w,645
 uipath/_cli/cli_dev.py,sha256=nEfpjw1PZ72O6jmufYWVrueVwihFxDPOeJakdvNHdOA,2146
-uipath/_cli/cli_eval.py,sha256=fYJWQlyiIc8SpTzY9QPNQWOx40PagMEKdsGZIu9As2A,4402
+uipath/_cli/cli_eval.py,sha256=uiisQ3Wpalu8e9gHRMmn_9Gqus_t4brbjTegMQOhLa0,3831
 uipath/_cli/cli_init.py,sha256=Ac3-9tIH3rpikIX1ehWTo7InW5tjVNoz_w6fjvgLK4w,7052
 uipath/_cli/cli_invoke.py,sha256=4jyhqcy7tPrpxvaUhW-9gut6ddsCGMdJJcpOXXmIe8g,4348
 uipath/_cli/cli_new.py,sha256=9378NYUBc9j-qKVXV7oja-jahfJhXBg8zKVyaon7ctY,2102
@@ -44,22 +44,13 @@ uipath/_cli/_dev/_terminal/_styles/terminal.tcss,sha256=ktVpKwXIXw2VZp8KIZD6fO9i
 uipath/_cli/_dev/_terminal/_utils/_chat.py,sha256=YUZxYVdmEManwHDuZsczJT1dWIYE1dVBgABlurwMFcE,8493
 uipath/_cli/_dev/_terminal/_utils/_exporter.py,sha256=oI6D_eMwrh_2aqDYUh4GrJg8VLGrLYhDahR-_o0uJns,4144
 uipath/_cli/_dev/_terminal/_utils/_logger.py,sha256=jeNShEED27cNIHTe_NNx-2kUiXpSLTmi0onM6tVkqRM,888
-uipath/_cli/_evals/_runtime.py,sha256=q4h3zp_7Ygkhj1zE_YTKKXRp3BhkHaPj8CWqjkzerTk,4748
-uipath/_cli/_evals/progress_reporter.py,sha256=PGt1rs7IH1C6HPw8fWUwb98GB3UBuM6eUiiqGthfCIk,11174
-uipath/_cli/_evals/_evaluators/__init__.py,sha256=jD7KNLjbsUpsESFXX11eW2MEPXDNuPp2-t-IPB-inlM,734
-uipath/_cli/_evals/_evaluators/_deterministic_evaluator_base.py,sha256=BTl0puBjp9iCsU3YFfYWqk4TOz4iE19O3q1-dK6qUOI,1723
-uipath/_cli/_evals/_evaluators/_evaluator_base.py,sha256=knHUwYFt0gMG1uJhq5TGEab6M_YevxX019yT3yYwZsw,3787
-uipath/_cli/_evals/_evaluators/_evaluator_factory.py,sha256=cURShn17X6BW-_G3rknJXWtlgpeh5UdioLUV6oGCGAU,4912
-uipath/_cli/_evals/_evaluators/_exact_match_evaluator.py,sha256=lvEtAitrZy9myoZLMXLqlBWBPX06Msu67kuFMGSbikM,1319
-uipath/_cli/_evals/_evaluators/_json_similarity_evaluator.py,sha256=HpmkvuwU4Az3IIqFVLUmDvzkqb21pFMxY0sg2biZOMM,7093
-uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py,sha256=nSLZ29xWqALEI53ifr79JPXjyx0T4sr7p-4NygwgAio,6594
-uipath/_cli/_evals/_evaluators/_trajectory_evaluator.py,sha256=dnogQTOskpI4_cNF0Ge3hBceJJocvOgxBWAwaCWnzB0,1595
-uipath/_cli/_evals/_models/__init__.py,sha256=Ewjp3u2YeTH2MmzY9LWf7EIbAoIf_nW9fMYbj7pGlPs,420
-uipath/_cli/_evals/_models/_agent_execution_output.py,sha256=llvApU4JkTnNgQ5DvHPt8ee3bnV6cCANyeiebWKE07E,401
-uipath/_cli/_evals/_models/_evaluation_set.py,sha256=tVHykSget-G3sOCs9bSchMYUTpFqzXVlYYbY8L9SI0c,1518
-uipath/_cli/_evals/_models/_evaluators.py,sha256=l57NEVyYmzSKuoIXuGkE94Br01hAMg35fiS2MlTkaQM,2115
+uipath/_cli/_evals/_evaluator_factory.py,sha256=2lOalabNSzmnnwr0SfoPWvFWXs0Ly857XBmPuOdhFBQ,4729
+uipath/_cli/_evals/_runtime.py,sha256=KFGl2we1RH0omuD2HWw5thIK6DDZxVGtqx_G9T4DM_A,8332
+uipath/_cli/_evals/_models/_evaluation_set.py,sha256=mwcTstHuyHd7ys_nLzgCNKBAsS4ns9UL2TF5Oq2Cc64,1758
+uipath/_cli/_evals/_models/_evaluator_base_params.py,sha256=lTYKOV66tcjW85KHTyOdtF1p1VDaBNemrMAvH8bFIFc,382
+uipath/_cli/_evals/_models/_output.py,sha256=TTQ0hhmD3dTkIbj_Ly_rDCGSnpZsHwdmCsl7FLdoZD0,2634
 uipath/_cli/_push/sw_file_handler.py,sha256=AX4TKM-q6CNGw3JyBW02M8ktPZuFMcAU9LN3Ii0Q2QI,18202
-uipath/_cli/_runtime/_contracts.py,sha256=Mwdb11OULGg9xGSu_A6KCl7A5jw-fKe5tBiv62MZNVM,28424
+uipath/_cli/_runtime/_contracts.py,sha256=ZK572viY3Ydnip21rzmJ3R6F3cXpHVtDTuvwcEciy3I,28782
 uipath/_cli/_runtime/_escalation.py,sha256=x3vI98qsfRA-fL_tNkRVTFXioM5Gv2w0GFcXJJ5eQtg,7981
 uipath/_cli/_runtime/_hitl.py,sha256=VKbM021nVg1HEDnTfucSLJ0LsDn83CKyUtVzofS2qTU,11369
 uipath/_cli/_runtime/_logging.py,sha256=MGklGKPjYKjs7J5Jy9eplA9zCDsdtEbkZdCbTwgut_4,8311
@@ -74,13 +65,13 @@ uipath/_cli/_utils/_common.py,sha256=CzhhkIRfCuQ1-5HLDtjzOyt8KFs1jm6wzrBeU_v2B7c
 uipath/_cli/_utils/_console.py,sha256=scvnrrFoFX6CE451K-PXKV7UN0DUkInbOtDZ5jAdPP0,10070
 uipath/_cli/_utils/_constants.py,sha256=rS8lQ5Nzull8ytajK6lBsz398qiCp1REoAwlHtyBwF0,1415
 uipath/_cli/_utils/_debug.py,sha256=zamzIR4VgbdKADAE4gbmjxDsbgF7wvdr7C5Dqp744Oc,1739
-uipath/_cli/_utils/_eval_set.py,sha256=z0sTEj4lGkLZXfj9vUpMwFPL6LNMs1MSCZ43Efzoc6A,2750
+uipath/_cli/_utils/_eval_set.py,sha256=4aP8yAC-jMrNYaC62Yj8fHD2hNlotGwy63bciQrpdc4,2766
 uipath/_cli/_utils/_folders.py,sha256=UVJcKPfPAVR5HF4AP6EXdlNVcfEF1v5pwGCpoAgBY34,1155
 uipath/_cli/_utils/_input_args.py,sha256=3LGNqVpJItvof75VGm-ZNTUMUH9-c7-YgleM5b2YgRg,5088
 uipath/_cli/_utils/_parse_ast.py,sha256=8Iohz58s6bYQ7rgWtOTjrEInLJ-ETikmOMZzZdIY2Co,20072
 uipath/_cli/_utils/_processes.py,sha256=q7DfEKHISDWf3pngci5za_z0Pbnf_shWiYEcTOTCiyk,1855
 uipath/_cli/_utils/_project_files.py,sha256=sulh3xZhDDw_rBOrn_XSUfVSD6sUu47ZK4n_lF5BKkQ,13197
-uipath/_cli/_utils/_studio_project.py,sha256=HvzcpIIIA4hUIvMbId1dsAhmFLMuhnS2ZtyNdcpXJ8c,15422
+uipath/_cli/_utils/_studio_project.py,sha256=4aoRFj5FazUPpPltfr3jvyjoLsUd4hyl9We4SAuKFh4,15376
 uipath/_cli/_utils/_tracing.py,sha256=2igb03j3EHjF_A406UhtCKkPfudVfFPjUq5tXUEG4oo,1541
 uipath/_cli/_utils/_uv_helpers.py,sha256=6SvoLnZPoKIxW0sjMvD1-ENV_HOXDYzH34GjBqwT138,3450
 uipath/_resources/AGENTS.md,sha256=YWhWuX9XIbyVhVT3PnPc4Of3_q6bsNJcuzYu3N8f_Ug,25850
@@ -122,6 +113,15 @@ uipath/agent/conversation/meta.py,sha256=3t0eS9UHoAPHre97QTUeVbjDhnMX4zj4-qG6ju0
 uipath/agent/conversation/tool.py,sha256=ol8XI8AVd-QNn5auXNBPcCzOkh9PPFtL7hTK3kqInkU,2191
 uipath/eval/_helpers/__init__.py,sha256=GSmZMryjuO3Wo_zdxZdrHCRRsgOxsVFYkYgJ15YNC3E,86
 uipath/eval/_helpers/helpers.py,sha256=iE2HHdMiAdAMLqxHkPKHpfecEtAuN5BTBqvKFTI8ciE,1315
+uipath/eval/evaluators/__init__.py,sha256=DJAAhgv0I5UfBod4sGnSiKerfrz1iMmk7GNFb71V8eI,494
+uipath/eval/evaluators/base_evaluator.py,sha256=gryaN7WMV__NGorwu4WPRL5A5RlJ1exQ9jDJ6ZrXDB8,2679
+uipath/eval/evaluators/deterministic_evaluator_base.py,sha256=yDWTMU1mG-93D6DscAUHmaVUc1rhGYtNjGXgevzAObM,1723
+uipath/eval/evaluators/exact_match_evaluator.py,sha256=Qfz-kIUf80PKjAuge1Tc1GvN6kDB6hHveBZ86w_2How,1512
+uipath/eval/evaluators/json_similarity_evaluator.py,sha256=cP4kpN-UIf690V5dq4LaCjJc2zFx-nEffUclCwDdlhM,6607
+uipath/eval/evaluators/llm_as_judge_evaluator.py,sha256=l0bbn8ZLi9ZTXcgr7tJ2tsCvHFqIIeGa7sobaAHgI2Y,4927
+uipath/eval/evaluators/trajectory_evaluator.py,sha256=7boiKzjLpQPs8M8y2PGnI3bZQ1MEwR6QRZpXyKQcR7Y,1244
+uipath/eval/models/__init__.py,sha256=x360CDZaRjUL3q3kh2CcXYYrQ47jwn6p6JnmhEIvMlA,419
+uipath/eval/models/models.py,sha256=9IraD5C2KfKK1ZLMZ7jBOJzzHW4X1Dp2k41abqmPMnA,2838
 uipath/models/__init__.py,sha256=d_DkK1AtRUetM1t2NrH5UKgvJOBiynzaKnK5pMY7aIc,1289
 uipath/models/action_schema.py,sha256=tBn1qQ3NQLU5nwWlBIzIKIx3XK5pO_D1S51IjFlZ1FA,610
 uipath/models/actions.py,sha256=1vRsJ3JSmMdPkbiYAiHzY8K44vmW3VlMsmQUBAkSgrQ,3141
@@ -148,8 +148,8 @@ uipath/tracing/_traced.py,sha256=qeVDrds2OUnpdUIA0RhtF0kg2dlAZhyC1RRkI-qivTM,185
 uipath/tracing/_utils.py,sha256=wJRELaPu69iY0AhV432Dk5QYf_N_ViRU4kAUG1BI1ew,10384
 uipath/utils/__init__.py,sha256=VD-KXFpF_oWexFg6zyiWMkxl2HM4hYJMIUDZ1UEtGx0,105
 uipath/utils/_endpoints_manager.py,sha256=iRTl5Q0XAm_YgcnMcJOXtj-8052sr6jpWuPNz6CgT0Q,8408
-uipath-2.1.52.dist-info/METADATA,sha256=zNHWskIn1OPB0hrLbZBoX6qmd-U-52O_LTHBZ06FvdQ,6482
-uipath-2.1.52.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-uipath-2.1.52.dist-info/entry_points.txt,sha256=9C2_29U6Oq1ExFu7usihR-dnfIVNSKc-0EFbh0rskB4,43
-uipath-2.1.52.dist-info/licenses/LICENSE,sha256=-KBavWXepyDjimmzH5fVAsi-6jNVpIKFc2kZs0Ri4ng,1058
-uipath-2.1.52.dist-info/RECORD,,
+uipath-2.1.54.dist-info/METADATA,sha256=Yzw9AxJ3oi5FRDQ3ISjPi55QIWeKyNYqFSVzNnN-FXo,6482
+uipath-2.1.54.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+uipath-2.1.54.dist-info/entry_points.txt,sha256=9C2_29U6Oq1ExFu7usihR-dnfIVNSKc-0EFbh0rskB4,43
+uipath-2.1.54.dist-info/licenses/LICENSE,sha256=-KBavWXepyDjimmzH5fVAsi-6jNVpIKFc2kZs0Ri4ng,1058
+uipath-2.1.54.dist-info/RECORD,,

uipath/_cli/_evals/_evaluators/__init__.py DELETED Viewed

@@ -1,22 +0,0 @@
-"""Evaluators package for the evaluation system.
-This package contains all evaluator types and the factory for creating them.
-"""
-from ._deterministic_evaluator_base import DeterministicEvaluatorBase
-from ._evaluator_base import EvaluatorBase
-from ._evaluator_factory import EvaluatorFactory
-from ._exact_match_evaluator import ExactMatchEvaluator
-from ._json_similarity_evaluator import JsonSimilarityEvaluator
-from ._llm_as_judge_evaluator import LlmAsAJudgeEvaluator
-from ._trajectory_evaluator import TrajectoryEvaluator
-__all__ = [
-    "EvaluatorBase",
-    "DeterministicEvaluatorBase",
-    "EvaluatorFactory",
-    "JsonSimilarityEvaluator",
-    "ExactMatchEvaluator",
-    "LlmAsAJudgeEvaluator",
-    "TrajectoryEvaluator",
-]

uipath/_cli/_evals/_evaluators/_deterministic_evaluator_base.py DELETED Viewed

@@ -1,46 +0,0 @@
-import copy
-import json
-from abc import ABC
-from typing import Any, Dict, Tuple
-from ._evaluator_base import EvaluatorBase
-class DeterministicEvaluatorBase(EvaluatorBase, ABC):
-    def __init__(self, target_output_key: str = "*"):
-        super().__init__()
-        self.target_output_key = target_output_key
-    def _select_targets(
-        self, expected_output: Dict[str, Any], actual_output: Dict[str, Any]
-    ) -> Tuple[Any, Any]:
-        actual_output_copy = copy.deepcopy(actual_output)
-        expected_output_copy = copy.deepcopy(expected_output)
-        if self.target_output_key != "*":
-            if (
-                self.target_output_key not in actual_output
-                or self.target_output_key not in expected_output
-            ):
-                raise ValueError(
-                    f"Field '{self.target_output_key}' missing from expected or actual output"
-                )
-            actual_output_copy = actual_output_copy[self.target_output_key]
-            expected_output_copy = expected_output[self.target_output_key]
-        return actual_output_copy, expected_output_copy
-    def _canonical_json(self, obj: Any) -> str:
-        return json.dumps(
-            self._normalize_numbers(obj),
-            sort_keys=True,
-            separators=(",", ":"),
-            ensure_ascii=False,
-        )
-    def _normalize_numbers(self, obj: Any) -> Any:
-        if isinstance(obj, dict):
-            return {k: self._normalize_numbers(v) for k, v in obj.items()}
-        if isinstance(obj, (list, tuple)):
-            return [self._normalize_numbers(v) for v in obj]
-        if isinstance(obj, (int, float)) and not isinstance(obj, bool):
-            return float(obj)
-        return obj

uipath/_cli/_evals/_evaluators/_evaluator_base.py DELETED Viewed

@@ -1,124 +0,0 @@
-import functools
-import time
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from typing import Any, Dict
-from uipath._cli._evals._models import (
-    EvaluationResult,
-    EvaluatorCategory,
-    EvaluatorType,
-)
-def measure_execution_time(func):
-    """Decorator to measure execution time and update EvaluationResult.evaluation_time."""
-    @functools.wraps(func)
-    async def wrapper(*args, **kwargs) -> EvaluationResult:
-        start_time = time.time()
-        result = await func(*args, **kwargs)
-        end_time = time.time()
-        execution_time = end_time - start_time
-        result.evaluation_time = execution_time
-        return result
-    return wrapper
-@dataclass
-class EvaluatorBaseParams:
-    """Parameters for initializing the base evaluator."""
-    evaluator_id: str
-    category: EvaluatorCategory
-    evaluator_type: EvaluatorType
-    name: str
-    description: str
-    created_at: str
-    updated_at: str
-    target_output_key: str
-class EvaluatorBase(ABC):
-    """Abstract base class for all evaluators."""
-    def __init__(self):
-        # initialization done via 'from_params' function
-        self.id: str
-        self.name: str
-        self.description: str
-        self.created_at: str
-        self.updated_at: str
-        self.category: EvaluatorCategory
-        self.type: EvaluatorType
-        self.target_output_key: str
-        pass
-    @classmethod
-    def from_params(cls, params: EvaluatorBaseParams, **kwargs):
-        """Initialize the base evaluator from parameters.
-        Args:
-            params: EvaluatorBaseParams containing base configuration
-            **kwargs: Additional specific parameters for concrete evaluators
-        Returns:
-            Initialized evaluator instance
-        """
-        instance = cls(**kwargs)
-        instance.id = params.evaluator_id
-        instance.category = params.category
-        instance.type = params.evaluator_type
-        instance.name = params.name
-        instance.description = params.description
-        instance.created_at = params.created_at
-        instance.updated_at = params.updated_at
-        instance.target_output_key = params.target_output_key
-        return instance
-    @measure_execution_time
-    @abstractmethod
-    async def evaluate(
-        self,
-        evaluation_id: str,
-        evaluation_name: str,
-        input_data: Dict[str, Any],
-        expected_output: Dict[str, Any],
-        actual_output: Dict[str, Any],
-    ) -> EvaluationResult:
-        """Evaluate the given data and return a result.
-        Args:
-            evaluation_id: The ID of the evaluation being processed
-            evaluation_name: The name of the evaluation
-            input_data: The input data for the evaluation
-            expected_output: The expected output
-            actual_output: The actual output from the agent
-        Returns:
-            EvaluationResult containing the score and details
-        """
-        pass
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert the evaluator instance to a dictionary representation.
-        Returns:
-            Dict[str, Any]: Dictionary containing all evaluator properties
-        """
-        return {
-            "id": self.id,
-            "name": self.name,
-            "description": self.description,
-            "created_at": self.created_at,
-            "updated_at": self.updated_at,
-            "category": self.category.name if self.category else None,
-            "type": self.type.name if self.type else None,
-            "target_output_key": self.target_output_key,
-        }
-    def __repr__(self) -> str:
-        """String representation of the evaluator."""
-        return f"{self.__class__.__name__}(id='{self.id}', name='{self.name}', category={self.category.name})"

uipath/_cli/_evals/_evaluators/_exact_match_evaluator.py DELETED Viewed

@@ -1,40 +0,0 @@
-import copy
-from typing import Any, Dict
-from uipath._cli._evals._evaluators._deterministic_evaluator_base import (
-    DeterministicEvaluatorBase,
-)
-from uipath._cli._evals._models import EvaluationResult
-from uipath._cli._evals._models._evaluators import ScoreType
-class ExactMatchEvaluator(DeterministicEvaluatorBase):
-    async def evaluate(
-        self,
-        evaluation_id: str,
-        evaluation_name: str,
-        input_data: Dict[str, Any],
-        expected_output: Dict[str, Any],
-        actual_output: Dict[str, Any],
-    ) -> EvaluationResult:
-        actual_output_copy = copy.deepcopy(actual_output)
-        expected_output_copy = copy.deepcopy(expected_output)
-        actual_output, expected_output = self._select_targets(
-            expected_output, actual_output
-        )
-        are_equal = self._canonical_json(actual_output) == self._canonical_json(
-            expected_output
-        )
-        return EvaluationResult(
-            evaluation_id=evaluation_id,
-            evaluation_name=evaluation_name,
-            evaluator_id=self.id,
-            evaluator_name=self.name,
-            score=are_equal,
-            input=input_data,
-            expected_output=expected_output_copy,
-            actual_output=actual_output_copy,
-            score_type=ScoreType.BOOLEAN,
-        )

uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py DELETED Viewed

@@ -1,183 +0,0 @@
-import json
-from typing import Any, Dict
-from ...._config import Config
-from ...._execution_context import ExecutionContext
-from ...._services.llm_gateway_service import UiPathLlmChatService
-from ...._utils.constants import (
-    ENV_BASE_URL,
-    ENV_UIPATH_ACCESS_TOKEN,
-    ENV_UNATTENDED_USER_ACCESS_TOKEN,
-    COMMUNITY_agents_SUFFIX,
-)
-from .._models import EvaluationResult, LLMResponse
-from .._models._evaluators import ScoreType
-from ._evaluator_base import EvaluatorBase
-class LlmAsAJudgeEvaluator(EvaluatorBase):
-    """Evaluator that uses an LLM to judge the quality of outputs."""
-    def __init__(self, prompt: str = "", model: str = "", target_output_key: str = "*"):
-        """Initialize the LLM-as-a-judge evaluator.
-        Args:
-            prompt: The prompt template for the LLM
-            model: The model to use for evaluation
-            target_output_key: Key in output to evaluate ("*" for entire output)
-        """
-        super().__init__()
-        self.actual_output_placeholder = "{{ActualOutput}}"
-        self.expected_output_placeholder = "{{ExpectedOutput}}"
-        self._initialize_llm()
-        self.prompt = prompt
-        self.model = model
-        self.target_output_key: str = target_output_key
-    def _initialize_llm(self):
-        """Initialize the LLM used for evaluation."""
-        import os
-        base_url_value: str = os.getenv(ENV_BASE_URL)  # type: ignore
-        secret_value: str = os.getenv(ENV_UNATTENDED_USER_ACCESS_TOKEN) or os.getenv(
-            ENV_UIPATH_ACCESS_TOKEN
-        )  # type: ignore
-        config = Config(
-            base_url=base_url_value,
-            secret=secret_value,
-        )
-        self.llm = UiPathLlmChatService(config, ExecutionContext())
-    async def evaluate(
-        self,
-        evaluation_id: str,
-        evaluation_name: str,
-        input_data: Dict[str, Any],
-        expected_output: Dict[str, Any],
-        actual_output: Dict[str, Any],
-    ) -> EvaluationResult:
-        """Evaluate using an LLM as a judge.
-        Args:
-            evaluation_id: The ID of the evaluation being processed
-            evaluation_name: The name of the evaluation
-            input_data: The input data for the evaluation
-            expected_output: The expected output
-            actual_output: The actual output from the agent
-        Returns:
-            EvaluationResult containing the score and details
-        """
-        # Extract the target value to evaluate
-        target_value = self._extract_target_value(actual_output)
-        expected_value = self._extract_target_value(expected_output)
-        # Create the evaluation prompt
-        evaluation_prompt = self._create_evaluation_prompt(expected_value, target_value)
-        llm_response = await self._get_llm_response(evaluation_prompt)
-        return EvaluationResult(
-            evaluation_id=evaluation_id,
-            evaluation_name=evaluation_name,
-            evaluator_id=self.id,
-            evaluator_name=self.name,
-            score=llm_response.score,
-            input=input_data,
-            expected_output=expected_output,
-            actual_output=actual_output,
-            details=llm_response.justification,
-            score_type=ScoreType.NUMERICAL,
-        )
-    def _extract_target_value(self, output: Dict[str, Any]) -> Any:
-        """Extract the target value from output based on target_output_key."""
-        if self.target_output_key == "*":
-            return output
-        # Handle nested keys
-        keys = self.target_output_key.split(".")
-        value = output
-        try:
-            for key in keys:
-                if isinstance(value, dict):
-                    value = value[key]
-                else:
-                    return None
-            return value
-        except (KeyError, TypeError):
-            return None
-    def _create_evaluation_prompt(
-        self, expected_output: Any, actual_output: Any
-    ) -> str:
-        """Create the evaluation prompt for the LLM."""
-        formatted_prompt = self.prompt.replace(
-            self.actual_output_placeholder,
-            str(actual_output),
-        )
-        formatted_prompt = formatted_prompt.replace(
-            self.expected_output_placeholder,
-            str(expected_output),
-        )
-        return formatted_prompt
-    async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
-        """Get response from the LLM.
-        Args:
-            evaluation_prompt: The formatted prompt to send to the LLM
-        Returns:
-            LLMResponse with score and justification
-        """
-        try:
-            # remove community-agents suffix from llm model name
-            model = self.model
-            if model.endswith(COMMUNITY_agents_SUFFIX):
-                model = model.replace(COMMUNITY_agents_SUFFIX, "")
-            # Prepare the request
-            request_data = {
-                "model": model,
-                "messages": [{"role": "user", "content": evaluation_prompt}],
-                "response_format": {
-                    "type": "json_schema",
-                    "json_schema": {
-                        "name": "evaluation_response",
-                        "schema": {
-                            "type": "object",
-                            "properties": {
-                                "score": {
-                                    "type": "number",
-                                    "minimum": 0,
-                                    "maximum": 100,
-                                    "description": "Score between 0 and 100",
-                                },
-                                "justification": {
-                                    "type": "string",
-                                    "description": "Explanation for the score",
-                                },
-                            },
-                            "required": ["score", "justification"],
-                        },
-                    },
-                },
-            }
-            response = await self.llm.chat_completions(**request_data)
-            try:
-                return LLMResponse(**json.loads(response.choices[-1].message.content))
-            except (json.JSONDecodeError, ValueError) as e:
-                return LLMResponse(
-                    score=0.0, justification=f"Error parsing LLM response: {str(e)}"
-                )
-        except Exception as e:
-            # Fallback in case of any errors
-            return LLMResponse(
-                score=0.0, justification=f"Error during LLM evaluation: {str(e)}"
-            )

uipath 2.1.52__py3-none-any.whl → 2.1.54__py3-none-any.whl

uipath 2.1.52py3-none-any.whl → 2.1.54py3-none-any.whl