PyPI - uipath - Versions diffs - 2.1.16__py3-none-any.whl → 2.1.18__py3-none-any.whl - Mend

uipath 2.1.16py3-none-any.whl → 2.1.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

uipath/_cli/_evals/_evaluators/__init__.py CHANGED Viewed

@@ -3,18 +3,20 @@
 This package contains all evaluator types and the factory for creating them.
 """
-from ._agent_scorer_evaluator import AgentScorerEvaluator
-from ._deterministic_evaluator import DeterministicEvaluator
+from ._deterministic_evaluator_base import DeterministicEvaluatorBase
 from ._evaluator_base import EvaluatorBase
 from ._evaluator_factory import EvaluatorFactory
+from ._exact_match_evaluator import ExactMatchEvaluator
+from ._json_similarity_evaluator import JsonSimilarityEvaluator
 from ._llm_as_judge_evaluator import LlmAsAJudgeEvaluator
 from ._trajectory_evaluator import TrajectoryEvaluator
 __all__ = [
     "EvaluatorBase",
+    "DeterministicEvaluatorBase",
     "EvaluatorFactory",
-    "DeterministicEvaluator",
+    "JsonSimilarityEvaluator",
+    "ExactMatchEvaluator",
     "LlmAsAJudgeEvaluator",
-    "AgentScorerEvaluator",
     "TrajectoryEvaluator",
 ]

uipath/_cli/_evals/_evaluators/_deterministic_evaluator_base.py ADDED Viewed

@@ -0,0 +1,46 @@
+import copy
+import json
+from abc import ABC
+from typing import Any, Dict, Tuple
+from ._evaluator_base import EvaluatorBase
+class DeterministicEvaluatorBase(EvaluatorBase, ABC):
+    def __init__(self, target_output_key: str = "*"):
+        super().__init__()
+        self.target_output_key = target_output_key
+    def _select_targets(
+        self, expected_output: Dict[str, Any], actual_output: Dict[str, Any]
+    ) -> Tuple[Any, Any]:
+        actual_output_copy = copy.deepcopy(actual_output)
+        expected_output_copy = copy.deepcopy(expected_output)
+        if self.target_output_key != "*":
+            if (
+                self.target_output_key not in actual_output
+                or self.target_output_key not in expected_output
+            ):
+                raise ValueError(
+                    f"Field '{self.target_output_key}' missing from expected or actual output"
+                )
+            actual_output_copy = actual_output_copy[self.target_output_key]
+            expected_output_copy = expected_output[self.target_output_key]
+        return actual_output_copy, expected_output_copy
+    def _canonical_json(self, obj: Any) -> str:
+        return json.dumps(
+            self._normalize_numbers(obj),
+            sort_keys=True,
+            separators=(",", ":"),
+            ensure_ascii=False,
+        )
+    def _normalize_numbers(self, obj: Any) -> Any:
+        if isinstance(obj, dict):
+            return {k: self._normalize_numbers(v) for k, v in obj.items()}
+        if isinstance(obj, (list, tuple)):
+            return [self._normalize_numbers(v) for v in obj]
+        if isinstance(obj, (int, float)) and not isinstance(obj, bool):
+            return float(obj)
+        return obj

uipath/_cli/_evals/_evaluators/_evaluator_factory.py CHANGED Viewed

@@ -1,9 +1,9 @@
 from typing import Any, Dict
 from .._models import EvaluatorCategory, EvaluatorType
-from ._agent_scorer_evaluator import AgentScorerEvaluator
-from ._deterministic_evaluator import DeterministicEvaluator
 from ._evaluator_base import EvaluatorBase, EvaluatorBaseParams
+from ._exact_match_evaluator import ExactMatchEvaluator
+from ._json_similarity_evaluator import JsonSimilarityEvaluator
 from ._llm_as_judge_evaluator import LlmAsAJudgeEvaluator
 from ._trajectory_evaluator import TrajectoryEvaluator
@@ -50,23 +50,50 @@ class EvaluatorFactory:
         )
         # Create evaluator based on category
-        if category == EvaluatorCategory.Deterministic:
-            return EvaluatorFactory._create_deterministic_evaluator(base_params, data)
-        elif category == EvaluatorCategory.LlmAsAJudge:
-            return EvaluatorFactory._create_llm_as_judge_evaluator(base_params, data)
-        elif category == EvaluatorCategory.AgentScorer:
-            return EvaluatorFactory._create_agent_scorer_evaluator(base_params, data)
-        elif category == EvaluatorCategory.Trajectory:
-            return EvaluatorFactory._create_trajectory_evaluator(base_params, data)
-        else:
-            raise ValueError(f"Unknown evaluator category: {category}")
+        match category:
+            case EvaluatorCategory.Deterministic:
+                if evaluator_type == evaluator_type.Equals:
+                    return EvaluatorFactory._create_exact_match_evaluator(
+                        base_params, data
+                    )
+                elif evaluator_type == evaluator_type.JsonSimilarity:
+                    return EvaluatorFactory._create_json_similarity_evaluator(
+                        base_params, data
+                    )
+                else:
+                    raise ValueError(
+                        f"Unknown evaluator type {evaluator_type} for category {category}"
+                    )
+            case EvaluatorCategory.LlmAsAJudge:
+                return EvaluatorFactory._create_llm_as_judge_evaluator(
+                    base_params, data
+                )
+            case EvaluatorCategory.AgentScorer:
+                raise NotImplementedError()
+            case EvaluatorCategory.Trajectory:
+                return EvaluatorFactory._create_trajectory_evaluator(base_params, data)
+            case _:
+                raise ValueError(f"Unknown evaluator category: {category}")
     @staticmethod
-    def _create_deterministic_evaluator(
+    def _create_exact_match_evaluator(
         base_params: EvaluatorBaseParams, data: Dict[str, Any]
-    ) -> DeterministicEvaluator:
+    ) -> ExactMatchEvaluator:
         """Create a deterministic evaluator."""
-        raise NotImplementedError()
+        return ExactMatchEvaluator.from_params(
+            base_params,
+            target_output_key=data.get("targetOutputKey", "*"),
+        )
+    @staticmethod
+    def _create_json_similarity_evaluator(
+        base_params: EvaluatorBaseParams, data: Dict[str, Any]
+    ) -> JsonSimilarityEvaluator:
+        """Create a deterministic evaluator."""
+        return JsonSimilarityEvaluator.from_params(
+            base_params,
+            target_output_key=data.get("targetOutputKey", "*"),
+        )
     @staticmethod
     def _create_llm_as_judge_evaluator(
@@ -88,13 +115,6 @@ class EvaluatorFactory:
             target_output_key=data.get("targetOutputKey", "*"),
         )
-    @staticmethod
-    def _create_agent_scorer_evaluator(
-        base_params: EvaluatorBaseParams, data: Dict[str, Any]
-    ) -> AgentScorerEvaluator:
-        """Create an agent scorer evaluator."""
-        raise NotImplementedError()
     @staticmethod
     def _create_trajectory_evaluator(
         base_params: EvaluatorBaseParams, data: Dict[str, Any]

uipath/_cli/_evals/_evaluators/_exact_match_evaluator.py ADDED Viewed

@@ -0,0 +1,40 @@
+import copy
+from typing import Any, Dict
+from uipath._cli._evals._evaluators._deterministic_evaluator_base import (
+    DeterministicEvaluatorBase,
+)
+from uipath._cli._evals._models import EvaluationResult
+from uipath._cli._evals._models._evaluators import ScoreType
+class ExactMatchEvaluator(DeterministicEvaluatorBase):
+    async def evaluate(
+        self,
+        evaluation_id: str,
+        evaluation_name: str,
+        input_data: Dict[str, Any],
+        expected_output: Dict[str, Any],
+        actual_output: Dict[str, Any],
+    ) -> EvaluationResult:
+        actual_output_copy = copy.deepcopy(actual_output)
+        expected_output_copy = copy.deepcopy(expected_output)
+        actual_output, expected_output = self._select_targets(
+            expected_output, actual_output
+        )
+        are_equal = self._canonical_json(actual_output) == self._canonical_json(
+            expected_output
+        )
+        return EvaluationResult(
+            evaluation_id=evaluation_id,
+            evaluation_name=evaluation_name,
+            evaluator_id=self.id,
+            evaluator_name=self.name,
+            score=are_equal,
+            input=input_data,
+            expected_output=expected_output_copy,
+            actual_output=actual_output_copy,
+            score_type=ScoreType.BOOLEAN,
+        )

uipath/_cli/_evals/_evaluators/_json_similarity_evaluator.py ADDED Viewed

@@ -0,0 +1,168 @@
+import copy
+import math
+from typing import Any, Dict, Tuple
+from uipath._cli._evals._evaluators._deterministic_evaluator_base import (
+    DeterministicEvaluatorBase,
+)
+from uipath._cli._evals._models import EvaluationResult
+from uipath._cli._evals._models._evaluators import ScoreType
+class JsonSimilarityEvaluator(DeterministicEvaluatorBase):
+    """Deterministic evaluator that scores structural JSON similarity.
+    Compares expected versus actual JSON-like structures and returns a
+    numerical score in the range [0, 100]. The comparison is token-based
+    and tolerant for numbers and strings (via Levenshtein distance).
+    """
+    async def evaluate(
+        self,
+        evaluation_id: str,
+        evaluation_name: str,
+        input_data: Dict[str, Any],
+        expected_output: Dict[str, Any],
+        actual_output: Dict[str, Any],
+    ) -> EvaluationResult:
+        """Evaluate similarity between expected and actual JSON outputs.
+        Args:
+            evaluation_id: Unique identifier for this evaluation run.
+            evaluation_name: Human friendly evaluation name.
+            input_data: Input payload used to produce the outputs.
+            expected_output: Ground-truth JSON structure.
+            actual_output: Produced JSON structure to compare against the ground truth.
+        Returns:
+            EvaluationResult: Structured result with the numerical similarity score.
+        """
+        actual_output_copy = copy.deepcopy(actual_output)
+        expected_output_copy = copy.deepcopy(expected_output)
+        actual_output, expected_output = self._select_targets(
+            expected_output, actual_output
+        )
+        similarity = self._compare_json(expected_output, actual_output)
+        return EvaluationResult(
+            evaluation_id=evaluation_id,
+            evaluation_name=evaluation_name,
+            evaluator_id=self.id,
+            evaluator_name=self.name,
+            score=similarity,
+            input=input_data,
+            expected_output=expected_output_copy,
+            actual_output=actual_output_copy,
+            score_type=ScoreType.NUMERICAL,
+        )
+    def _compare_json(self, expected: Any, actual: Any) -> float:
+        matched_leaves, total_leaves = self._compare_tokens(expected, actual)
+        if total_leaves == 0:
+            return 100.0
+        sim = (matched_leaves / total_leaves) * 100.0
+        return max(0.0, min(100.0, sim))
+    def _compare_tokens(
+        self, expected_token: Any, actual_token: Any
+    ) -> Tuple[float, float]:
+        if self._is_number(expected_token) and self._is_number(actual_token):
+            return self._compare_numbers(float(expected_token), float(actual_token))
+        if type(expected_token) is not type(actual_token):
+            return 0.0, self._count_leaves(expected_token)
+        if isinstance(expected_token, dict):
+            matched_leaves = total_leaves = 0.0
+            # Only expected keys count
+            for expected_key, expected_value in expected_token.items():
+                if isinstance(actual_token, dict) and expected_key in actual_token:
+                    matched, total = self._compare_tokens(
+                        expected_value, actual_token[expected_key]
+                    )
+                else:
+                    matched, total = (0.0, self._count_leaves(expected_value))
+                matched_leaves += matched
+                total_leaves += total
+            return matched_leaves, total_leaves
+        if isinstance(expected_token, list):
+            matched_leaves = total_leaves = 0.0
+            common_length = min(len(expected_token), len(actual_token))
+            for index in range(common_length):
+                matched, total = self._compare_tokens(
+                    expected_token[index], actual_token[index]
+                )
+                matched_leaves += matched
+                total_leaves += total
+            for index in range(common_length, len(expected_token)):
+                total_leaves += self._count_leaves(expected_token[index])
+            return (matched_leaves, total_leaves)
+        if isinstance(expected_token, bool):
+            return (1.0, 1.0) if expected_token == actual_token else (0.0, 1.0)
+        if isinstance(expected_token, str):
+            return self._compare_strings(expected_token, actual_token)
+        return (1.0, 1.0) if str(expected_token) == str(actual_token) else (0.0, 1.0)
+    def _compare_numbers(
+        self, expected_number: float, actual_number: float
+    ) -> Tuple[float, float]:
+        total = 1.0
+        if math.isclose(expected_number, 0.0, abs_tol=1e-12):
+            matched = 1.0 if math.isclose(actual_number, 0.0, abs_tol=1e-12) else 0.0
+        else:
+            ratio = abs(expected_number - actual_number) / abs(expected_number)
+            matched = max(0.0, min(1.0, 1.0 - ratio))
+        return matched, total
+    def _compare_strings(
+        self, expected_string: str, actual_string: str
+    ) -> Tuple[float, float]:
+        total = 1.0
+        if not expected_string and not actual_string:
+            return 1.0, total
+        distance = self._levenshtein(expected_string, actual_string)
+        max_length = max(len(expected_string), len(actual_string))
+        similarity = 1.0 - (distance / max_length) if max_length else 1.0
+        similarity = max(0.0, min(1.0, similarity))
+        return similarity, total
+    def _count_leaves(self, token_node: Any) -> float:
+        if isinstance(token_node, dict):
+            return sum(
+                self._count_leaves(child_value) for child_value in token_node.values()
+            )
+        if isinstance(token_node, list):
+            return sum(self._count_leaves(child_value) for child_value in token_node)
+        return 1.0
+    def _levenshtein(self, source_text: str, target_text: str) -> int:
+        if not source_text:
+            return len(target_text)
+        if not target_text:
+            return len(source_text)
+        source_len, target_len = len(source_text), len(target_text)
+        distance_matrix = [[0] * (target_len + 1) for _ in range(source_len + 1)]
+        for row_idx in range(source_len + 1):
+            distance_matrix[row_idx][0] = row_idx
+        for col_idx in range(target_len + 1):
+            distance_matrix[0][col_idx] = col_idx
+        for row_idx in range(1, source_len + 1):
+            for col_idx in range(1, target_len + 1):
+                substitution_cost = (
+                    0 if source_text[row_idx - 1] == target_text[col_idx - 1] else 1
+                )
+                distance_matrix[row_idx][col_idx] = min(
+                    distance_matrix[row_idx - 1][col_idx] + 1,  # deletion
+                    distance_matrix[row_idx][col_idx - 1] + 1,  # insertion
+                    distance_matrix[row_idx - 1][col_idx - 1]
+                    + substitution_cost,  # substitution
+                )
+        return distance_matrix[source_len][target_len]
+    def _is_number(self, value: Any) -> bool:
+        return isinstance(value, (int, float)) and not isinstance(value, bool)

uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py CHANGED Viewed

@@ -11,6 +11,7 @@ from ...._utils.constants import (
     COMMUNITY_agents_SUFFIX,
 )
 from .._models import EvaluationResult, LLMResponse
+from .._models._evaluators import ScoreType
 from ._evaluator_base import EvaluatorBase
@@ -86,6 +87,7 @@ class LlmAsAJudgeEvaluator(EvaluatorBase):
             expected_output=expected_output,
             actual_output=actual_output,
             details=llm_response.justification,
+            score_type=ScoreType.NUMERICAL,
         )
     def _extract_target_value(self, output: Dict[str, Any]) -> Any:

uipath/_cli/_evals/_models/_evaluators.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from datetime import datetime
+from datetime import datetime, timezone
 from enum import IntEnum
 from typing import Any, Dict, List, Optional
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 class LLMResponse(BaseModel):
@@ -50,6 +50,12 @@ class EvaluatorType(IntEnum):
             raise ValueError(f"{value} is not a valid EvaluatorType value")
+class ScoreType(IntEnum):
+    BOOLEAN = 0
+    NUMERICAL = 1
+    ERROR = 2
 class EvaluationResult(BaseModel):
     """Result of a single evaluation."""
@@ -57,13 +63,14 @@ class EvaluationResult(BaseModel):
     evaluation_name: str
     evaluator_id: str
     evaluator_name: str
-    score: float
-    # mark this as optional, as it is populated inside the 'measure_execution_time' decorator
+    score: float | bool
+    score_type: ScoreType
+    # this is marked as optional, as it is populated inside the 'measure_execution_time' decorator
     evaluation_time: Optional[float] = None
     input: Dict[str, Any]
     expected_output: Dict[str, Any]
     actual_output: Dict[str, Any]
-    timestamp: datetime = Field(default_factory=datetime.utcnow)
+    timestamp: datetime = datetime.now(timezone.utc)
     details: Optional[str] = None
@@ -76,12 +83,6 @@ class EvaluationSetResult(BaseModel):
     average_score: float
-class ScoreType(IntEnum):
-    BOOLEAN = 0
-    NUMERICAL = 1
-    ERROR = 2
 class EvalItemResult(BaseModel):
     """Result of a single evaluation item."""

uipath/_cli/_evals/evaluation_service.py CHANGED Viewed

@@ -337,7 +337,7 @@ class EvaluationService:
             try:
                 if self._progress_reporter:
                     await self._progress_reporter.update_eval_run(
-                        eval_results, eval_run_id, success, execution_time
+                        eval_results, eval_run_id, execution_time
                     )
                 sw_progress_reporter_queue.task_done()
             except Exception as e:

uipath/_cli/_evals/progress_reporter.py CHANGED Viewed

@@ -35,7 +35,9 @@ class ProgressReporter:
         self._eval_set_id = eval_set_id
         self.agent_snapshot = agent_snapshot
         self._no_of_evals = no_of_evals
-        self._evaluators = evaluators
+        self._evaluators: dict[str, EvaluatorBase] = {
+            evaluator.id: evaluator for evaluator in evaluators
+        }
         self._evaluator_scores: dict[str, list[float]] = {
             evaluator.id: [] for evaluator in evaluators
         }
@@ -56,6 +58,18 @@ class ProgressReporter:
                 "Cannot report data to StudioWeb. Please set UIPATH_PROJECT_ID."
             )
+    async def create_eval_set_run(self):
+        """Create a new evaluation set run in StudioWeb."""
+        spec = self._create_eval_set_run_spec()
+        response = await self._client.request_async(
+            method=spec.method,
+            url=spec.endpoint,
+            params=spec.params,
+            content=spec.content,
+            headers=spec.headers,
+        )
+        self._eval_set_run_id = json.loads(response.content)["id"]
     async def create_eval_run(self, eval_item: dict[str, Any]):
         """Create a new evaluation run in StudioWeb.
@@ -72,7 +86,6 @@ class ProgressReporter:
             params=spec.params,
             content=spec.content,
             headers=spec.headers,
-            scoped="org",
         )
         return json.loads(response.content)["id"]
@@ -80,7 +93,6 @@ class ProgressReporter:
         self,
         eval_results: list[EvalItemResult],
         eval_run_id: str,
-        success: bool,
         execution_time: float,
     ):
         """Update an evaluation run with results.
@@ -88,7 +100,6 @@ class ProgressReporter:
         Args:
             eval_results: Dictionary mapping evaluator IDs to evaluation results
             eval_run_id: ID of the evaluation run to update
-            success: Whether the evaluation was successful
             execution_time: The agent execution time
         """
         assertion_runs, evaluator_scores, actual_output = self._collect_results(
@@ -107,21 +118,7 @@ class ProgressReporter:
             params=spec.params,
             content=spec.content,
             headers=spec.headers,
-            scoped="org",
-        )
-    async def create_eval_set_run(self):
-        """Create a new evaluation set run in StudioWeb."""
-        spec = self._create_eval_set_run_spec()
-        response = await self._client.request_async(
-            method=spec.method,
-            url=spec.endpoint,
-            params=spec.params,
-            content=spec.content,
-            headers=spec.headers,
-            scoped="org",
         )
-        self._eval_set_run_id = json.loads(response.content)["id"]
     async def update_eval_set_run(self):
         """Update the evaluation set run status to complete."""
@@ -132,7 +129,6 @@ class ProgressReporter:
             params=spec.params,
             content=spec.content,
             headers=spec.headers,
-            scoped="org",
         )
     def _collect_results(
@@ -143,12 +139,23 @@ class ProgressReporter:
         actual_output: dict[str, Any] = {}
         for eval_result in eval_results:
             # keep track of evaluator scores. this should be removed after this computation is done server-side
-            self._evaluator_scores[eval_result.evaluator_id].append(
-                eval_result.result.score
-            )
+            # check the evaluator score type
+            match eval_result.result.score_type:
+                case ScoreType.NUMERICAL:
+                    self._evaluator_scores[eval_result.evaluator_id].append(
+                        eval_result.result.score
+                    )
+                case ScoreType.BOOLEAN:
+                    self._evaluator_scores[eval_result.evaluator_id].append(
+                        100 if eval_result.result.score else 0
+                    )
+                case ScoreType.ERROR:
+                    self._evaluator_scores[eval_result.evaluator_id].append(0)
             evaluator_scores.append(
                 {
-                    "type": ScoreType.NUMERICAL.value,
+                    "type": eval_result.result.score_type.value,
                     "value": eval_result.result.score,
                     "justification": eval_result.result.details,
                     "evaluatorId": eval_result.evaluator_id,
@@ -158,14 +165,6 @@ class ProgressReporter:
                 {
                     "status": EvaluationStatus.COMPLETED.value,
                     "evaluatorId": eval_result.evaluator_id,
-                    "result": {
-                        "output": {"content": {**eval_result.result.actual_output}},
-                        "score": {
-                            "type": ScoreType.NUMERICAL.value,
-                            "value": eval_result.result.score,
-                            "justification": eval_result.result.details,
-                        },
-                    },
                     "completionMetrics": {
                         "duration": eval_result.result.evaluation_time,
                         "cost": None,
@@ -173,6 +172,14 @@ class ProgressReporter:
                         "completionTokens": 0,
                         "promptTokens": 0,
                     },
+                    "assertionSnapshot": {
+                        "assertionType": self._evaluators[
+                            eval_result.evaluator_id
+                        ].type.name,
+                        "outputKey": self._evaluators[
+                            eval_result.evaluator_id
+                        ].target_output_key,
+                    },
                 }
             )
@@ -192,7 +199,7 @@ class ProgressReporter:
         return RequestSpec(
             method="PUT",
             endpoint=Endpoint(
-                f"agents_/api/execution/agents/{self._project_id}/evalRun"
+                f"agentsruntime_/api/execution/agents/{self._project_id}/evalRun"
             ),
             content=json.dumps(
                 {
@@ -213,7 +220,7 @@ class ProgressReporter:
         return RequestSpec(
             method="POST",
             endpoint=Endpoint(
-                f"agents_/api/execution/agents/{self._project_id}/evalRun"
+                f"agentsruntime_/api/execution/agents/{self._project_id}/evalRun"
             ),
             content=json.dumps(
                 {
@@ -221,41 +228,10 @@ class ProgressReporter:
                     "evalSnapshot": {
                         "id": eval_item["id"],
                         "name": eval_item["name"],
-                        "assertionType": "unknown",
-                        "assertionProperties": {},
                         "inputs": eval_item.get("inputs"),
-                        "outputKey": "*",
+                        "expectedOutput": eval_item.get("expectedOutput", {}),
                     },
                     "status": EvaluationStatus.IN_PROGRESS.value,
-                    "assertionRuns": [
-                        # TODO: replace default values
-                        {
-                            "assertionSnapshot": {
-                                "assertionProperties": {
-                                    "expectedOutput": eval_item.get(
-                                        "expectedOutput", {}
-                                    ),
-                                    "prompt": "No prompt for coded agents",
-                                    "simulationInstructions": "",
-                                    "expectedAgentBehavior": "",
-                                    "inputGenerationInstructions": "",
-                                    "simulateTools": False,
-                                    "simulateInput": False,
-                                    "toolsToSimulate": [],
-                                    **(
-                                        {"model": evaluator.model}
-                                        if hasattr(evaluator, "model")
-                                        else {}
-                                    ),
-                                },
-                                "assertionType": "Custom",
-                                "outputKey": "*",
-                            },
-                            "status": 1,
-                            "evaluatorId": evaluator.id,
-                        }
-                        for evaluator in self._evaluators
-                    ],
                 }
             ),
             headers=self._tenant_header(),
@@ -264,13 +240,12 @@ class ProgressReporter:
     def _create_eval_set_run_spec(
         self,
     ) -> RequestSpec:
-        self._add_defaults_to_agent_snapshot()
         agent_snapshot_dict = json.loads(self.agent_snapshot)
         return RequestSpec(
             method="POST",
             endpoint=Endpoint(
-                f"agents_/api/execution/agents/{self._project_id}/evalSetRun"
+                f"agentsruntime_/api/execution/agents/{self._project_id}/evalSetRun"
             ),
             content=json.dumps(
                 {
@@ -288,7 +263,7 @@ class ProgressReporter:
         evaluator_scores = []
         evaluator_averages = []
-        for evaluator in self._evaluators:
+        for evaluator in self._evaluators.values():
             scores = self._evaluator_scores[evaluator.id]
             if scores:
                 avg_score = sum(scores) / len(scores)
@@ -316,14 +291,11 @@ class ProgressReporter:
         return RequestSpec(
             method="PUT",
             endpoint=Endpoint(
-                f"agents_/api/execution/agents/{self._project_id}/evalSetRun"
+                f"agentsruntime_/api/execution/agents/{self._project_id}/evalSetRun"
             ),
             content=json.dumps(
                 {
-                    ## TODO: send the actual data here (do we need to send those again? isn't it redundant?)
                     "evalSetRunId": self._eval_set_run_id,
-                    ## this should be removed. not used but enforced by the API
-                    "score": overall_score,
                     "status": EvaluationStatus.COMPLETED.value,
                     "evaluatorScores": evaluator_scores,
                 }
@@ -331,22 +303,6 @@ class ProgressReporter:
             headers=self._tenant_header(),
         )
-    def _add_defaults_to_agent_snapshot(self):
-        ## TODO: remove this after properties are marked as optional at api level
-        agent_snapshot_dict = json.loads(self.agent_snapshot)
-        agent_snapshot_dict["tools"] = []
-        agent_snapshot_dict["contexts"] = []
-        agent_snapshot_dict["escalations"] = []
-        agent_snapshot_dict["systemPrompt"] = ""
-        agent_snapshot_dict["userPrompt"] = ""
-        agent_snapshot_dict["settings"] = {
-            "model": "",
-            "maxTokens": 0,
-            "temperature": 0,
-            "engine": "",
-        }
-        self.agent_snapshot = json.dumps(agent_snapshot_dict)
     def _tenant_header(self) -> dict[str, str]:
         tenant_id = os.getenv(ENV_TENANT_ID, None)
         if not tenant_id:

uipath/models/exceptions.py CHANGED Viewed

@@ -20,6 +20,11 @@ class EnrichedException(Exception):
         # Extract the relevant details from the HTTPStatusError
         self.status_code = error.response.status_code if error.response else "Unknown"
         self.url = str(error.request.url) if error.request else "Unknown"
+        self.http_method = (
+            error.request.method
+            if error.request and error.request.method
+            else "Unknown"
+        )
         self.response_content = (
             error.response.content.decode("utf-8")
             if error.response and error.response.content
@@ -28,6 +33,7 @@ class EnrichedException(Exception):
         enriched_message = (
             f"\nRequest URL: {self.url}"
+            f"\nHTTP Method: {self.http_method}"
             f"\nStatus Code: {self.status_code}"
             f"\nResponse Content: {self.response_content}"
         )

{uipath-2.1.16.dist-info → uipath-2.1.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: uipath
-Version: 2.1.16
+Version: 2.1.18
 Summary: Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools.
 Project-URL: Homepage, https://uipath.com
 Project-URL: Repository, https://github.com/UiPath/uipath-python

{uipath-2.1.16.dist-info → uipath-2.1.18.dist-info}/RECORD RENAMED Viewed

@@ -29,18 +29,19 @@ uipath/_cli/_auth/auth_config.json,sha256=UnAhdum8phjuZaZKE5KLp0IcPCbIltDEU1M_G8
 uipath/_cli/_auth/index.html,sha256=_Q2OtqPfapG_6vumbQYqtb2PfFe0smk7TlGERKEBvB4,22518
 uipath/_cli/_auth/localhost.crt,sha256=oGl9oLLOiouHubAt39B4zEfylFvKEtbtr_43SIliXJc,1226
 uipath/_cli/_auth/localhost.key,sha256=X31VYXD8scZtmGA837dGX5l6G-LXHLo5ItWJhZXaz3c,1679
-uipath/_cli/_evals/evaluation_service.py,sha256=cE7V0kjpn4ucSCfcqA_2JWlxoT2Vz-Jhn6uLhF_3Arw,22086
-uipath/_cli/_evals/progress_reporter.py,sha256=W4paB4v_SbCkqpudYC4N-29aaZNo8bS4HrN3MBl7hxs,13583
-uipath/_cli/_evals/_evaluators/__init__.py,sha256=eXUozXTNJIVCoV54_btA5mXmm1uNnp0qcfysrz9JQu4,629
-uipath/_cli/_evals/_evaluators/_agent_scorer_evaluator.py,sha256=qv4YjNiwqi5gWA24mRTC3QQ73o2Djkn1aY-AnHRyUMI,1545
-uipath/_cli/_evals/_evaluators/_deterministic_evaluator.py,sha256=P0du9KWz5MP5Pw70Ze7piqeBfFq7w0aU7DLeEiNC3k4,1398
+uipath/_cli/_evals/evaluation_service.py,sha256=VVxZxoCJoB2SUhej_c0DzC9AlnIlWMKnug7z5weNSoE,22077
+uipath/_cli/_evals/progress_reporter.py,sha256=m1Dio1vG-04nFTFz5ijM_j1dhudlgOzQukmTkkg6wS4,11490
+uipath/_cli/_evals/_evaluators/__init__.py,sha256=jD7KNLjbsUpsESFXX11eW2MEPXDNuPp2-t-IPB-inlM,734
+uipath/_cli/_evals/_evaluators/_deterministic_evaluator_base.py,sha256=BTl0puBjp9iCsU3YFfYWqk4TOz4iE19O3q1-dK6qUOI,1723
 uipath/_cli/_evals/_evaluators/_evaluator_base.py,sha256=knHUwYFt0gMG1uJhq5TGEab6M_YevxX019yT3yYwZsw,3787
-uipath/_cli/_evals/_evaluators/_evaluator_factory.py,sha256=YvWi5DS8XKKvfgwxurv2ZP3Jmylv1hgshIw9VEfevoY,3954
-uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py,sha256=GdX5CsdfcPxCeSkkD-JpYLBddGhU8YieTIMcyENiWns,6506
+uipath/_cli/_evals/_evaluators/_evaluator_factory.py,sha256=RJtCuFREZ8Ijlldpa0521poZLmcR7vTU3WyYOmhJOkc,4688
+uipath/_cli/_evals/_evaluators/_exact_match_evaluator.py,sha256=lvEtAitrZy9myoZLMXLqlBWBPX06Msu67kuFMGSbikM,1319
+uipath/_cli/_evals/_evaluators/_json_similarity_evaluator.py,sha256=HpmkvuwU4Az3IIqFVLUmDvzkqb21pFMxY0sg2biZOMM,7093
+uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py,sha256=nSLZ29xWqALEI53ifr79JPXjyx0T4sr7p-4NygwgAio,6594
 uipath/_cli/_evals/_evaluators/_trajectory_evaluator.py,sha256=dnogQTOskpI4_cNF0Ge3hBceJJocvOgxBWAwaCWnzB0,1595
 uipath/_cli/_evals/_models/__init__.py,sha256=Ewjp3u2YeTH2MmzY9LWf7EIbAoIf_nW9fMYbj7pGlPs,420
 uipath/_cli/_evals/_models/_evaluation_set.py,sha256=UIapFwn_Ti9zHUIcL3xyHDcLZ4lq4sHJ3JXLvY5OYI0,1080
-uipath/_cli/_evals/_models/_evaluators.py,sha256=L0CUO_P_NxxLAspi70POrZYmWkSDPtQ_ParPlz7lYOw,2086
+uipath/_cli/_evals/_models/_evaluators.py,sha256=l57NEVyYmzSKuoIXuGkE94Br01hAMg35fiS2MlTkaQM,2115
 uipath/_cli/_push/sw_file_handler.py,sha256=tRE9n68xv0r20ulwOyALHtYwzbjGneiASwzNm8xtBN0,16372
 uipath/_cli/_runtime/_contracts.py,sha256=WlpaiQAMWCo-JFHjee35Klf49A3GsKjOU1Mf2IpUGHY,16033
 uipath/_cli/_runtime/_escalation.py,sha256=x3vI98qsfRA-fL_tNkRVTFXioM5Gv2w0GFcXJJ5eQtg,7981
@@ -99,7 +100,7 @@ uipath/models/connections.py,sha256=perIqW99YEg_0yWZPdpZlmNpZcwY_toR1wkqDUBdAN0,
 uipath/models/context_grounding.py,sha256=S9PeOlFlw7VxzzJVR_Fs28OObW3MLHUPCFqNgkEz24k,1315
 uipath/models/context_grounding_index.py,sha256=0ADlH8fC10qIbakgwU89pRVawzJ36TiSDKIqOhUdhuA,2580
 uipath/models/errors.py,sha256=gPyU4sKYn57v03aOVqm97mnU9Do2e7bwMQwiSQVp9qc,461
-uipath/models/exceptions.py,sha256=INpWyjc-u5HFlVp-2gtYB_NAPBZJA-n4zqd_2t8swJY,1434
+uipath/models/exceptions.py,sha256=F0ITAhJsl6Agvmnv4nxvgY5oC_lrYIlxWTLs0yx859M,1636
 uipath/models/interrupt_models.py,sha256=UzuVTMVesI204YQ4qFQFaN-gN3kksddkrujofcaC7zQ,881
 uipath/models/job.py,sha256=f9L6_kg_VP0dAYvdcz1DWEWzy4NZPdlpHREod0uNK1E,3099
 uipath/models/llm_gateway.py,sha256=rUIus7BrUuuRriXqSJUE9FnjOyQ7pYpaX6hWEYvA6AA,1923
@@ -114,8 +115,8 @@ uipath/tracing/_traced.py,sha256=qeVDrds2OUnpdUIA0RhtF0kg2dlAZhyC1RRkI-qivTM,185
 uipath/tracing/_utils.py,sha256=ZeensQexnw69jVcsVrGyED7mPlAU-L1agDGm6_1A3oc,10388
 uipath/utils/__init__.py,sha256=VD-KXFpF_oWexFg6zyiWMkxl2HM4hYJMIUDZ1UEtGx0,105
 uipath/utils/_endpoints_manager.py,sha256=hiGEu6vyfQJoeiiql6w21TNiG6tADUfXlVBimxPU1-Q,4160
-uipath-2.1.16.dist-info/METADATA,sha256=w5YuTHYxqG0i1zve0oynlUDCUtUjdyGcH8YQzVngBVg,6367
-uipath-2.1.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-uipath-2.1.16.dist-info/entry_points.txt,sha256=9C2_29U6Oq1ExFu7usihR-dnfIVNSKc-0EFbh0rskB4,43
-uipath-2.1.16.dist-info/licenses/LICENSE,sha256=-KBavWXepyDjimmzH5fVAsi-6jNVpIKFc2kZs0Ri4ng,1058
-uipath-2.1.16.dist-info/RECORD,,
+uipath-2.1.18.dist-info/METADATA,sha256=V5bxB_ENxsAgMRKGPz3Kx3gvmmgnrRxRDVAbILiBTtY,6367
+uipath-2.1.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+uipath-2.1.18.dist-info/entry_points.txt,sha256=9C2_29U6Oq1ExFu7usihR-dnfIVNSKc-0EFbh0rskB4,43
+uipath-2.1.18.dist-info/licenses/LICENSE,sha256=-KBavWXepyDjimmzH5fVAsi-6jNVpIKFc2kZs0Ri4ng,1058
+uipath-2.1.18.dist-info/RECORD,,

uipath/_cli/_evals/_evaluators/_agent_scorer_evaluator.py DELETED Viewed

@@ -1,48 +0,0 @@
-from typing import Any, Dict
-from .._models import EvaluationResult
-from ._evaluator_base import EvaluatorBase
-class AgentScorerEvaluator(EvaluatorBase):
-    """Evaluator that uses an agent to score outputs."""
-    def __init__(
-        self,
-        agent_config: Dict[str, Any],
-        scoring_criteria: Dict[str, Any],
-        target_output_key: str = "*",
-    ):
-        """Initialize the agent scorer evaluator.
-        Args:
-            agent_config: Configuration for the scoring agent
-            scoring_criteria: Criteria used for scoring
-            target_output_key: Key in output to evaluate ("*" for entire output)
-        """
-        super().__init__()
-        self.agent_config = agent_config or {}
-        self.scoring_criteria = scoring_criteria or {}
-        self.target_output_key = target_output_key
-    async def evaluate(
-        self,
-        evaluation_id: str,
-        evaluation_name: str,
-        input_data: Dict[str, Any],
-        expected_output: Dict[str, Any],
-        actual_output: Dict[str, Any],
-    ) -> EvaluationResult:
-        """Evaluate using an agent scorer.
-        Args:
-            evaluation_id: The ID of the evaluation being processed
-            evaluation_name: The name of the evaluation
-            input_data: The input data for the evaluation
-            expected_output: The expected output
-            actual_output: The actual output from the agent
-        Returns:
-            EvaluationResult containing the score and details
-        """
-        raise NotImplementedError()

uipath/_cli/_evals/_evaluators/_deterministic_evaluator.py DELETED Viewed

@@ -1,41 +0,0 @@
-from typing import Any, Dict
-from .._models import EvaluationResult
-from ._evaluator_base import EvaluatorBase
-class DeterministicEvaluator(EvaluatorBase):
-    """Evaluator for deterministic/rule-based evaluations."""
-    def __init__(self, rule_config: Dict[str, Any], target_output_key: str = "*"):
-        """Initialize the deterministic evaluator.
-        Args:
-            rule_config: Configuration for the rule (expected_value, regex_pattern, etc.)
-            target_output_key: Key in output to evaluate ("*" for entire output)
-        """
-        super().__init__()
-        self.rule_config = rule_config or {}
-        self.target_output_key = target_output_key
-    async def evaluate(
-        self,
-        evaluation_id: str,
-        evaluation_name: str,
-        input_data: Dict[str, Any],
-        expected_output: Dict[str, Any],
-        actual_output: Dict[str, Any],
-    ) -> EvaluationResult:
-        """Evaluate using deterministic rules.
-        Args:
-            evaluation_id: The ID of the evaluation being processed
-            evaluation_name: The name of the evaluation
-            input_data: The input data for the evaluation
-            expected_output: The expected output
-            actual_output: The actual output from the agent
-        Returns:
-            EvaluationResult containing the score and details
-        """
-        raise NotImplementedError()

{uipath-2.1.16.dist-info → uipath-2.1.18.dist-info}/WHEEL RENAMED Viewed

File without changes

{uipath-2.1.16.dist-info → uipath-2.1.18.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{uipath-2.1.16.dist-info → uipath-2.1.18.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

uipath 2.1.16__py3-none-any.whl → 2.1.18__py3-none-any.whl

uipath 2.1.16py3-none-any.whl → 2.1.18py3-none-any.whl