PyPI - uipath - Versions diffs - 2.1.59__py3-none-any.whl → 2.1.61__py3-none-any.whl - Mend

uipath 2.1.59py3-none-any.whl → 2.1.61py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

uipath/_cli/_evals/_models/_output.py +22 -9
uipath/_cli/_evals/_models/_sw_reporting.py +21 -0
uipath/_cli/_evals/_progress_reporter.py +436 -0
uipath/_cli/_evals/_runtime.py +103 -10
uipath/_cli/_runtime/_contracts.py +11 -4
uipath/_cli/_utils/_folders.py +30 -24
uipath/_cli/cli_eval.py +28 -6
uipath/_cli/cli_invoke.py +5 -2
uipath/_cli/cli_publish.py +4 -3
uipath/_events/__init__.py +0 -0
uipath/_events/_event_bus.py +157 -0
uipath/_events/_events.py +53 -0
uipath/agent/models/agent.py +13 -0
uipath/eval/models/models.py +1 -1
uipath/tracing/_otel_exporters.py +95 -91
uipath/tracing/_traced.py +16 -0
uipath/tracing/_utils.py +9 -2
{uipath-2.1.59.dist-info → uipath-2.1.61.dist-info}/METADATA +1 -1
{uipath-2.1.59.dist-info → uipath-2.1.61.dist-info}/RECORD +22 -17
{uipath-2.1.59.dist-info → uipath-2.1.61.dist-info}/WHEEL +0 -0
{uipath-2.1.59.dist-info → uipath-2.1.61.dist-info}/entry_points.txt +0 -0
{uipath-2.1.59.dist-info → uipath-2.1.61.dist-info}/licenses/LICENSE +0 -0

uipath/_cli/_evals/_models/_output.py CHANGED Viewed

@@ -62,9 +62,19 @@ class EvaluationRunResultDto(BaseModel):
 class EvaluationRunResult(BaseModel):
     model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
+    score: float = 0.0
     evaluation_name: str
     evaluation_run_results: List[EvaluationRunResultDto]
+    def compute_average_score(self) -> None:
+        """Compute average score for this single eval_item."""
+        if not self.evaluation_run_results:
+            self.score = 0.0
+            return
+        total_score = sum(dto.result.score for dto in self.evaluation_run_results)
+        self.score = total_score / len(self.evaluation_run_results)
 class UiPathEvalOutput(BaseModel):
     model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
@@ -74,12 +84,15 @@ class UiPathEvalOutput(BaseModel):
     evaluation_set_results: List[EvaluationRunResult]
     def compute_average_score(self) -> None:
-        total_score = 0.0
-        total_count = 0
-        for evaluation_set_result in self.evaluation_set_results:
-            for evaluation_run_result in evaluation_set_result.evaluation_run_results:
-                total_score += evaluation_run_result.result.score
-                total_count += 1
-        self.score = total_score / total_count if total_count > 0 else 0.0
+        """Compute overall average by calling eval_item.compute_average_score()."""
+        if not self.evaluation_set_results:
+            self.score = 0.0
+            return
+        for eval_result in self.evaluation_set_results:
+            eval_result.compute_average_score()
+        eval_item_scores = [
+            eval_result.score for eval_result in self.evaluation_set_results
+        ]
+        self.score = sum(eval_item_scores) / len(eval_item_scores)

uipath/_cli/_evals/_models/_sw_reporting.py ADDED Viewed

@@ -0,0 +1,21 @@
+from typing import Any
+from pydantic import BaseModel, ConfigDict
+from pydantic.alias_generators import to_camel
+from uipath.eval.models import EvalItemResult
+class StudioWebProgressItem(BaseModel):
+    eval_run_id: str
+    eval_results: list[EvalItemResult]
+    success: bool
+    agent_output: dict[str, Any]
+    agent_execution_time: float
+class StudioWebAgentSnapshot(BaseModel):
+    model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
+    input_schema: dict[str, Any]
+    output_schema: dict[str, Any]

uipath/_cli/_evals/_progress_reporter.py ADDED Viewed

@@ -0,0 +1,436 @@
+"""Progress reporter for sending evaluation updates to StudioWeb."""
+import functools
+import json
+import logging
+import os
+from typing import Any, Dict, List
+from opentelemetry import trace
+from uipath import UiPath
+from uipath._cli._evals._models._evaluation_set import EvaluationItem, EvaluationStatus
+from uipath._cli._evals._models._sw_reporting import (
+    StudioWebAgentSnapshot,
+    StudioWebProgressItem,
+)
+from uipath._cli._utils._console import ConsoleLogger
+from uipath._cli._utils._project_files import (  # type: ignore
+    get_project_config,
+)
+from uipath._events._event_bus import EventBus
+from uipath._events._events import (
+    EvalRunCreatedEvent,
+    EvalRunUpdatedEvent,
+    EvalSetRunCreatedEvent,
+    EvalSetRunUpdatedEvent,
+    EvaluationEvents,
+)
+from uipath._utils import Endpoint, RequestSpec
+from uipath._utils.constants import ENV_TENANT_ID, HEADER_INTERNAL_TENANT_ID
+from uipath.eval.evaluators import BaseEvaluator
+from uipath.eval.models import EvalItemResult, ScoreType
+from uipath.tracing import LlmOpsHttpExporter
+logger = logging.getLogger(__name__)
+def gracefully_handle_errors(func):
+    """Decorator to catch and log errors without stopping execution."""
+    @functools.wraps(func)
+    async def wrapper(self, *args, **kwargs):
+        try:
+            return await func(self, *args, **kwargs)
+        except Exception as e:
+            if hasattr(self, "_console"):
+                error_type = type(e).__name__
+                logger.warning(
+                    f"Cannot report progress to SW. "
+                    f"Function: {func.__name__}, "
+                    f"Error type: {error_type}, "
+                    f"Details: {e}"
+                )
+            return None
+    return wrapper
+class StudioWebProgressReporter:
+    """Handles reporting evaluation progress to StudioWeb."""
+    def __init__(self):
+        logging.getLogger("uipath._cli.middlewares").setLevel(logging.CRITICAL)
+        console_logger = ConsoleLogger.get_instance()
+        uipath = UiPath()
+        self._client = uipath.api_client
+        self._console = console_logger
+        self._project_id = os.getenv("UIPATH_PROJECT_ID", None)
+        if not self._project_id:
+            logger.warning(
+                "Cannot report data to StudioWeb. Please set UIPATH_PROJECT_ID."
+            )
+        self.eval_set_run_ids: Dict[str, str] = {}
+        self.evaluators: Dict[str, Any] = {}
+        self.evaluator_scores: Dict[str, List[float]] = {}
+        self.eval_run_ids: Dict[str, str] = {}
+    @gracefully_handle_errors
+    async def create_eval_set_run(
+        self,
+        eval_set_id: str,
+        agent_snapshot: StudioWebAgentSnapshot,
+        no_of_evals: int,
+        evaluators: List[BaseEvaluator[Any]],
+    ) -> str:
+        """Create a new evaluation set run in StudioWeb."""
+        spec = self._create_eval_set_run_spec(eval_set_id, agent_snapshot, no_of_evals)
+        response = await self._client.request_async(
+            method=spec.method,
+            url=spec.endpoint,
+            params=spec.params,
+            json=spec.json,
+            headers=spec.headers,
+        )
+        eval_set_run_id = json.loads(response.content)["id"]
+        return eval_set_run_id
+    @gracefully_handle_errors
+    async def create_eval_run(
+        self, eval_item: EvaluationItem, eval_set_run_id: str
+    ) -> str:
+        """Create a new evaluation run in StudioWeb.
+        Args:
+            eval_item: Dictionary containing evaluation data
+            eval_set_run_id: The ID of the evaluation set run
+        Returns:
+            The ID of the created evaluation run
+        """
+        spec = self._create_eval_run_spec(eval_item, eval_set_run_id)
+        response = await self._client.request_async(
+            method=spec.method,
+            url=spec.endpoint,
+            params=spec.params,
+            json=spec.json,
+            headers=spec.headers,
+        )
+        return json.loads(response.content)["id"]
+    @gracefully_handle_errors
+    async def update_eval_run(
+        self,
+        sw_progress_item: StudioWebProgressItem,
+        evaluators: dict[str, BaseEvaluator[Any]],
+    ):
+        """Update an evaluation run with results."""
+        assertion_runs, evaluator_scores = self._collect_results(
+            sw_progress_item.eval_results, evaluators
+        )
+        spec = self._update_eval_run_spec(
+            assertion_runs=assertion_runs,
+            evaluator_scores=evaluator_scores,
+            eval_run_id=sw_progress_item.eval_run_id,
+            execution_time=sw_progress_item.agent_execution_time,
+            actual_output=sw_progress_item.agent_output,
+        )
+        await self._client.request_async(
+            method=spec.method,
+            url=spec.endpoint,
+            params=spec.params,
+            json=spec.json,
+            headers=spec.headers,
+        )
+    @gracefully_handle_errors
+    async def update_eval_set_run(
+        self,
+        eval_set_run_id: str,
+        evaluator_scores: dict[str, float],
+    ):
+        """Update the evaluation set run status to complete."""
+        spec = self._update_eval_set_run_spec(eval_set_run_id, evaluator_scores)
+        await self._client.request_async(
+            method=spec.method,
+            url=spec.endpoint,
+            params=spec.params,
+            json=spec.json,
+            headers=spec.headers,
+        )
+    async def handle_create_eval_set_run(self, payload: EvalSetRunCreatedEvent) -> None:
+        try:
+            self.evaluators = {eval.id: eval for eval in payload.evaluators}
+            self.evaluator_scores = {eval.id: [] for eval in payload.evaluators}
+            eval_set_run_id = await self.create_eval_set_run(
+                eval_set_id=payload.eval_set_id,
+                agent_snapshot=self._extract_agent_snapshot(payload.entrypoint),
+                no_of_evals=payload.no_of_evals,
+                evaluators=payload.evaluators,
+            )
+            self.eval_set_run_ids[payload.execution_id] = eval_set_run_id
+            current_span = trace.get_current_span()
+            if current_span.is_recording():
+                current_span.set_attribute("eval_set_run_id", eval_set_run_id)
+            logger.debug(f"Created eval set run with ID: {eval_set_run_id}")
+        except Exception as e:
+            logger.error(f"Failed to handle create eval set run event: {e}")
+    async def handle_create_eval_run(self, payload: EvalRunCreatedEvent) -> None:
+        try:
+            if eval_set_run_id := self.eval_set_run_ids.get(payload.execution_id):
+                eval_run_id = await self.create_eval_run(
+                    payload.eval_item, eval_set_run_id
+                )
+                if eval_run_id:
+                    self.eval_run_ids[payload.execution_id] = eval_run_id
+                    logger.debug(f"Created eval run with ID: {eval_run_id}")
+            else:
+                logger.warning("Cannot create eval run: eval_set_run_id not available")
+        except Exception as e:
+            logger.error(f"Failed to handle create eval run event: {e}")
+    async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None:
+        try:
+            spans_exporter = LlmOpsHttpExporter(
+                trace_id=self.eval_set_run_ids.get(payload.execution_id),
+            )
+            spans_exporter.export(payload.spans)
+            for eval_result in payload.eval_results:
+                evaluator_id = eval_result.evaluator_id
+                if evaluator_id in self.evaluator_scores:
+                    match eval_result.result.score_type:
+                        case ScoreType.NUMERICAL:
+                            self.evaluator_scores[evaluator_id].append(
+                                eval_result.result.score
+                            )
+                        case ScoreType.BOOLEAN:
+                            self.evaluator_scores[evaluator_id].append(
+                                100 if eval_result.result.score else 0
+                            )
+                        case ScoreType.ERROR:
+                            self.evaluator_scores[evaluator_id].append(0)
+            eval_run_id = self.eval_run_ids[payload.execution_id]
+            if eval_run_id:
+                await self.update_eval_run(
+                    StudioWebProgressItem(
+                        eval_run_id=eval_run_id,
+                        eval_results=payload.eval_results,
+                        success=payload.success,
+                        agent_output=payload.agent_output,
+                        agent_execution_time=payload.agent_execution_time,
+                    ),
+                    self.evaluators,
+                )
+                logger.debug(f"Updated eval run with ID: {eval_run_id}")
+        except Exception as e:
+            logger.error(f"Failed to handle update eval run event: {e}")
+    async def handle_update_eval_set_run(self, payload: EvalSetRunUpdatedEvent) -> None:
+        try:
+            if eval_set_run_id := self.eval_set_run_ids.get(payload.execution_id):
+                await self.update_eval_set_run(
+                    eval_set_run_id,
+                    payload.evaluator_scores,
+                )
+                logger.debug(f"Updated eval set run with ID: {eval_set_run_id}")
+            else:
+                logger.warning(
+                    "Cannot update eval set run: eval_set_run_id not available"
+                )
+        except Exception as e:
+            logger.error(f"Failed to handle update eval set run event: {e}")
+    async def subscribe_to_eval_runtime_events(self, event_bus: EventBus) -> None:
+        event_bus.subscribe(
+            EvaluationEvents.CREATE_EVAL_SET_RUN, self.handle_create_eval_set_run
+        )
+        event_bus.subscribe(
+            EvaluationEvents.CREATE_EVAL_RUN, self.handle_create_eval_run
+        )
+        event_bus.subscribe(
+            EvaluationEvents.UPDATE_EVAL_RUN, self.handle_update_eval_run
+        )
+        event_bus.subscribe(
+            EvaluationEvents.UPDATE_EVAL_SET_RUN, self.handle_update_eval_set_run
+        )
+        logger.info("StudioWeb progress reporter subscribed to evaluation events")
+    def _extract_agent_snapshot(self, entrypoint: str) -> StudioWebAgentSnapshot:
+        try:
+            project_config = get_project_config(os.getcwd())
+            ep = None
+            for entry_point in project_config.get("entryPoints", []):
+                if entry_point.get("filePath") == entrypoint:
+                    ep = entry_point
+                    break
+            if not ep:
+                logger.warning(
+                    f"Entrypoint {entrypoint} not found in configuration file"
+                )
+                return StudioWebAgentSnapshot(input_schema={}, output_schema={})
+            input_schema = ep.get("input", {})
+            output_schema = ep.get("output", {})
+            return StudioWebAgentSnapshot(
+                input_schema=input_schema, output_schema=output_schema
+            )
+        except Exception as e:
+            logger.warning(f"Failed to extract agent snapshot: {e}")
+            return StudioWebAgentSnapshot(input_schema={}, output_schema={})
+    def _collect_results(
+        self,
+        eval_results: list[EvalItemResult],
+        evaluators: dict[str, BaseEvaluator[Any]],
+    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+        assertion_runs: list[dict[str, Any]] = []
+        evaluator_scores_list: list[dict[str, Any]] = []
+        for eval_result in eval_results:
+            evaluator_scores_list.append(
+                {
+                    "type": eval_result.result.score_type.value,
+                    "value": eval_result.result.score,
+                    "justification": eval_result.result.details,
+                    "evaluatorId": eval_result.evaluator_id,
+                }
+            )
+            assertion_runs.append(
+                {
+                    "status": EvaluationStatus.COMPLETED.value,
+                    "evaluatorId": eval_result.evaluator_id,
+                    "completionMetrics": {
+                        "duration": int(eval_result.result.evaluation_time)
+                        if eval_result.result.evaluation_time
+                        else 0,
+                        "cost": None,
+                        "tokens": 0,
+                        "completionTokens": 0,
+                        "promptTokens": 0,
+                    },
+                    "assertionSnapshot": {
+                        "assertionType": evaluators[
+                            eval_result.evaluator_id
+                        ].evaluator_type.name,
+                        "outputKey": evaluators[
+                            eval_result.evaluator_id
+                        ].target_output_key,
+                    },
+                }
+            )
+        return assertion_runs, evaluator_scores_list
+    def _update_eval_run_spec(
+        self,
+        assertion_runs: list[dict[str, Any]],
+        evaluator_scores: list[dict[str, Any]],
+        eval_run_id: str,
+        actual_output: dict[str, Any],
+        execution_time: float,
+    ) -> RequestSpec:
+        return RequestSpec(
+            method="PUT",
+            endpoint=Endpoint(
+                f"agentsruntime_/api/execution/agents/{self._project_id}/evalRun"
+            ),
+            json={
+                "evalRunId": eval_run_id,
+                "status": EvaluationStatus.COMPLETED.value,
+                "result": {
+                    "output": {"content": {**actual_output}},
+                    "evaluatorScores": evaluator_scores,
+                },
+                "completionMetrics": {"duration": int(execution_time)},
+                "assertionRuns": assertion_runs,
+            },
+            headers=self._tenant_header(),
+        )
+    def _create_eval_run_spec(
+        self, eval_item: EvaluationItem, eval_set_run_id: str
+    ) -> RequestSpec:
+        return RequestSpec(
+            method="POST",
+            endpoint=Endpoint(
+                f"agentsruntime_/api/execution/agents/{self._project_id}/evalRun"
+            ),
+            json={
+                "evalSetRunId": eval_set_run_id,
+                "evalSnapshot": {
+                    "id": eval_item.id,
+                    "name": eval_item.name,
+                    "inputs": eval_item.inputs,
+                    "expectedOutput": eval_item.expected_output,
+                },
+                "status": EvaluationStatus.IN_PROGRESS.value,
+            },
+            headers=self._tenant_header(),
+        )
+    def _create_eval_set_run_spec(
+        self,
+        eval_set_id: str,
+        agent_snapshot: StudioWebAgentSnapshot,
+        no_of_evals: int,
+    ) -> RequestSpec:
+        return RequestSpec(
+            method="POST",
+            endpoint=Endpoint(
+                f"agentsruntime_/api/execution/agents/{self._project_id}/evalSetRun"
+            ),
+            json={
+                "agentId": self._project_id,
+                "evalSetId": eval_set_id,
+                "agentSnapshot": agent_snapshot.model_dump(by_alias=True),
+                "status": EvaluationStatus.IN_PROGRESS.value,
+                "numberOfEvalsExecuted": no_of_evals,
+            },
+            headers=self._tenant_header(),
+        )
+    def _update_eval_set_run_spec(
+        self,
+        eval_set_run_id: str,
+        evaluator_scores: dict[str, float],
+    ) -> RequestSpec:
+        evaluator_scores_list = [
+            {"value": avg_score, "evaluatorId": evaluator_id}
+            for evaluator_id, avg_score in evaluator_scores.items()
+        ]
+        return RequestSpec(
+            method="PUT",
+            endpoint=Endpoint(
+                f"agentsruntime_/api/execution/agents/{self._project_id}/evalSetRun"
+            ),
+            json={
+                "evalSetRunId": eval_set_run_id,
+                "status": EvaluationStatus.COMPLETED.value,
+                "evaluatorScores": evaluator_scores_list,
+            },
+            headers=self._tenant_header(),
+        )
+    def _tenant_header(self) -> dict[str, str]:
+        tenant_id = os.getenv(ENV_TENANT_ID, None)
+        if not tenant_id:
+            self._console.error(
+                f"{ENV_TENANT_ID} env var is not set. Please run 'uipath auth'."
+            )
+        return {HEADER_INTERNAL_TENANT_ID: tenant_id}  # type: ignore

uipath/_cli/_evals/_runtime.py CHANGED Viewed

@@ -7,9 +7,17 @@ from typing import Any, Dict, Generic, List, Optional, Sequence, TypeVar
 from opentelemetry.sdk.trace import ReadableSpan
 from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from ..._events._event_bus import EventBus
+from ..._events._events import (
+    EvalRunCreatedEvent,
+    EvalRunUpdatedEvent,
+    EvalSetRunCreatedEvent,
+    EvalSetRunUpdatedEvent,
+    EvaluationEvents,
+)
 from ...eval.evaluators import BaseEvaluator
 from ...eval.models import EvaluationResult
-from ...eval.models.models import AgentExecution
+from ...eval.models.models import AgentExecution, EvalItemResult
 from .._runtime._contracts import (
     UiPathBaseRuntime,
     UiPathRuntimeContext,
@@ -75,10 +83,16 @@ class UiPathEvalContext(UiPathRuntimeContext):
 class UiPathEvalRuntime(UiPathBaseRuntime, Generic[T, C]):
     """Specialized runtime for evaluation runs, with access to the factory."""
-    def __init__(self, context: UiPathEvalContext, factory: UiPathRuntimeFactory[T, C]):
+    def __init__(
+        self,
+        context: UiPathEvalContext,
+        factory: UiPathRuntimeFactory[T, C],
+        event_bus: EventBus,
+    ):
         super().__init__(context)
         self.context: UiPathEvalContext = context
         self.factory: UiPathRuntimeFactory[T, C] = factory
+        self.event_bus: EventBus = event_bus
         self.span_exporter: ExecutionSpanExporter = ExecutionSpanExporter()
         self.factory.add_span_exporter(self.span_exporter)
@@ -87,50 +101,119 @@ class UiPathEvalRuntime(UiPathBaseRuntime, Generic[T, C]):
         cls,
         context: UiPathEvalContext,
         factory: UiPathRuntimeFactory[T, C],
+        event_bus: EventBus,
     ) -> "UiPathEvalRuntime[T, C]":
-        return cls(context, factory)
+        return cls(context, factory, event_bus)
     async def execute(self) -> Optional[UiPathRuntimeResult]:
-        """Evaluation logic. Can spawn other runtimes through the factory."""
         if self.context.eval_set is None:
             raise ValueError("eval_set must be provided for evaluation runs")
+        if not self.context.execution_id:
+            raise ValueError("execution_id must be provided for evaluation runs")
+        event_bus = self.event_bus
         evaluation_set = EvalHelpers.load_eval_set(
             self.context.eval_set, self.context.eval_ids
         )
         evaluators = self._load_evaluators(evaluation_set)
+        evaluator_averages = {evaluator.id: 0.0 for evaluator in evaluators}
+        evaluator_counts = {evaluator.id: 0 for evaluator in evaluators}
+        await event_bus.publish(
+            EvaluationEvents.CREATE_EVAL_SET_RUN,
+            EvalSetRunCreatedEvent(
+                execution_id=self.context.execution_id,
+                entrypoint=self.context.entrypoint or "",
+                eval_set_id=evaluation_set.id,
+                no_of_evals=len(evaluation_set.evaluations),
+                evaluators=evaluators,
+            ),
+        )
         results = UiPathEvalOutput(
             evaluation_set_name=evaluation_set.name, score=0, evaluation_set_results=[]
         )
         for eval_item in evaluation_set.evaluations:
+            await event_bus.publish(
+                EvaluationEvents.CREATE_EVAL_RUN,
+                EvalRunCreatedEvent(
+                    execution_id=self.context.execution_id,
+                    eval_item=eval_item,
+                ),
+            )
             evaluation_run_results = EvaluationRunResult(
                 evaluation_name=eval_item.name, evaluation_run_results=[]
             )
             results.evaluation_set_results.append(evaluation_run_results)
             agent_execution_output = await self.execute_runtime(eval_item)
-            # we run each evaluator on the agent_output
+            evaluation_item_results: list[EvalItemResult] = []
             for evaluator in evaluators:
                 evaluation_result = await self.run_evaluator(
                     evaluator=evaluator,
                     execution_output=agent_execution_output,
                     eval_item=eval_item,
                 )
+                dto_result = EvaluationResultDto.from_evaluation_result(
+                    evaluation_result
+                )
+                evaluator_counts[evaluator.id] += 1
+                count = evaluator_counts[evaluator.id]
+                evaluator_averages[evaluator.id] += (
+                    dto_result.score - evaluator_averages[evaluator.id]
+                ) / count
                 evaluation_run_results.evaluation_run_results.append(
                     EvaluationRunResultDto(
                         evaluator_name=evaluator.name,
-                        result=EvaluationResultDto.from_evaluation_result(
-                            evaluation_result
-                        ),
+                        result=dto_result,
+                    )
+                )
+                evaluation_item_results.append(
+                    EvalItemResult(
+                        evaluator_id=evaluator.id,
+                        result=evaluation_result,
                     )
                 )
+            evaluation_run_results.compute_average_score()
+            await event_bus.publish(
+                EvaluationEvents.UPDATE_EVAL_RUN,
+                EvalRunUpdatedEvent(
+                    execution_id=self.context.execution_id,
+                    eval_item=eval_item,
+                    eval_results=evaluation_item_results,
+                    success=not agent_execution_output.result.error,
+                    agent_output=agent_execution_output.result.output,
+                    agent_execution_time=agent_execution_output.execution_time,
+                    spans=agent_execution_output.spans,
+                ),
+                wait_for_completion=False,
+            )
         results.compute_average_score()
+        await event_bus.publish(
+            EvaluationEvents.UPDATE_EVAL_SET_RUN,
+            EvalSetRunUpdatedEvent(
+                execution_id=self.context.execution_id,
+                evaluator_scores=evaluator_averages,
+            ),
+            wait_for_completion=False,
+        )
         self.context.result = UiPathRuntimeResult(
             output={**results.model_dump(by_alias=True)},
             status=UiPathRuntimeStatus.SUCCESSFUL,
         )
         return self.context.result
     async def execute_runtime(
@@ -141,11 +224,21 @@ class UiPathEvalRuntime(UiPathBaseRuntime, Generic[T, C]):
             input_json=eval_item.inputs,
             is_eval_run=True,
         )
+        attributes = {
+            "evalId": eval_item.id,
+            "span_type": "eval",
+        }
+        if runtime_context.execution_id:
+            attributes["execution.id"] = runtime_context.execution_id
         start_time = time()
         result = await self.factory.execute_in_root_span(
-            runtime_context, root_span=eval_item.name
+            runtime_context, root_span=eval_item.name, attributes=attributes
         )
         end_time = time()
         if runtime_context.execution_id is None:
             raise ValueError("execution_id must be set for eval runs")

uipath 2.1.59__py3-none-any.whl → 2.1.61__py3-none-any.whl

uipath 2.1.59py3-none-any.whl → 2.1.61py3-none-any.whl