uipath 2.1.66__py3-none-any.whl → 2.1.67__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uipath/_cli/_evals/_evaluator_factory.py +18 -2
- uipath/_cli/_evals/_models/_trajectory_span.py +115 -0
- uipath/_cli/_evals/_runtime.py +1 -0
- uipath/eval/evaluators/trajectory_evaluator.py +133 -10
- uipath/eval/models/models.py +1 -0
- {uipath-2.1.66.dist-info → uipath-2.1.67.dist-info}/METADATA +1 -1
- {uipath-2.1.66.dist-info → uipath-2.1.67.dist-info}/RECORD +10 -9
- {uipath-2.1.66.dist-info → uipath-2.1.67.dist-info}/WHEEL +0 -0
- {uipath-2.1.66.dist-info → uipath-2.1.67.dist-info}/entry_points.txt +0 -0
- {uipath-2.1.66.dist-info → uipath-2.1.67.dist-info}/licenses/LICENSE +0 -0
@@ -123,6 +123,22 @@ class EvaluatorFactory:
|
|
123
123
|
@staticmethod
|
124
124
|
def _create_trajectory_evaluator(
|
125
125
|
base_params: EvaluatorBaseParams, data: Dict[str, Any]
|
126
|
-
) -> TrajectoryEvaluator
|
126
|
+
) -> TrajectoryEvaluator:
|
127
127
|
"""Create a trajectory evaluator."""
|
128
|
-
|
128
|
+
prompt = data.get("prompt", "")
|
129
|
+
if not prompt:
|
130
|
+
raise ValueError("Trajectory evaluator must include 'prompt' field")
|
131
|
+
|
132
|
+
model = data.get("model", "")
|
133
|
+
if not model:
|
134
|
+
raise ValueError("LLM evaluator must include 'model' field")
|
135
|
+
if model == "same-as-agent":
|
136
|
+
raise ValueError(
|
137
|
+
"'same-as-agent' model option is not supported by coded agents evaluations. Please select a specific model for the evaluator."
|
138
|
+
)
|
139
|
+
|
140
|
+
return TrajectoryEvaluator(
|
141
|
+
**base_params.model_dump(),
|
142
|
+
prompt=prompt,
|
143
|
+
model=model,
|
144
|
+
)
|
@@ -0,0 +1,115 @@
|
|
1
|
+
"""Trajectory evaluation span model for serializing span data in evaluations."""
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Any, Dict, List, Optional
|
5
|
+
|
6
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
7
|
+
from pydantic import BaseModel
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class TrajectoryEvaluationSpan:
|
12
|
+
"""Simplified span representation for trajectory evaluation.
|
13
|
+
|
14
|
+
Contains span information needed for evaluating agent execution paths,
|
15
|
+
excluding timestamps which are not useful for trajectory analysis.
|
16
|
+
"""
|
17
|
+
|
18
|
+
name: str
|
19
|
+
status: str
|
20
|
+
attributes: Dict[str, Any]
|
21
|
+
parent_name: Optional[str] = None
|
22
|
+
events: Optional[List[Dict[str, Any]]] = None
|
23
|
+
|
24
|
+
def __post_init__(self):
|
25
|
+
"""Initialize default values."""
|
26
|
+
if self.events is None:
|
27
|
+
self.events = []
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def from_readable_span(
|
31
|
+
cls, span: ReadableSpan, parent_spans: Optional[Dict[int, str]] = None
|
32
|
+
) -> "TrajectoryEvaluationSpan":
|
33
|
+
"""Convert a ReadableSpan to a TrajectoryEvaluationSpan.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
span: The OpenTelemetry ReadableSpan to convert
|
37
|
+
parent_spans: Optional mapping of span IDs to names for parent lookup
|
38
|
+
|
39
|
+
Returns:
|
40
|
+
TrajectoryEvaluationSpan with relevant data extracted
|
41
|
+
"""
|
42
|
+
# Extract status
|
43
|
+
status_map = {0: "unset", 1: "ok", 2: "error"}
|
44
|
+
status = status_map.get(span.status.status_code.value, "unknown")
|
45
|
+
|
46
|
+
# Extract attributes - keep all attributes for now
|
47
|
+
attributes = {}
|
48
|
+
if span.attributes:
|
49
|
+
attributes = dict(span.attributes)
|
50
|
+
|
51
|
+
# Get parent name if available
|
52
|
+
parent_name = None
|
53
|
+
if span.parent and parent_spans and span.parent.span_id in parent_spans:
|
54
|
+
parent_name = parent_spans[span.parent.span_id]
|
55
|
+
|
56
|
+
# Extract events (without timestamps)
|
57
|
+
events = []
|
58
|
+
if hasattr(span, "events") and span.events:
|
59
|
+
for event in span.events:
|
60
|
+
event_data = {
|
61
|
+
"name": event.name,
|
62
|
+
"attributes": dict(event.attributes) if event.attributes else {},
|
63
|
+
}
|
64
|
+
events.append(event_data)
|
65
|
+
|
66
|
+
return cls(
|
67
|
+
name=span.name,
|
68
|
+
status=status,
|
69
|
+
attributes=attributes,
|
70
|
+
parent_name=parent_name,
|
71
|
+
events=events,
|
72
|
+
)
|
73
|
+
|
74
|
+
def to_dict(self) -> Dict[str, Any]:
|
75
|
+
"""Convert to dictionary for JSON serialization."""
|
76
|
+
return {
|
77
|
+
"name": self.name,
|
78
|
+
"status": self.status,
|
79
|
+
"parent_name": self.parent_name,
|
80
|
+
"attributes": self.attributes,
|
81
|
+
"events": self.events,
|
82
|
+
}
|
83
|
+
|
84
|
+
|
85
|
+
class TrajectoryEvaluationTrace(BaseModel):
|
86
|
+
"""Container for a collection of trajectory evaluation spans."""
|
87
|
+
|
88
|
+
spans: List[TrajectoryEvaluationSpan]
|
89
|
+
|
90
|
+
@classmethod
|
91
|
+
def from_readable_spans(
|
92
|
+
cls, spans: List[ReadableSpan]
|
93
|
+
) -> "TrajectoryEvaluationTrace":
|
94
|
+
"""Convert a list of ReadableSpans to TrajectoryEvaluationTrace.
|
95
|
+
|
96
|
+
Args:
|
97
|
+
spans: List of OpenTelemetry ReadableSpans to convert
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
TrajectoryEvaluationTrace with converted spans
|
101
|
+
"""
|
102
|
+
# Create a mapping of span IDs to names for parent lookup
|
103
|
+
span_id_to_name = {span.get_span_context().span_id: span.name for span in spans}
|
104
|
+
|
105
|
+
evaluation_spans = [
|
106
|
+
TrajectoryEvaluationSpan.from_readable_span(span, span_id_to_name)
|
107
|
+
for span in spans
|
108
|
+
]
|
109
|
+
|
110
|
+
return cls(spans=evaluation_spans)
|
111
|
+
|
112
|
+
class Config:
|
113
|
+
"""Pydantic configuration."""
|
114
|
+
|
115
|
+
arbitrary_types_allowed = True
|
uipath/_cli/_evals/_runtime.py
CHANGED
@@ -264,6 +264,7 @@ class UiPathEvalRuntime(UiPathBaseRuntime, Generic[T, C]):
|
|
264
264
|
agent_input=eval_item.inputs,
|
265
265
|
agent_output=execution_output.result.output or {},
|
266
266
|
agent_trace=execution_output.spans,
|
267
|
+
expected_agent_behavior=eval_item.expected_agent_behavior,
|
267
268
|
)
|
268
269
|
|
269
270
|
result = await evaluator.evaluate(
|
@@ -1,31 +1,66 @@
|
|
1
1
|
"""Trajectory evaluator for analyzing execution paths and decision sequences."""
|
2
2
|
|
3
|
-
|
3
|
+
import json
|
4
|
+
from typing import Any, Optional
|
4
5
|
|
6
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
7
|
+
from pydantic import field_validator
|
8
|
+
|
9
|
+
from uipath._cli._evals._models._trajectory_span import TrajectoryEvaluationTrace
|
5
10
|
from uipath.eval.models import EvaluationResult
|
6
11
|
|
7
|
-
from
|
12
|
+
from ..._services import UiPathLlmChatService
|
13
|
+
from ..._utils.constants import COMMUNITY_agents_SUFFIX
|
14
|
+
from ..models.models import AgentExecution, LLMResponse, NumericEvaluationResult
|
8
15
|
from .base_evaluator import BaseEvaluator
|
9
16
|
|
10
|
-
T = TypeVar("T")
|
11
|
-
|
12
17
|
|
13
|
-
class TrajectoryEvaluator(BaseEvaluator[
|
18
|
+
class TrajectoryEvaluator(BaseEvaluator[dict[str, Any]]):
|
14
19
|
"""Evaluator that analyzes the trajectory/path taken to reach outputs."""
|
15
20
|
|
21
|
+
prompt: str
|
22
|
+
model: str
|
23
|
+
expected_agent_behavior_placeholder: str = "{{ExpectedAgentBehavior}}"
|
24
|
+
agent_run_history_placeholder: str = "{{AgentRunHistory}}"
|
25
|
+
llm: Optional[UiPathLlmChatService] = None
|
26
|
+
|
27
|
+
@field_validator("prompt")
|
28
|
+
@classmethod
|
29
|
+
def validate_prompt_placeholder(cls, v: str) -> str:
|
30
|
+
"""Validate that prompt contains required placeholders."""
|
31
|
+
if "{{ExpectedAgentBehavior}}" not in v or "{{AgentRunHistory}}" not in v:
|
32
|
+
raise ValueError(
|
33
|
+
"Prompt must contain {ExpectedAgentBehavior} and {{AgentRunHistory}} placeholders"
|
34
|
+
)
|
35
|
+
return v
|
36
|
+
|
37
|
+
def model_post_init(self, __context):
|
38
|
+
"""Initialize the LLM service after model creation."""
|
39
|
+
super().model_post_init(__context)
|
40
|
+
self._initialize_llm()
|
41
|
+
|
42
|
+
def _initialize_llm(self):
|
43
|
+
"""Initialize the LLM used for evaluation."""
|
44
|
+
from uipath import UiPath
|
45
|
+
|
46
|
+
uipath = UiPath()
|
47
|
+
self.llm = uipath.llm
|
48
|
+
|
16
49
|
async def evaluate(
|
17
|
-
self,
|
50
|
+
self,
|
51
|
+
agent_execution: AgentExecution,
|
52
|
+
evaluation_criteria: dict[str, Any],
|
18
53
|
) -> EvaluationResult:
|
19
54
|
"""Evaluate using trajectory analysis.
|
20
55
|
|
21
|
-
Analyzes the execution path and decision sequence taken by the agent
|
22
|
-
to assess the quality of the reasoning process.
|
56
|
+
Analyzes the execution path and decision sequence taken by the agent.
|
23
57
|
|
24
58
|
Args:
|
25
59
|
agent_execution: The execution details containing:
|
26
60
|
- agent_input: The input received by the agent
|
27
61
|
- actual_output: The actual output from the agent
|
28
|
-
-
|
62
|
+
- agent_trace: The execution spans to use for the evaluation
|
63
|
+
- expected_agent_behavior: The expected agent behavior
|
29
64
|
evaluation_criteria: The criteria to evaluate
|
30
65
|
Returns:
|
31
66
|
EvaluationResult: Score based on trajectory analysis
|
@@ -33,4 +68,92 @@ class TrajectoryEvaluator(BaseEvaluator[T]):
|
|
33
68
|
Raises:
|
34
69
|
NotImplementedError: This evaluator is not yet implemented
|
35
70
|
"""
|
36
|
-
|
71
|
+
evaluation_prompt = self._create_evaluation_prompt(
|
72
|
+
expected_agent_behavior=agent_execution.expected_agent_behavior,
|
73
|
+
agent_run_history=agent_execution.agent_trace,
|
74
|
+
)
|
75
|
+
|
76
|
+
llm_response = await self._get_llm_response(evaluation_prompt)
|
77
|
+
|
78
|
+
return NumericEvaluationResult(
|
79
|
+
score=llm_response.score,
|
80
|
+
details=llm_response.justification,
|
81
|
+
)
|
82
|
+
|
83
|
+
def _create_evaluation_prompt(
|
84
|
+
self,
|
85
|
+
expected_agent_behavior: Any,
|
86
|
+
agent_run_history: Any,
|
87
|
+
) -> str:
|
88
|
+
"""Create the evaluation prompt for the LLM."""
|
89
|
+
formatted_prompt = self.prompt.replace(
|
90
|
+
self.expected_agent_behavior_placeholder,
|
91
|
+
str(expected_agent_behavior),
|
92
|
+
)
|
93
|
+
|
94
|
+
# Trim extra properties from the spans (such as timestamps which are not relevant to the eval)
|
95
|
+
if (
|
96
|
+
isinstance(agent_run_history, list)
|
97
|
+
and agent_run_history
|
98
|
+
and isinstance(agent_run_history[0], ReadableSpan)
|
99
|
+
):
|
100
|
+
trajectory_trace = TrajectoryEvaluationTrace.from_readable_spans(
|
101
|
+
agent_run_history
|
102
|
+
)
|
103
|
+
agent_run_history = str(trajectory_trace.spans)
|
104
|
+
else:
|
105
|
+
agent_run_history = str(agent_run_history)
|
106
|
+
|
107
|
+
formatted_prompt = formatted_prompt.replace(
|
108
|
+
self.agent_run_history_placeholder,
|
109
|
+
agent_run_history,
|
110
|
+
)
|
111
|
+
|
112
|
+
return formatted_prompt
|
113
|
+
|
114
|
+
async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
|
115
|
+
"""Get response from the LLM.
|
116
|
+
|
117
|
+
Args:
|
118
|
+
evaluation_prompt: The formatted prompt to send to the LLM
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
LLMResponse with score and justification
|
122
|
+
"""
|
123
|
+
if not self.llm:
|
124
|
+
raise ValueError("LLM service not initialized")
|
125
|
+
|
126
|
+
model = self.model
|
127
|
+
if model.endswith(COMMUNITY_agents_SUFFIX):
|
128
|
+
model = model.replace(COMMUNITY_agents_SUFFIX, "")
|
129
|
+
|
130
|
+
# Prepare the request
|
131
|
+
request_data = {
|
132
|
+
"model": model,
|
133
|
+
"messages": [{"role": "user", "content": evaluation_prompt}],
|
134
|
+
"response_format": {
|
135
|
+
"type": "json_schema",
|
136
|
+
"json_schema": {
|
137
|
+
"name": "evaluation_response",
|
138
|
+
"schema": {
|
139
|
+
"type": "object",
|
140
|
+
"properties": {
|
141
|
+
"score": {
|
142
|
+
"type": "number",
|
143
|
+
"minimum": 0,
|
144
|
+
"maximum": 100,
|
145
|
+
"description": "Score between 0 and 100",
|
146
|
+
},
|
147
|
+
"justification": {
|
148
|
+
"type": "string",
|
149
|
+
"description": "Explanation for the score",
|
150
|
+
},
|
151
|
+
},
|
152
|
+
"required": ["score", "justification"],
|
153
|
+
},
|
154
|
+
},
|
155
|
+
},
|
156
|
+
}
|
157
|
+
|
158
|
+
response = await self.llm.chat_completions(**request_data)
|
159
|
+
return LLMResponse(**json.loads(response.choices[-1].message.content))
|
uipath/eval/models/models.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: uipath
|
3
|
-
Version: 2.1.
|
3
|
+
Version: 2.1.67
|
4
4
|
Summary: Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools.
|
5
5
|
Project-URL: Homepage, https://uipath.com
|
6
6
|
Project-URL: Repository, https://github.com/UiPath/uipath-python
|
@@ -44,13 +44,14 @@ uipath/_cli/_dev/_terminal/_styles/terminal.tcss,sha256=ktVpKwXIXw2VZp8KIZD6fO9i
|
|
44
44
|
uipath/_cli/_dev/_terminal/_utils/_chat.py,sha256=YUZxYVdmEManwHDuZsczJT1dWIYE1dVBgABlurwMFcE,8493
|
45
45
|
uipath/_cli/_dev/_terminal/_utils/_exporter.py,sha256=oI6D_eMwrh_2aqDYUh4GrJg8VLGrLYhDahR-_o0uJns,4144
|
46
46
|
uipath/_cli/_dev/_terminal/_utils/_logger.py,sha256=jeNShEED27cNIHTe_NNx-2kUiXpSLTmi0onM6tVkqRM,888
|
47
|
-
uipath/_cli/_evals/_evaluator_factory.py,sha256=
|
47
|
+
uipath/_cli/_evals/_evaluator_factory.py,sha256=OWfLxPOEcDn4qv5m3n7LBfIBKcdTPml2ZCLcsqSymlU,5329
|
48
48
|
uipath/_cli/_evals/_progress_reporter.py,sha256=hpSt0CXpIoFJGsbqZkqmwyGO_TBNesbWKlvDJUEDxd8,16455
|
49
|
-
uipath/_cli/_evals/_runtime.py,sha256=
|
49
|
+
uipath/_cli/_evals/_runtime.py,sha256=WKcBT6DGzNRjgEOpmH0b7RoEbEsHMyAbcAMs8b_CAI0,11418
|
50
50
|
uipath/_cli/_evals/_models/_evaluation_set.py,sha256=mwcTstHuyHd7ys_nLzgCNKBAsS4ns9UL2TF5Oq2Cc64,1758
|
51
51
|
uipath/_cli/_evals/_models/_evaluator_base_params.py,sha256=lTYKOV66tcjW85KHTyOdtF1p1VDaBNemrMAvH8bFIFc,382
|
52
52
|
uipath/_cli/_evals/_models/_output.py,sha256=LjwMBGI78sDFa2Dl8b9ReXJmjig57pdLWpuiwChrRLo,3096
|
53
53
|
uipath/_cli/_evals/_models/_sw_reporting.py,sha256=tSBLQFAdOIun8eP0vsqt56K6bmCZz_uMaWI3hskg_24,536
|
54
|
+
uipath/_cli/_evals/_models/_trajectory_span.py,sha256=8ukM8sB9rvzBMHfC_gnexAC3xlp4uMDevKZrRzcgrm4,3637
|
54
55
|
uipath/_cli/_push/sw_file_handler.py,sha256=iE8Sk1Z-9hxmLFFj3j-k4kTK6TzNFP6hUCmxTudG6JQ,18251
|
55
56
|
uipath/_cli/_runtime/_contracts.py,sha256=xIcKq0xRbenzmJkZQO8blKwZ3b72Ntm4YONSYwaI-kg,28880
|
56
57
|
uipath/_cli/_runtime/_escalation.py,sha256=x3vI98qsfRA-fL_tNkRVTFXioM5Gv2w0GFcXJJ5eQtg,7981
|
@@ -126,9 +127,9 @@ uipath/eval/evaluators/deterministic_evaluator_base.py,sha256=yDWTMU1mG-93D6DscA
|
|
126
127
|
uipath/eval/evaluators/exact_match_evaluator.py,sha256=Qfz-kIUf80PKjAuge1Tc1GvN6kDB6hHveBZ86w_2How,1512
|
127
128
|
uipath/eval/evaluators/json_similarity_evaluator.py,sha256=cP4kpN-UIf690V5dq4LaCjJc2zFx-nEffUclCwDdlhM,6607
|
128
129
|
uipath/eval/evaluators/llm_as_judge_evaluator.py,sha256=l0bbn8ZLi9ZTXcgr7tJ2tsCvHFqIIeGa7sobaAHgI2Y,4927
|
129
|
-
uipath/eval/evaluators/trajectory_evaluator.py,sha256=
|
130
|
+
uipath/eval/evaluators/trajectory_evaluator.py,sha256=IylFm4yeNcVYgtmBzvzFn4Y2GXdSNnvAF8F4bCvPYdw,5774
|
130
131
|
uipath/eval/models/__init__.py,sha256=x360CDZaRjUL3q3kh2CcXYYrQ47jwn6p6JnmhEIvMlA,419
|
131
|
-
uipath/eval/models/models.py,sha256=
|
132
|
+
uipath/eval/models/models.py,sha256=is2wo-i0ld8Y_oZpbw5nG4cTXBz4bDLNxN6IjrfRcyM,2886
|
132
133
|
uipath/models/__init__.py,sha256=d_DkK1AtRUetM1t2NrH5UKgvJOBiynzaKnK5pMY7aIc,1289
|
133
134
|
uipath/models/action_schema.py,sha256=tBn1qQ3NQLU5nwWlBIzIKIx3XK5pO_D1S51IjFlZ1FA,610
|
134
135
|
uipath/models/actions.py,sha256=1vRsJ3JSmMdPkbiYAiHzY8K44vmW3VlMsmQUBAkSgrQ,3141
|
@@ -155,8 +156,8 @@ uipath/tracing/_traced.py,sha256=yBIY05PCCrYyx50EIHZnwJaKNdHPNx-YTR1sHQl0a98,199
|
|
155
156
|
uipath/tracing/_utils.py,sha256=qd7N56tg6VXQ9pREh61esBgUWLNA0ssKsE0QlwrRWFM,11974
|
156
157
|
uipath/utils/__init__.py,sha256=VD-KXFpF_oWexFg6zyiWMkxl2HM4hYJMIUDZ1UEtGx0,105
|
157
158
|
uipath/utils/_endpoints_manager.py,sha256=iRTl5Q0XAm_YgcnMcJOXtj-8052sr6jpWuPNz6CgT0Q,8408
|
158
|
-
uipath-2.1.
|
159
|
-
uipath-2.1.
|
160
|
-
uipath-2.1.
|
161
|
-
uipath-2.1.
|
162
|
-
uipath-2.1.
|
159
|
+
uipath-2.1.67.dist-info/METADATA,sha256=l-1OOoU7DUoo1PenVsoRIorYjP0wFq861zJeP4obFVs,6482
|
160
|
+
uipath-2.1.67.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
161
|
+
uipath-2.1.67.dist-info/entry_points.txt,sha256=9C2_29U6Oq1ExFu7usihR-dnfIVNSKc-0EFbh0rskB4,43
|
162
|
+
uipath-2.1.67.dist-info/licenses/LICENSE,sha256=-KBavWXepyDjimmzH5fVAsi-6jNVpIKFc2kZs0Ri4ng,1058
|
163
|
+
uipath-2.1.67.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|