uipath 2.1.52__py3-none-any.whl → 2.1.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. uipath/_cli/_evals/{_evaluators/_evaluator_factory.py → _evaluator_factory.py} +24 -23
  2. uipath/_cli/_evals/_models/_evaluation_set.py +23 -18
  3. uipath/_cli/_evals/_models/_evaluator_base_params.py +16 -0
  4. uipath/_cli/_evals/_models/_output.py +85 -0
  5. uipath/_cli/_evals/_runtime.py +102 -10
  6. uipath/_cli/_runtime/_contracts.py +11 -2
  7. uipath/_cli/_utils/_eval_set.py +1 -1
  8. uipath/_cli/_utils/_studio_project.py +30 -29
  9. uipath/_cli/cli_eval.py +46 -61
  10. uipath/eval/evaluators/__init__.py +15 -0
  11. uipath/eval/evaluators/base_evaluator.py +88 -0
  12. uipath/eval/evaluators/deterministic_evaluator_base.py +53 -0
  13. uipath/eval/evaluators/exact_match_evaluator.py +37 -0
  14. uipath/{_cli/_evals/_evaluators/_json_similarity_evaluator.py → eval/evaluators/json_similarity_evaluator.py} +23 -40
  15. uipath/eval/evaluators/llm_as_judge_evaluator.py +137 -0
  16. uipath/eval/evaluators/trajectory_evaluator.py +36 -0
  17. uipath/eval/models/__init__.py +19 -0
  18. uipath/{_cli/_evals/_models/_evaluators.py → eval/models/models.py} +67 -43
  19. {uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/METADATA +1 -1
  20. {uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/RECORD +23 -23
  21. uipath/_cli/_evals/_evaluators/__init__.py +0 -22
  22. uipath/_cli/_evals/_evaluators/_deterministic_evaluator_base.py +0 -46
  23. uipath/_cli/_evals/_evaluators/_evaluator_base.py +0 -124
  24. uipath/_cli/_evals/_evaluators/_exact_match_evaluator.py +0 -40
  25. uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py +0 -183
  26. uipath/_cli/_evals/_evaluators/_trajectory_evaluator.py +0 -48
  27. uipath/_cli/_evals/_models/__init__.py +0 -18
  28. uipath/_cli/_evals/_models/_agent_execution_output.py +0 -14
  29. uipath/_cli/_evals/progress_reporter.py +0 -304
  30. {uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/WHEEL +0 -0
  31. {uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/entry_points.txt +0 -0
  32. {uipath-2.1.52.dist-info → uipath-2.1.54.dist-info}/licenses/LICENSE +0 -0
@@ -1,15 +1,79 @@
1
- from datetime import datetime, timezone
1
+ """Models for evaluation framework including execution data and evaluation results."""
2
+
2
3
  from enum import IntEnum
3
- from typing import Any, Dict, List, Optional
4
+ from typing import Annotated, Any, Dict, Literal, Optional, Union
5
+
6
+ from opentelemetry.sdk.trace import ReadableSpan
7
+ from pydantic import BaseModel, ConfigDict, Field
8
+
9
+
10
+ class AgentExecution(BaseModel):
11
+ """Represents the execution data of an agent for evaluation purposes."""
4
12
 
5
- from pydantic import BaseModel
13
+ model_config = ConfigDict(arbitrary_types_allowed=True)
14
+
15
+ agent_input: Optional[Dict[str, Any]]
16
+ agent_output: Dict[str, Any]
17
+ agent_trace: list[ReadableSpan]
6
18
 
7
19
 
8
20
  class LLMResponse(BaseModel):
21
+ """Response from an LLM evaluator."""
22
+
9
23
  score: float
10
24
  justification: str
11
25
 
12
26
 
27
+ class ScoreType(IntEnum):
28
+ """Types of evaluation scores."""
29
+
30
+ BOOLEAN = 0
31
+ NUMERICAL = 1
32
+ ERROR = 2
33
+
34
+
35
+ class BaseEvaluationResult(BaseModel):
36
+ """Base class for evaluation results."""
37
+
38
+ details: Optional[str] = None
39
+ # this is marked as optional, as it is populated inside the 'measure_execution_time' decorator
40
+ evaluation_time: Optional[float] = None
41
+
42
+
43
+ class BooleanEvaluationResult(BaseEvaluationResult):
44
+ """Result of a boolean evaluation."""
45
+
46
+ score: bool
47
+ score_type: Literal[ScoreType.BOOLEAN] = ScoreType.BOOLEAN
48
+
49
+
50
+ class NumericEvaluationResult(BaseEvaluationResult):
51
+ """Result of a numerical evaluation."""
52
+
53
+ score: float
54
+ score_type: Literal[ScoreType.NUMERICAL] = ScoreType.NUMERICAL
55
+
56
+
57
+ class ErrorEvaluationResult(BaseEvaluationResult):
58
+ """Result of an error evaluation."""
59
+
60
+ score: float = 0.0
61
+ score_type: Literal[ScoreType.ERROR] = ScoreType.ERROR
62
+
63
+
64
+ EvaluationResult = Annotated[
65
+ Union[BooleanEvaluationResult, NumericEvaluationResult, ErrorEvaluationResult],
66
+ Field(discriminator="score_type"),
67
+ ]
68
+
69
+
70
+ class EvalItemResult(BaseModel):
71
+ """Result of a single evaluation item."""
72
+
73
+ evaluator_name: str
74
+ result: EvaluationResult
75
+
76
+
13
77
  class EvaluatorCategory(IntEnum):
14
78
  """Types of evaluators."""
15
79
 
@@ -48,43 +112,3 @@ class EvaluatorType(IntEnum):
48
112
  return cls(value)
49
113
  else:
50
114
  raise ValueError(f"{value} is not a valid EvaluatorType value")
51
-
52
-
53
- class ScoreType(IntEnum):
54
- BOOLEAN = 0
55
- NUMERICAL = 1
56
- ERROR = 2
57
-
58
-
59
- class EvaluationResult(BaseModel):
60
- """Result of a single evaluation."""
61
-
62
- evaluation_id: str
63
- evaluation_name: str
64
- evaluator_id: str
65
- evaluator_name: str
66
- score: float | bool
67
- score_type: ScoreType
68
- # this is marked as optional, as it is populated inside the 'measure_execution_time' decorator
69
- evaluation_time: Optional[float] = None
70
- input: Dict[str, Any]
71
- expected_output: Dict[str, Any]
72
- actual_output: Dict[str, Any]
73
- timestamp: datetime = datetime.now(timezone.utc)
74
- details: Optional[str] = None
75
-
76
-
77
- class EvaluationSetResult(BaseModel):
78
- """Result of a complete evaluation set."""
79
-
80
- eval_set_id: str
81
- eval_set_name: str
82
- results: List[EvaluationResult]
83
- average_score: float
84
-
85
-
86
- class EvalItemResult(BaseModel):
87
- """Result of a single evaluation item."""
88
-
89
- evaluator_id: str
90
- result: EvaluationResult
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: uipath
3
- Version: 2.1.52
3
+ Version: 2.1.54
4
4
  Summary: Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools.
5
5
  Project-URL: Homepage, https://uipath.com
6
6
  Project-URL: Repository, https://github.com/UiPath/uipath-python
@@ -9,7 +9,7 @@ uipath/_cli/__init__.py,sha256=tscKceSouYcEOxUbGjoyHi4qGi74giBFeXG1I-ut1hs,2308
9
9
  uipath/_cli/cli_auth.py,sha256=i3ykLlCg68xgPXHHaa0agHwGFIiLiTLzOiF6Su8XaEo,2436
10
10
  uipath/_cli/cli_deploy.py,sha256=KPCmQ0c_NYD5JofSDao5r6QYxHshVCRxlWDVnQvlp5w,645
11
11
  uipath/_cli/cli_dev.py,sha256=nEfpjw1PZ72O6jmufYWVrueVwihFxDPOeJakdvNHdOA,2146
12
- uipath/_cli/cli_eval.py,sha256=fYJWQlyiIc8SpTzY9QPNQWOx40PagMEKdsGZIu9As2A,4402
12
+ uipath/_cli/cli_eval.py,sha256=uiisQ3Wpalu8e9gHRMmn_9Gqus_t4brbjTegMQOhLa0,3831
13
13
  uipath/_cli/cli_init.py,sha256=Ac3-9tIH3rpikIX1ehWTo7InW5tjVNoz_w6fjvgLK4w,7052
14
14
  uipath/_cli/cli_invoke.py,sha256=4jyhqcy7tPrpxvaUhW-9gut6ddsCGMdJJcpOXXmIe8g,4348
15
15
  uipath/_cli/cli_new.py,sha256=9378NYUBc9j-qKVXV7oja-jahfJhXBg8zKVyaon7ctY,2102
@@ -44,22 +44,13 @@ uipath/_cli/_dev/_terminal/_styles/terminal.tcss,sha256=ktVpKwXIXw2VZp8KIZD6fO9i
44
44
  uipath/_cli/_dev/_terminal/_utils/_chat.py,sha256=YUZxYVdmEManwHDuZsczJT1dWIYE1dVBgABlurwMFcE,8493
45
45
  uipath/_cli/_dev/_terminal/_utils/_exporter.py,sha256=oI6D_eMwrh_2aqDYUh4GrJg8VLGrLYhDahR-_o0uJns,4144
46
46
  uipath/_cli/_dev/_terminal/_utils/_logger.py,sha256=jeNShEED27cNIHTe_NNx-2kUiXpSLTmi0onM6tVkqRM,888
47
- uipath/_cli/_evals/_runtime.py,sha256=q4h3zp_7Ygkhj1zE_YTKKXRp3BhkHaPj8CWqjkzerTk,4748
48
- uipath/_cli/_evals/progress_reporter.py,sha256=PGt1rs7IH1C6HPw8fWUwb98GB3UBuM6eUiiqGthfCIk,11174
49
- uipath/_cli/_evals/_evaluators/__init__.py,sha256=jD7KNLjbsUpsESFXX11eW2MEPXDNuPp2-t-IPB-inlM,734
50
- uipath/_cli/_evals/_evaluators/_deterministic_evaluator_base.py,sha256=BTl0puBjp9iCsU3YFfYWqk4TOz4iE19O3q1-dK6qUOI,1723
51
- uipath/_cli/_evals/_evaluators/_evaluator_base.py,sha256=knHUwYFt0gMG1uJhq5TGEab6M_YevxX019yT3yYwZsw,3787
52
- uipath/_cli/_evals/_evaluators/_evaluator_factory.py,sha256=cURShn17X6BW-_G3rknJXWtlgpeh5UdioLUV6oGCGAU,4912
53
- uipath/_cli/_evals/_evaluators/_exact_match_evaluator.py,sha256=lvEtAitrZy9myoZLMXLqlBWBPX06Msu67kuFMGSbikM,1319
54
- uipath/_cli/_evals/_evaluators/_json_similarity_evaluator.py,sha256=HpmkvuwU4Az3IIqFVLUmDvzkqb21pFMxY0sg2biZOMM,7093
55
- uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py,sha256=nSLZ29xWqALEI53ifr79JPXjyx0T4sr7p-4NygwgAio,6594
56
- uipath/_cli/_evals/_evaluators/_trajectory_evaluator.py,sha256=dnogQTOskpI4_cNF0Ge3hBceJJocvOgxBWAwaCWnzB0,1595
57
- uipath/_cli/_evals/_models/__init__.py,sha256=Ewjp3u2YeTH2MmzY9LWf7EIbAoIf_nW9fMYbj7pGlPs,420
58
- uipath/_cli/_evals/_models/_agent_execution_output.py,sha256=llvApU4JkTnNgQ5DvHPt8ee3bnV6cCANyeiebWKE07E,401
59
- uipath/_cli/_evals/_models/_evaluation_set.py,sha256=tVHykSget-G3sOCs9bSchMYUTpFqzXVlYYbY8L9SI0c,1518
60
- uipath/_cli/_evals/_models/_evaluators.py,sha256=l57NEVyYmzSKuoIXuGkE94Br01hAMg35fiS2MlTkaQM,2115
47
+ uipath/_cli/_evals/_evaluator_factory.py,sha256=2lOalabNSzmnnwr0SfoPWvFWXs0Ly857XBmPuOdhFBQ,4729
48
+ uipath/_cli/_evals/_runtime.py,sha256=KFGl2we1RH0omuD2HWw5thIK6DDZxVGtqx_G9T4DM_A,8332
49
+ uipath/_cli/_evals/_models/_evaluation_set.py,sha256=mwcTstHuyHd7ys_nLzgCNKBAsS4ns9UL2TF5Oq2Cc64,1758
50
+ uipath/_cli/_evals/_models/_evaluator_base_params.py,sha256=lTYKOV66tcjW85KHTyOdtF1p1VDaBNemrMAvH8bFIFc,382
51
+ uipath/_cli/_evals/_models/_output.py,sha256=TTQ0hhmD3dTkIbj_Ly_rDCGSnpZsHwdmCsl7FLdoZD0,2634
61
52
  uipath/_cli/_push/sw_file_handler.py,sha256=AX4TKM-q6CNGw3JyBW02M8ktPZuFMcAU9LN3Ii0Q2QI,18202
62
- uipath/_cli/_runtime/_contracts.py,sha256=Mwdb11OULGg9xGSu_A6KCl7A5jw-fKe5tBiv62MZNVM,28424
53
+ uipath/_cli/_runtime/_contracts.py,sha256=ZK572viY3Ydnip21rzmJ3R6F3cXpHVtDTuvwcEciy3I,28782
63
54
  uipath/_cli/_runtime/_escalation.py,sha256=x3vI98qsfRA-fL_tNkRVTFXioM5Gv2w0GFcXJJ5eQtg,7981
64
55
  uipath/_cli/_runtime/_hitl.py,sha256=VKbM021nVg1HEDnTfucSLJ0LsDn83CKyUtVzofS2qTU,11369
65
56
  uipath/_cli/_runtime/_logging.py,sha256=MGklGKPjYKjs7J5Jy9eplA9zCDsdtEbkZdCbTwgut_4,8311
@@ -74,13 +65,13 @@ uipath/_cli/_utils/_common.py,sha256=CzhhkIRfCuQ1-5HLDtjzOyt8KFs1jm6wzrBeU_v2B7c
74
65
  uipath/_cli/_utils/_console.py,sha256=scvnrrFoFX6CE451K-PXKV7UN0DUkInbOtDZ5jAdPP0,10070
75
66
  uipath/_cli/_utils/_constants.py,sha256=rS8lQ5Nzull8ytajK6lBsz398qiCp1REoAwlHtyBwF0,1415
76
67
  uipath/_cli/_utils/_debug.py,sha256=zamzIR4VgbdKADAE4gbmjxDsbgF7wvdr7C5Dqp744Oc,1739
77
- uipath/_cli/_utils/_eval_set.py,sha256=z0sTEj4lGkLZXfj9vUpMwFPL6LNMs1MSCZ43Efzoc6A,2750
68
+ uipath/_cli/_utils/_eval_set.py,sha256=4aP8yAC-jMrNYaC62Yj8fHD2hNlotGwy63bciQrpdc4,2766
78
69
  uipath/_cli/_utils/_folders.py,sha256=UVJcKPfPAVR5HF4AP6EXdlNVcfEF1v5pwGCpoAgBY34,1155
79
70
  uipath/_cli/_utils/_input_args.py,sha256=3LGNqVpJItvof75VGm-ZNTUMUH9-c7-YgleM5b2YgRg,5088
80
71
  uipath/_cli/_utils/_parse_ast.py,sha256=8Iohz58s6bYQ7rgWtOTjrEInLJ-ETikmOMZzZdIY2Co,20072
81
72
  uipath/_cli/_utils/_processes.py,sha256=q7DfEKHISDWf3pngci5za_z0Pbnf_shWiYEcTOTCiyk,1855
82
73
  uipath/_cli/_utils/_project_files.py,sha256=sulh3xZhDDw_rBOrn_XSUfVSD6sUu47ZK4n_lF5BKkQ,13197
83
- uipath/_cli/_utils/_studio_project.py,sha256=HvzcpIIIA4hUIvMbId1dsAhmFLMuhnS2ZtyNdcpXJ8c,15422
74
+ uipath/_cli/_utils/_studio_project.py,sha256=4aoRFj5FazUPpPltfr3jvyjoLsUd4hyl9We4SAuKFh4,15376
84
75
  uipath/_cli/_utils/_tracing.py,sha256=2igb03j3EHjF_A406UhtCKkPfudVfFPjUq5tXUEG4oo,1541
85
76
  uipath/_cli/_utils/_uv_helpers.py,sha256=6SvoLnZPoKIxW0sjMvD1-ENV_HOXDYzH34GjBqwT138,3450
86
77
  uipath/_resources/AGENTS.md,sha256=YWhWuX9XIbyVhVT3PnPc4Of3_q6bsNJcuzYu3N8f_Ug,25850
@@ -122,6 +113,15 @@ uipath/agent/conversation/meta.py,sha256=3t0eS9UHoAPHre97QTUeVbjDhnMX4zj4-qG6ju0
122
113
  uipath/agent/conversation/tool.py,sha256=ol8XI8AVd-QNn5auXNBPcCzOkh9PPFtL7hTK3kqInkU,2191
123
114
  uipath/eval/_helpers/__init__.py,sha256=GSmZMryjuO3Wo_zdxZdrHCRRsgOxsVFYkYgJ15YNC3E,86
124
115
  uipath/eval/_helpers/helpers.py,sha256=iE2HHdMiAdAMLqxHkPKHpfecEtAuN5BTBqvKFTI8ciE,1315
116
+ uipath/eval/evaluators/__init__.py,sha256=DJAAhgv0I5UfBod4sGnSiKerfrz1iMmk7GNFb71V8eI,494
117
+ uipath/eval/evaluators/base_evaluator.py,sha256=gryaN7WMV__NGorwu4WPRL5A5RlJ1exQ9jDJ6ZrXDB8,2679
118
+ uipath/eval/evaluators/deterministic_evaluator_base.py,sha256=yDWTMU1mG-93D6DscAUHmaVUc1rhGYtNjGXgevzAObM,1723
119
+ uipath/eval/evaluators/exact_match_evaluator.py,sha256=Qfz-kIUf80PKjAuge1Tc1GvN6kDB6hHveBZ86w_2How,1512
120
+ uipath/eval/evaluators/json_similarity_evaluator.py,sha256=cP4kpN-UIf690V5dq4LaCjJc2zFx-nEffUclCwDdlhM,6607
121
+ uipath/eval/evaluators/llm_as_judge_evaluator.py,sha256=l0bbn8ZLi9ZTXcgr7tJ2tsCvHFqIIeGa7sobaAHgI2Y,4927
122
+ uipath/eval/evaluators/trajectory_evaluator.py,sha256=7boiKzjLpQPs8M8y2PGnI3bZQ1MEwR6QRZpXyKQcR7Y,1244
123
+ uipath/eval/models/__init__.py,sha256=x360CDZaRjUL3q3kh2CcXYYrQ47jwn6p6JnmhEIvMlA,419
124
+ uipath/eval/models/models.py,sha256=9IraD5C2KfKK1ZLMZ7jBOJzzHW4X1Dp2k41abqmPMnA,2838
125
125
  uipath/models/__init__.py,sha256=d_DkK1AtRUetM1t2NrH5UKgvJOBiynzaKnK5pMY7aIc,1289
126
126
  uipath/models/action_schema.py,sha256=tBn1qQ3NQLU5nwWlBIzIKIx3XK5pO_D1S51IjFlZ1FA,610
127
127
  uipath/models/actions.py,sha256=1vRsJ3JSmMdPkbiYAiHzY8K44vmW3VlMsmQUBAkSgrQ,3141
@@ -148,8 +148,8 @@ uipath/tracing/_traced.py,sha256=qeVDrds2OUnpdUIA0RhtF0kg2dlAZhyC1RRkI-qivTM,185
148
148
  uipath/tracing/_utils.py,sha256=wJRELaPu69iY0AhV432Dk5QYf_N_ViRU4kAUG1BI1ew,10384
149
149
  uipath/utils/__init__.py,sha256=VD-KXFpF_oWexFg6zyiWMkxl2HM4hYJMIUDZ1UEtGx0,105
150
150
  uipath/utils/_endpoints_manager.py,sha256=iRTl5Q0XAm_YgcnMcJOXtj-8052sr6jpWuPNz6CgT0Q,8408
151
- uipath-2.1.52.dist-info/METADATA,sha256=zNHWskIn1OPB0hrLbZBoX6qmd-U-52O_LTHBZ06FvdQ,6482
152
- uipath-2.1.52.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
153
- uipath-2.1.52.dist-info/entry_points.txt,sha256=9C2_29U6Oq1ExFu7usihR-dnfIVNSKc-0EFbh0rskB4,43
154
- uipath-2.1.52.dist-info/licenses/LICENSE,sha256=-KBavWXepyDjimmzH5fVAsi-6jNVpIKFc2kZs0Ri4ng,1058
155
- uipath-2.1.52.dist-info/RECORD,,
151
+ uipath-2.1.54.dist-info/METADATA,sha256=Yzw9AxJ3oi5FRDQ3ISjPi55QIWeKyNYqFSVzNnN-FXo,6482
152
+ uipath-2.1.54.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
153
+ uipath-2.1.54.dist-info/entry_points.txt,sha256=9C2_29U6Oq1ExFu7usihR-dnfIVNSKc-0EFbh0rskB4,43
154
+ uipath-2.1.54.dist-info/licenses/LICENSE,sha256=-KBavWXepyDjimmzH5fVAsi-6jNVpIKFc2kZs0Ri4ng,1058
155
+ uipath-2.1.54.dist-info/RECORD,,
@@ -1,22 +0,0 @@
1
- """Evaluators package for the evaluation system.
2
-
3
- This package contains all evaluator types and the factory for creating them.
4
- """
5
-
6
- from ._deterministic_evaluator_base import DeterministicEvaluatorBase
7
- from ._evaluator_base import EvaluatorBase
8
- from ._evaluator_factory import EvaluatorFactory
9
- from ._exact_match_evaluator import ExactMatchEvaluator
10
- from ._json_similarity_evaluator import JsonSimilarityEvaluator
11
- from ._llm_as_judge_evaluator import LlmAsAJudgeEvaluator
12
- from ._trajectory_evaluator import TrajectoryEvaluator
13
-
14
- __all__ = [
15
- "EvaluatorBase",
16
- "DeterministicEvaluatorBase",
17
- "EvaluatorFactory",
18
- "JsonSimilarityEvaluator",
19
- "ExactMatchEvaluator",
20
- "LlmAsAJudgeEvaluator",
21
- "TrajectoryEvaluator",
22
- ]
@@ -1,46 +0,0 @@
1
- import copy
2
- import json
3
- from abc import ABC
4
- from typing import Any, Dict, Tuple
5
-
6
- from ._evaluator_base import EvaluatorBase
7
-
8
-
9
- class DeterministicEvaluatorBase(EvaluatorBase, ABC):
10
- def __init__(self, target_output_key: str = "*"):
11
- super().__init__()
12
- self.target_output_key = target_output_key
13
-
14
- def _select_targets(
15
- self, expected_output: Dict[str, Any], actual_output: Dict[str, Any]
16
- ) -> Tuple[Any, Any]:
17
- actual_output_copy = copy.deepcopy(actual_output)
18
- expected_output_copy = copy.deepcopy(expected_output)
19
- if self.target_output_key != "*":
20
- if (
21
- self.target_output_key not in actual_output
22
- or self.target_output_key not in expected_output
23
- ):
24
- raise ValueError(
25
- f"Field '{self.target_output_key}' missing from expected or actual output"
26
- )
27
- actual_output_copy = actual_output_copy[self.target_output_key]
28
- expected_output_copy = expected_output[self.target_output_key]
29
- return actual_output_copy, expected_output_copy
30
-
31
- def _canonical_json(self, obj: Any) -> str:
32
- return json.dumps(
33
- self._normalize_numbers(obj),
34
- sort_keys=True,
35
- separators=(",", ":"),
36
- ensure_ascii=False,
37
- )
38
-
39
- def _normalize_numbers(self, obj: Any) -> Any:
40
- if isinstance(obj, dict):
41
- return {k: self._normalize_numbers(v) for k, v in obj.items()}
42
- if isinstance(obj, (list, tuple)):
43
- return [self._normalize_numbers(v) for v in obj]
44
- if isinstance(obj, (int, float)) and not isinstance(obj, bool):
45
- return float(obj)
46
- return obj
@@ -1,124 +0,0 @@
1
- import functools
2
- import time
3
- from abc import ABC, abstractmethod
4
- from dataclasses import dataclass
5
- from typing import Any, Dict
6
-
7
- from uipath._cli._evals._models import (
8
- EvaluationResult,
9
- EvaluatorCategory,
10
- EvaluatorType,
11
- )
12
-
13
-
14
- def measure_execution_time(func):
15
- """Decorator to measure execution time and update EvaluationResult.evaluation_time."""
16
-
17
- @functools.wraps(func)
18
- async def wrapper(*args, **kwargs) -> EvaluationResult:
19
- start_time = time.time()
20
- result = await func(*args, **kwargs)
21
- end_time = time.time()
22
- execution_time = end_time - start_time
23
-
24
- result.evaluation_time = execution_time
25
- return result
26
-
27
- return wrapper
28
-
29
-
30
- @dataclass
31
- class EvaluatorBaseParams:
32
- """Parameters for initializing the base evaluator."""
33
-
34
- evaluator_id: str
35
- category: EvaluatorCategory
36
- evaluator_type: EvaluatorType
37
- name: str
38
- description: str
39
- created_at: str
40
- updated_at: str
41
- target_output_key: str
42
-
43
-
44
- class EvaluatorBase(ABC):
45
- """Abstract base class for all evaluators."""
46
-
47
- def __init__(self):
48
- # initialization done via 'from_params' function
49
- self.id: str
50
- self.name: str
51
- self.description: str
52
- self.created_at: str
53
- self.updated_at: str
54
- self.category: EvaluatorCategory
55
- self.type: EvaluatorType
56
- self.target_output_key: str
57
- pass
58
-
59
- @classmethod
60
- def from_params(cls, params: EvaluatorBaseParams, **kwargs):
61
- """Initialize the base evaluator from parameters.
62
-
63
- Args:
64
- params: EvaluatorBaseParams containing base configuration
65
- **kwargs: Additional specific parameters for concrete evaluators
66
-
67
- Returns:
68
- Initialized evaluator instance
69
- """
70
- instance = cls(**kwargs)
71
- instance.id = params.evaluator_id
72
- instance.category = params.category
73
- instance.type = params.evaluator_type
74
- instance.name = params.name
75
- instance.description = params.description
76
- instance.created_at = params.created_at
77
- instance.updated_at = params.updated_at
78
- instance.target_output_key = params.target_output_key
79
- return instance
80
-
81
- @measure_execution_time
82
- @abstractmethod
83
- async def evaluate(
84
- self,
85
- evaluation_id: str,
86
- evaluation_name: str,
87
- input_data: Dict[str, Any],
88
- expected_output: Dict[str, Any],
89
- actual_output: Dict[str, Any],
90
- ) -> EvaluationResult:
91
- """Evaluate the given data and return a result.
92
-
93
- Args:
94
- evaluation_id: The ID of the evaluation being processed
95
- evaluation_name: The name of the evaluation
96
- input_data: The input data for the evaluation
97
- expected_output: The expected output
98
- actual_output: The actual output from the agent
99
-
100
- Returns:
101
- EvaluationResult containing the score and details
102
- """
103
- pass
104
-
105
- def to_dict(self) -> Dict[str, Any]:
106
- """Convert the evaluator instance to a dictionary representation.
107
-
108
- Returns:
109
- Dict[str, Any]: Dictionary containing all evaluator properties
110
- """
111
- return {
112
- "id": self.id,
113
- "name": self.name,
114
- "description": self.description,
115
- "created_at": self.created_at,
116
- "updated_at": self.updated_at,
117
- "category": self.category.name if self.category else None,
118
- "type": self.type.name if self.type else None,
119
- "target_output_key": self.target_output_key,
120
- }
121
-
122
- def __repr__(self) -> str:
123
- """String representation of the evaluator."""
124
- return f"{self.__class__.__name__}(id='{self.id}', name='{self.name}', category={self.category.name})"
@@ -1,40 +0,0 @@
1
- import copy
2
- from typing import Any, Dict
3
-
4
- from uipath._cli._evals._evaluators._deterministic_evaluator_base import (
5
- DeterministicEvaluatorBase,
6
- )
7
- from uipath._cli._evals._models import EvaluationResult
8
- from uipath._cli._evals._models._evaluators import ScoreType
9
-
10
-
11
- class ExactMatchEvaluator(DeterministicEvaluatorBase):
12
- async def evaluate(
13
- self,
14
- evaluation_id: str,
15
- evaluation_name: str,
16
- input_data: Dict[str, Any],
17
- expected_output: Dict[str, Any],
18
- actual_output: Dict[str, Any],
19
- ) -> EvaluationResult:
20
- actual_output_copy = copy.deepcopy(actual_output)
21
- expected_output_copy = copy.deepcopy(expected_output)
22
-
23
- actual_output, expected_output = self._select_targets(
24
- expected_output, actual_output
25
- )
26
- are_equal = self._canonical_json(actual_output) == self._canonical_json(
27
- expected_output
28
- )
29
-
30
- return EvaluationResult(
31
- evaluation_id=evaluation_id,
32
- evaluation_name=evaluation_name,
33
- evaluator_id=self.id,
34
- evaluator_name=self.name,
35
- score=are_equal,
36
- input=input_data,
37
- expected_output=expected_output_copy,
38
- actual_output=actual_output_copy,
39
- score_type=ScoreType.BOOLEAN,
40
- )
@@ -1,183 +0,0 @@
1
- import json
2
- from typing import Any, Dict
3
-
4
- from ...._config import Config
5
- from ...._execution_context import ExecutionContext
6
- from ...._services.llm_gateway_service import UiPathLlmChatService
7
- from ...._utils.constants import (
8
- ENV_BASE_URL,
9
- ENV_UIPATH_ACCESS_TOKEN,
10
- ENV_UNATTENDED_USER_ACCESS_TOKEN,
11
- COMMUNITY_agents_SUFFIX,
12
- )
13
- from .._models import EvaluationResult, LLMResponse
14
- from .._models._evaluators import ScoreType
15
- from ._evaluator_base import EvaluatorBase
16
-
17
-
18
- class LlmAsAJudgeEvaluator(EvaluatorBase):
19
- """Evaluator that uses an LLM to judge the quality of outputs."""
20
-
21
- def __init__(self, prompt: str = "", model: str = "", target_output_key: str = "*"):
22
- """Initialize the LLM-as-a-judge evaluator.
23
-
24
- Args:
25
- prompt: The prompt template for the LLM
26
- model: The model to use for evaluation
27
- target_output_key: Key in output to evaluate ("*" for entire output)
28
- """
29
- super().__init__()
30
- self.actual_output_placeholder = "{{ActualOutput}}"
31
- self.expected_output_placeholder = "{{ExpectedOutput}}"
32
- self._initialize_llm()
33
- self.prompt = prompt
34
- self.model = model
35
- self.target_output_key: str = target_output_key
36
-
37
- def _initialize_llm(self):
38
- """Initialize the LLM used for evaluation."""
39
- import os
40
-
41
- base_url_value: str = os.getenv(ENV_BASE_URL) # type: ignore
42
- secret_value: str = os.getenv(ENV_UNATTENDED_USER_ACCESS_TOKEN) or os.getenv(
43
- ENV_UIPATH_ACCESS_TOKEN
44
- ) # type: ignore
45
- config = Config(
46
- base_url=base_url_value,
47
- secret=secret_value,
48
- )
49
- self.llm = UiPathLlmChatService(config, ExecutionContext())
50
-
51
- async def evaluate(
52
- self,
53
- evaluation_id: str,
54
- evaluation_name: str,
55
- input_data: Dict[str, Any],
56
- expected_output: Dict[str, Any],
57
- actual_output: Dict[str, Any],
58
- ) -> EvaluationResult:
59
- """Evaluate using an LLM as a judge.
60
-
61
- Args:
62
- evaluation_id: The ID of the evaluation being processed
63
- evaluation_name: The name of the evaluation
64
- input_data: The input data for the evaluation
65
- expected_output: The expected output
66
- actual_output: The actual output from the agent
67
-
68
- Returns:
69
- EvaluationResult containing the score and details
70
- """
71
- # Extract the target value to evaluate
72
- target_value = self._extract_target_value(actual_output)
73
- expected_value = self._extract_target_value(expected_output)
74
-
75
- # Create the evaluation prompt
76
- evaluation_prompt = self._create_evaluation_prompt(expected_value, target_value)
77
-
78
- llm_response = await self._get_llm_response(evaluation_prompt)
79
-
80
- return EvaluationResult(
81
- evaluation_id=evaluation_id,
82
- evaluation_name=evaluation_name,
83
- evaluator_id=self.id,
84
- evaluator_name=self.name,
85
- score=llm_response.score,
86
- input=input_data,
87
- expected_output=expected_output,
88
- actual_output=actual_output,
89
- details=llm_response.justification,
90
- score_type=ScoreType.NUMERICAL,
91
- )
92
-
93
- def _extract_target_value(self, output: Dict[str, Any]) -> Any:
94
- """Extract the target value from output based on target_output_key."""
95
- if self.target_output_key == "*":
96
- return output
97
-
98
- # Handle nested keys
99
- keys = self.target_output_key.split(".")
100
- value = output
101
-
102
- try:
103
- for key in keys:
104
- if isinstance(value, dict):
105
- value = value[key]
106
- else:
107
- return None
108
- return value
109
- except (KeyError, TypeError):
110
- return None
111
-
112
- def _create_evaluation_prompt(
113
- self, expected_output: Any, actual_output: Any
114
- ) -> str:
115
- """Create the evaluation prompt for the LLM."""
116
- formatted_prompt = self.prompt.replace(
117
- self.actual_output_placeholder,
118
- str(actual_output),
119
- )
120
- formatted_prompt = formatted_prompt.replace(
121
- self.expected_output_placeholder,
122
- str(expected_output),
123
- )
124
-
125
- return formatted_prompt
126
-
127
- async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
128
- """Get response from the LLM.
129
-
130
- Args:
131
- evaluation_prompt: The formatted prompt to send to the LLM
132
-
133
- Returns:
134
- LLMResponse with score and justification
135
- """
136
- try:
137
- # remove community-agents suffix from llm model name
138
- model = self.model
139
- if model.endswith(COMMUNITY_agents_SUFFIX):
140
- model = model.replace(COMMUNITY_agents_SUFFIX, "")
141
-
142
- # Prepare the request
143
- request_data = {
144
- "model": model,
145
- "messages": [{"role": "user", "content": evaluation_prompt}],
146
- "response_format": {
147
- "type": "json_schema",
148
- "json_schema": {
149
- "name": "evaluation_response",
150
- "schema": {
151
- "type": "object",
152
- "properties": {
153
- "score": {
154
- "type": "number",
155
- "minimum": 0,
156
- "maximum": 100,
157
- "description": "Score between 0 and 100",
158
- },
159
- "justification": {
160
- "type": "string",
161
- "description": "Explanation for the score",
162
- },
163
- },
164
- "required": ["score", "justification"],
165
- },
166
- },
167
- },
168
- }
169
-
170
- response = await self.llm.chat_completions(**request_data)
171
-
172
- try:
173
- return LLMResponse(**json.loads(response.choices[-1].message.content))
174
- except (json.JSONDecodeError, ValueError) as e:
175
- return LLMResponse(
176
- score=0.0, justification=f"Error parsing LLM response: {str(e)}"
177
- )
178
-
179
- except Exception as e:
180
- # Fallback in case of any errors
181
- return LLMResponse(
182
- score=0.0, justification=f"Error during LLM evaluation: {str(e)}"
183
- )