uipath 2.1.108__py3-none-any.whl → 2.1.109__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of uipath might be problematic. Click here for more details.
- uipath/_cli/__init__.py +4 -0
- uipath/_cli/_evals/_console_progress_reporter.py +2 -2
- uipath/_cli/_evals/_evaluator_factory.py +314 -29
- uipath/_cli/_evals/_helpers.py +194 -0
- uipath/_cli/_evals/_models/_evaluation_set.py +73 -7
- uipath/_cli/_evals/_models/_evaluator.py +183 -9
- uipath/_cli/_evals/_models/_evaluator_base_params.py +3 -3
- uipath/_cli/_evals/_models/_output.py +87 -3
- uipath/_cli/_evals/_progress_reporter.py +288 -28
- uipath/_cli/_evals/_runtime.py +80 -26
- uipath/_cli/_evals/mocks/input_mocker.py +1 -3
- uipath/_cli/_evals/mocks/llm_mocker.py +2 -2
- uipath/_cli/_evals/mocks/mocker_factory.py +2 -2
- uipath/_cli/_evals/mocks/mockito_mocker.py +2 -2
- uipath/_cli/_evals/mocks/mocks.py +5 -3
- uipath/_cli/_push/models.py +17 -0
- uipath/_cli/_push/sw_file_handler.py +336 -3
- uipath/_cli/_templates/custom_evaluator.py.template +65 -0
- uipath/_cli/_utils/_eval_set.py +30 -9
- uipath/_cli/_utils/_resources.py +21 -0
- uipath/_cli/_utils/_studio_project.py +18 -0
- uipath/_cli/cli_add.py +114 -0
- uipath/_cli/cli_eval.py +5 -1
- uipath/_cli/cli_pull.py +11 -26
- uipath/_cli/cli_push.py +2 -0
- uipath/_cli/cli_register.py +45 -0
- uipath/_events/_events.py +6 -5
- uipath/_utils/constants.py +4 -0
- uipath/eval/_helpers/evaluators_helpers.py +494 -0
- uipath/eval/_helpers/helpers.py +30 -2
- uipath/eval/evaluators/__init__.py +60 -5
- uipath/eval/evaluators/base_evaluator.py +546 -44
- uipath/eval/evaluators/contains_evaluator.py +80 -0
- uipath/eval/evaluators/exact_match_evaluator.py +43 -12
- uipath/eval/evaluators/json_similarity_evaluator.py +41 -12
- uipath/eval/evaluators/legacy_base_evaluator.py +89 -0
- uipath/eval/evaluators/{deterministic_evaluator_base.py → legacy_deterministic_evaluator_base.py} +2 -2
- uipath/eval/evaluators/legacy_exact_match_evaluator.py +37 -0
- uipath/eval/evaluators/legacy_json_similarity_evaluator.py +151 -0
- uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py +137 -0
- uipath/eval/evaluators/{trajectory_evaluator.py → legacy_trajectory_evaluator.py} +5 -6
- uipath/eval/evaluators/llm_as_judge_evaluator.py +143 -78
- uipath/eval/evaluators/llm_judge_output_evaluator.py +112 -0
- uipath/eval/evaluators/llm_judge_trajectory_evaluator.py +142 -0
- uipath/eval/evaluators/output_evaluator.py +117 -0
- uipath/eval/evaluators/tool_call_args_evaluator.py +82 -0
- uipath/eval/evaluators/tool_call_count_evaluator.py +87 -0
- uipath/eval/evaluators/tool_call_order_evaluator.py +84 -0
- uipath/eval/evaluators/tool_call_output_evaluator.py +87 -0
- uipath/eval/evaluators_types/ContainsEvaluator.json +73 -0
- uipath/eval/evaluators_types/ExactMatchEvaluator.json +89 -0
- uipath/eval/evaluators_types/JsonSimilarityEvaluator.json +81 -0
- uipath/eval/evaluators_types/LLMJudgeOutputEvaluator.json +110 -0
- uipath/eval/evaluators_types/LLMJudgeSimulationTrajectoryEvaluator.json +88 -0
- uipath/eval/evaluators_types/LLMJudgeStrictJSONSimilarityOutputEvaluator.json +110 -0
- uipath/eval/evaluators_types/LLMJudgeTrajectoryEvaluator.json +88 -0
- uipath/eval/evaluators_types/ToolCallArgsEvaluator.json +131 -0
- uipath/eval/evaluators_types/ToolCallCountEvaluator.json +104 -0
- uipath/eval/evaluators_types/ToolCallOrderEvaluator.json +100 -0
- uipath/eval/evaluators_types/ToolCallOutputEvaluator.json +124 -0
- uipath/eval/evaluators_types/generate_types.py +31 -0
- uipath/eval/models/__init__.py +16 -1
- uipath/eval/models/llm_judge_types.py +196 -0
- uipath/eval/models/models.py +109 -7
- {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/METADATA +1 -1
- {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/RECORD +69 -37
- {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/WHEEL +0 -0
- {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/entry_points.txt +0 -0
- {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/licenses/LICENSE +0 -0
uipath/eval/_helpers/helpers.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
|
+
import functools
|
|
1
2
|
import json
|
|
2
3
|
import os
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from typing import Any
|
|
3
7
|
|
|
4
8
|
import click
|
|
5
9
|
|
|
6
|
-
from
|
|
7
|
-
from uipath._utils.constants import UIPATH_CONFIG_FILE
|
|
10
|
+
from ..models import ErrorEvaluationResult, EvaluationResult
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
def auto_discover_entrypoint() -> str:
|
|
@@ -16,6 +19,9 @@ def auto_discover_entrypoint() -> str:
|
|
|
16
19
|
Raises:
|
|
17
20
|
ValueError: If no entrypoint found or multiple entrypoints exist
|
|
18
21
|
"""
|
|
22
|
+
from uipath._cli._utils._console import ConsoleLogger
|
|
23
|
+
from uipath._utils.constants import UIPATH_CONFIG_FILE
|
|
24
|
+
|
|
19
25
|
console = ConsoleLogger()
|
|
20
26
|
|
|
21
27
|
if not os.path.isfile(UIPATH_CONFIG_FILE):
|
|
@@ -45,3 +51,25 @@ def auto_discover_entrypoint() -> str:
|
|
|
45
51
|
f"Auto-discovered agent entrypoint: {click.style(entrypoint, fg='cyan')}"
|
|
46
52
|
)
|
|
47
53
|
return entrypoint
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def track_evaluation_metrics(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
57
|
+
"""Decorator to track evaluation metrics and handle errors gracefully."""
|
|
58
|
+
|
|
59
|
+
@functools.wraps(func)
|
|
60
|
+
async def wrapper(*args: Any, **kwargs: Any) -> EvaluationResult:
|
|
61
|
+
start_time = time.time()
|
|
62
|
+
try:
|
|
63
|
+
result = await func(*args, **kwargs)
|
|
64
|
+
except Exception as e:
|
|
65
|
+
result = ErrorEvaluationResult(
|
|
66
|
+
details="Exception thrown by evaluator: {}".format(e),
|
|
67
|
+
evaluation_time=time.time() - start_time,
|
|
68
|
+
)
|
|
69
|
+
end_time = time.time()
|
|
70
|
+
execution_time = end_time - start_time
|
|
71
|
+
|
|
72
|
+
result.evaluation_time = execution_time
|
|
73
|
+
return result
|
|
74
|
+
|
|
75
|
+
return wrapper
|
|
@@ -1,15 +1,70 @@
|
|
|
1
1
|
"""UiPath evaluator implementations for agent performance evaluation."""
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
# Current coded evaluators
|
|
6
|
+
from .base_evaluator import BaseEvaluationCriteria, BaseEvaluator, BaseEvaluatorConfig
|
|
7
|
+
from .contains_evaluator import ContainsEvaluator
|
|
4
8
|
from .exact_match_evaluator import ExactMatchEvaluator
|
|
5
9
|
from .json_similarity_evaluator import JsonSimilarityEvaluator
|
|
6
|
-
|
|
7
|
-
|
|
10
|
+
|
|
11
|
+
# Legacy evaluators
|
|
12
|
+
from .legacy_base_evaluator import LegacyBaseEvaluator
|
|
13
|
+
from .legacy_exact_match_evaluator import LegacyExactMatchEvaluator
|
|
14
|
+
from .legacy_json_similarity_evaluator import LegacyJsonSimilarityEvaluator
|
|
15
|
+
from .legacy_llm_as_judge_evaluator import LegacyLlmAsAJudgeEvaluator
|
|
16
|
+
from .legacy_trajectory_evaluator import LegacyTrajectoryEvaluator
|
|
17
|
+
from .llm_judge_output_evaluator import (
|
|
18
|
+
BaseLLMOutputEvaluator,
|
|
19
|
+
LLMJudgeOutputEvaluator,
|
|
20
|
+
LLMJudgeStrictJSONSimilarityOutputEvaluator,
|
|
21
|
+
)
|
|
22
|
+
from .llm_judge_trajectory_evaluator import (
|
|
23
|
+
BaseLLMTrajectoryEvaluator,
|
|
24
|
+
LLMJudgeTrajectoryEvaluator,
|
|
25
|
+
LLMJudgeTrajectorySimulationEvaluator,
|
|
26
|
+
)
|
|
27
|
+
from .tool_call_args_evaluator import ToolCallArgsEvaluator
|
|
28
|
+
from .tool_call_count_evaluator import ToolCallCountEvaluator
|
|
29
|
+
from .tool_call_order_evaluator import ToolCallOrderEvaluator
|
|
30
|
+
from .tool_call_output_evaluator import ToolCallOutputEvaluator
|
|
31
|
+
|
|
32
|
+
EVALUATORS: list[type[BaseEvaluator[Any, Any, Any]]] = [
|
|
33
|
+
ExactMatchEvaluator,
|
|
34
|
+
ContainsEvaluator,
|
|
35
|
+
JsonSimilarityEvaluator,
|
|
36
|
+
LLMJudgeOutputEvaluator,
|
|
37
|
+
LLMJudgeStrictJSONSimilarityOutputEvaluator,
|
|
38
|
+
LLMJudgeTrajectoryEvaluator,
|
|
39
|
+
LLMJudgeTrajectorySimulationEvaluator,
|
|
40
|
+
ToolCallOrderEvaluator,
|
|
41
|
+
ToolCallArgsEvaluator,
|
|
42
|
+
ToolCallCountEvaluator,
|
|
43
|
+
ToolCallOutputEvaluator,
|
|
44
|
+
]
|
|
8
45
|
|
|
9
46
|
__all__ = [
|
|
47
|
+
# Legacy evaluators
|
|
48
|
+
"LegacyBaseEvaluator",
|
|
49
|
+
"LegacyExactMatchEvaluator",
|
|
50
|
+
"LegacyJsonSimilarityEvaluator",
|
|
51
|
+
"LegacyLlmAsAJudgeEvaluator",
|
|
52
|
+
"LegacyTrajectoryEvaluator",
|
|
53
|
+
# Current coded evaluators
|
|
10
54
|
"BaseEvaluator",
|
|
55
|
+
"ContainsEvaluator",
|
|
11
56
|
"ExactMatchEvaluator",
|
|
12
57
|
"JsonSimilarityEvaluator",
|
|
13
|
-
"
|
|
14
|
-
"
|
|
58
|
+
"BaseLLMOutputEvaluator",
|
|
59
|
+
"LLMJudgeOutputEvaluator",
|
|
60
|
+
"LLMJudgeStrictJSONSimilarityOutputEvaluator",
|
|
61
|
+
"BaseLLMTrajectoryEvaluator",
|
|
62
|
+
"LLMJudgeTrajectoryEvaluator",
|
|
63
|
+
"LLMJudgeTrajectorySimulationEvaluator",
|
|
64
|
+
"ToolCallOrderEvaluator",
|
|
65
|
+
"ToolCallArgsEvaluator",
|
|
66
|
+
"ToolCallCountEvaluator",
|
|
67
|
+
"ToolCallOutputEvaluator",
|
|
68
|
+
"BaseEvaluationCriteria",
|
|
69
|
+
"BaseEvaluatorConfig",
|
|
15
70
|
]
|