uipath 2.1.108__py3-none-any.whl → 2.1.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of uipath might be problematic. Click here for more details.

Files changed (69) hide show
  1. uipath/_cli/__init__.py +4 -0
  2. uipath/_cli/_evals/_console_progress_reporter.py +2 -2
  3. uipath/_cli/_evals/_evaluator_factory.py +314 -29
  4. uipath/_cli/_evals/_helpers.py +194 -0
  5. uipath/_cli/_evals/_models/_evaluation_set.py +73 -7
  6. uipath/_cli/_evals/_models/_evaluator.py +183 -9
  7. uipath/_cli/_evals/_models/_evaluator_base_params.py +3 -3
  8. uipath/_cli/_evals/_models/_output.py +87 -3
  9. uipath/_cli/_evals/_progress_reporter.py +288 -28
  10. uipath/_cli/_evals/_runtime.py +80 -26
  11. uipath/_cli/_evals/mocks/input_mocker.py +1 -3
  12. uipath/_cli/_evals/mocks/llm_mocker.py +2 -2
  13. uipath/_cli/_evals/mocks/mocker_factory.py +2 -2
  14. uipath/_cli/_evals/mocks/mockito_mocker.py +2 -2
  15. uipath/_cli/_evals/mocks/mocks.py +5 -3
  16. uipath/_cli/_push/models.py +17 -0
  17. uipath/_cli/_push/sw_file_handler.py +336 -3
  18. uipath/_cli/_templates/custom_evaluator.py.template +65 -0
  19. uipath/_cli/_utils/_eval_set.py +30 -9
  20. uipath/_cli/_utils/_resources.py +21 -0
  21. uipath/_cli/_utils/_studio_project.py +18 -0
  22. uipath/_cli/cli_add.py +114 -0
  23. uipath/_cli/cli_eval.py +5 -1
  24. uipath/_cli/cli_pull.py +11 -26
  25. uipath/_cli/cli_push.py +2 -0
  26. uipath/_cli/cli_register.py +45 -0
  27. uipath/_events/_events.py +6 -5
  28. uipath/_utils/constants.py +4 -0
  29. uipath/eval/_helpers/evaluators_helpers.py +494 -0
  30. uipath/eval/_helpers/helpers.py +30 -2
  31. uipath/eval/evaluators/__init__.py +60 -5
  32. uipath/eval/evaluators/base_evaluator.py +546 -44
  33. uipath/eval/evaluators/contains_evaluator.py +80 -0
  34. uipath/eval/evaluators/exact_match_evaluator.py +43 -12
  35. uipath/eval/evaluators/json_similarity_evaluator.py +41 -12
  36. uipath/eval/evaluators/legacy_base_evaluator.py +89 -0
  37. uipath/eval/evaluators/{deterministic_evaluator_base.py → legacy_deterministic_evaluator_base.py} +2 -2
  38. uipath/eval/evaluators/legacy_exact_match_evaluator.py +37 -0
  39. uipath/eval/evaluators/legacy_json_similarity_evaluator.py +151 -0
  40. uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py +137 -0
  41. uipath/eval/evaluators/{trajectory_evaluator.py → legacy_trajectory_evaluator.py} +5 -6
  42. uipath/eval/evaluators/llm_as_judge_evaluator.py +143 -78
  43. uipath/eval/evaluators/llm_judge_output_evaluator.py +112 -0
  44. uipath/eval/evaluators/llm_judge_trajectory_evaluator.py +142 -0
  45. uipath/eval/evaluators/output_evaluator.py +117 -0
  46. uipath/eval/evaluators/tool_call_args_evaluator.py +82 -0
  47. uipath/eval/evaluators/tool_call_count_evaluator.py +87 -0
  48. uipath/eval/evaluators/tool_call_order_evaluator.py +84 -0
  49. uipath/eval/evaluators/tool_call_output_evaluator.py +87 -0
  50. uipath/eval/evaluators_types/ContainsEvaluator.json +73 -0
  51. uipath/eval/evaluators_types/ExactMatchEvaluator.json +89 -0
  52. uipath/eval/evaluators_types/JsonSimilarityEvaluator.json +81 -0
  53. uipath/eval/evaluators_types/LLMJudgeOutputEvaluator.json +110 -0
  54. uipath/eval/evaluators_types/LLMJudgeSimulationTrajectoryEvaluator.json +88 -0
  55. uipath/eval/evaluators_types/LLMJudgeStrictJSONSimilarityOutputEvaluator.json +110 -0
  56. uipath/eval/evaluators_types/LLMJudgeTrajectoryEvaluator.json +88 -0
  57. uipath/eval/evaluators_types/ToolCallArgsEvaluator.json +131 -0
  58. uipath/eval/evaluators_types/ToolCallCountEvaluator.json +104 -0
  59. uipath/eval/evaluators_types/ToolCallOrderEvaluator.json +100 -0
  60. uipath/eval/evaluators_types/ToolCallOutputEvaluator.json +124 -0
  61. uipath/eval/evaluators_types/generate_types.py +31 -0
  62. uipath/eval/models/__init__.py +16 -1
  63. uipath/eval/models/llm_judge_types.py +196 -0
  64. uipath/eval/models/models.py +109 -7
  65. {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/METADATA +1 -1
  66. {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/RECORD +69 -37
  67. {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/WHEEL +0 -0
  68. {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/entry_points.txt +0 -0
  69. {uipath-2.1.108.dist-info → uipath-2.1.109.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,13 @@
1
+ import functools
1
2
  import json
2
3
  import os
4
+ import time
5
+ from collections.abc import Callable
6
+ from typing import Any
3
7
 
4
8
  import click
5
9
 
6
- from uipath._cli._utils._console import ConsoleLogger
7
- from uipath._utils.constants import UIPATH_CONFIG_FILE
10
+ from ..models import ErrorEvaluationResult, EvaluationResult
8
11
 
9
12
 
10
13
  def auto_discover_entrypoint() -> str:
@@ -16,6 +19,9 @@ def auto_discover_entrypoint() -> str:
16
19
  Raises:
17
20
  ValueError: If no entrypoint found or multiple entrypoints exist
18
21
  """
22
+ from uipath._cli._utils._console import ConsoleLogger
23
+ from uipath._utils.constants import UIPATH_CONFIG_FILE
24
+
19
25
  console = ConsoleLogger()
20
26
 
21
27
  if not os.path.isfile(UIPATH_CONFIG_FILE):
@@ -45,3 +51,25 @@ def auto_discover_entrypoint() -> str:
45
51
  f"Auto-discovered agent entrypoint: {click.style(entrypoint, fg='cyan')}"
46
52
  )
47
53
  return entrypoint
54
+
55
+
56
+ def track_evaluation_metrics(func: Callable[..., Any]) -> Callable[..., Any]:
57
+ """Decorator to track evaluation metrics and handle errors gracefully."""
58
+
59
+ @functools.wraps(func)
60
+ async def wrapper(*args: Any, **kwargs: Any) -> EvaluationResult:
61
+ start_time = time.time()
62
+ try:
63
+ result = await func(*args, **kwargs)
64
+ except Exception as e:
65
+ result = ErrorEvaluationResult(
66
+ details="Exception thrown by evaluator: {}".format(e),
67
+ evaluation_time=time.time() - start_time,
68
+ )
69
+ end_time = time.time()
70
+ execution_time = end_time - start_time
71
+
72
+ result.evaluation_time = execution_time
73
+ return result
74
+
75
+ return wrapper
@@ -1,15 +1,70 @@
1
1
  """UiPath evaluator implementations for agent performance evaluation."""
2
2
 
3
- from .base_evaluator import BaseEvaluator
3
+ from typing import Any
4
+
5
+ # Current coded evaluators
6
+ from .base_evaluator import BaseEvaluationCriteria, BaseEvaluator, BaseEvaluatorConfig
7
+ from .contains_evaluator import ContainsEvaluator
4
8
  from .exact_match_evaluator import ExactMatchEvaluator
5
9
  from .json_similarity_evaluator import JsonSimilarityEvaluator
6
- from .llm_as_judge_evaluator import LlmAsAJudgeEvaluator
7
- from .trajectory_evaluator import TrajectoryEvaluator
10
+
11
+ # Legacy evaluators
12
+ from .legacy_base_evaluator import LegacyBaseEvaluator
13
+ from .legacy_exact_match_evaluator import LegacyExactMatchEvaluator
14
+ from .legacy_json_similarity_evaluator import LegacyJsonSimilarityEvaluator
15
+ from .legacy_llm_as_judge_evaluator import LegacyLlmAsAJudgeEvaluator
16
+ from .legacy_trajectory_evaluator import LegacyTrajectoryEvaluator
17
+ from .llm_judge_output_evaluator import (
18
+ BaseLLMOutputEvaluator,
19
+ LLMJudgeOutputEvaluator,
20
+ LLMJudgeStrictJSONSimilarityOutputEvaluator,
21
+ )
22
+ from .llm_judge_trajectory_evaluator import (
23
+ BaseLLMTrajectoryEvaluator,
24
+ LLMJudgeTrajectoryEvaluator,
25
+ LLMJudgeTrajectorySimulationEvaluator,
26
+ )
27
+ from .tool_call_args_evaluator import ToolCallArgsEvaluator
28
+ from .tool_call_count_evaluator import ToolCallCountEvaluator
29
+ from .tool_call_order_evaluator import ToolCallOrderEvaluator
30
+ from .tool_call_output_evaluator import ToolCallOutputEvaluator
31
+
32
+ EVALUATORS: list[type[BaseEvaluator[Any, Any, Any]]] = [
33
+ ExactMatchEvaluator,
34
+ ContainsEvaluator,
35
+ JsonSimilarityEvaluator,
36
+ LLMJudgeOutputEvaluator,
37
+ LLMJudgeStrictJSONSimilarityOutputEvaluator,
38
+ LLMJudgeTrajectoryEvaluator,
39
+ LLMJudgeTrajectorySimulationEvaluator,
40
+ ToolCallOrderEvaluator,
41
+ ToolCallArgsEvaluator,
42
+ ToolCallCountEvaluator,
43
+ ToolCallOutputEvaluator,
44
+ ]
8
45
 
9
46
  __all__ = [
47
+ # Legacy evaluators
48
+ "LegacyBaseEvaluator",
49
+ "LegacyExactMatchEvaluator",
50
+ "LegacyJsonSimilarityEvaluator",
51
+ "LegacyLlmAsAJudgeEvaluator",
52
+ "LegacyTrajectoryEvaluator",
53
+ # Current coded evaluators
10
54
  "BaseEvaluator",
55
+ "ContainsEvaluator",
11
56
  "ExactMatchEvaluator",
12
57
  "JsonSimilarityEvaluator",
13
- "LlmAsAJudgeEvaluator",
14
- "TrajectoryEvaluator",
58
+ "BaseLLMOutputEvaluator",
59
+ "LLMJudgeOutputEvaluator",
60
+ "LLMJudgeStrictJSONSimilarityOutputEvaluator",
61
+ "BaseLLMTrajectoryEvaluator",
62
+ "LLMJudgeTrajectoryEvaluator",
63
+ "LLMJudgeTrajectorySimulationEvaluator",
64
+ "ToolCallOrderEvaluator",
65
+ "ToolCallArgsEvaluator",
66
+ "ToolCallCountEvaluator",
67
+ "ToolCallOutputEvaluator",
68
+ "BaseEvaluationCriteria",
69
+ "BaseEvaluatorConfig",
15
70
  ]