uipath 2.1.108__py3-none-any.whl → 2.1.110__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of uipath might be problematic. Click here for more details.
- uipath/_cli/__init__.py +4 -0
- uipath/_cli/_evals/_console_progress_reporter.py +2 -2
- uipath/_cli/_evals/_evaluator_factory.py +314 -29
- uipath/_cli/_evals/_helpers.py +194 -0
- uipath/_cli/_evals/_models/_evaluation_set.py +73 -7
- uipath/_cli/_evals/_models/_evaluator.py +183 -9
- uipath/_cli/_evals/_models/_evaluator_base_params.py +3 -3
- uipath/_cli/_evals/_models/_output.py +87 -3
- uipath/_cli/_evals/_progress_reporter.py +288 -28
- uipath/_cli/_evals/_runtime.py +80 -26
- uipath/_cli/_evals/mocks/input_mocker.py +1 -3
- uipath/_cli/_evals/mocks/llm_mocker.py +2 -2
- uipath/_cli/_evals/mocks/mocker_factory.py +2 -2
- uipath/_cli/_evals/mocks/mockito_mocker.py +2 -2
- uipath/_cli/_evals/mocks/mocks.py +5 -3
- uipath/_cli/_push/models.py +17 -0
- uipath/_cli/_push/sw_file_handler.py +336 -3
- uipath/_cli/_runtime/_contracts.py +2 -4
- uipath/_cli/_runtime/_runtime.py +2 -5
- uipath/_cli/_templates/custom_evaluator.py.template +65 -0
- uipath/_cli/_utils/_eval_set.py +30 -9
- uipath/_cli/_utils/_resources.py +21 -0
- uipath/_cli/_utils/_studio_project.py +18 -0
- uipath/_cli/cli_add.py +114 -0
- uipath/_cli/cli_eval.py +5 -1
- uipath/_cli/cli_init.py +5 -4
- uipath/_cli/cli_pull.py +11 -26
- uipath/_cli/cli_push.py +2 -0
- uipath/_cli/cli_register.py +45 -0
- uipath/_events/_events.py +6 -5
- uipath/_utils/constants.py +4 -0
- uipath/eval/_helpers/evaluators_helpers.py +494 -0
- uipath/eval/_helpers/helpers.py +30 -2
- uipath/eval/evaluators/__init__.py +60 -5
- uipath/eval/evaluators/base_evaluator.py +546 -44
- uipath/eval/evaluators/contains_evaluator.py +80 -0
- uipath/eval/evaluators/exact_match_evaluator.py +43 -12
- uipath/eval/evaluators/json_similarity_evaluator.py +41 -12
- uipath/eval/evaluators/legacy_base_evaluator.py +89 -0
- uipath/eval/evaluators/{deterministic_evaluator_base.py → legacy_deterministic_evaluator_base.py} +2 -2
- uipath/eval/evaluators/legacy_exact_match_evaluator.py +37 -0
- uipath/eval/evaluators/legacy_json_similarity_evaluator.py +151 -0
- uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py +137 -0
- uipath/eval/evaluators/{trajectory_evaluator.py → legacy_trajectory_evaluator.py} +5 -6
- uipath/eval/evaluators/llm_as_judge_evaluator.py +143 -78
- uipath/eval/evaluators/llm_judge_output_evaluator.py +112 -0
- uipath/eval/evaluators/llm_judge_trajectory_evaluator.py +142 -0
- uipath/eval/evaluators/output_evaluator.py +117 -0
- uipath/eval/evaluators/tool_call_args_evaluator.py +82 -0
- uipath/eval/evaluators/tool_call_count_evaluator.py +87 -0
- uipath/eval/evaluators/tool_call_order_evaluator.py +84 -0
- uipath/eval/evaluators/tool_call_output_evaluator.py +87 -0
- uipath/eval/evaluators_types/ContainsEvaluator.json +73 -0
- uipath/eval/evaluators_types/ExactMatchEvaluator.json +89 -0
- uipath/eval/evaluators_types/JsonSimilarityEvaluator.json +81 -0
- uipath/eval/evaluators_types/LLMJudgeOutputEvaluator.json +110 -0
- uipath/eval/evaluators_types/LLMJudgeSimulationTrajectoryEvaluator.json +88 -0
- uipath/eval/evaluators_types/LLMJudgeStrictJSONSimilarityOutputEvaluator.json +110 -0
- uipath/eval/evaluators_types/LLMJudgeTrajectoryEvaluator.json +88 -0
- uipath/eval/evaluators_types/ToolCallArgsEvaluator.json +131 -0
- uipath/eval/evaluators_types/ToolCallCountEvaluator.json +104 -0
- uipath/eval/evaluators_types/ToolCallOrderEvaluator.json +100 -0
- uipath/eval/evaluators_types/ToolCallOutputEvaluator.json +124 -0
- uipath/eval/evaluators_types/generate_types.py +31 -0
- uipath/eval/models/__init__.py +16 -1
- uipath/eval/models/llm_judge_types.py +196 -0
- uipath/eval/models/models.py +109 -7
- {uipath-2.1.108.dist-info → uipath-2.1.110.dist-info}/METADATA +1 -1
- {uipath-2.1.108.dist-info → uipath-2.1.110.dist-info}/RECORD +72 -40
- {uipath-2.1.108.dist-info → uipath-2.1.110.dist-info}/WHEEL +0 -0
- {uipath-2.1.108.dist-info → uipath-2.1.110.dist-info}/entry_points.txt +0 -0
- {uipath-2.1.108.dist-info → uipath-2.1.110.dist-info}/licenses/LICENSE +0 -0
|
@@ -6,7 +6,14 @@ import os
|
|
|
6
6
|
from datetime import datetime, timezone
|
|
7
7
|
from typing import Any, AsyncIterator, Dict, Optional, Set
|
|
8
8
|
|
|
9
|
+
import click
|
|
10
|
+
|
|
9
11
|
from ...models.exceptions import EnrichedException
|
|
12
|
+
from .._evals._helpers import ( # type: ignore
|
|
13
|
+
register_evaluator,
|
|
14
|
+
try_extract_file_and_class_name,
|
|
15
|
+
)
|
|
16
|
+
from .._utils._console import ConsoleLogger
|
|
10
17
|
from .._utils._constants import (
|
|
11
18
|
AGENT_INITIAL_CODE_VERSION,
|
|
12
19
|
AGENT_STORAGE_VERSION,
|
|
@@ -28,6 +35,7 @@ from .._utils._studio_project import (
|
|
|
28
35
|
StructuralMigration,
|
|
29
36
|
StudioClient,
|
|
30
37
|
)
|
|
38
|
+
from .models import EvaluatorFileDetails
|
|
31
39
|
|
|
32
40
|
logger = logging.getLogger(__name__)
|
|
33
41
|
|
|
@@ -58,6 +66,7 @@ class SwFileHandler:
|
|
|
58
66
|
"""
|
|
59
67
|
self.directory = directory
|
|
60
68
|
self.include_uv_lock = include_uv_lock
|
|
69
|
+
self.console = ConsoleLogger()
|
|
61
70
|
self._studio_client = StudioClient(project_id)
|
|
62
71
|
self._project_structure: Optional[ProjectStructure] = None
|
|
63
72
|
|
|
@@ -175,6 +184,7 @@ class SwFileHandler:
|
|
|
175
184
|
remote_file = source_code_files.get(
|
|
176
185
|
local_file.relative_path.replace("\\", "/"), None
|
|
177
186
|
)
|
|
187
|
+
|
|
178
188
|
if remote_file:
|
|
179
189
|
# File exists remotely - mark for update
|
|
180
190
|
processed_source_files.add(remote_file.id)
|
|
@@ -185,7 +195,7 @@ class SwFileHandler:
|
|
|
185
195
|
)
|
|
186
196
|
updates.append(
|
|
187
197
|
FileOperationUpdate(
|
|
188
|
-
file_path=local_file.
|
|
198
|
+
file_path=local_file.file_path,
|
|
189
199
|
status="updating",
|
|
190
200
|
message=f"Updating '{local_file.file_name}'",
|
|
191
201
|
)
|
|
@@ -203,9 +213,9 @@ class SwFileHandler:
|
|
|
203
213
|
)
|
|
204
214
|
updates.append(
|
|
205
215
|
FileOperationUpdate(
|
|
206
|
-
file_path=local_file.
|
|
216
|
+
file_path=local_file.file_path,
|
|
207
217
|
status="uploading",
|
|
208
|
-
message=f"Uploading '{local_file.
|
|
218
|
+
message=f"Uploading '{local_file.file_name}'",
|
|
209
219
|
)
|
|
210
220
|
)
|
|
211
221
|
|
|
@@ -610,3 +620,326 @@ class SwFileHandler:
|
|
|
610
620
|
# Yield all updates
|
|
611
621
|
for update in updates:
|
|
612
622
|
yield update
|
|
623
|
+
|
|
624
|
+
def _extract_evaluator_details(self, file_path: str) -> tuple[bool, str]:
|
|
625
|
+
"""Return whether an evaluator JSON file has a version property and the custom-evaluator python file (if exists).
|
|
626
|
+
|
|
627
|
+
Args:
|
|
628
|
+
file_path: Path to the file to check
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
tuple[bool, str]: A tuple containing:
|
|
632
|
+
- A boolean indicating whether the JSON file contains a "version" property.
|
|
633
|
+
- The path to the custom-evaluator Python file, if it exists; otherwise, an empty string.
|
|
634
|
+
"""
|
|
635
|
+
try:
|
|
636
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
637
|
+
data = json.load(f)
|
|
638
|
+
_, file_name, _ = try_extract_file_and_class_name(
|
|
639
|
+
data.get("evaluatorSchema", "")
|
|
640
|
+
)
|
|
641
|
+
return "version" in data, file_name
|
|
642
|
+
except (json.JSONDecodeError, FileNotFoundError):
|
|
643
|
+
return False, ""
|
|
644
|
+
|
|
645
|
+
def _get_coded_evals_files(self) -> tuple[list[EvaluatorFileDetails], list[str]]:
|
|
646
|
+
"""Get coded-evals files from local evals directory.
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
Tuple of (evaluator_files, eval_set_files) with version property
|
|
650
|
+
"""
|
|
651
|
+
evaluator_files: list[EvaluatorFileDetails] = []
|
|
652
|
+
eval_set_files = []
|
|
653
|
+
|
|
654
|
+
# Check {self.directory}/evals/evaluators/ for files with version property
|
|
655
|
+
evaluators_dir = os.path.join(self.directory, "evals", "evaluators")
|
|
656
|
+
if os.path.exists(evaluators_dir):
|
|
657
|
+
for file_name in os.listdir(evaluators_dir):
|
|
658
|
+
if file_name.endswith(".json"):
|
|
659
|
+
file_path = os.path.join(evaluators_dir, file_name)
|
|
660
|
+
version, file_name = self._extract_evaluator_details(file_path)
|
|
661
|
+
if version:
|
|
662
|
+
evaluator_files.append(
|
|
663
|
+
EvaluatorFileDetails(
|
|
664
|
+
path=file_path, custom_evaluator_file_name=file_name
|
|
665
|
+
)
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
# Check {self.directory}/evals/eval-sets/ for files with version property
|
|
669
|
+
eval_sets_dir = os.path.join(self.directory, "evals", "eval-sets")
|
|
670
|
+
if os.path.exists(eval_sets_dir):
|
|
671
|
+
for file_name in os.listdir(eval_sets_dir):
|
|
672
|
+
if file_name.endswith(".json"):
|
|
673
|
+
file_path = os.path.join(eval_sets_dir, file_name)
|
|
674
|
+
version, _ = self._extract_evaluator_details(file_path)
|
|
675
|
+
if version:
|
|
676
|
+
eval_set_files.append(file_path)
|
|
677
|
+
|
|
678
|
+
return evaluator_files, eval_set_files
|
|
679
|
+
|
|
680
|
+
def _get_subfolder_by_name(
|
|
681
|
+
self, parent_folder: ProjectFolder, subfolder_name: str
|
|
682
|
+
) -> Optional[ProjectFolder]:
|
|
683
|
+
"""Get a subfolder from within a parent folder by name.
|
|
684
|
+
|
|
685
|
+
Args:
|
|
686
|
+
parent_folder: The parent folder to search within
|
|
687
|
+
subfolder_name: Name of the subfolder to find
|
|
688
|
+
|
|
689
|
+
Returns:
|
|
690
|
+
Optional[ProjectFolder]: The found subfolder or None
|
|
691
|
+
"""
|
|
692
|
+
for folder in parent_folder.folders:
|
|
693
|
+
if folder.name == subfolder_name:
|
|
694
|
+
return folder
|
|
695
|
+
return None
|
|
696
|
+
|
|
697
|
+
async def _ensure_coded_evals_structure(
|
|
698
|
+
self, structure: ProjectStructure
|
|
699
|
+
) -> ProjectFolder:
|
|
700
|
+
"""Ensure coded-evals folder structure exists in remote project.
|
|
701
|
+
|
|
702
|
+
Args:
|
|
703
|
+
structure: Current project structure
|
|
704
|
+
|
|
705
|
+
Returns:
|
|
706
|
+
ProjectFolder: The coded-evals folder
|
|
707
|
+
"""
|
|
708
|
+
coded_evals_folder = self._get_folder_by_name(structure, "coded-evals")
|
|
709
|
+
|
|
710
|
+
if not coded_evals_folder:
|
|
711
|
+
coded_evals_id = await self._studio_client.create_folder_async(
|
|
712
|
+
"coded-evals"
|
|
713
|
+
)
|
|
714
|
+
self.console.success(
|
|
715
|
+
f"Created {click.style('coded-evals', fg='cyan')} folder"
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
await self._studio_client.create_folder_async("evaluators", coded_evals_id)
|
|
719
|
+
self.console.success(
|
|
720
|
+
f"Created {click.style('coded-evals/evaluators', fg='cyan')} folder"
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
await self._studio_client.create_folder_async("eval-sets", coded_evals_id)
|
|
724
|
+
self.console.success(
|
|
725
|
+
f"Created {click.style('coded-evals/eval-sets', fg='cyan')} folder"
|
|
726
|
+
)
|
|
727
|
+
|
|
728
|
+
# Refresh structure to get the new folders
|
|
729
|
+
structure = await self._studio_client.get_project_structure_async()
|
|
730
|
+
coded_evals_folder = self._get_folder_by_name(structure, "coded-evals")
|
|
731
|
+
assert coded_evals_folder, "Coded-evals folder uploaded but not found."
|
|
732
|
+
|
|
733
|
+
return coded_evals_folder
|
|
734
|
+
|
|
735
|
+
def _collect_files_from_folder(
|
|
736
|
+
self, folder: Optional[ProjectFolder]
|
|
737
|
+
) -> Dict[str, ProjectFile]:
|
|
738
|
+
files: Dict[str, ProjectFile] = {}
|
|
739
|
+
if folder:
|
|
740
|
+
for file in folder.files:
|
|
741
|
+
files[file.name] = file
|
|
742
|
+
return files
|
|
743
|
+
|
|
744
|
+
def _process_file_sync(
|
|
745
|
+
self,
|
|
746
|
+
local_file_path: str,
|
|
747
|
+
remote_files: Dict[str, ProjectFile],
|
|
748
|
+
parent_path: str,
|
|
749
|
+
destination_prefix: str,
|
|
750
|
+
structural_migration: StructuralMigration,
|
|
751
|
+
processed_ids: Set[str],
|
|
752
|
+
) -> None:
|
|
753
|
+
"""Process a single local file for upload or update to remote.
|
|
754
|
+
|
|
755
|
+
Args:
|
|
756
|
+
local_file_path: Path to the local file to sync
|
|
757
|
+
remote_files: Dictionary of remote files indexed by filename
|
|
758
|
+
parent_path: Parent path for new file creation
|
|
759
|
+
destination_prefix: Prefix for destination path in console output
|
|
760
|
+
structural_migration: Migration object to append resources to
|
|
761
|
+
processed_ids: Set to track processed remote file IDs
|
|
762
|
+
"""
|
|
763
|
+
file_name = os.path.basename(local_file_path)
|
|
764
|
+
remote_file = remote_files.get(file_name)
|
|
765
|
+
destination = f"{destination_prefix}/{file_name}"
|
|
766
|
+
|
|
767
|
+
if remote_file:
|
|
768
|
+
processed_ids.add(remote_file.id)
|
|
769
|
+
structural_migration.modified_resources.append(
|
|
770
|
+
ModifiedResource(id=remote_file.id, content_file_path=local_file_path)
|
|
771
|
+
)
|
|
772
|
+
self.console.info(f"Updating {click.style(destination, fg='yellow')}")
|
|
773
|
+
else:
|
|
774
|
+
structural_migration.added_resources.append(
|
|
775
|
+
AddedResource(
|
|
776
|
+
content_file_path=local_file_path, parent_path=parent_path
|
|
777
|
+
)
|
|
778
|
+
)
|
|
779
|
+
self.console.info(f"Uploading to {click.style(destination, fg='cyan')}")
|
|
780
|
+
|
|
781
|
+
def _collect_deleted_remote_files(
|
|
782
|
+
self,
|
|
783
|
+
remote_files: Dict[str, ProjectFile],
|
|
784
|
+
processed_ids: Set[str],
|
|
785
|
+
destination_prefix: str,
|
|
786
|
+
structural_migration: StructuralMigration,
|
|
787
|
+
) -> None:
|
|
788
|
+
"""Collect remote files that no longer exist locally for deletion.
|
|
789
|
+
|
|
790
|
+
Args:
|
|
791
|
+
remote_files: Dictionary of remote files indexed by filename
|
|
792
|
+
processed_ids: Set of remote file IDs that were processed
|
|
793
|
+
destination_prefix: Prefix for destination path in console output
|
|
794
|
+
structural_migration: Migration object to append deleted resources to
|
|
795
|
+
"""
|
|
796
|
+
for file_name, remote_file in remote_files.items():
|
|
797
|
+
if remote_file.id not in processed_ids:
|
|
798
|
+
structural_migration.deleted_resources.append(remote_file.id)
|
|
799
|
+
destination = f"{destination_prefix}/{file_name}"
|
|
800
|
+
self.console.info(
|
|
801
|
+
f"Deleting {click.style(destination, fg='bright_red')}"
|
|
802
|
+
)
|
|
803
|
+
|
|
804
|
+
async def upload_coded_evals_files(self) -> None:
|
|
805
|
+
"""Upload coded-evals files (files with version property) to Studio Web.
|
|
806
|
+
|
|
807
|
+
This method:
|
|
808
|
+
1. Scans local evals/evaluators and evals/eval-sets for files with version property
|
|
809
|
+
2. Ensures coded-evals folder structure exists in remote project
|
|
810
|
+
3. Uploads the files to coded-evals/evaluators and coded-evals/eval-sets respectively
|
|
811
|
+
4. Deletes remote files that no longer exist locally (consistent with source file behavior)
|
|
812
|
+
"""
|
|
813
|
+
evaluator_details, eval_set_files = self._get_coded_evals_files()
|
|
814
|
+
|
|
815
|
+
structure = await self._studio_client.get_project_structure_async()
|
|
816
|
+
coded_evals_folder = self._get_folder_by_name(structure, "coded-evals")
|
|
817
|
+
|
|
818
|
+
# If no coded-evals folder exists and no local files, nothing to do
|
|
819
|
+
if not coded_evals_folder and not evaluator_details and not eval_set_files:
|
|
820
|
+
return
|
|
821
|
+
|
|
822
|
+
# Ensure folder structure exists if we have local files
|
|
823
|
+
if evaluator_details or eval_set_files:
|
|
824
|
+
await self._ensure_coded_evals_structure(structure)
|
|
825
|
+
# Refresh structure to get the new folders
|
|
826
|
+
structure = await self._studio_client.get_project_structure_async()
|
|
827
|
+
coded_evals_folder = self._get_folder_by_name(structure, "coded-evals")
|
|
828
|
+
|
|
829
|
+
if not coded_evals_folder:
|
|
830
|
+
return # Nothing to sync
|
|
831
|
+
|
|
832
|
+
evaluators_folder = self._get_subfolder_by_name(
|
|
833
|
+
coded_evals_folder, "evaluators"
|
|
834
|
+
)
|
|
835
|
+
if evaluators_folder:
|
|
836
|
+
eval_sets_folder = self._get_subfolder_by_name(
|
|
837
|
+
coded_evals_folder, "eval-sets"
|
|
838
|
+
)
|
|
839
|
+
custom_evaluators_folder = self._get_subfolder_by_name(
|
|
840
|
+
evaluators_folder, "custom"
|
|
841
|
+
)
|
|
842
|
+
evaluator_types_folder = None
|
|
843
|
+
if custom_evaluators_folder:
|
|
844
|
+
evaluator_types_folder = self._get_subfolder_by_name(
|
|
845
|
+
custom_evaluators_folder, "types"
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
remote_evaluator_files = self._collect_files_from_folder(evaluators_folder)
|
|
849
|
+
remote_eval_set_files = self._collect_files_from_folder(eval_sets_folder)
|
|
850
|
+
remote_custom_evaluator_files = self._collect_files_from_folder(
|
|
851
|
+
custom_evaluators_folder
|
|
852
|
+
)
|
|
853
|
+
remote_custom_evaluator_type_files = self._collect_files_from_folder(
|
|
854
|
+
evaluator_types_folder
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
# Create structural migration for coded-evals files
|
|
858
|
+
structural_migration = StructuralMigration(
|
|
859
|
+
deleted_resources=[], added_resources=[], modified_resources=[]
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
processed_evaluator_ids: Set[str] = set()
|
|
863
|
+
processed_eval_set_ids: Set[str] = set()
|
|
864
|
+
processed_custom_evaluator_ids: Set[str] = set()
|
|
865
|
+
processed_evaluator_type_ids: Set[str] = set()
|
|
866
|
+
|
|
867
|
+
for evaluator in evaluator_details:
|
|
868
|
+
if evaluator.is_custom:
|
|
869
|
+
evaluator_schema_file_path, evaluator_types_file_path = (
|
|
870
|
+
register_evaluator(evaluator.custom_evaluator_file_name)
|
|
871
|
+
)
|
|
872
|
+
|
|
873
|
+
self._process_file_sync(
|
|
874
|
+
evaluator_schema_file_path,
|
|
875
|
+
remote_custom_evaluator_files,
|
|
876
|
+
"coded-evals/evaluators/custom",
|
|
877
|
+
"coded-evals/evaluators/custom",
|
|
878
|
+
structural_migration,
|
|
879
|
+
processed_custom_evaluator_ids,
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
self._process_file_sync(
|
|
883
|
+
evaluator_types_file_path,
|
|
884
|
+
remote_custom_evaluator_type_files,
|
|
885
|
+
"coded-evals/evaluators/custom/types",
|
|
886
|
+
"coded-evals/evaluators/custom/types",
|
|
887
|
+
structural_migration,
|
|
888
|
+
processed_evaluator_type_ids,
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
self._process_file_sync(
|
|
892
|
+
evaluator.path,
|
|
893
|
+
remote_evaluator_files,
|
|
894
|
+
"coded-evals/evaluators",
|
|
895
|
+
"coded-evals/evaluators",
|
|
896
|
+
structural_migration,
|
|
897
|
+
processed_evaluator_ids,
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
for eval_set_file in eval_set_files:
|
|
901
|
+
self._process_file_sync(
|
|
902
|
+
eval_set_file,
|
|
903
|
+
remote_eval_set_files,
|
|
904
|
+
"coded-evals/eval-sets",
|
|
905
|
+
"coded-evals/eval-sets",
|
|
906
|
+
structural_migration,
|
|
907
|
+
processed_eval_set_ids,
|
|
908
|
+
)
|
|
909
|
+
|
|
910
|
+
self._collect_deleted_remote_files(
|
|
911
|
+
remote_evaluator_files,
|
|
912
|
+
processed_evaluator_ids,
|
|
913
|
+
"coded-evals/evaluators",
|
|
914
|
+
structural_migration,
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
self._collect_deleted_remote_files(
|
|
918
|
+
remote_eval_set_files,
|
|
919
|
+
processed_eval_set_ids,
|
|
920
|
+
"coded-evals/eval-sets",
|
|
921
|
+
structural_migration,
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
self._collect_deleted_remote_files(
|
|
925
|
+
remote_custom_evaluator_files,
|
|
926
|
+
processed_custom_evaluator_ids,
|
|
927
|
+
"coded-evals/evaluators/custom",
|
|
928
|
+
structural_migration,
|
|
929
|
+
)
|
|
930
|
+
|
|
931
|
+
self._collect_deleted_remote_files(
|
|
932
|
+
remote_custom_evaluator_type_files,
|
|
933
|
+
processed_evaluator_type_ids,
|
|
934
|
+
"coded-evals/evaluators/custom/types",
|
|
935
|
+
structural_migration,
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
if (
|
|
939
|
+
structural_migration.added_resources
|
|
940
|
+
or structural_migration.modified_resources
|
|
941
|
+
or structural_migration.deleted_resources
|
|
942
|
+
):
|
|
943
|
+
await self._studio_client.perform_structural_migration_async(
|
|
944
|
+
structural_migration
|
|
945
|
+
)
|
|
@@ -577,16 +577,14 @@ class UiPathBaseRuntime(ABC):
|
|
|
577
577
|
runtime = cls(context)
|
|
578
578
|
return runtime
|
|
579
579
|
|
|
580
|
-
|
|
581
|
-
def get_binding_resources(self) -> List[BindingResource]:
|
|
580
|
+
async def get_binding_resources(self) -> List[BindingResource]:
|
|
582
581
|
"""Get binding resources for this runtime.
|
|
583
582
|
|
|
584
583
|
Returns: A list of binding resources.
|
|
585
584
|
"""
|
|
586
585
|
raise NotImplementedError()
|
|
587
586
|
|
|
588
|
-
|
|
589
|
-
def get_entrypoint(self) -> Entrypoint:
|
|
587
|
+
async def get_entrypoint(self) -> Entrypoint:
|
|
590
588
|
"""Get entrypoint for this runtime.
|
|
591
589
|
|
|
592
590
|
Returns: A entrypoint for this runtime.
|
uipath/_cli/_runtime/_runtime.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
import uuid
|
|
6
|
-
from functools import cached_property
|
|
7
6
|
from pathlib import Path
|
|
8
7
|
from typing import Any, Awaitable, Callable, List, Optional, TypeVar
|
|
9
8
|
|
|
@@ -116,9 +115,8 @@ class UiPathScriptRuntime(UiPathRuntime):
|
|
|
116
115
|
"""Create runtime instance from context."""
|
|
117
116
|
return UiPathScriptRuntime(context, context.entrypoint or "")
|
|
118
117
|
|
|
119
|
-
@cached_property
|
|
120
118
|
@override
|
|
121
|
-
def get_binding_resources(self) -> List[BindingResource]:
|
|
119
|
+
async def get_binding_resources(self) -> List[BindingResource]:
|
|
122
120
|
"""Get binding resources for script runtime.
|
|
123
121
|
|
|
124
122
|
Returns: A list of binding resources.
|
|
@@ -128,9 +126,8 @@ class UiPathScriptRuntime(UiPathRuntime):
|
|
|
128
126
|
bindings = generate_bindings(script_path)
|
|
129
127
|
return bindings.resources
|
|
130
128
|
|
|
131
|
-
@cached_property
|
|
132
129
|
@override
|
|
133
|
-
def get_entrypoint(self) -> Entrypoint:
|
|
130
|
+
async def get_entrypoint(self) -> Entrypoint:
|
|
134
131
|
working_dir = self.context.runtime_dir or os.getcwd()
|
|
135
132
|
script_path = get_user_script(working_dir, entrypoint=self.context.entrypoint)
|
|
136
133
|
if not script_path:
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from uipath.eval.evaluators import BaseEvaluator, BaseEvaluationCriteria, BaseEvaluatorConfig
|
|
2
|
+
from uipath.eval.models import AgentExecution, EvaluationResult, NumericEvaluationResult, BooleanEvaluationResult, ErrorEvaluationResult
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class $criteria_class(BaseEvaluationCriteria):
|
|
6
|
+
"""Evaluation criteria for the $evaluator_name evaluator."""
|
|
7
|
+
|
|
8
|
+
# Define your evaluation criteria fields here
|
|
9
|
+
# Example: expected_value: str
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class $config_class(BaseEvaluatorConfig[$criteria_class]):
|
|
14
|
+
"""Configuration for the $evaluator_name evaluator."""
|
|
15
|
+
|
|
16
|
+
name: str = "$class_name"
|
|
17
|
+
# Set default evaluation criteria if needed
|
|
18
|
+
# default_evaluation_criteria: $criteria_class | None = $criteria_class(expected_value="example")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class $class_name(BaseEvaluator[$criteria_class, $config_class, type(None)]):
|
|
22
|
+
"""Description for $class_name"""
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def get_evaluator_id(cls) -> str:
|
|
26
|
+
"""Get the evaluator ID."""
|
|
27
|
+
return "$class_name"
|
|
28
|
+
|
|
29
|
+
async def evaluate(
|
|
30
|
+
self,
|
|
31
|
+
agent_execution: AgentExecution,
|
|
32
|
+
evaluation_criteria: $criteria_class
|
|
33
|
+
) -> EvaluationResult:
|
|
34
|
+
"""Evaluate the agent execution against the criteria.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
agent_execution: The execution details containing:
|
|
38
|
+
- agent_input: The input received by the agent
|
|
39
|
+
- agent_output: The actual output from the agent
|
|
40
|
+
- agent_trace: The execution trace from the agent (list of OpenTelemetry spans)
|
|
41
|
+
- simulation_instructions: The simulation instructions for the agent
|
|
42
|
+
evaluation_criteria: The criteria to evaluate against
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
EvaluationResult containing the score and details
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
'''
|
|
49
|
+
# TODO: Implement your evaluation logic here
|
|
50
|
+
Example: Check if the agent output matches expected criteria
|
|
51
|
+
|
|
52
|
+
Access agent execution data:
|
|
53
|
+
agent_input = agent_execution.agent_input
|
|
54
|
+
agent_output = agent_execution.agent_output
|
|
55
|
+
agent_trace = agent_execution.agent_trace
|
|
56
|
+
|
|
57
|
+
# Perform your evaluation
|
|
58
|
+
score = 0.0 # Replace with your scoring logic
|
|
59
|
+
|
|
60
|
+
return NumericEvaluationResult(
|
|
61
|
+
score=score,
|
|
62
|
+
)
|
|
63
|
+
'''
|
|
64
|
+
|
|
65
|
+
raise NotImplementedError(f"evaluate method not implemented")
|
uipath/_cli/_utils/_eval_set.py
CHANGED
|
@@ -3,8 +3,9 @@ from pathlib import Path
|
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
5
|
import click
|
|
6
|
+
from pydantic import TypeAdapter, ValidationError
|
|
6
7
|
|
|
7
|
-
from uipath._cli._evals._models._evaluation_set import
|
|
8
|
+
from uipath._cli._evals._models._evaluation_set import AnyEvaluationSet
|
|
8
9
|
from uipath._cli._utils._console import ConsoleLogger
|
|
9
10
|
|
|
10
11
|
console = ConsoleLogger()
|
|
@@ -57,28 +58,48 @@ class EvalHelpers:
|
|
|
57
58
|
@staticmethod
|
|
58
59
|
def load_eval_set(
|
|
59
60
|
eval_set_path: str, eval_ids: Optional[List[str]] = None
|
|
60
|
-
) ->
|
|
61
|
+
) -> tuple[AnyEvaluationSet, str]:
|
|
61
62
|
"""Load the evaluation set from file.
|
|
62
63
|
|
|
64
|
+
Args:
|
|
65
|
+
eval_set_path: Path to the evaluation set file
|
|
66
|
+
eval_ids: Optional list of evaluation IDs to filter
|
|
67
|
+
|
|
63
68
|
Returns:
|
|
64
|
-
|
|
69
|
+
Tuple of (AnyEvaluationSet, resolved_path)
|
|
65
70
|
"""
|
|
71
|
+
# If the file doesn't exist at the given path, try looking in evals/eval-sets/
|
|
72
|
+
resolved_path = eval_set_path
|
|
73
|
+
if not Path(eval_set_path).exists():
|
|
74
|
+
# Check if it's just a filename, then search in evals/eval-sets/
|
|
75
|
+
if Path(eval_set_path).name == eval_set_path:
|
|
76
|
+
eval_sets_path = Path("evals/eval-sets") / eval_set_path
|
|
77
|
+
if eval_sets_path.exists():
|
|
78
|
+
resolved_path = str(eval_sets_path)
|
|
79
|
+
|
|
66
80
|
try:
|
|
67
|
-
with open(
|
|
81
|
+
with open(resolved_path, "r", encoding="utf-8") as f:
|
|
68
82
|
data = json.load(f)
|
|
83
|
+
except FileNotFoundError as e:
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"Evaluation set file not found: '{eval_set_path}'. "
|
|
86
|
+
f"Searched in current directory and evals/eval-sets/ directory."
|
|
87
|
+
) from e
|
|
69
88
|
except json.JSONDecodeError as e:
|
|
70
89
|
raise ValueError(
|
|
71
|
-
f"Invalid JSON in evaluation set file '{
|
|
90
|
+
f"Invalid JSON in evaluation set file '{resolved_path}': {str(e)}. "
|
|
72
91
|
f"Please check the file for syntax errors."
|
|
73
92
|
) from e
|
|
74
93
|
|
|
75
94
|
try:
|
|
76
|
-
eval_set =
|
|
77
|
-
|
|
95
|
+
eval_set: AnyEvaluationSet = TypeAdapter(AnyEvaluationSet).validate_python(
|
|
96
|
+
data
|
|
97
|
+
)
|
|
98
|
+
except ValidationError as e:
|
|
78
99
|
raise ValueError(
|
|
79
|
-
f"Invalid evaluation set format in '{
|
|
100
|
+
f"Invalid evaluation set format in '{resolved_path}': {str(e)}. "
|
|
80
101
|
f"Please verify the evaluation set structure."
|
|
81
102
|
) from e
|
|
82
103
|
if eval_ids:
|
|
83
104
|
eval_set.extract_selected_evals(eval_ids)
|
|
84
|
-
return eval_set
|
|
105
|
+
return eval_set, resolved_path
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
|
|
3
|
+
from ._console import ConsoleLogger
|
|
4
|
+
|
|
5
|
+
console = ConsoleLogger().get_instance()
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Resources(str, enum.Enum):
|
|
9
|
+
"""Available resources that can be created."""
|
|
10
|
+
|
|
11
|
+
EVALUATOR = "evaluator"
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def from_string(cls, resource: str) -> "Resources": # type: ignore
|
|
15
|
+
try:
|
|
16
|
+
return Resources(resource)
|
|
17
|
+
except ValueError:
|
|
18
|
+
valid_resources = ", ".join([r.value for r in Resources])
|
|
19
|
+
console.error(
|
|
20
|
+
f"Invalid resource type: '{resource}'. Valid types are: {valid_resources}"
|
|
21
|
+
)
|
|
@@ -149,6 +149,24 @@ def get_folder_by_name(
|
|
|
149
149
|
return None
|
|
150
150
|
|
|
151
151
|
|
|
152
|
+
def get_subfolder_by_name(
|
|
153
|
+
parent_folder: ProjectFolder, subfolder_name: str
|
|
154
|
+
) -> Optional[ProjectFolder]:
|
|
155
|
+
"""Get a subfolder from within a parent folder by name.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
parent_folder: The parent folder to search within
|
|
159
|
+
subfolder_name: Name of the subfolder to find
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Optional[ProjectFolder]: The found subfolder or None
|
|
163
|
+
"""
|
|
164
|
+
for folder in parent_folder.folders:
|
|
165
|
+
if folder.name == subfolder_name:
|
|
166
|
+
return folder
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
|
|
152
170
|
def resolve_path(
|
|
153
171
|
folder: ProjectFolder,
|
|
154
172
|
path: PurePath,
|