uipath 2.1.107__py3-none-any.whl → 2.1.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of uipath might be problematic. Click here for more details.

Files changed (72) hide show
  1. uipath/_cli/__init__.py +4 -0
  2. uipath/_cli/_evals/_console_progress_reporter.py +2 -2
  3. uipath/_cli/_evals/_evaluator_factory.py +314 -29
  4. uipath/_cli/_evals/_helpers.py +194 -0
  5. uipath/_cli/_evals/_models/_evaluation_set.py +73 -7
  6. uipath/_cli/_evals/_models/_evaluator.py +183 -9
  7. uipath/_cli/_evals/_models/_evaluator_base_params.py +3 -3
  8. uipath/_cli/_evals/_models/_output.py +87 -3
  9. uipath/_cli/_evals/_progress_reporter.py +288 -28
  10. uipath/_cli/_evals/_runtime.py +80 -26
  11. uipath/_cli/_evals/mocks/input_mocker.py +1 -3
  12. uipath/_cli/_evals/mocks/llm_mocker.py +2 -2
  13. uipath/_cli/_evals/mocks/mocker_factory.py +2 -2
  14. uipath/_cli/_evals/mocks/mockito_mocker.py +2 -2
  15. uipath/_cli/_evals/mocks/mocks.py +5 -3
  16. uipath/_cli/_push/models.py +17 -0
  17. uipath/_cli/_push/sw_file_handler.py +336 -3
  18. uipath/_cli/_runtime/_contracts.py +25 -5
  19. uipath/_cli/_templates/custom_evaluator.py.template +65 -0
  20. uipath/_cli/_utils/_eval_set.py +30 -9
  21. uipath/_cli/_utils/_resources.py +21 -0
  22. uipath/_cli/_utils/_studio_project.py +18 -0
  23. uipath/_cli/cli_add.py +114 -0
  24. uipath/_cli/cli_eval.py +5 -1
  25. uipath/_cli/cli_pull.py +11 -26
  26. uipath/_cli/cli_push.py +2 -0
  27. uipath/_cli/cli_register.py +45 -0
  28. uipath/_events/_events.py +6 -5
  29. uipath/_resources/SDK_REFERENCE.md +0 -97
  30. uipath/_uipath.py +10 -37
  31. uipath/_utils/constants.py +4 -0
  32. uipath/eval/_helpers/evaluators_helpers.py +494 -0
  33. uipath/eval/_helpers/helpers.py +30 -2
  34. uipath/eval/evaluators/__init__.py +60 -5
  35. uipath/eval/evaluators/base_evaluator.py +546 -44
  36. uipath/eval/evaluators/contains_evaluator.py +80 -0
  37. uipath/eval/evaluators/exact_match_evaluator.py +43 -12
  38. uipath/eval/evaluators/json_similarity_evaluator.py +41 -12
  39. uipath/eval/evaluators/legacy_base_evaluator.py +89 -0
  40. uipath/eval/evaluators/{deterministic_evaluator_base.py → legacy_deterministic_evaluator_base.py} +2 -2
  41. uipath/eval/evaluators/legacy_exact_match_evaluator.py +37 -0
  42. uipath/eval/evaluators/legacy_json_similarity_evaluator.py +151 -0
  43. uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py +137 -0
  44. uipath/eval/evaluators/{trajectory_evaluator.py → legacy_trajectory_evaluator.py} +5 -6
  45. uipath/eval/evaluators/llm_as_judge_evaluator.py +143 -78
  46. uipath/eval/evaluators/llm_judge_output_evaluator.py +112 -0
  47. uipath/eval/evaluators/llm_judge_trajectory_evaluator.py +142 -0
  48. uipath/eval/evaluators/output_evaluator.py +117 -0
  49. uipath/eval/evaluators/tool_call_args_evaluator.py +82 -0
  50. uipath/eval/evaluators/tool_call_count_evaluator.py +87 -0
  51. uipath/eval/evaluators/tool_call_order_evaluator.py +84 -0
  52. uipath/eval/evaluators/tool_call_output_evaluator.py +87 -0
  53. uipath/eval/evaluators_types/ContainsEvaluator.json +73 -0
  54. uipath/eval/evaluators_types/ExactMatchEvaluator.json +89 -0
  55. uipath/eval/evaluators_types/JsonSimilarityEvaluator.json +81 -0
  56. uipath/eval/evaluators_types/LLMJudgeOutputEvaluator.json +110 -0
  57. uipath/eval/evaluators_types/LLMJudgeSimulationTrajectoryEvaluator.json +88 -0
  58. uipath/eval/evaluators_types/LLMJudgeStrictJSONSimilarityOutputEvaluator.json +110 -0
  59. uipath/eval/evaluators_types/LLMJudgeTrajectoryEvaluator.json +88 -0
  60. uipath/eval/evaluators_types/ToolCallArgsEvaluator.json +131 -0
  61. uipath/eval/evaluators_types/ToolCallCountEvaluator.json +104 -0
  62. uipath/eval/evaluators_types/ToolCallOrderEvaluator.json +100 -0
  63. uipath/eval/evaluators_types/ToolCallOutputEvaluator.json +124 -0
  64. uipath/eval/evaluators_types/generate_types.py +31 -0
  65. uipath/eval/models/__init__.py +16 -1
  66. uipath/eval/models/llm_judge_types.py +196 -0
  67. uipath/eval/models/models.py +109 -7
  68. {uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/METADATA +1 -1
  69. {uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/RECORD +72 -40
  70. {uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/WHEEL +0 -0
  71. {uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/entry_points.txt +0 -0
  72. {uipath-2.1.107.dist-info → uipath-2.1.109.dist-info}/licenses/LICENSE +0 -0
@@ -6,7 +6,14 @@ import os
6
6
  from datetime import datetime, timezone
7
7
  from typing import Any, AsyncIterator, Dict, Optional, Set
8
8
 
9
+ import click
10
+
9
11
  from ...models.exceptions import EnrichedException
12
+ from .._evals._helpers import ( # type: ignore
13
+ register_evaluator,
14
+ try_extract_file_and_class_name,
15
+ )
16
+ from .._utils._console import ConsoleLogger
10
17
  from .._utils._constants import (
11
18
  AGENT_INITIAL_CODE_VERSION,
12
19
  AGENT_STORAGE_VERSION,
@@ -28,6 +35,7 @@ from .._utils._studio_project import (
28
35
  StructuralMigration,
29
36
  StudioClient,
30
37
  )
38
+ from .models import EvaluatorFileDetails
31
39
 
32
40
  logger = logging.getLogger(__name__)
33
41
 
@@ -58,6 +66,7 @@ class SwFileHandler:
58
66
  """
59
67
  self.directory = directory
60
68
  self.include_uv_lock = include_uv_lock
69
+ self.console = ConsoleLogger()
61
70
  self._studio_client = StudioClient(project_id)
62
71
  self._project_structure: Optional[ProjectStructure] = None
63
72
 
@@ -175,6 +184,7 @@ class SwFileHandler:
175
184
  remote_file = source_code_files.get(
176
185
  local_file.relative_path.replace("\\", "/"), None
177
186
  )
187
+
178
188
  if remote_file:
179
189
  # File exists remotely - mark for update
180
190
  processed_source_files.add(remote_file.id)
@@ -185,7 +195,7 @@ class SwFileHandler:
185
195
  )
186
196
  updates.append(
187
197
  FileOperationUpdate(
188
- file_path=local_file.file_name,
198
+ file_path=local_file.file_path,
189
199
  status="updating",
190
200
  message=f"Updating '{local_file.file_name}'",
191
201
  )
@@ -203,9 +213,9 @@ class SwFileHandler:
203
213
  )
204
214
  updates.append(
205
215
  FileOperationUpdate(
206
- file_path=local_file.relative_path,
216
+ file_path=local_file.file_path,
207
217
  status="uploading",
208
- message=f"Uploading '{local_file.relative_path}'",
218
+ message=f"Uploading '{local_file.file_name}'",
209
219
  )
210
220
  )
211
221
 
@@ -610,3 +620,326 @@ class SwFileHandler:
610
620
  # Yield all updates
611
621
  for update in updates:
612
622
  yield update
623
+
624
+ def _extract_evaluator_details(self, file_path: str) -> tuple[bool, str]:
625
+ """Return whether an evaluator JSON file has a version property and the custom-evaluator python file (if exists).
626
+
627
+ Args:
628
+ file_path: Path to the file to check
629
+
630
+ Returns:
631
+ tuple[bool, str]: A tuple containing:
632
+ - A boolean indicating whether the JSON file contains a "version" property.
633
+ - The path to the custom-evaluator Python file, if it exists; otherwise, an empty string.
634
+ """
635
+ try:
636
+ with open(file_path, "r", encoding="utf-8") as f:
637
+ data = json.load(f)
638
+ _, file_name, _ = try_extract_file_and_class_name(
639
+ data.get("evaluatorSchema", "")
640
+ )
641
+ return "version" in data, file_name
642
+ except (json.JSONDecodeError, FileNotFoundError):
643
+ return False, ""
644
+
645
+ def _get_coded_evals_files(self) -> tuple[list[EvaluatorFileDetails], list[str]]:
646
+ """Get coded-evals files from local evals directory.
647
+
648
+ Returns:
649
+ Tuple of (evaluator_files, eval_set_files) with version property
650
+ """
651
+ evaluator_files: list[EvaluatorFileDetails] = []
652
+ eval_set_files = []
653
+
654
+ # Check {self.directory}/evals/evaluators/ for files with version property
655
+ evaluators_dir = os.path.join(self.directory, "evals", "evaluators")
656
+ if os.path.exists(evaluators_dir):
657
+ for file_name in os.listdir(evaluators_dir):
658
+ if file_name.endswith(".json"):
659
+ file_path = os.path.join(evaluators_dir, file_name)
660
+ version, file_name = self._extract_evaluator_details(file_path)
661
+ if version:
662
+ evaluator_files.append(
663
+ EvaluatorFileDetails(
664
+ path=file_path, custom_evaluator_file_name=file_name
665
+ )
666
+ )
667
+
668
+ # Check {self.directory}/evals/eval-sets/ for files with version property
669
+ eval_sets_dir = os.path.join(self.directory, "evals", "eval-sets")
670
+ if os.path.exists(eval_sets_dir):
671
+ for file_name in os.listdir(eval_sets_dir):
672
+ if file_name.endswith(".json"):
673
+ file_path = os.path.join(eval_sets_dir, file_name)
674
+ version, _ = self._extract_evaluator_details(file_path)
675
+ if version:
676
+ eval_set_files.append(file_path)
677
+
678
+ return evaluator_files, eval_set_files
679
+
680
+ def _get_subfolder_by_name(
681
+ self, parent_folder: ProjectFolder, subfolder_name: str
682
+ ) -> Optional[ProjectFolder]:
683
+ """Get a subfolder from within a parent folder by name.
684
+
685
+ Args:
686
+ parent_folder: The parent folder to search within
687
+ subfolder_name: Name of the subfolder to find
688
+
689
+ Returns:
690
+ Optional[ProjectFolder]: The found subfolder or None
691
+ """
692
+ for folder in parent_folder.folders:
693
+ if folder.name == subfolder_name:
694
+ return folder
695
+ return None
696
+
697
+ async def _ensure_coded_evals_structure(
698
+ self, structure: ProjectStructure
699
+ ) -> ProjectFolder:
700
+ """Ensure coded-evals folder structure exists in remote project.
701
+
702
+ Args:
703
+ structure: Current project structure
704
+
705
+ Returns:
706
+ ProjectFolder: The coded-evals folder
707
+ """
708
+ coded_evals_folder = self._get_folder_by_name(structure, "coded-evals")
709
+
710
+ if not coded_evals_folder:
711
+ coded_evals_id = await self._studio_client.create_folder_async(
712
+ "coded-evals"
713
+ )
714
+ self.console.success(
715
+ f"Created {click.style('coded-evals', fg='cyan')} folder"
716
+ )
717
+
718
+ await self._studio_client.create_folder_async("evaluators", coded_evals_id)
719
+ self.console.success(
720
+ f"Created {click.style('coded-evals/evaluators', fg='cyan')} folder"
721
+ )
722
+
723
+ await self._studio_client.create_folder_async("eval-sets", coded_evals_id)
724
+ self.console.success(
725
+ f"Created {click.style('coded-evals/eval-sets', fg='cyan')} folder"
726
+ )
727
+
728
+ # Refresh structure to get the new folders
729
+ structure = await self._studio_client.get_project_structure_async()
730
+ coded_evals_folder = self._get_folder_by_name(structure, "coded-evals")
731
+ assert coded_evals_folder, "Coded-evals folder uploaded but not found."
732
+
733
+ return coded_evals_folder
734
+
735
+ def _collect_files_from_folder(
736
+ self, folder: Optional[ProjectFolder]
737
+ ) -> Dict[str, ProjectFile]:
738
+ files: Dict[str, ProjectFile] = {}
739
+ if folder:
740
+ for file in folder.files:
741
+ files[file.name] = file
742
+ return files
743
+
744
+ def _process_file_sync(
745
+ self,
746
+ local_file_path: str,
747
+ remote_files: Dict[str, ProjectFile],
748
+ parent_path: str,
749
+ destination_prefix: str,
750
+ structural_migration: StructuralMigration,
751
+ processed_ids: Set[str],
752
+ ) -> None:
753
+ """Process a single local file for upload or update to remote.
754
+
755
+ Args:
756
+ local_file_path: Path to the local file to sync
757
+ remote_files: Dictionary of remote files indexed by filename
758
+ parent_path: Parent path for new file creation
759
+ destination_prefix: Prefix for destination path in console output
760
+ structural_migration: Migration object to append resources to
761
+ processed_ids: Set to track processed remote file IDs
762
+ """
763
+ file_name = os.path.basename(local_file_path)
764
+ remote_file = remote_files.get(file_name)
765
+ destination = f"{destination_prefix}/{file_name}"
766
+
767
+ if remote_file:
768
+ processed_ids.add(remote_file.id)
769
+ structural_migration.modified_resources.append(
770
+ ModifiedResource(id=remote_file.id, content_file_path=local_file_path)
771
+ )
772
+ self.console.info(f"Updating {click.style(destination, fg='yellow')}")
773
+ else:
774
+ structural_migration.added_resources.append(
775
+ AddedResource(
776
+ content_file_path=local_file_path, parent_path=parent_path
777
+ )
778
+ )
779
+ self.console.info(f"Uploading to {click.style(destination, fg='cyan')}")
780
+
781
+ def _collect_deleted_remote_files(
782
+ self,
783
+ remote_files: Dict[str, ProjectFile],
784
+ processed_ids: Set[str],
785
+ destination_prefix: str,
786
+ structural_migration: StructuralMigration,
787
+ ) -> None:
788
+ """Collect remote files that no longer exist locally for deletion.
789
+
790
+ Args:
791
+ remote_files: Dictionary of remote files indexed by filename
792
+ processed_ids: Set of remote file IDs that were processed
793
+ destination_prefix: Prefix for destination path in console output
794
+ structural_migration: Migration object to append deleted resources to
795
+ """
796
+ for file_name, remote_file in remote_files.items():
797
+ if remote_file.id not in processed_ids:
798
+ structural_migration.deleted_resources.append(remote_file.id)
799
+ destination = f"{destination_prefix}/{file_name}"
800
+ self.console.info(
801
+ f"Deleting {click.style(destination, fg='bright_red')}"
802
+ )
803
+
804
+ async def upload_coded_evals_files(self) -> None:
805
+ """Upload coded-evals files (files with version property) to Studio Web.
806
+
807
+ This method:
808
+ 1. Scans local evals/evaluators and evals/eval-sets for files with version property
809
+ 2. Ensures coded-evals folder structure exists in remote project
810
+ 3. Uploads the files to coded-evals/evaluators and coded-evals/eval-sets respectively
811
+ 4. Deletes remote files that no longer exist locally (consistent with source file behavior)
812
+ """
813
+ evaluator_details, eval_set_files = self._get_coded_evals_files()
814
+
815
+ structure = await self._studio_client.get_project_structure_async()
816
+ coded_evals_folder = self._get_folder_by_name(structure, "coded-evals")
817
+
818
+ # If no coded-evals folder exists and no local files, nothing to do
819
+ if not coded_evals_folder and not evaluator_details and not eval_set_files:
820
+ return
821
+
822
+ # Ensure folder structure exists if we have local files
823
+ if evaluator_details or eval_set_files:
824
+ await self._ensure_coded_evals_structure(structure)
825
+ # Refresh structure to get the new folders
826
+ structure = await self._studio_client.get_project_structure_async()
827
+ coded_evals_folder = self._get_folder_by_name(structure, "coded-evals")
828
+
829
+ if not coded_evals_folder:
830
+ return # Nothing to sync
831
+
832
+ evaluators_folder = self._get_subfolder_by_name(
833
+ coded_evals_folder, "evaluators"
834
+ )
835
+ if evaluators_folder:
836
+ eval_sets_folder = self._get_subfolder_by_name(
837
+ coded_evals_folder, "eval-sets"
838
+ )
839
+ custom_evaluators_folder = self._get_subfolder_by_name(
840
+ evaluators_folder, "custom"
841
+ )
842
+ evaluator_types_folder = None
843
+ if custom_evaluators_folder:
844
+ evaluator_types_folder = self._get_subfolder_by_name(
845
+ custom_evaluators_folder, "types"
846
+ )
847
+
848
+ remote_evaluator_files = self._collect_files_from_folder(evaluators_folder)
849
+ remote_eval_set_files = self._collect_files_from_folder(eval_sets_folder)
850
+ remote_custom_evaluator_files = self._collect_files_from_folder(
851
+ custom_evaluators_folder
852
+ )
853
+ remote_custom_evaluator_type_files = self._collect_files_from_folder(
854
+ evaluator_types_folder
855
+ )
856
+
857
+ # Create structural migration for coded-evals files
858
+ structural_migration = StructuralMigration(
859
+ deleted_resources=[], added_resources=[], modified_resources=[]
860
+ )
861
+
862
+ processed_evaluator_ids: Set[str] = set()
863
+ processed_eval_set_ids: Set[str] = set()
864
+ processed_custom_evaluator_ids: Set[str] = set()
865
+ processed_evaluator_type_ids: Set[str] = set()
866
+
867
+ for evaluator in evaluator_details:
868
+ if evaluator.is_custom:
869
+ evaluator_schema_file_path, evaluator_types_file_path = (
870
+ register_evaluator(evaluator.custom_evaluator_file_name)
871
+ )
872
+
873
+ self._process_file_sync(
874
+ evaluator_schema_file_path,
875
+ remote_custom_evaluator_files,
876
+ "coded-evals/evaluators/custom",
877
+ "coded-evals/evaluators/custom",
878
+ structural_migration,
879
+ processed_custom_evaluator_ids,
880
+ )
881
+
882
+ self._process_file_sync(
883
+ evaluator_types_file_path,
884
+ remote_custom_evaluator_type_files,
885
+ "coded-evals/evaluators/custom/types",
886
+ "coded-evals/evaluators/custom/types",
887
+ structural_migration,
888
+ processed_evaluator_type_ids,
889
+ )
890
+
891
+ self._process_file_sync(
892
+ evaluator.path,
893
+ remote_evaluator_files,
894
+ "coded-evals/evaluators",
895
+ "coded-evals/evaluators",
896
+ structural_migration,
897
+ processed_evaluator_ids,
898
+ )
899
+
900
+ for eval_set_file in eval_set_files:
901
+ self._process_file_sync(
902
+ eval_set_file,
903
+ remote_eval_set_files,
904
+ "coded-evals/eval-sets",
905
+ "coded-evals/eval-sets",
906
+ structural_migration,
907
+ processed_eval_set_ids,
908
+ )
909
+
910
+ self._collect_deleted_remote_files(
911
+ remote_evaluator_files,
912
+ processed_evaluator_ids,
913
+ "coded-evals/evaluators",
914
+ structural_migration,
915
+ )
916
+
917
+ self._collect_deleted_remote_files(
918
+ remote_eval_set_files,
919
+ processed_eval_set_ids,
920
+ "coded-evals/eval-sets",
921
+ structural_migration,
922
+ )
923
+
924
+ self._collect_deleted_remote_files(
925
+ remote_custom_evaluator_files,
926
+ processed_custom_evaluator_ids,
927
+ "coded-evals/evaluators/custom",
928
+ structural_migration,
929
+ )
930
+
931
+ self._collect_deleted_remote_files(
932
+ remote_custom_evaluator_type_files,
933
+ processed_evaluator_type_ids,
934
+ "coded-evals/evaluators/custom/types",
935
+ structural_migration,
936
+ )
937
+
938
+ if (
939
+ structural_migration.added_resources
940
+ or structural_migration.modified_resources
941
+ or structural_migration.deleted_resources
942
+ ):
943
+ await self._studio_client.perform_structural_migration_async(
944
+ structural_migration
945
+ )
@@ -460,12 +460,12 @@ class UiPathRuntimeContext(BaseModel):
460
460
  return instance
461
461
 
462
462
 
463
- class UiPathRuntimeError(Exception):
463
+ class UiPathBaseRuntimeError(Exception):
464
464
  """Base exception class for UiPath runtime errors with structured error information."""
465
465
 
466
466
  def __init__(
467
467
  self,
468
- code: UiPathErrorCode,
468
+ code: str,
469
469
  title: str,
470
470
  detail: str,
471
471
  category: UiPathErrorCategory = UiPathErrorCategory.UNKNOWN,
@@ -484,10 +484,8 @@ class UiPathRuntimeError(Exception):
484
484
  if status is None:
485
485
  status = self._extract_http_status()
486
486
 
487
- code_value = code.value
488
-
489
487
  self.error_info = UiPathErrorContract(
490
- code=f"{prefix}.{code_value}",
488
+ code=f"{prefix}.{code}",
491
489
  title=title,
492
490
  detail=detail,
493
491
  category=category,
@@ -529,6 +527,28 @@ class UiPathRuntimeError(Exception):
529
527
  return self.error_info.model_dump()
530
528
 
531
529
 
530
+ class UiPathRuntimeError(UiPathBaseRuntimeError):
531
+ """Exception class for UiPath runtime errors."""
532
+
533
+ def __init__(
534
+ self,
535
+ code: UiPathErrorCode,
536
+ title: str,
537
+ detail: str,
538
+ category: UiPathErrorCategory = UiPathErrorCategory.UNKNOWN,
539
+ prefix: str = "Python",
540
+ include_traceback: bool = True,
541
+ ):
542
+ super().__init__(
543
+ code=code.value,
544
+ title=title,
545
+ detail=detail,
546
+ category=category,
547
+ prefix=prefix,
548
+ include_traceback=include_traceback,
549
+ )
550
+
551
+
532
552
  class UiPathRuntimeStreamNotSupportedError(NotImplementedError):
533
553
  """Raised when a runtime does not support streaming."""
534
554
 
@@ -0,0 +1,65 @@
1
+ from uipath.eval.evaluators import BaseEvaluator, BaseEvaluationCriteria, BaseEvaluatorConfig
2
+ from uipath.eval.models import AgentExecution, EvaluationResult, NumericEvaluationResult, BooleanEvaluationResult, ErrorEvaluationResult
3
+
4
+
5
+ class $criteria_class(BaseEvaluationCriteria):
6
+ """Evaluation criteria for the $evaluator_name evaluator."""
7
+
8
+ # Define your evaluation criteria fields here
9
+ # Example: expected_value: str
10
+ pass
11
+
12
+
13
+ class $config_class(BaseEvaluatorConfig[$criteria_class]):
14
+ """Configuration for the $evaluator_name evaluator."""
15
+
16
+ name: str = "$class_name"
17
+ # Set default evaluation criteria if needed
18
+ # default_evaluation_criteria: $criteria_class | None = $criteria_class(expected_value="example")
19
+
20
+
21
+ class $class_name(BaseEvaluator[$criteria_class, $config_class, type(None)]):
22
+ """Description for $class_name"""
23
+
24
+ @classmethod
25
+ def get_evaluator_id(cls) -> str:
26
+ """Get the evaluator ID."""
27
+ return "$class_name"
28
+
29
+ async def evaluate(
30
+ self,
31
+ agent_execution: AgentExecution,
32
+ evaluation_criteria: $criteria_class
33
+ ) -> EvaluationResult:
34
+ """Evaluate the agent execution against the criteria.
35
+
36
+ Args:
37
+ agent_execution: The execution details containing:
38
+ - agent_input: The input received by the agent
39
+ - agent_output: The actual output from the agent
40
+ - agent_trace: The execution trace from the agent (list of OpenTelemetry spans)
41
+ - simulation_instructions: The simulation instructions for the agent
42
+ evaluation_criteria: The criteria to evaluate against
43
+
44
+ Returns:
45
+ EvaluationResult containing the score and details
46
+ """
47
+
48
+ '''
49
+ # TODO: Implement your evaluation logic here
50
+ Example: Check if the agent output matches expected criteria
51
+
52
+ Access agent execution data:
53
+ agent_input = agent_execution.agent_input
54
+ agent_output = agent_execution.agent_output
55
+ agent_trace = agent_execution.agent_trace
56
+
57
+ # Perform your evaluation
58
+ score = 0.0 # Replace with your scoring logic
59
+
60
+ return NumericEvaluationResult(
61
+ score=score,
62
+ )
63
+ '''
64
+
65
+ raise NotImplementedError(f"evaluate method not implemented")
@@ -3,8 +3,9 @@ from pathlib import Path
3
3
  from typing import List, Optional
4
4
 
5
5
  import click
6
+ from pydantic import TypeAdapter, ValidationError
6
7
 
7
- from uipath._cli._evals._models._evaluation_set import EvaluationSet
8
+ from uipath._cli._evals._models._evaluation_set import AnyEvaluationSet
8
9
  from uipath._cli._utils._console import ConsoleLogger
9
10
 
10
11
  console = ConsoleLogger()
@@ -57,28 +58,48 @@ class EvalHelpers:
57
58
  @staticmethod
58
59
  def load_eval_set(
59
60
  eval_set_path: str, eval_ids: Optional[List[str]] = None
60
- ) -> EvaluationSet:
61
+ ) -> tuple[AnyEvaluationSet, str]:
61
62
  """Load the evaluation set from file.
62
63
 
64
+ Args:
65
+ eval_set_path: Path to the evaluation set file
66
+ eval_ids: Optional list of evaluation IDs to filter
67
+
63
68
  Returns:
64
- The loaded evaluation set as EvaluationSet model
69
+ Tuple of (AnyEvaluationSet, resolved_path)
65
70
  """
71
+ # If the file doesn't exist at the given path, try looking in evals/eval-sets/
72
+ resolved_path = eval_set_path
73
+ if not Path(eval_set_path).exists():
74
+ # Check if it's just a filename, then search in evals/eval-sets/
75
+ if Path(eval_set_path).name == eval_set_path:
76
+ eval_sets_path = Path("evals/eval-sets") / eval_set_path
77
+ if eval_sets_path.exists():
78
+ resolved_path = str(eval_sets_path)
79
+
66
80
  try:
67
- with open(eval_set_path, "r", encoding="utf-8") as f:
81
+ with open(resolved_path, "r", encoding="utf-8") as f:
68
82
  data = json.load(f)
83
+ except FileNotFoundError as e:
84
+ raise ValueError(
85
+ f"Evaluation set file not found: '{eval_set_path}'. "
86
+ f"Searched in current directory and evals/eval-sets/ directory."
87
+ ) from e
69
88
  except json.JSONDecodeError as e:
70
89
  raise ValueError(
71
- f"Invalid JSON in evaluation set file '{eval_set_path}': {str(e)}. "
90
+ f"Invalid JSON in evaluation set file '{resolved_path}': {str(e)}. "
72
91
  f"Please check the file for syntax errors."
73
92
  ) from e
74
93
 
75
94
  try:
76
- eval_set = EvaluationSet(**data)
77
- except (TypeError, ValueError) as e:
95
+ eval_set: AnyEvaluationSet = TypeAdapter(AnyEvaluationSet).validate_python(
96
+ data
97
+ )
98
+ except ValidationError as e:
78
99
  raise ValueError(
79
- f"Invalid evaluation set format in '{eval_set_path}': {str(e)}. "
100
+ f"Invalid evaluation set format in '{resolved_path}': {str(e)}. "
80
101
  f"Please verify the evaluation set structure."
81
102
  ) from e
82
103
  if eval_ids:
83
104
  eval_set.extract_selected_evals(eval_ids)
84
- return eval_set
105
+ return eval_set, resolved_path
@@ -0,0 +1,21 @@
1
+ import enum
2
+
3
+ from ._console import ConsoleLogger
4
+
5
+ console = ConsoleLogger().get_instance()
6
+
7
+
8
+ class Resources(str, enum.Enum):
9
+ """Available resources that can be created."""
10
+
11
+ EVALUATOR = "evaluator"
12
+
13
+ @classmethod
14
+ def from_string(cls, resource: str) -> "Resources": # type: ignore
15
+ try:
16
+ return Resources(resource)
17
+ except ValueError:
18
+ valid_resources = ", ".join([r.value for r in Resources])
19
+ console.error(
20
+ f"Invalid resource type: '{resource}'. Valid types are: {valid_resources}"
21
+ )
@@ -149,6 +149,24 @@ def get_folder_by_name(
149
149
  return None
150
150
 
151
151
 
152
+ def get_subfolder_by_name(
153
+ parent_folder: ProjectFolder, subfolder_name: str
154
+ ) -> Optional[ProjectFolder]:
155
+ """Get a subfolder from within a parent folder by name.
156
+
157
+ Args:
158
+ parent_folder: The parent folder to search within
159
+ subfolder_name: Name of the subfolder to find
160
+
161
+ Returns:
162
+ Optional[ProjectFolder]: The found subfolder or None
163
+ """
164
+ for folder in parent_folder.folders:
165
+ if folder.name == subfolder_name:
166
+ return folder
167
+ return None
168
+
169
+
152
170
  def resolve_path(
153
171
  folder: ProjectFolder,
154
172
  path: PurePath,