uipath 2.1.8__tar.gz → 2.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. {uipath-2.1.8 → uipath-2.1.10}/PKG-INFO +1 -1
  2. {uipath-2.1.8 → uipath-2.1.10}/pyproject.toml +1 -2
  3. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/__init__.py +2 -0
  4. uipath-2.1.10/src/uipath/_cli/_evals/_evaluators/__init__.py +20 -0
  5. uipath-2.1.10/src/uipath/_cli/_evals/_evaluators/_agent_scorer_evaluator.py +48 -0
  6. uipath-2.1.10/src/uipath/_cli/_evals/_evaluators/_deterministic_evaluator.py +41 -0
  7. uipath-2.1.10/src/uipath/_cli/_evals/_evaluators/_evaluator_base.py +124 -0
  8. uipath-2.1.10/src/uipath/_cli/_evals/_evaluators/_evaluator_factory.py +103 -0
  9. uipath-2.1.10/src/uipath/_cli/_evals/_evaluators/_llm_as_judge_evaluator.py +181 -0
  10. uipath-2.1.10/src/uipath/_cli/_evals/_evaluators/_trajectory_evaluator.py +48 -0
  11. uipath-2.1.10/src/uipath/_cli/_evals/_models/__init__.py +18 -0
  12. uipath-2.1.10/src/uipath/_cli/_evals/_models/_evaluation_set.py +43 -0
  13. uipath-2.1.10/src/uipath/_cli/_evals/_models/_evaluators.py +89 -0
  14. uipath-2.1.10/src/uipath/_cli/_evals/evaluation_service.py +583 -0
  15. uipath-2.1.10/src/uipath/_cli/_evals/progress_reporter.py +356 -0
  16. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_runtime/_contracts.py +25 -10
  17. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_runtime/_logging.py +8 -6
  18. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_console.py +105 -1
  19. uipath-2.1.10/src/uipath/_cli/cli_eval.py +95 -0
  20. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/cli_run.py +74 -32
  21. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/api_client.py +5 -3
  22. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/llm_gateway_service.py +4 -4
  23. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/constants.py +4 -0
  24. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/telemetry/_constants.py +3 -3
  25. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/test_run.py +2 -1
  26. {uipath-2.1.8 → uipath-2.1.10}/uv.lock +1 -1
  27. {uipath-2.1.8 → uipath-2.1.10}/.cursorrules +0 -0
  28. {uipath-2.1.8 → uipath-2.1.10}/.editorconfig +0 -0
  29. {uipath-2.1.8 → uipath-2.1.10}/.gitattributes +0 -0
  30. {uipath-2.1.8 → uipath-2.1.10}/.github/workflows/cd.yml +0 -0
  31. {uipath-2.1.8 → uipath-2.1.10}/.github/workflows/ci.yml +0 -0
  32. {uipath-2.1.8 → uipath-2.1.10}/.github/workflows/commitlint.yml +0 -0
  33. {uipath-2.1.8 → uipath-2.1.10}/.github/workflows/lint.yml +0 -0
  34. {uipath-2.1.8 → uipath-2.1.10}/.github/workflows/publish-dev.yml +0 -0
  35. {uipath-2.1.8 → uipath-2.1.10}/.github/workflows/publish-docs.yml +0 -0
  36. {uipath-2.1.8 → uipath-2.1.10}/.github/workflows/slack.yml +0 -0
  37. {uipath-2.1.8 → uipath-2.1.10}/.github/workflows/test.yml +0 -0
  38. {uipath-2.1.8 → uipath-2.1.10}/.gitignore +0 -0
  39. {uipath-2.1.8 → uipath-2.1.10}/.pre-commit-config.yaml +0 -0
  40. {uipath-2.1.8 → uipath-2.1.10}/.python-version +0 -0
  41. {uipath-2.1.8 → uipath-2.1.10}/.vscode/extensions.json +0 -0
  42. {uipath-2.1.8 → uipath-2.1.10}/.vscode/launch.json +0 -0
  43. {uipath-2.1.8 → uipath-2.1.10}/.vscode/settings.json +0 -0
  44. {uipath-2.1.8 → uipath-2.1.10}/CONTRIBUTING.md +0 -0
  45. {uipath-2.1.8 → uipath-2.1.10}/LICENSE +0 -0
  46. {uipath-2.1.8 → uipath-2.1.10}/README.md +0 -0
  47. {uipath-2.1.8 → uipath-2.1.10}/docs/CONTRIBUTING.md +0 -0
  48. {uipath-2.1.8 → uipath-2.1.10}/docs/FAQ.md +0 -0
  49. {uipath-2.1.8 → uipath-2.1.10}/docs/assets/env-preparation-failed-dark.png +0 -0
  50. {uipath-2.1.8 → uipath-2.1.10}/docs/assets/env-preparation-failed-light.png +0 -0
  51. {uipath-2.1.8 → uipath-2.1.10}/docs/assets/favicon.png +0 -0
  52. {uipath-2.1.8 → uipath-2.1.10}/docs/assets/logo-dark.svg +0 -0
  53. {uipath-2.1.8 → uipath-2.1.10}/docs/assets/logo-light.svg +0 -0
  54. {uipath-2.1.8 → uipath-2.1.10}/docs/cli/index.md +0 -0
  55. {uipath-2.1.8 → uipath-2.1.10}/docs/core/actions.md +0 -0
  56. {uipath-2.1.8 → uipath-2.1.10}/docs/core/assets/cloud_env_var_dark.gif +0 -0
  57. {uipath-2.1.8 → uipath-2.1.10}/docs/core/assets/cloud_env_var_light.gif +0 -0
  58. {uipath-2.1.8 → uipath-2.1.10}/docs/core/assets/cloud_env_var_secret_dark.png +0 -0
  59. {uipath-2.1.8 → uipath-2.1.10}/docs/core/assets/cloud_env_var_secret_light.png +0 -0
  60. {uipath-2.1.8 → uipath-2.1.10}/docs/core/assets/copy_path_dark.png +0 -0
  61. {uipath-2.1.8 → uipath-2.1.10}/docs/core/assets/copy_path_light.png +0 -0
  62. {uipath-2.1.8 → uipath-2.1.10}/docs/core/assets.md +0 -0
  63. {uipath-2.1.8 → uipath-2.1.10}/docs/core/attachments.md +0 -0
  64. {uipath-2.1.8 → uipath-2.1.10}/docs/core/buckets.md +0 -0
  65. {uipath-2.1.8 → uipath-2.1.10}/docs/core/connections.md +0 -0
  66. {uipath-2.1.8 → uipath-2.1.10}/docs/core/context_grounding.md +0 -0
  67. {uipath-2.1.8 → uipath-2.1.10}/docs/core/environment_variables.md +0 -0
  68. {uipath-2.1.8 → uipath-2.1.10}/docs/core/getting_started.md +0 -0
  69. {uipath-2.1.8 → uipath-2.1.10}/docs/core/jobs.md +0 -0
  70. {uipath-2.1.8 → uipath-2.1.10}/docs/core/llm_gateway.md +0 -0
  71. {uipath-2.1.8 → uipath-2.1.10}/docs/core/processes.md +0 -0
  72. {uipath-2.1.8 → uipath-2.1.10}/docs/core/queues.md +0 -0
  73. {uipath-2.1.8 → uipath-2.1.10}/docs/core/traced.md +0 -0
  74. {uipath-2.1.8 → uipath-2.1.10}/docs/hooks.py +0 -0
  75. {uipath-2.1.8 → uipath-2.1.10}/docs/index.md +0 -0
  76. {uipath-2.1.8 → uipath-2.1.10}/docs/javascripts/extra.js +0 -0
  77. {uipath-2.1.8 → uipath-2.1.10}/docs/overrides/main.html +0 -0
  78. {uipath-2.1.8 → uipath-2.1.10}/docs/overrides/partials/actions.html +0 -0
  79. {uipath-2.1.8 → uipath-2.1.10}/docs/overrides/partials/logo.html +0 -0
  80. {uipath-2.1.8 → uipath-2.1.10}/docs/release_policy.md +0 -0
  81. {uipath-2.1.8 → uipath-2.1.10}/docs/sample_images/google-ADK-agent/agent-output.png +0 -0
  82. {uipath-2.1.8 → uipath-2.1.10}/docs/stylesheets/extra.css +0 -0
  83. {uipath-2.1.8 → uipath-2.1.10}/justfile +0 -0
  84. {uipath-2.1.8 → uipath-2.1.10}/mkdocs.yml +0 -0
  85. {uipath-2.1.8 → uipath-2.1.10}/py.typed +0 -0
  86. {uipath-2.1.8 → uipath-2.1.10}/samples/google-ADK-agent/.env.example +0 -0
  87. {uipath-2.1.8 → uipath-2.1.10}/samples/google-ADK-agent/README.md +0 -0
  88. {uipath-2.1.8 → uipath-2.1.10}/samples/google-ADK-agent/input.json +0 -0
  89. {uipath-2.1.8 → uipath-2.1.10}/samples/google-ADK-agent/multi_tool_agent/__init__.py +0 -0
  90. {uipath-2.1.8 → uipath-2.1.10}/samples/google-ADK-agent/multi_tool_agent/agent.py +0 -0
  91. {uipath-2.1.8 → uipath-2.1.10}/samples/google-ADK-agent/pyproject.toml +0 -0
  92. {uipath-2.1.8 → uipath-2.1.10}/samples/google-ADK-agent/uv.lock +0 -0
  93. {uipath-2.1.8 → uipath-2.1.10}/scripts/debug_test.py +0 -0
  94. {uipath-2.1.8 → uipath-2.1.10}/scripts/lint_httpx_client.py +0 -0
  95. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/__init__.py +0 -0
  96. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/README.md +0 -0
  97. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_auth/_auth_server.py +0 -0
  98. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_auth/_client_credentials.py +0 -0
  99. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_auth/_models.py +0 -0
  100. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_auth/_oidc_utils.py +0 -0
  101. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_auth/_portal_service.py +0 -0
  102. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_auth/_utils.py +0 -0
  103. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_auth/auth_config.json +0 -0
  104. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_auth/index.html +0 -0
  105. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_auth/localhost.crt +0 -0
  106. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_auth/localhost.key +0 -0
  107. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_runtime/_escalation.py +0 -0
  108. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_runtime/_hitl.py +0 -0
  109. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_runtime/_runtime.py +0 -0
  110. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_templates/.psmdcp.template +0 -0
  111. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_templates/.rels.template +0 -0
  112. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_templates/[Content_Types].xml.template +0 -0
  113. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_templates/main.py.template +0 -0
  114. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_templates/package.nuspec.template +0 -0
  115. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_common.py +0 -0
  116. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_constants.py +0 -0
  117. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_debug.py +0 -0
  118. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_folders.py +0 -0
  119. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_input_args.py +0 -0
  120. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_parse_ast.py +0 -0
  121. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_processes.py +0 -0
  122. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_project_files.py +0 -0
  123. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_studio_project.py +0 -0
  124. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_tracing.py +0 -0
  125. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/_utils/_uv_helpers.py +0 -0
  126. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/cli_auth.py +0 -0
  127. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/cli_deploy.py +0 -0
  128. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/cli_init.py +0 -0
  129. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/cli_invoke.py +0 -0
  130. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/cli_new.py +0 -0
  131. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/cli_pack.py +0 -0
  132. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/cli_publish.py +0 -0
  133. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/cli_pull.py +0 -0
  134. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/cli_push.py +0 -0
  135. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/middlewares.py +0 -0
  136. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_cli/spinner.py +0 -0
  137. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_config.py +0 -0
  138. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_execution_context.py +0 -0
  139. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_folder_context.py +0 -0
  140. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/__init__.py +0 -0
  141. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/_base_service.py +0 -0
  142. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/actions_service.py +0 -0
  143. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/assets_service.py +0 -0
  144. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/attachments_service.py +0 -0
  145. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/buckets_service.py +0 -0
  146. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/connections_service.py +0 -0
  147. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/context_grounding_service.py +0 -0
  148. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/folder_service.py +0 -0
  149. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/jobs_service.py +0 -0
  150. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/processes_service.py +0 -0
  151. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_services/queues_service.py +0 -0
  152. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_uipath.py +0 -0
  153. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/__init__.py +0 -0
  154. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/_endpoint.py +0 -0
  155. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/_infer_bindings.py +0 -0
  156. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/_logs.py +0 -0
  157. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/_read_overwrites.py +0 -0
  158. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/_request_override.py +0 -0
  159. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/_request_spec.py +0 -0
  160. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/_ssl_context.py +0 -0
  161. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/_url.py +0 -0
  162. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/_utils/_user_agent.py +0 -0
  163. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/__init__.py +0 -0
  164. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/action_schema.py +0 -0
  165. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/actions.py +0 -0
  166. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/assets.py +0 -0
  167. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/attachment.py +0 -0
  168. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/buckets.py +0 -0
  169. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/connections.py +0 -0
  170. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/context_grounding.py +0 -0
  171. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/context_grounding_index.py +0 -0
  172. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/errors.py +0 -0
  173. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/exceptions.py +0 -0
  174. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/interrupt_models.py +0 -0
  175. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/job.py +0 -0
  176. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/llm_gateway.py +0 -0
  177. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/processes.py +0 -0
  178. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/models/queues.py +0 -0
  179. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/py.typed +0 -0
  180. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/telemetry/__init__.py +0 -0
  181. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/telemetry/_track.py +0 -0
  182. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/tracing/__init__.py +0 -0
  183. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/tracing/_otel_exporters.py +0 -0
  184. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/tracing/_traced.py +0 -0
  185. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/tracing/_utils.py +0 -0
  186. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/utils/__init__.py +0 -0
  187. {uipath-2.1.8 → uipath-2.1.10}/src/uipath/utils/_endpoints_manager.py +0 -0
  188. {uipath-2.1.8 → uipath-2.1.10}/tests/__init__.py +0 -0
  189. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/conftest.py +0 -0
  190. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/mocks/bindings_script.py +0 -0
  191. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/mocks/pyproject.toml +0 -0
  192. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/mocks/simple_script.py +0 -0
  193. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/mocks/uipath-mock.json +0 -0
  194. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/mocks/uipath-simple-script-mock.json +0 -0
  195. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/test_hitl.py +0 -0
  196. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/test_init.py +0 -0
  197. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/test_invoke.py +0 -0
  198. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/test_new.py +0 -0
  199. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/test_pack.py +0 -0
  200. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/test_publish.py +0 -0
  201. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/test_pull.py +0 -0
  202. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/test_push.py +0 -0
  203. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/test_utils.py +0 -0
  204. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/utils/common.py +0 -0
  205. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/utils/project_details.py +0 -0
  206. {uipath-2.1.8 → uipath-2.1.10}/tests/cli/utils/uipath_json.py +0 -0
  207. {uipath-2.1.8 → uipath-2.1.10}/tests/conftest.py +0 -0
  208. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/conftest.py +0 -0
  209. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_actions_service.py +0 -0
  210. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_api_client.py +0 -0
  211. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_assets_service.py +0 -0
  212. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_attachments_service.py +0 -0
  213. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_base_service.py +0 -0
  214. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_buckets_service.py +0 -0
  215. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_connections_service.py +0 -0
  216. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_context_grounding_service.py +0 -0
  217. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_folder_service.py +0 -0
  218. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_jobs_service.py +0 -0
  219. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_llm_integration.py +0 -0
  220. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_llm_schema_cleanup.py +0 -0
  221. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_llm_service.py +0 -0
  222. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_processes_service.py +0 -0
  223. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_queues_service.py +0 -0
  224. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/services/test_uipath_llm_integration.py +0 -0
  225. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/test_bindings_inference.py +0 -0
  226. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/test_config.py +0 -0
  227. {uipath-2.1.8 → uipath-2.1.10}/tests/sdk/test_overwrites.py +0 -0
  228. {uipath-2.1.8 → uipath-2.1.10}/tests/tracing/test_otel_exporters.py +0 -0
  229. {uipath-2.1.8 → uipath-2.1.10}/tests/tracing/test_span_utils.py +0 -0
  230. {uipath-2.1.8 → uipath-2.1.10}/tests/tracing/test_traced.py +0 -0
  231. {uipath-2.1.8 → uipath-2.1.10}/tests/tracing/test_tracing_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: uipath
3
- Version: 2.1.8
3
+ Version: 2.1.10
4
4
  Summary: Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools.
5
5
  Project-URL: Homepage, https://uipath.com
6
6
  Project-URL: Repository, https://github.com/UiPath/uipath-python
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "uipath"
3
- version = "2.1.8"
3
+ version = "2.1.10"
4
4
  description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
5
5
  readme = { file = "README.md", content-type = "text/markdown" }
6
6
  requires-python = ">=3.10"
@@ -103,7 +103,6 @@ line-ending = "auto"
103
103
  plugins = ["pydantic.mypy"]
104
104
  exclude = ["samples/.*"]
105
105
 
106
-
107
106
  follow_imports = "silent"
108
107
  warn_redundant_casts = true
109
108
  warn_unused_ignores = true
@@ -5,6 +5,7 @@ import click
5
5
 
6
6
  from .cli_auth import auth as auth # type: ignore
7
7
  from .cli_deploy import deploy as deploy # type: ignore
8
+ from .cli_eval import eval as eval # type: ignore
8
9
  from .cli_init import init as init # type: ignore
9
10
  from .cli_invoke import invoke as invoke # type: ignore
10
11
  from .cli_new import new as new # type: ignore
@@ -67,3 +68,4 @@ cli.add_command(auth)
67
68
  cli.add_command(invoke)
68
69
  cli.add_command(push)
69
70
  cli.add_command(pull)
71
+ cli.add_command(eval)
@@ -0,0 +1,20 @@
1
+ """Evaluators package for the evaluation system.
2
+
3
+ This package contains all evaluator types and the factory for creating them.
4
+ """
5
+
6
+ from ._agent_scorer_evaluator import AgentScorerEvaluator
7
+ from ._deterministic_evaluator import DeterministicEvaluator
8
+ from ._evaluator_base import EvaluatorBase
9
+ from ._evaluator_factory import EvaluatorFactory
10
+ from ._llm_as_judge_evaluator import LlmAsAJudgeEvaluator
11
+ from ._trajectory_evaluator import TrajectoryEvaluator
12
+
13
+ __all__ = [
14
+ "EvaluatorBase",
15
+ "EvaluatorFactory",
16
+ "DeterministicEvaluator",
17
+ "LlmAsAJudgeEvaluator",
18
+ "AgentScorerEvaluator",
19
+ "TrajectoryEvaluator",
20
+ ]
@@ -0,0 +1,48 @@
1
+ from typing import Any, Dict
2
+
3
+ from .._models import EvaluationResult
4
+ from ._evaluator_base import EvaluatorBase
5
+
6
+
7
+ class AgentScorerEvaluator(EvaluatorBase):
8
+ """Evaluator that uses an agent to score outputs."""
9
+
10
+ def __init__(
11
+ self,
12
+ agent_config: Dict[str, Any],
13
+ scoring_criteria: Dict[str, Any],
14
+ target_output_key: str = "*",
15
+ ):
16
+ """Initialize the agent scorer evaluator.
17
+
18
+ Args:
19
+ agent_config: Configuration for the scoring agent
20
+ scoring_criteria: Criteria used for scoring
21
+ target_output_key: Key in output to evaluate ("*" for entire output)
22
+ """
23
+ super().__init__()
24
+ self.agent_config = agent_config or {}
25
+ self.scoring_criteria = scoring_criteria or {}
26
+ self.target_output_key = target_output_key
27
+
28
+ async def evaluate(
29
+ self,
30
+ evaluation_id: str,
31
+ evaluation_name: str,
32
+ input_data: Dict[str, Any],
33
+ expected_output: Dict[str, Any],
34
+ actual_output: Dict[str, Any],
35
+ ) -> EvaluationResult:
36
+ """Evaluate using an agent scorer.
37
+
38
+ Args:
39
+ evaluation_id: The ID of the evaluation being processed
40
+ evaluation_name: The name of the evaluation
41
+ input_data: The input data for the evaluation
42
+ expected_output: The expected output
43
+ actual_output: The actual output from the agent
44
+
45
+ Returns:
46
+ EvaluationResult containing the score and details
47
+ """
48
+ raise NotImplementedError()
@@ -0,0 +1,41 @@
1
+ from typing import Any, Dict
2
+
3
+ from .._models import EvaluationResult
4
+ from ._evaluator_base import EvaluatorBase
5
+
6
+
7
+ class DeterministicEvaluator(EvaluatorBase):
8
+ """Evaluator for deterministic/rule-based evaluations."""
9
+
10
+ def __init__(self, rule_config: Dict[str, Any], target_output_key: str = "*"):
11
+ """Initialize the deterministic evaluator.
12
+
13
+ Args:
14
+ rule_config: Configuration for the rule (expected_value, regex_pattern, etc.)
15
+ target_output_key: Key in output to evaluate ("*" for entire output)
16
+ """
17
+ super().__init__()
18
+ self.rule_config = rule_config or {}
19
+ self.target_output_key = target_output_key
20
+
21
+ async def evaluate(
22
+ self,
23
+ evaluation_id: str,
24
+ evaluation_name: str,
25
+ input_data: Dict[str, Any],
26
+ expected_output: Dict[str, Any],
27
+ actual_output: Dict[str, Any],
28
+ ) -> EvaluationResult:
29
+ """Evaluate using deterministic rules.
30
+
31
+ Args:
32
+ evaluation_id: The ID of the evaluation being processed
33
+ evaluation_name: The name of the evaluation
34
+ input_data: The input data for the evaluation
35
+ expected_output: The expected output
36
+ actual_output: The actual output from the agent
37
+
38
+ Returns:
39
+ EvaluationResult containing the score and details
40
+ """
41
+ raise NotImplementedError()
@@ -0,0 +1,124 @@
1
+ import functools
2
+ import time
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from typing import Any, Dict
6
+
7
+ from uipath._cli._evals._models import (
8
+ EvaluationResult,
9
+ EvaluatorCategory,
10
+ EvaluatorType,
11
+ )
12
+
13
+
14
+ def measure_execution_time(func):
15
+ """Decorator to measure execution time and update EvaluationResult.evaluation_time."""
16
+
17
+ @functools.wraps(func)
18
+ async def wrapper(*args, **kwargs) -> EvaluationResult:
19
+ start_time = time.time()
20
+ result = await func(*args, **kwargs)
21
+ end_time = time.time()
22
+ execution_time = end_time - start_time
23
+
24
+ result.evaluation_time = execution_time
25
+ return result
26
+
27
+ return wrapper
28
+
29
+
30
+ @dataclass
31
+ class EvaluatorBaseParams:
32
+ """Parameters for initializing the base evaluator."""
33
+
34
+ evaluator_id: str
35
+ category: EvaluatorCategory
36
+ evaluator_type: EvaluatorType
37
+ name: str
38
+ description: str
39
+ created_at: str
40
+ updated_at: str
41
+ target_output_key: str
42
+
43
+
44
+ class EvaluatorBase(ABC):
45
+ """Abstract base class for all evaluators."""
46
+
47
+ def __init__(self):
48
+ # initialization done via 'from_params' function
49
+ self.id: str
50
+ self.name: str
51
+ self.description: str
52
+ self.created_at: str
53
+ self.updated_at: str
54
+ self.category: EvaluatorCategory
55
+ self.type: EvaluatorType
56
+ self.target_output_key: str
57
+ pass
58
+
59
+ @classmethod
60
+ def from_params(cls, params: EvaluatorBaseParams, **kwargs):
61
+ """Initialize the base evaluator from parameters.
62
+
63
+ Args:
64
+ params: EvaluatorBaseParams containing base configuration
65
+ **kwargs: Additional specific parameters for concrete evaluators
66
+
67
+ Returns:
68
+ Initialized evaluator instance
69
+ """
70
+ instance = cls(**kwargs)
71
+ instance.id = params.evaluator_id
72
+ instance.category = params.category
73
+ instance.type = params.evaluator_type
74
+ instance.name = params.name
75
+ instance.description = params.description
76
+ instance.created_at = params.created_at
77
+ instance.updated_at = params.updated_at
78
+ instance.target_output_key = params.target_output_key
79
+ return instance
80
+
81
+ @measure_execution_time
82
+ @abstractmethod
83
+ async def evaluate(
84
+ self,
85
+ evaluation_id: str,
86
+ evaluation_name: str,
87
+ input_data: Dict[str, Any],
88
+ expected_output: Dict[str, Any],
89
+ actual_output: Dict[str, Any],
90
+ ) -> EvaluationResult:
91
+ """Evaluate the given data and return a result.
92
+
93
+ Args:
94
+ evaluation_id: The ID of the evaluation being processed
95
+ evaluation_name: The name of the evaluation
96
+ input_data: The input data for the evaluation
97
+ expected_output: The expected output
98
+ actual_output: The actual output from the agent
99
+
100
+ Returns:
101
+ EvaluationResult containing the score and details
102
+ """
103
+ pass
104
+
105
+ def to_dict(self) -> Dict[str, Any]:
106
+ """Convert the evaluator instance to a dictionary representation.
107
+
108
+ Returns:
109
+ Dict[str, Any]: Dictionary containing all evaluator properties
110
+ """
111
+ return {
112
+ "id": self.id,
113
+ "name": self.name,
114
+ "description": self.description,
115
+ "created_at": self.created_at,
116
+ "updated_at": self.updated_at,
117
+ "category": self.category.name if self.category else None,
118
+ "type": self.type.name if self.type else None,
119
+ "target_output_key": self.target_output_key,
120
+ }
121
+
122
+ def __repr__(self) -> str:
123
+ """String representation of the evaluator."""
124
+ return f"{self.__class__.__name__}(id='{self.id}', name='{self.name}', category={self.category.name})"
@@ -0,0 +1,103 @@
1
+ from typing import Any, Dict
2
+
3
+ from .._models import EvaluatorCategory, EvaluatorType
4
+ from ._agent_scorer_evaluator import AgentScorerEvaluator
5
+ from ._deterministic_evaluator import DeterministicEvaluator
6
+ from ._evaluator_base import EvaluatorBase, EvaluatorBaseParams
7
+ from ._llm_as_judge_evaluator import LlmAsAJudgeEvaluator
8
+ from ._trajectory_evaluator import TrajectoryEvaluator
9
+
10
+
11
+ class EvaluatorFactory:
12
+ """Factory class for creating evaluator instances based on configuration."""
13
+
14
+ @staticmethod
15
+ def create_evaluator(data: Dict[str, Any]) -> EvaluatorBase:
16
+ """Create an evaluator instance from configuration data.
17
+
18
+ Args:
19
+ data: Dictionary containing evaluator configuration from JSON file
20
+
21
+ Returns:
22
+ Appropriate evaluator instance based on category
23
+
24
+ Raises:
25
+ ValueError: If category is unknown or required fields are missing
26
+ """
27
+ # Extract common fields
28
+ evaluator_id = data.get("id")
29
+ if not evaluator_id:
30
+ raise ValueError("Evaluator configuration must include 'id' field")
31
+
32
+ category = EvaluatorCategory.from_int(data.get("category"))
33
+ evaluator_type = EvaluatorType.from_int(data.get("type", EvaluatorType.Unknown))
34
+ name = data.get("name", "")
35
+ description = data.get("description", "")
36
+ created_at = data.get("createdAt", "")
37
+ updated_at = data.get("updatedAt", "")
38
+ target_output_key = data.get("targetOutputKey", "")
39
+
40
+ # Create base parameters
41
+ base_params = EvaluatorBaseParams(
42
+ evaluator_id=evaluator_id,
43
+ category=category,
44
+ evaluator_type=evaluator_type,
45
+ name=name,
46
+ description=description,
47
+ created_at=created_at,
48
+ updated_at=updated_at,
49
+ target_output_key=target_output_key,
50
+ )
51
+
52
+ # Create evaluator based on category
53
+ if category == EvaluatorCategory.Deterministic:
54
+ return EvaluatorFactory._create_deterministic_evaluator(base_params, data)
55
+ elif category == EvaluatorCategory.LlmAsAJudge:
56
+ return EvaluatorFactory._create_llm_as_judge_evaluator(base_params, data)
57
+ elif category == EvaluatorCategory.AgentScorer:
58
+ return EvaluatorFactory._create_agent_scorer_evaluator(base_params, data)
59
+ elif category == EvaluatorCategory.Trajectory:
60
+ return EvaluatorFactory._create_trajectory_evaluator(base_params, data)
61
+ else:
62
+ raise ValueError(f"Unknown evaluator category: {category}")
63
+
64
+ @staticmethod
65
+ def _create_deterministic_evaluator(
66
+ base_params: EvaluatorBaseParams, data: Dict[str, Any]
67
+ ) -> DeterministicEvaluator:
68
+ """Create a deterministic evaluator."""
69
+ raise NotImplementedError()
70
+
71
+ @staticmethod
72
+ def _create_llm_as_judge_evaluator(
73
+ base_params: EvaluatorBaseParams, data: Dict[str, Any]
74
+ ) -> LlmAsAJudgeEvaluator:
75
+ """Create an LLM-as-a-judge evaluator."""
76
+ prompt = data.get("prompt", "")
77
+ if not prompt:
78
+ raise ValueError("LLM evaluator must include 'prompt' field")
79
+
80
+ model = data.get("model", "")
81
+ if not model:
82
+ raise ValueError("LLM evaluator must include 'model' field")
83
+
84
+ return LlmAsAJudgeEvaluator.from_params(
85
+ base_params,
86
+ prompt=prompt,
87
+ model=model,
88
+ target_output_key=data.get("targetOutputKey", "*"),
89
+ )
90
+
91
+ @staticmethod
92
+ def _create_agent_scorer_evaluator(
93
+ base_params: EvaluatorBaseParams, data: Dict[str, Any]
94
+ ) -> AgentScorerEvaluator:
95
+ """Create an agent scorer evaluator."""
96
+ raise NotImplementedError()
97
+
98
+ @staticmethod
99
+ def _create_trajectory_evaluator(
100
+ base_params: EvaluatorBaseParams, data: Dict[str, Any]
101
+ ) -> TrajectoryEvaluator:
102
+ """Create a trajectory evaluator."""
103
+ raise NotImplementedError()
@@ -0,0 +1,181 @@
1
+ import json
2
+ from typing import Any, Dict
3
+
4
+ from ...._config import Config
5
+ from ...._execution_context import ExecutionContext
6
+ from ...._services.llm_gateway_service import UiPathLlmChatService
7
+ from ...._utils.constants import (
8
+ ENV_BASE_URL,
9
+ ENV_UIPATH_ACCESS_TOKEN,
10
+ ENV_UNATTENDED_USER_ACCESS_TOKEN,
11
+ COMMUNITY_agents_SUFFIX,
12
+ )
13
+ from .._models import EvaluationResult, LLMResponse
14
+ from ._evaluator_base import EvaluatorBase
15
+
16
+
17
+ class LlmAsAJudgeEvaluator(EvaluatorBase):
18
+ """Evaluator that uses an LLM to judge the quality of outputs."""
19
+
20
+ def __init__(self, prompt: str = "", model: str = "", target_output_key: str = "*"):
21
+ """Initialize the LLM-as-a-judge evaluator.
22
+
23
+ Args:
24
+ prompt: The prompt template for the LLM
25
+ model: The model to use for evaluation
26
+ target_output_key: Key in output to evaluate ("*" for entire output)
27
+ """
28
+ super().__init__()
29
+ self.actual_output_placeholder = "{{ActualOutput}}"
30
+ self.expected_output_placeholder = "{{ExpectedOutput}}"
31
+ self._initialize_llm()
32
+ self.prompt = prompt
33
+ self.model = model
34
+ self.target_output_key: str = target_output_key
35
+
36
+ def _initialize_llm(self):
37
+ """Initialize the LLM used for evaluation."""
38
+ import os
39
+
40
+ base_url_value: str = os.getenv(ENV_BASE_URL) # type: ignore
41
+ secret_value: str = os.getenv(ENV_UNATTENDED_USER_ACCESS_TOKEN) or os.getenv(
42
+ ENV_UIPATH_ACCESS_TOKEN
43
+ ) # type: ignore
44
+ config = Config(
45
+ base_url=base_url_value,
46
+ secret=secret_value,
47
+ )
48
+ self.llm = UiPathLlmChatService(config, ExecutionContext())
49
+
50
+ async def evaluate(
51
+ self,
52
+ evaluation_id: str,
53
+ evaluation_name: str,
54
+ input_data: Dict[str, Any],
55
+ expected_output: Dict[str, Any],
56
+ actual_output: Dict[str, Any],
57
+ ) -> EvaluationResult:
58
+ """Evaluate using an LLM as a judge.
59
+
60
+ Args:
61
+ evaluation_id: The ID of the evaluation being processed
62
+ evaluation_name: The name of the evaluation
63
+ input_data: The input data for the evaluation
64
+ expected_output: The expected output
65
+ actual_output: The actual output from the agent
66
+
67
+ Returns:
68
+ EvaluationResult containing the score and details
69
+ """
70
+ # Extract the target value to evaluate
71
+ target_value = self._extract_target_value(actual_output)
72
+ expected_value = self._extract_target_value(expected_output)
73
+
74
+ # Create the evaluation prompt
75
+ evaluation_prompt = self._create_evaluation_prompt(expected_value, target_value)
76
+
77
+ llm_response = await self._get_llm_response(evaluation_prompt)
78
+
79
+ return EvaluationResult(
80
+ evaluation_id=evaluation_id,
81
+ evaluation_name=evaluation_name,
82
+ evaluator_id=self.id,
83
+ evaluator_name=self.name,
84
+ score=llm_response.score,
85
+ input=input_data,
86
+ expected_output=expected_output,
87
+ actual_output=actual_output,
88
+ details=llm_response.justification,
89
+ )
90
+
91
+ def _extract_target_value(self, output: Dict[str, Any]) -> Any:
92
+ """Extract the target value from output based on target_output_key."""
93
+ if self.target_output_key == "*":
94
+ return output
95
+
96
+ # Handle nested keys
97
+ keys = self.target_output_key.split(".")
98
+ value = output
99
+
100
+ try:
101
+ for key in keys:
102
+ if isinstance(value, dict):
103
+ value = value[key]
104
+ else:
105
+ return None
106
+ return value
107
+ except (KeyError, TypeError):
108
+ return None
109
+
110
+ def _create_evaluation_prompt(
111
+ self, expected_output: Any, actual_output: Any
112
+ ) -> str:
113
+ """Create the evaluation prompt for the LLM."""
114
+ formatted_prompt = self.prompt.replace(
115
+ self.actual_output_placeholder,
116
+ str(actual_output),
117
+ )
118
+ formatted_prompt = formatted_prompt.replace(
119
+ self.expected_output_placeholder,
120
+ str(expected_output),
121
+ )
122
+
123
+ return formatted_prompt
124
+
125
+ async def _get_llm_response(self, evaluation_prompt: str) -> LLMResponse:
126
+ """Get response from the LLM.
127
+
128
+ Args:
129
+ evaluation_prompt: The formatted prompt to send to the LLM
130
+
131
+ Returns:
132
+ LLMResponse with score and justification
133
+ """
134
+ try:
135
+ # remove community-agents suffix from llm model name
136
+ model = self.model
137
+ if model.endswith(COMMUNITY_agents_SUFFIX):
138
+ model = model.replace(COMMUNITY_agents_SUFFIX, "")
139
+
140
+ # Prepare the request
141
+ request_data = {
142
+ "model": model,
143
+ "messages": [{"role": "user", "content": evaluation_prompt}],
144
+ "response_format": {
145
+ "type": "json_schema",
146
+ "json_schema": {
147
+ "name": "evaluation_response",
148
+ "schema": {
149
+ "type": "object",
150
+ "properties": {
151
+ "score": {
152
+ "type": "number",
153
+ "minimum": 0,
154
+ "maximum": 100,
155
+ "description": "Score between 0 and 100",
156
+ },
157
+ "justification": {
158
+ "type": "string",
159
+ "description": "Explanation for the score",
160
+ },
161
+ },
162
+ "required": ["score", "justification"],
163
+ },
164
+ },
165
+ },
166
+ }
167
+
168
+ response = await self.llm.chat_completions(**request_data)
169
+
170
+ try:
171
+ return LLMResponse(**json.loads(response.choices[-1].message.content))
172
+ except (json.JSONDecodeError, ValueError) as e:
173
+ return LLMResponse(
174
+ score=0.0, justification=f"Error parsing LLM response: {str(e)}"
175
+ )
176
+
177
+ except Exception as e:
178
+ # Fallback in case of any errors
179
+ return LLMResponse(
180
+ score=0.0, justification=f"Error during LLM evaluation: {str(e)}"
181
+ )
@@ -0,0 +1,48 @@
1
+ from typing import Any, Dict
2
+
3
+ from .._models import EvaluationResult
4
+ from ._evaluator_base import EvaluatorBase
5
+
6
+
7
+ class TrajectoryEvaluator(EvaluatorBase):
8
+ """Evaluator that analyzes the trajectory/path taken to reach outputs."""
9
+
10
+ def __init__(
11
+ self,
12
+ trajectory_config: Dict[str, Any],
13
+ step_weights: Dict[str, float],
14
+ target_output_key: str = "*",
15
+ ):
16
+ """Initialize the trajectory evaluator.
17
+
18
+ Args:
19
+ trajectory_config: Configuration for trajectory analysis
20
+ step_weights: Weights for different steps in the trajectory
21
+ target_output_key: Key in output to evaluate ("*" for entire output)
22
+ """
23
+ super().__init__()
24
+ self.trajectory_config = trajectory_config or {}
25
+ self.step_weights = step_weights or {}
26
+ self.target_output_key = target_output_key
27
+
28
+ async def evaluate(
29
+ self,
30
+ evaluation_id: str,
31
+ evaluation_name: str,
32
+ input_data: Dict[str, Any],
33
+ expected_output: Dict[str, Any],
34
+ actual_output: Dict[str, Any],
35
+ ) -> EvaluationResult:
36
+ """Evaluate using trajectory analysis.
37
+
38
+ Args:
39
+ evaluation_id: The ID of the evaluation being processed
40
+ evaluation_name: The name of the evaluation
41
+ input_data: The input data for the evaluation
42
+ expected_output: The expected output
43
+ actual_output: The actual output from the agent
44
+
45
+ Returns:
46
+ EvaluationResult containing the score and details
47
+ """
48
+ raise NotImplementedError()
@@ -0,0 +1,18 @@
1
+ from uipath._cli._evals._models._evaluation_set import EvaluationItem, EvaluationSet
2
+ from uipath._cli._evals._models._evaluators import (
3
+ EvaluationResult,
4
+ EvaluationSetResult,
5
+ EvaluatorCategory,
6
+ EvaluatorType,
7
+ LLMResponse,
8
+ )
9
+
10
+ __all__ = [
11
+ "LLMResponse",
12
+ "EvaluatorCategory",
13
+ "EvaluatorType",
14
+ "EvaluationResult",
15
+ "EvaluationSetResult",
16
+ "EvaluationItem",
17
+ "EvaluationSet",
18
+ ]
@@ -0,0 +1,43 @@
1
+ from enum import IntEnum
2
+ from typing import Any, Dict, List
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class EvaluationItem(BaseModel):
8
+ """Individual evaluation item within an evaluation set."""
9
+
10
+ id: str
11
+ name: str
12
+ inputs: Dict[str, Any]
13
+ expectedOutput: Dict[str, Any]
14
+ expectedAgentBehavior: str = ""
15
+ simulationInstructions: str = ""
16
+ simulateInput: bool = False
17
+ inputGenerationInstructions: str = ""
18
+ simulateTools: bool = False
19
+ toolsToSimulate: List[str] = Field(default_factory=list)
20
+ evalSetId: str
21
+ createdAt: str
22
+ updatedAt: str
23
+
24
+
25
+ class EvaluationSet(BaseModel):
26
+ """Complete evaluation set model."""
27
+
28
+ id: str
29
+ fileName: str
30
+ evaluatorRefs: List[str] = Field(default_factory=list)
31
+ evaluations: List[EvaluationItem] = Field(default_factory=list)
32
+ name: str
33
+ batchSize: int = 10
34
+ timeoutMinutes: int = 20
35
+ modelSettings: List[Dict[str, Any]] = Field(default_factory=list)
36
+ createdAt: str
37
+ updatedAt: str
38
+
39
+
40
+ class EvaluationStatus(IntEnum):
41
+ PENDING = 0
42
+ IN_PROGRESS = 1
43
+ COMPLETED = 2