verifiers 0.1.10.dev2__tar.gz → 0.1.10.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/.gitignore +1 -0
  2. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/PKG-INFO +20 -17
  3. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/README.md +15 -3
  4. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/pyproject.toml +15 -23
  5. verifiers-0.1.10.dev4/tests/test_client_config.py +52 -0
  6. verifiers-0.1.10.dev4/tests/test_endpoint_registry.py +177 -0
  7. verifiers-0.1.10.dev4/tests/test_environment_extra.py +615 -0
  8. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_envs.py +12 -0
  9. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_eval_cli.py +374 -3
  10. verifiers-0.1.10.dev4/tests/test_eval_display.py +80 -0
  11. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_eval_utils.py +20 -0
  12. verifiers-0.1.10.dev4/tests/test_gepa_cli.py +89 -0
  13. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_imports.py +1 -0
  14. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_install_utils.py +3 -2
  15. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_math_rubric.py +8 -15
  16. verifiers-0.1.10.dev4/tests/test_opencode_harbor.py +57 -0
  17. verifiers-0.1.10.dev4/tests/test_path_utils.py +89 -0
  18. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_sandbox_env.py +0 -6
  19. verifiers-0.1.10.dev4/tests/test_sandbox_mixin.py +351 -0
  20. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_save_utils.py +203 -0
  21. verifiers-0.1.10.dev4/tests/test_setup_script.py +103 -0
  22. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_tool_env.py +4 -1
  23. verifiers-0.1.10.dev4/tests/test_tui_info_formatting.py +41 -0
  24. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/__init__.py +44 -28
  25. verifiers-0.1.10.dev4/verifiers/cli/__init__.py +1 -0
  26. verifiers-0.1.10.dev4/verifiers/cli/commands/__init__.py +1 -0
  27. verifiers-0.1.10.dev4/verifiers/cli/commands/build.py +7 -0
  28. verifiers-0.1.10.dev4/verifiers/cli/commands/eval.py +7 -0
  29. verifiers-0.1.10.dev4/verifiers/cli/commands/gepa.py +7 -0
  30. verifiers-0.1.10.dev4/verifiers/cli/commands/init.py +7 -0
  31. verifiers-0.1.10.dev4/verifiers/cli/commands/install.py +7 -0
  32. verifiers-0.1.10.dev4/verifiers/cli/commands/setup.py +9 -0
  33. verifiers-0.1.10.dev4/verifiers/cli/plugins/__init__.py +5 -0
  34. verifiers-0.1.10.dev4/verifiers/cli/plugins/prime.py +97 -0
  35. verifiers-0.1.10.dev4/verifiers/cli/tui.py +9 -0
  36. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/environment.py +360 -140
  37. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/experimental/README.md +8 -0
  38. verifiers-0.1.10.dev4/verifiers/envs/experimental/__init__.py +3 -0
  39. verifiers-0.1.10.dev4/verifiers/envs/experimental/cli_agent_env.py +422 -0
  40. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/experimental/harbor_env.py +55 -51
  41. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/experimental/mcp_env.py +7 -1
  42. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/experimental/rlm_env.py +325 -100
  43. verifiers-0.1.10.dev4/verifiers/envs/experimental/sandbox_mixin.py +241 -0
  44. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/README.md +66 -0
  45. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/__init__.py +0 -6
  46. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +4 -2
  47. verifiers-0.1.10.dev4/verifiers/envs/integrations/openenv_env.py +1169 -0
  48. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/textarena_env.py +9 -5
  49. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/sandbox_env.py +11 -62
  50. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/tool_env.py +6 -3
  51. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/gepa/adapter.py +12 -8
  52. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/gepa/display.py +3 -0
  53. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/gepa/gepa_utils.py +11 -7
  54. verifiers-0.1.10.dev4/verifiers/rl/README.md +15 -0
  55. verifiers-0.1.10.dev4/verifiers/rl/__init__.py +11 -0
  56. verifiers-0.1.10.dev4/verifiers/rl/inference/__init__.py +11 -0
  57. verifiers-0.1.10.dev4/verifiers/rl/inference/client.py +3 -0
  58. verifiers-0.1.10.dev4/verifiers/rl/inference/server.py +11 -0
  59. verifiers-0.1.10.dev4/verifiers/rl/trainer/__init__.py +29 -0
  60. verifiers-0.1.10.dev4/verifiers/rl/trainer/config.py +3 -0
  61. verifiers-0.1.10.dev4/verifiers/rl/trainer/orchestrator.py +3 -0
  62. verifiers-0.1.10.dev4/verifiers/rl/trainer/trainer.py +4 -0
  63. verifiers-0.1.10.dev4/verifiers/rl/trainer/utils.py +5 -0
  64. verifiers-0.1.10.dev4/verifiers/rubrics/math_rubric.py +102 -0
  65. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/rubrics/rubric.py +16 -5
  66. verifiers-0.1.10.dev4/verifiers/scripts/build.py +452 -0
  67. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/eval.py +147 -29
  68. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/gepa.py +197 -9
  69. verifiers-0.1.10.dev4/verifiers/scripts/init.py +436 -0
  70. verifiers-0.1.10.dev4/verifiers/scripts/rl.py +11 -0
  71. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/setup.py +102 -99
  72. verifiers-0.1.10.dev4/verifiers/scripts/train.py +11 -0
  73. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/tui.py +39 -7
  74. verifiers-0.1.10.dev4/verifiers/scripts/vllm.py +11 -0
  75. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/types.py +71 -8
  76. verifiers-0.1.10.dev4/verifiers/utils/client_utils.py +98 -0
  77. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/data_utils.py +3 -3
  78. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/display_utils.py +16 -3
  79. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/eval_display.py +44 -10
  80. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/eval_utils.py +264 -84
  81. verifiers-0.1.10.dev4/verifiers/utils/import_utils.py +18 -0
  82. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/install_utils.py +11 -7
  83. verifiers-0.1.10.dev4/verifiers/utils/interception_utils.py +416 -0
  84. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/message_utils.py +2 -3
  85. verifiers-0.1.10.dev4/verifiers/utils/path_utils.py +143 -0
  86. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/sandbox_exec_utils.py +6 -2
  87. verifiers-0.1.10.dev4/verifiers/utils/save_utils.py +575 -0
  88. verifiers-0.1.10.dev4/verifiers/utils/threaded_sandbox_client.py +63 -0
  89. verifiers-0.1.10.dev4/verifiers/utils/usage_utils.py +101 -0
  90. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/worker_utils.py +37 -0
  91. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/client/env_client.py +5 -2
  92. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/client/zmq_env_client.py +37 -4
  93. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/server/env_server.py +9 -3
  94. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/server/zmq_env_server.py +2 -0
  95. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/types.py +3 -3
  96. verifiers-0.1.10.dev2/tests/test_environment_extra.py +0 -224
  97. verifiers-0.1.10.dev2/verifiers/envs/experimental/cli_agent_env.py +0 -820
  98. verifiers-0.1.10.dev2/verifiers/rl/README.md +0 -108
  99. verifiers-0.1.10.dev2/verifiers/rl/inference/client.py +0 -172
  100. verifiers-0.1.10.dev2/verifiers/rl/inference/server.py +0 -193
  101. verifiers-0.1.10.dev2/verifiers/rl/trainer/__init__.py +0 -37
  102. verifiers-0.1.10.dev2/verifiers/rl/trainer/config.py +0 -342
  103. verifiers-0.1.10.dev2/verifiers/rl/trainer/orchestrator.py +0 -375
  104. verifiers-0.1.10.dev2/verifiers/rl/trainer/trainer.py +0 -497
  105. verifiers-0.1.10.dev2/verifiers/rl/trainer/utils.py +0 -289
  106. verifiers-0.1.10.dev2/verifiers/rubrics/math_rubric.py +0 -88
  107. verifiers-0.1.10.dev2/verifiers/scripts/__init__.py +0 -0
  108. verifiers-0.1.10.dev2/verifiers/scripts/init.py +0 -209
  109. verifiers-0.1.10.dev2/verifiers/scripts/rl.py +0 -207
  110. verifiers-0.1.10.dev2/verifiers/scripts/train.py +0 -40
  111. verifiers-0.1.10.dev2/verifiers/utils/__init__.py +0 -0
  112. verifiers-0.1.10.dev2/verifiers/utils/client_utils.py +0 -62
  113. verifiers-0.1.10.dev2/verifiers/utils/path_utils.py +0 -51
  114. verifiers-0.1.10.dev2/verifiers/utils/save_utils.py +0 -385
  115. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/LICENSE +0 -0
  116. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/AGENTS.md +0 -0
  117. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/README.md +0 -0
  118. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/__init__.py +0 -0
  119. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/conftest.py +0 -0
  120. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/mock_client_guide.md +0 -0
  121. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/mock_openai_client.py +0 -0
  122. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_browser_env.py +0 -0
  123. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_cli_agent_env.py +0 -0
  124. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_decorator_ranks.py +0 -0
  125. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_env_group.py +0 -0
  126. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_environment.py +0 -0
  127. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_environment_audio_modality.py +0 -0
  128. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_error_chain.py +0 -0
  129. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_gym_env.py +0 -0
  130. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_logging.py +0 -0
  131. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_maybe_think_parser.py +0 -0
  132. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_message_utils_audio.py +0 -0
  133. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_multiturn_env.py +0 -0
  134. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_parser.py +0 -0
  135. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_rlm_env.py +0 -0
  136. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_rlm_env_sandbox.py +0 -0
  137. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_rubric.py +0 -0
  138. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_rubric_group.py +0 -0
  139. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_singleturn_env.py +0 -0
  140. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_stateful_tool_env.py +0 -0
  141. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_think_parser.py +0 -0
  142. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_tool_utils.py +0 -0
  143. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_trajectory_processing.py +0 -0
  144. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_xml_parser.py +0 -0
  145. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/AGENTS.md +0 -0
  146. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/decorators.py +0 -0
  147. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/AGENTS.md +0 -0
  148. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/__init__.py +0 -0
  149. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/env_group.py +0 -0
  150. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/experimental/gym_env.py +0 -0
  151. {verifiers-0.1.10.dev2/verifiers/envs/experimental → verifiers-0.1.10.dev4/verifiers/envs/integrations}/__init__.py +0 -0
  152. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  153. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  154. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  155. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  156. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  157. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/multiturn_env.py +0 -0
  158. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/python_env.py +0 -0
  159. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/singleturn_env.py +0 -0
  160. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/stateful_tool_env.py +0 -0
  161. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/errors.py +0 -0
  162. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/gepa/__init__.py +0 -0
  163. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/gepa/config.py +0 -0
  164. {verifiers-0.1.10.dev2/verifiers/envs/integrations → verifiers-0.1.10.dev4/verifiers/parsers}/__init__.py +0 -0
  165. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/parsers/maybe_think_parser.py +0 -0
  166. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/parsers/parser.py +0 -0
  167. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/parsers/think_parser.py +0 -0
  168. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/parsers/xml_parser.py +0 -0
  169. {verifiers-0.1.10.dev2/verifiers/parsers → verifiers-0.1.10.dev4/verifiers/rubrics}/__init__.py +0 -0
  170. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/rubrics/judge_rubric.py +0 -0
  171. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/rubrics/rubric_group.py +0 -0
  172. {verifiers-0.1.10.dev2/verifiers/rl/inference → verifiers-0.1.10.dev4/verifiers/scripts}/__init__.py +0 -0
  173. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/install.py +0 -0
  174. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/prime_rl.py +0 -0
  175. {verifiers-0.1.10.dev2/verifiers/rubrics → verifiers-0.1.10.dev4/verifiers/utils}/__init__.py +0 -0
  176. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/async_utils.py +0 -0
  177. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/config_utils.py +0 -0
  178. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/env_utils.py +0 -0
  179. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/error_utils.py +0 -0
  180. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/logging_utils.py +0 -0
  181. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/response_utils.py +0 -0
  182. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/thread_utils.py +0 -0
  183. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/token_utils.py +0 -0
  184. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/tool_utils.py +0 -0
  185. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/tunnel_utils.py +0 -0
  186. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/__init__.py +0 -0
@@ -10,6 +10,7 @@ uv.lock
10
10
  .ropeproject/
11
11
  .scratch/
12
12
  .chroma_db/
13
+ /.codex/environments/
13
14
 
14
15
  # artifacts
15
16
  core.*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.10.dev2
3
+ Version: 0.1.10.dev4
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -32,8 +32,8 @@ Requires-Dist: nest-asyncio>=1.6.0
32
32
  Requires-Dist: numpy
33
33
  Requires-Dist: openai-agents>=0.0.7
34
34
  Requires-Dist: openai>=1.108.1
35
- Requires-Dist: prime-sandboxes>=0.2.9
36
- Requires-Dist: prime-tunnel
35
+ Requires-Dist: prime-sandboxes>=0.2.14
36
+ Requires-Dist: prime-tunnel>=0.1.0
37
37
  Requires-Dist: pydantic>=2.11.9
38
38
  Requires-Dist: pyzmq>=27.1.0
39
39
  Requires-Dist: requests
@@ -47,19 +47,10 @@ Provides-Extra: browser
47
47
  Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
48
48
  Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
49
49
  Requires-Dist: stagehand>=3.0.0; extra == 'browser'
50
+ Provides-Extra: openenv
51
+ Requires-Dist: openenv-core[core]==0.2.1; extra == 'openenv'
50
52
  Provides-Extra: rg
51
53
  Requires-Dist: reasoning-gym; extra == 'rg'
52
- Provides-Extra: rl
53
- Requires-Dist: accelerate>=1.4.0; extra == 'rl'
54
- Requires-Dist: deepspeed>=0.17.6; extra == 'rl'
55
- Requires-Dist: flash-attn>=2.8.3; extra == 'rl'
56
- Requires-Dist: liger-kernel>=0.5.10; extra == 'rl'
57
- Requires-Dist: peft; extra == 'rl'
58
- Requires-Dist: requests; extra == 'rl'
59
- Requires-Dist: torch<2.9.0,>=2.8.0; extra == 'rl'
60
- Requires-Dist: transformers>=4.56.2; extra == 'rl'
61
- Requires-Dist: vllm<0.11.0,>=0.10.0; extra == 'rl'
62
- Requires-Dist: wandb; extra == 'rl'
63
54
  Provides-Extra: ta
64
55
  Requires-Dist: nltk; extra == 'ta'
65
56
  Requires-Dist: textarena; extra == 'ta'
@@ -140,8 +131,12 @@ prime lab setup
140
131
  This sets up a Python project if needed (with `uv init`), installs `verifiers` (with `uv add verifiers`), creates the recommended workspace structure, and downloads useful starter files:
141
132
  ```
142
133
  configs/
143
- ├── endpoints.py # OpenAI-compatible API endpoint configuration
144
- └── lab/ # Example configs for Hosted Training
134
+ ├── endpoints.toml # OpenAI-compatible API endpoint configuration
135
+ ├── rl/ # Example configs for Hosted Training
136
+ ├── eval/ # Example multi-environment eval configs
137
+ └── gepa/ # Example configs for prompt optimization
138
+ .prime/
139
+ └── skills/ # Bundled workflow skills for create/browse/review/eval/GEPA/train/brainstorm
145
140
  environments/
146
141
  └── AGENTS.md # Documentation for AI coding agents
147
142
  AGENTS.md # Top-level documentation for AI coding agents
@@ -157,6 +152,14 @@ Environments built with Verifiers are self-contained Python modules. To initiali
157
152
  ```bash
158
153
  prime env init my-env # creates a new template in ./environments/my_env
159
154
  ```
155
+ For OpenEnv integration, use:
156
+ ```bash
157
+ prime env init my-openenv --openenv
158
+ ```
159
+ Then copy your OpenEnv project into `environments/my_openenv/proj/` and build the image with:
160
+ ```bash
161
+ uv run vf-build my-openenv
162
+ ```
160
163
 
161
164
  This will create a new module called `my_env` with a basic environment template.
162
165
  ```
@@ -195,7 +198,7 @@ To run a local evaluation with any OpenAI-compatible model, do:
195
198
  ```bash
196
199
  prime eval run my-env -m gpt-5-nano # run and save eval results locally
197
200
  ```
198
- Evaluations use [Prime Inference](https://docs.primeintellect.ai/inference/overview) by default; configure your own API endpoints in `./configs/endpoints.py`.
201
+ Evaluations use [Prime Inference](https://docs.primeintellect.ai/inference/overview) by default; configure your own API endpoints in `./configs/endpoints.toml`.
199
202
 
200
203
  View local evaluation results in the terminal UI:
201
204
  ```bash
@@ -73,8 +73,12 @@ prime lab setup
73
73
  This sets up a Python project if needed (with `uv init`), installs `verifiers` (with `uv add verifiers`), creates the recommended workspace structure, and downloads useful starter files:
74
74
  ```
75
75
  configs/
76
- ├── endpoints.py # OpenAI-compatible API endpoint configuration
77
- └── lab/ # Example configs for Hosted Training
76
+ ├── endpoints.toml # OpenAI-compatible API endpoint configuration
77
+ ├── rl/ # Example configs for Hosted Training
78
+ ├── eval/ # Example multi-environment eval configs
79
+ └── gepa/ # Example configs for prompt optimization
80
+ .prime/
81
+ └── skills/ # Bundled workflow skills for create/browse/review/eval/GEPA/train/brainstorm
78
82
  environments/
79
83
  └── AGENTS.md # Documentation for AI coding agents
80
84
  AGENTS.md # Top-level documentation for AI coding agents
@@ -90,6 +94,14 @@ Environments built with Verifiers are self-contained Python modules. To initiali
90
94
  ```bash
91
95
  prime env init my-env # creates a new template in ./environments/my_env
92
96
  ```
97
+ For OpenEnv integration, use:
98
+ ```bash
99
+ prime env init my-openenv --openenv
100
+ ```
101
+ Then copy your OpenEnv project into `environments/my_openenv/proj/` and build the image with:
102
+ ```bash
103
+ uv run vf-build my-openenv
104
+ ```
93
105
 
94
106
  This will create a new module called `my_env` with a basic environment template.
95
107
  ```
@@ -128,7 +140,7 @@ To run a local evaluation with any OpenAI-compatible model, do:
128
140
  ```bash
129
141
  prime eval run my-env -m gpt-5-nano # run and save eval results locally
130
142
  ```
131
- Evaluations use [Prime Inference](https://docs.primeintellect.ai/inference/overview) by default; configure your own API endpoints in `./configs/endpoints.py`.
143
+ Evaluations use [Prime Inference](https://docs.primeintellect.ai/inference/overview) by default; configure your own API endpoints in `./configs/endpoints.toml`.
132
144
 
133
145
  View local evaluation results in the terminal UI:
134
146
  ```bash
@@ -36,8 +36,8 @@ dependencies = [
36
36
  "nest-asyncio>=1.6.0", # for jupyter notebooks
37
37
  "openai>=1.108.1",
38
38
  "openai-agents>=0.0.7",
39
- "prime-tunnel",
40
- "prime-sandboxes>=0.2.9",
39
+ "prime-tunnel>=0.1.0",
40
+ "prime-sandboxes>=0.2.14",
41
41
  "pydantic>=2.11.9",
42
42
  "requests",
43
43
  "rich",
@@ -64,6 +64,10 @@ dev = [
64
64
  "ipywidgets",
65
65
  "reasoning-gym",
66
66
  "textarena",
67
+ "openenv-core[core]==0.2.1",
68
+ "stagehand>=3.0.0",
69
+ "aiohttp>=3.9.0",
70
+ "python-dotenv>=1.0.0",
67
71
  "nltk",
68
72
  ]
69
73
 
@@ -75,40 +79,25 @@ ta = [
75
79
  "textarena",
76
80
  "nltk",
77
81
  ]
82
+ openenv = [
83
+ "openenv-core[core]==0.2.1",
84
+ ]
78
85
  browser = [
79
86
  "stagehand>=3.0.0",
80
87
  "aiohttp>=3.9.0",
81
88
  "python-dotenv>=1.0.0",
82
89
  ]
83
- rl = [
84
- "torch>=2.8.0,<2.9.0",
85
- "transformers>=4.56.2",
86
- "accelerate>=1.4.0",
87
- "requests",
88
- "peft",
89
- "wandb",
90
- "vllm>=0.10.0,<0.11.0",
91
- "liger-kernel>=0.5.10",
92
- "deepspeed>=0.17.6",
93
- "flash-attn>=2.8.3",
94
- ]
95
-
96
- [tool.uv.extra-build-dependencies]
97
- flash-attn = [{ requirement = "torch", match-runtime = true }]
98
-
99
- [tool.uv.extra-build-variables]
100
- flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }
101
-
102
90
  [project.scripts]
103
91
  vf-eval = "verifiers.scripts.eval:main"
104
92
  vf-gepa = "verifiers.scripts.gepa:main"
105
93
  vf-init = "verifiers.scripts.init:main"
106
94
  vf-install = "verifiers.scripts.install:main"
107
95
  vf-setup = "verifiers.scripts.setup:main"
96
+ vf-build = "verifiers.scripts.build:main"
108
97
  vf-rl = "verifiers.scripts.rl:main"
109
98
  vf-train = "verifiers.scripts.train:main"
110
99
  vf-tui = "verifiers.scripts.tui:main"
111
- vf-vllm = "verifiers.rl.inference.server:main"
100
+ vf-vllm = "verifiers.scripts.vllm:main"
112
101
  prime-rl = "verifiers.scripts.prime_rl:main"
113
102
 
114
103
  # hatchling configuration
@@ -171,9 +160,12 @@ filterwarnings = [
171
160
  asyncio_mode = "auto"
172
161
  norecursedirs = [".git", ".tox", "dist", "build", "*.egg", "__pycache__"]
173
162
 
163
+ [tool.ty.environment]
164
+ python-version = "3.13"
165
+
174
166
  [tool.ty.rules]
175
- unresolved-import = "warn"
176
167
  unknown-argument = "warn"
168
+ redundant-cast = "ignore"
177
169
 
178
170
  [tool.ty.src]
179
171
  exclude = ["environments"]
@@ -0,0 +1,52 @@
1
+ import pytest
2
+ from pydantic import ValidationError
3
+
4
+ from verifiers.types import ClientConfig, EndpointClientConfig
5
+
6
+
7
+ def test_client_config_allows_leaf_endpoint_configs():
8
+ config = ClientConfig(
9
+ api_base_url="http://localhost:8000/v1",
10
+ endpoint_configs=[
11
+ EndpointClientConfig(api_base_url="http://localhost:8001/v1"),
12
+ {"api_base_url": "http://localhost:8002/v1"},
13
+ ],
14
+ )
15
+
16
+ assert len(config.endpoint_configs) == 2
17
+ assert config.endpoint_configs[0].api_base_url == "http://localhost:8001/v1"
18
+ assert config.endpoint_configs[1].api_base_url == "http://localhost:8002/v1"
19
+
20
+
21
+ def test_client_config_rejects_recursive_endpoint_configs():
22
+ with pytest.raises(ValidationError, match="cannot include endpoint_configs"):
23
+ ClientConfig.model_validate(
24
+ {
25
+ "api_base_url": "http://localhost:8000/v1",
26
+ "endpoint_configs": [
27
+ {
28
+ "api_base_url": "http://localhost:8001/v1",
29
+ "endpoint_configs": [
30
+ {"api_base_url": "http://localhost:8002/v1"}
31
+ ],
32
+ }
33
+ ],
34
+ }
35
+ )
36
+
37
+
38
+ def test_client_config_accepts_empty_nested_endpoint_configs_key():
39
+ config = ClientConfig.model_validate(
40
+ {
41
+ "api_base_url": "http://localhost:8000/v1",
42
+ "endpoint_configs": [
43
+ {
44
+ "api_base_url": "http://localhost:8001/v1",
45
+ "endpoint_configs": [],
46
+ }
47
+ ],
48
+ }
49
+ )
50
+
51
+ assert len(config.endpoint_configs) == 1
52
+ assert config.endpoint_configs[0].api_base_url == "http://localhost:8001/v1"
@@ -0,0 +1,177 @@
1
+ from pathlib import Path
2
+
3
+ from verifiers.utils.eval_utils import load_endpoints
4
+
5
+
6
+ def test_load_endpoints_python_registry_normalizes_to_lists(tmp_path: Path):
7
+ registry_path = tmp_path / "endpoints.py"
8
+ registry_path.write_text(
9
+ "ENDPOINTS = {\n"
10
+ ' "gpt-4.1-mini": {"model": "gpt-4.1-mini", "url": "https://api.openai.com/v1", "key": "OPENAI_API_KEY"},\n'
11
+ "}\n",
12
+ encoding="utf-8",
13
+ )
14
+
15
+ endpoints = load_endpoints(str(registry_path))
16
+
17
+ assert set(endpoints.keys()) == {"gpt-4.1-mini"}
18
+ assert len(endpoints["gpt-4.1-mini"]) == 1
19
+ endpoint = endpoints["gpt-4.1-mini"][0]
20
+ assert endpoint["model"] == "gpt-4.1-mini"
21
+ assert endpoint["url"] == "https://api.openai.com/v1"
22
+ assert endpoint["key"] == "OPENAI_API_KEY"
23
+
24
+
25
+ def test_load_endpoints_toml_groups_variants_by_endpoint_id(tmp_path: Path):
26
+ registry_path = tmp_path / "endpoints.toml"
27
+ registry_path.write_text(
28
+ "[[endpoint]]\n"
29
+ 'endpoint_id = "gpt-5-mini"\n'
30
+ 'model = "openai/gpt-5-mini"\n'
31
+ 'url = "https://api.pinference.ai/api/v1"\n'
32
+ 'key = "PRIME_API_KEY"\n'
33
+ "\n"
34
+ "[[endpoint]]\n"
35
+ 'endpoint_id = "gpt-5-mini"\n'
36
+ 'model = "openai/gpt-5-mini"\n'
37
+ 'url = "https://api.openai.com/v1"\n'
38
+ 'key = "OPENAI_API_KEY"\n',
39
+ encoding="utf-8",
40
+ )
41
+
42
+ endpoints = load_endpoints(str(registry_path))
43
+
44
+ assert set(endpoints.keys()) == {"gpt-5-mini"}
45
+ assert len(endpoints["gpt-5-mini"]) == 2
46
+ assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
47
+ assert endpoints["gpt-5-mini"][1]["url"] == "https://api.openai.com/v1"
48
+
49
+
50
+ def test_load_endpoints_toml_accepts_long_field_names(tmp_path: Path):
51
+ registry_path = tmp_path / "endpoints.toml"
52
+ registry_path.write_text(
53
+ "[[endpoint]]\n"
54
+ 'endpoint_id = "gpt-5-mini"\n'
55
+ 'model = "openai/gpt-5-mini"\n'
56
+ 'api_base_url = "https://api.pinference.ai/api/v1"\n'
57
+ 'api_key_var = "PRIME_API_KEY"\n',
58
+ encoding="utf-8",
59
+ )
60
+
61
+ endpoints = load_endpoints(str(registry_path))
62
+
63
+ assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
64
+ assert endpoints["gpt-5-mini"][0]["key"] == "PRIME_API_KEY"
65
+
66
+
67
+ def test_load_endpoints_toml_accepts_matching_short_and_long_fields(tmp_path: Path):
68
+ registry_path = tmp_path / "endpoints.toml"
69
+ registry_path.write_text(
70
+ "[[endpoint]]\n"
71
+ 'endpoint_id = "gpt-5-mini"\n'
72
+ 'model = "openai/gpt-5-mini"\n'
73
+ 'url = "https://api.pinference.ai/api/v1"\n'
74
+ 'api_base_url = "https://api.pinference.ai/api/v1"\n'
75
+ 'key = "PRIME_API_KEY"\n'
76
+ 'api_key_var = "PRIME_API_KEY"\n',
77
+ encoding="utf-8",
78
+ )
79
+
80
+ endpoints = load_endpoints(str(registry_path))
81
+
82
+ assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
83
+ assert endpoints["gpt-5-mini"][0]["key"] == "PRIME_API_KEY"
84
+
85
+
86
+ def test_load_endpoints_toml_rejects_conflicting_url_fields(tmp_path: Path):
87
+ registry_path = tmp_path / "endpoints.toml"
88
+ registry_path.write_text(
89
+ "[[endpoint]]\n"
90
+ 'endpoint_id = "gpt-5-mini"\n'
91
+ 'model = "openai/gpt-5-mini"\n'
92
+ 'url = "https://a.example/v1"\n'
93
+ 'api_base_url = "https://b.example/v1"\n'
94
+ 'key = "PRIME_API_KEY"\n',
95
+ encoding="utf-8",
96
+ )
97
+
98
+ endpoints = load_endpoints(str(registry_path))
99
+
100
+ assert endpoints == {}
101
+
102
+
103
+ def test_load_endpoints_toml_rejects_conflicting_key_fields(tmp_path: Path):
104
+ registry_path = tmp_path / "endpoints.toml"
105
+ registry_path.write_text(
106
+ "[[endpoint]]\n"
107
+ 'endpoint_id = "gpt-5-mini"\n'
108
+ 'model = "openai/gpt-5-mini"\n'
109
+ 'url = "https://a.example/v1"\n'
110
+ 'key = "A_KEY"\n'
111
+ 'api_key_var = "B_KEY"\n',
112
+ encoding="utf-8",
113
+ )
114
+
115
+ endpoints = load_endpoints(str(registry_path))
116
+
117
+ assert endpoints == {}
118
+
119
+
120
+ def test_load_endpoints_python_registry_supports_list_variants(tmp_path: Path):
121
+ registry_path = tmp_path / "endpoints.py"
122
+ registry_path.write_text(
123
+ "ENDPOINTS = {\n"
124
+ ' "gpt-5-mini": [\n'
125
+ ' {"model": "gpt-5-mini", "url": "https://a.example/v1", "key": "A_KEY"},\n'
126
+ ' {"model": "gpt-5-mini", "url": "https://b.example/v1", "key": "A_KEY"},\n'
127
+ " ]\n"
128
+ "}\n",
129
+ encoding="utf-8",
130
+ )
131
+
132
+ endpoints = load_endpoints(str(registry_path))
133
+
134
+ assert set(endpoints.keys()) == {"gpt-5-mini"}
135
+ assert len(endpoints["gpt-5-mini"]) == 2
136
+ assert endpoints["gpt-5-mini"][0]["url"] == "https://a.example/v1"
137
+ assert endpoints["gpt-5-mini"][1]["url"] == "https://b.example/v1"
138
+
139
+
140
+ def test_load_endpoints_directory_prefers_toml_then_python(tmp_path: Path):
141
+ python_registry = tmp_path / "endpoints.py"
142
+ toml_registry = tmp_path / "endpoints.toml"
143
+
144
+ python_registry.write_text(
145
+ "ENDPOINTS = {\n"
146
+ ' "from-py": {"model": "m", "url": "https://py.example/v1", "key": "PY_KEY"},\n'
147
+ "}\n",
148
+ encoding="utf-8",
149
+ )
150
+ toml_registry.write_text(
151
+ "[[endpoint]]\n"
152
+ 'endpoint_id = "from-toml"\n'
153
+ 'model = "m"\n'
154
+ 'url = "https://toml.example/v1"\n'
155
+ 'key = "TOML_KEY"\n',
156
+ encoding="utf-8",
157
+ )
158
+
159
+ endpoints = load_endpoints(str(tmp_path))
160
+ assert set(endpoints.keys()) == {"from-toml"}
161
+
162
+ toml_registry.unlink()
163
+ endpoints = load_endpoints(str(tmp_path))
164
+ assert set(endpoints.keys()) == {"from-py"}
165
+
166
+
167
+ def test_qwen3_vl_endpoint_ids_map_to_vl_models():
168
+ endpoints = load_endpoints("./configs/endpoints.toml")
169
+
170
+ assert endpoints["qwen3-vl-30b-i"][0]["model"] == "qwen/qwen3-vl-30b-a3b-instruct"
171
+ assert endpoints["qwen3-vl-30b-t"][0]["model"] == "qwen/qwen3-vl-30b-a3b-thinking"
172
+ assert (
173
+ endpoints["qwen3-vl-235b-i"][0]["model"] == "qwen/qwen3-vl-235b-a22b-instruct"
174
+ )
175
+ assert (
176
+ endpoints["qwen3-vl-235b-t"][0]["model"] == "qwen/qwen3-vl-235b-a22b-thinking"
177
+ )