verifiers 0.1.10.dev2__tar.gz → 0.1.10.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/.gitignore +1 -0
  2. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/PKG-INFO +13 -14
  3. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/README.md +8 -0
  4. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/pyproject.toml +15 -23
  5. verifiers-0.1.10.dev3/tests/test_client_config.py +52 -0
  6. verifiers-0.1.10.dev3/tests/test_endpoint_registry.py +177 -0
  7. verifiers-0.1.10.dev3/tests/test_environment_extra.py +615 -0
  8. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_envs.py +12 -0
  9. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_eval_cli.py +374 -3
  10. verifiers-0.1.10.dev3/tests/test_eval_display.py +80 -0
  11. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_eval_utils.py +20 -0
  12. verifiers-0.1.10.dev3/tests/test_gepa_cli.py +89 -0
  13. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_imports.py +1 -0
  14. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_math_rubric.py +8 -15
  15. verifiers-0.1.10.dev3/tests/test_opencode_harbor.py +57 -0
  16. verifiers-0.1.10.dev3/tests/test_path_utils.py +89 -0
  17. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_sandbox_env.py +0 -6
  18. verifiers-0.1.10.dev3/tests/test_sandbox_mixin.py +351 -0
  19. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_save_utils.py +203 -0
  20. verifiers-0.1.10.dev3/tests/test_setup_script.py +69 -0
  21. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_tool_env.py +4 -1
  22. verifiers-0.1.10.dev3/tests/test_tui_info_formatting.py +41 -0
  23. verifiers-0.1.10.dev3/tests/test_worker_client_timeouts.py +149 -0
  24. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/__init__.py +44 -28
  25. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/environment.py +357 -137
  26. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/experimental/README.md +8 -0
  27. verifiers-0.1.10.dev3/verifiers/envs/experimental/__init__.py +3 -0
  28. verifiers-0.1.10.dev3/verifiers/envs/experimental/cli_agent_env.py +422 -0
  29. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/experimental/harbor_env.py +55 -51
  30. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/experimental/mcp_env.py +7 -1
  31. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/experimental/rlm_env.py +325 -100
  32. verifiers-0.1.10.dev3/verifiers/envs/experimental/sandbox_mixin.py +241 -0
  33. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/README.md +66 -0
  34. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/__init__.py +0 -6
  35. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +4 -2
  36. verifiers-0.1.10.dev3/verifiers/envs/integrations/openenv_env.py +1169 -0
  37. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/textarena_env.py +9 -5
  38. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/sandbox_env.py +11 -62
  39. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/tool_env.py +6 -3
  40. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/gepa/adapter.py +12 -8
  41. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/gepa/display.py +3 -0
  42. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/gepa/gepa_utils.py +11 -7
  43. verifiers-0.1.10.dev3/verifiers/rl/README.md +15 -0
  44. verifiers-0.1.10.dev3/verifiers/rl/__init__.py +11 -0
  45. verifiers-0.1.10.dev3/verifiers/rl/inference/__init__.py +11 -0
  46. verifiers-0.1.10.dev3/verifiers/rl/inference/client.py +3 -0
  47. verifiers-0.1.10.dev3/verifiers/rl/inference/server.py +11 -0
  48. verifiers-0.1.10.dev3/verifiers/rl/trainer/__init__.py +29 -0
  49. verifiers-0.1.10.dev3/verifiers/rl/trainer/config.py +3 -0
  50. verifiers-0.1.10.dev3/verifiers/rl/trainer/orchestrator.py +3 -0
  51. verifiers-0.1.10.dev3/verifiers/rl/trainer/trainer.py +4 -0
  52. verifiers-0.1.10.dev3/verifiers/rl/trainer/utils.py +5 -0
  53. verifiers-0.1.10.dev3/verifiers/rubrics/math_rubric.py +102 -0
  54. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/rubrics/rubric.py +16 -5
  55. verifiers-0.1.10.dev3/verifiers/scripts/build.py +452 -0
  56. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/eval.py +147 -29
  57. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/gepa.py +197 -9
  58. verifiers-0.1.10.dev3/verifiers/scripts/init.py +436 -0
  59. verifiers-0.1.10.dev3/verifiers/scripts/rl.py +11 -0
  60. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/setup.py +61 -95
  61. verifiers-0.1.10.dev3/verifiers/scripts/train.py +11 -0
  62. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/tui.py +39 -7
  63. verifiers-0.1.10.dev3/verifiers/scripts/vllm.py +11 -0
  64. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/types.py +71 -8
  65. verifiers-0.1.10.dev3/verifiers/utils/client_utils.py +98 -0
  66. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/data_utils.py +3 -3
  67. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/display_utils.py +16 -3
  68. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/eval_display.py +44 -10
  69. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/eval_utils.py +264 -84
  70. verifiers-0.1.10.dev3/verifiers/utils/import_utils.py +18 -0
  71. verifiers-0.1.10.dev3/verifiers/utils/interception_utils.py +416 -0
  72. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/message_utils.py +2 -3
  73. verifiers-0.1.10.dev3/verifiers/utils/path_utils.py +143 -0
  74. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/sandbox_exec_utils.py +6 -2
  75. verifiers-0.1.10.dev3/verifiers/utils/save_utils.py +575 -0
  76. verifiers-0.1.10.dev3/verifiers/utils/threaded_sandbox_client.py +63 -0
  77. verifiers-0.1.10.dev3/verifiers/utils/usage_utils.py +101 -0
  78. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/client/env_client.py +18 -4
  79. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/client/zmq_env_client.py +39 -4
  80. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/server/env_server.py +9 -3
  81. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/server/zmq_env_server.py +2 -0
  82. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/types.py +3 -3
  83. verifiers-0.1.10.dev2/tests/test_environment_extra.py +0 -224
  84. verifiers-0.1.10.dev2/verifiers/envs/experimental/cli_agent_env.py +0 -820
  85. verifiers-0.1.10.dev2/verifiers/rl/README.md +0 -108
  86. verifiers-0.1.10.dev2/verifiers/rl/inference/client.py +0 -172
  87. verifiers-0.1.10.dev2/verifiers/rl/inference/server.py +0 -193
  88. verifiers-0.1.10.dev2/verifiers/rl/trainer/__init__.py +0 -37
  89. verifiers-0.1.10.dev2/verifiers/rl/trainer/config.py +0 -342
  90. verifiers-0.1.10.dev2/verifiers/rl/trainer/orchestrator.py +0 -375
  91. verifiers-0.1.10.dev2/verifiers/rl/trainer/trainer.py +0 -497
  92. verifiers-0.1.10.dev2/verifiers/rl/trainer/utils.py +0 -289
  93. verifiers-0.1.10.dev2/verifiers/rubrics/math_rubric.py +0 -88
  94. verifiers-0.1.10.dev2/verifiers/scripts/__init__.py +0 -0
  95. verifiers-0.1.10.dev2/verifiers/scripts/init.py +0 -209
  96. verifiers-0.1.10.dev2/verifiers/scripts/rl.py +0 -207
  97. verifiers-0.1.10.dev2/verifiers/scripts/train.py +0 -40
  98. verifiers-0.1.10.dev2/verifiers/utils/__init__.py +0 -0
  99. verifiers-0.1.10.dev2/verifiers/utils/client_utils.py +0 -62
  100. verifiers-0.1.10.dev2/verifiers/utils/path_utils.py +0 -51
  101. verifiers-0.1.10.dev2/verifiers/utils/save_utils.py +0 -385
  102. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/LICENSE +0 -0
  103. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/AGENTS.md +0 -0
  104. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/README.md +0 -0
  105. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/__init__.py +0 -0
  106. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/conftest.py +0 -0
  107. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/mock_client_guide.md +0 -0
  108. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/mock_openai_client.py +0 -0
  109. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_browser_env.py +0 -0
  110. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_cli_agent_env.py +0 -0
  111. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_decorator_ranks.py +0 -0
  112. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_env_group.py +0 -0
  113. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_environment.py +0 -0
  114. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_environment_audio_modality.py +0 -0
  115. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_error_chain.py +0 -0
  116. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_gym_env.py +0 -0
  117. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_install_utils.py +0 -0
  118. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_logging.py +0 -0
  119. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_maybe_think_parser.py +0 -0
  120. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_message_utils_audio.py +0 -0
  121. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_multiturn_env.py +0 -0
  122. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_parser.py +0 -0
  123. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_rlm_env.py +0 -0
  124. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_rlm_env_sandbox.py +0 -0
  125. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_rubric.py +0 -0
  126. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_rubric_group.py +0 -0
  127. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_singleturn_env.py +0 -0
  128. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_stateful_tool_env.py +0 -0
  129. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_think_parser.py +0 -0
  130. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_tool_utils.py +0 -0
  131. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_trajectory_processing.py +0 -0
  132. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_xml_parser.py +0 -0
  133. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/AGENTS.md +0 -0
  134. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/decorators.py +0 -0
  135. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/AGENTS.md +0 -0
  136. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/__init__.py +0 -0
  137. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/env_group.py +0 -0
  138. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/experimental/gym_env.py +0 -0
  139. {verifiers-0.1.10.dev2/verifiers/envs/experimental → verifiers-0.1.10.dev3/verifiers/envs/integrations}/__init__.py +0 -0
  140. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  141. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  142. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  143. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  144. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  145. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/multiturn_env.py +0 -0
  146. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/python_env.py +0 -0
  147. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/singleturn_env.py +0 -0
  148. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/stateful_tool_env.py +0 -0
  149. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/errors.py +0 -0
  150. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/gepa/__init__.py +0 -0
  151. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/gepa/config.py +0 -0
  152. {verifiers-0.1.10.dev2/verifiers/envs/integrations → verifiers-0.1.10.dev3/verifiers/parsers}/__init__.py +0 -0
  153. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/parsers/maybe_think_parser.py +0 -0
  154. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/parsers/parser.py +0 -0
  155. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/parsers/think_parser.py +0 -0
  156. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/parsers/xml_parser.py +0 -0
  157. {verifiers-0.1.10.dev2/verifiers/parsers → verifiers-0.1.10.dev3/verifiers/rubrics}/__init__.py +0 -0
  158. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/rubrics/judge_rubric.py +0 -0
  159. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/rubrics/rubric_group.py +0 -0
  160. {verifiers-0.1.10.dev2/verifiers/rl/inference → verifiers-0.1.10.dev3/verifiers/scripts}/__init__.py +0 -0
  161. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/install.py +0 -0
  162. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/prime_rl.py +0 -0
  163. {verifiers-0.1.10.dev2/verifiers/rubrics → verifiers-0.1.10.dev3/verifiers/utils}/__init__.py +0 -0
  164. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/async_utils.py +0 -0
  165. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/config_utils.py +0 -0
  166. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/env_utils.py +0 -0
  167. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/error_utils.py +0 -0
  168. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/install_utils.py +0 -0
  169. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/logging_utils.py +0 -0
  170. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/response_utils.py +0 -0
  171. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/thread_utils.py +0 -0
  172. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/token_utils.py +0 -0
  173. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/tool_utils.py +0 -0
  174. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/tunnel_utils.py +0 -0
  175. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/worker_utils.py +0 -0
  176. {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/__init__.py +0 -0
@@ -10,6 +10,7 @@ uv.lock
10
10
  .ropeproject/
11
11
  .scratch/
12
12
  .chroma_db/
13
+ /.codex/environments/
13
14
 
14
15
  # artifacts
15
16
  core.*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.10.dev2
3
+ Version: 0.1.10.dev3
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -32,8 +32,8 @@ Requires-Dist: nest-asyncio>=1.6.0
32
32
  Requires-Dist: numpy
33
33
  Requires-Dist: openai-agents>=0.0.7
34
34
  Requires-Dist: openai>=1.108.1
35
- Requires-Dist: prime-sandboxes>=0.2.9
36
- Requires-Dist: prime-tunnel
35
+ Requires-Dist: prime-sandboxes>=0.2.14
36
+ Requires-Dist: prime-tunnel>=0.1.0
37
37
  Requires-Dist: pydantic>=2.11.9
38
38
  Requires-Dist: pyzmq>=27.1.0
39
39
  Requires-Dist: requests
@@ -47,19 +47,10 @@ Provides-Extra: browser
47
47
  Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
48
48
  Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
49
49
  Requires-Dist: stagehand>=3.0.0; extra == 'browser'
50
+ Provides-Extra: openenv
51
+ Requires-Dist: openenv-core[core]==0.2.1; extra == 'openenv'
50
52
  Provides-Extra: rg
51
53
  Requires-Dist: reasoning-gym; extra == 'rg'
52
- Provides-Extra: rl
53
- Requires-Dist: accelerate>=1.4.0; extra == 'rl'
54
- Requires-Dist: deepspeed>=0.17.6; extra == 'rl'
55
- Requires-Dist: flash-attn>=2.8.3; extra == 'rl'
56
- Requires-Dist: liger-kernel>=0.5.10; extra == 'rl'
57
- Requires-Dist: peft; extra == 'rl'
58
- Requires-Dist: requests; extra == 'rl'
59
- Requires-Dist: torch<2.9.0,>=2.8.0; extra == 'rl'
60
- Requires-Dist: transformers>=4.56.2; extra == 'rl'
61
- Requires-Dist: vllm<0.11.0,>=0.10.0; extra == 'rl'
62
- Requires-Dist: wandb; extra == 'rl'
63
54
  Provides-Extra: ta
64
55
  Requires-Dist: nltk; extra == 'ta'
65
56
  Requires-Dist: textarena; extra == 'ta'
@@ -157,6 +148,14 @@ Environments built with Verifiers are self-contained Python modules. To initiali
157
148
  ```bash
158
149
  prime env init my-env # creates a new template in ./environments/my_env
159
150
  ```
151
+ For OpenEnv integration, use:
152
+ ```bash
153
+ prime env init my-openenv --openenv
154
+ ```
155
+ Then copy your OpenEnv project into `environments/my_openenv/proj/` and build the image with:
156
+ ```bash
157
+ uv run vf-build my-openenv
158
+ ```
160
159
 
161
160
  This will create a new module called `my_env` with a basic environment template.
162
161
  ```
@@ -90,6 +90,14 @@ Environments built with Verifiers are self-contained Python modules. To initiali
90
90
  ```bash
91
91
  prime env init my-env # creates a new template in ./environments/my_env
92
92
  ```
93
+ For OpenEnv integration, use:
94
+ ```bash
95
+ prime env init my-openenv --openenv
96
+ ```
97
+ Then copy your OpenEnv project into `environments/my_openenv/proj/` and build the image with:
98
+ ```bash
99
+ uv run vf-build my-openenv
100
+ ```
93
101
 
94
102
  This will create a new module called `my_env` with a basic environment template.
95
103
  ```
@@ -36,8 +36,8 @@ dependencies = [
36
36
  "nest-asyncio>=1.6.0", # for jupyter notebooks
37
37
  "openai>=1.108.1",
38
38
  "openai-agents>=0.0.7",
39
- "prime-tunnel",
40
- "prime-sandboxes>=0.2.9",
39
+ "prime-tunnel>=0.1.0",
40
+ "prime-sandboxes>=0.2.14",
41
41
  "pydantic>=2.11.9",
42
42
  "requests",
43
43
  "rich",
@@ -64,6 +64,10 @@ dev = [
64
64
  "ipywidgets",
65
65
  "reasoning-gym",
66
66
  "textarena",
67
+ "openenv-core[core]==0.2.1",
68
+ "stagehand>=3.0.0",
69
+ "aiohttp>=3.9.0",
70
+ "python-dotenv>=1.0.0",
67
71
  "nltk",
68
72
  ]
69
73
 
@@ -75,40 +79,25 @@ ta = [
75
79
  "textarena",
76
80
  "nltk",
77
81
  ]
82
+ openenv = [
83
+ "openenv-core[core]==0.2.1",
84
+ ]
78
85
  browser = [
79
86
  "stagehand>=3.0.0",
80
87
  "aiohttp>=3.9.0",
81
88
  "python-dotenv>=1.0.0",
82
89
  ]
83
- rl = [
84
- "torch>=2.8.0,<2.9.0",
85
- "transformers>=4.56.2",
86
- "accelerate>=1.4.0",
87
- "requests",
88
- "peft",
89
- "wandb",
90
- "vllm>=0.10.0,<0.11.0",
91
- "liger-kernel>=0.5.10",
92
- "deepspeed>=0.17.6",
93
- "flash-attn>=2.8.3",
94
- ]
95
-
96
- [tool.uv.extra-build-dependencies]
97
- flash-attn = [{ requirement = "torch", match-runtime = true }]
98
-
99
- [tool.uv.extra-build-variables]
100
- flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }
101
-
102
90
  [project.scripts]
103
91
  vf-eval = "verifiers.scripts.eval:main"
104
92
  vf-gepa = "verifiers.scripts.gepa:main"
105
93
  vf-init = "verifiers.scripts.init:main"
106
94
  vf-install = "verifiers.scripts.install:main"
107
95
  vf-setup = "verifiers.scripts.setup:main"
96
+ vf-build = "verifiers.scripts.build:main"
108
97
  vf-rl = "verifiers.scripts.rl:main"
109
98
  vf-train = "verifiers.scripts.train:main"
110
99
  vf-tui = "verifiers.scripts.tui:main"
111
- vf-vllm = "verifiers.rl.inference.server:main"
100
+ vf-vllm = "verifiers.scripts.vllm:main"
112
101
  prime-rl = "verifiers.scripts.prime_rl:main"
113
102
 
114
103
  # hatchling configuration
@@ -171,9 +160,12 @@ filterwarnings = [
171
160
  asyncio_mode = "auto"
172
161
  norecursedirs = [".git", ".tox", "dist", "build", "*.egg", "__pycache__"]
173
162
 
163
+ [tool.ty.environment]
164
+ python-version = "3.13"
165
+
174
166
  [tool.ty.rules]
175
- unresolved-import = "warn"
176
167
  unknown-argument = "warn"
168
+ redundant-cast = "ignore"
177
169
 
178
170
  [tool.ty.src]
179
171
  exclude = ["environments"]
@@ -0,0 +1,52 @@
1
+ import pytest
2
+ from pydantic import ValidationError
3
+
4
+ from verifiers.types import ClientConfig, EndpointClientConfig
5
+
6
+
7
+ def test_client_config_allows_leaf_endpoint_configs():
8
+ config = ClientConfig(
9
+ api_base_url="http://localhost:8000/v1",
10
+ endpoint_configs=[
11
+ EndpointClientConfig(api_base_url="http://localhost:8001/v1"),
12
+ {"api_base_url": "http://localhost:8002/v1"},
13
+ ],
14
+ )
15
+
16
+ assert len(config.endpoint_configs) == 2
17
+ assert config.endpoint_configs[0].api_base_url == "http://localhost:8001/v1"
18
+ assert config.endpoint_configs[1].api_base_url == "http://localhost:8002/v1"
19
+
20
+
21
+ def test_client_config_rejects_recursive_endpoint_configs():
22
+ with pytest.raises(ValidationError, match="cannot include endpoint_configs"):
23
+ ClientConfig.model_validate(
24
+ {
25
+ "api_base_url": "http://localhost:8000/v1",
26
+ "endpoint_configs": [
27
+ {
28
+ "api_base_url": "http://localhost:8001/v1",
29
+ "endpoint_configs": [
30
+ {"api_base_url": "http://localhost:8002/v1"}
31
+ ],
32
+ }
33
+ ],
34
+ }
35
+ )
36
+
37
+
38
+ def test_client_config_accepts_empty_nested_endpoint_configs_key():
39
+ config = ClientConfig.model_validate(
40
+ {
41
+ "api_base_url": "http://localhost:8000/v1",
42
+ "endpoint_configs": [
43
+ {
44
+ "api_base_url": "http://localhost:8001/v1",
45
+ "endpoint_configs": [],
46
+ }
47
+ ],
48
+ }
49
+ )
50
+
51
+ assert len(config.endpoint_configs) == 1
52
+ assert config.endpoint_configs[0].api_base_url == "http://localhost:8001/v1"
@@ -0,0 +1,177 @@
1
+ from pathlib import Path
2
+
3
+ from verifiers.utils.eval_utils import load_endpoints
4
+
5
+
6
+ def test_load_endpoints_python_registry_normalizes_to_lists(tmp_path: Path):
7
+ registry_path = tmp_path / "endpoints.py"
8
+ registry_path.write_text(
9
+ "ENDPOINTS = {\n"
10
+ ' "gpt-4.1-mini": {"model": "gpt-4.1-mini", "url": "https://api.openai.com/v1", "key": "OPENAI_API_KEY"},\n'
11
+ "}\n",
12
+ encoding="utf-8",
13
+ )
14
+
15
+ endpoints = load_endpoints(str(registry_path))
16
+
17
+ assert set(endpoints.keys()) == {"gpt-4.1-mini"}
18
+ assert len(endpoints["gpt-4.1-mini"]) == 1
19
+ endpoint = endpoints["gpt-4.1-mini"][0]
20
+ assert endpoint["model"] == "gpt-4.1-mini"
21
+ assert endpoint["url"] == "https://api.openai.com/v1"
22
+ assert endpoint["key"] == "OPENAI_API_KEY"
23
+
24
+
25
+ def test_load_endpoints_toml_groups_variants_by_endpoint_id(tmp_path: Path):
26
+ registry_path = tmp_path / "endpoints.toml"
27
+ registry_path.write_text(
28
+ "[[endpoint]]\n"
29
+ 'endpoint_id = "gpt-5-mini"\n'
30
+ 'model = "openai/gpt-5-mini"\n'
31
+ 'url = "https://api.pinference.ai/api/v1"\n'
32
+ 'key = "PRIME_API_KEY"\n'
33
+ "\n"
34
+ "[[endpoint]]\n"
35
+ 'endpoint_id = "gpt-5-mini"\n'
36
+ 'model = "openai/gpt-5-mini"\n'
37
+ 'url = "https://api.openai.com/v1"\n'
38
+ 'key = "OPENAI_API_KEY"\n',
39
+ encoding="utf-8",
40
+ )
41
+
42
+ endpoints = load_endpoints(str(registry_path))
43
+
44
+ assert set(endpoints.keys()) == {"gpt-5-mini"}
45
+ assert len(endpoints["gpt-5-mini"]) == 2
46
+ assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
47
+ assert endpoints["gpt-5-mini"][1]["url"] == "https://api.openai.com/v1"
48
+
49
+
50
+ def test_load_endpoints_toml_accepts_long_field_names(tmp_path: Path):
51
+ registry_path = tmp_path / "endpoints.toml"
52
+ registry_path.write_text(
53
+ "[[endpoint]]\n"
54
+ 'endpoint_id = "gpt-5-mini"\n'
55
+ 'model = "openai/gpt-5-mini"\n'
56
+ 'api_base_url = "https://api.pinference.ai/api/v1"\n'
57
+ 'api_key_var = "PRIME_API_KEY"\n',
58
+ encoding="utf-8",
59
+ )
60
+
61
+ endpoints = load_endpoints(str(registry_path))
62
+
63
+ assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
64
+ assert endpoints["gpt-5-mini"][0]["key"] == "PRIME_API_KEY"
65
+
66
+
67
+ def test_load_endpoints_toml_accepts_matching_short_and_long_fields(tmp_path: Path):
68
+ registry_path = tmp_path / "endpoints.toml"
69
+ registry_path.write_text(
70
+ "[[endpoint]]\n"
71
+ 'endpoint_id = "gpt-5-mini"\n'
72
+ 'model = "openai/gpt-5-mini"\n'
73
+ 'url = "https://api.pinference.ai/api/v1"\n'
74
+ 'api_base_url = "https://api.pinference.ai/api/v1"\n'
75
+ 'key = "PRIME_API_KEY"\n'
76
+ 'api_key_var = "PRIME_API_KEY"\n',
77
+ encoding="utf-8",
78
+ )
79
+
80
+ endpoints = load_endpoints(str(registry_path))
81
+
82
+ assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
83
+ assert endpoints["gpt-5-mini"][0]["key"] == "PRIME_API_KEY"
84
+
85
+
86
+ def test_load_endpoints_toml_rejects_conflicting_url_fields(tmp_path: Path):
87
+ registry_path = tmp_path / "endpoints.toml"
88
+ registry_path.write_text(
89
+ "[[endpoint]]\n"
90
+ 'endpoint_id = "gpt-5-mini"\n'
91
+ 'model = "openai/gpt-5-mini"\n'
92
+ 'url = "https://a.example/v1"\n'
93
+ 'api_base_url = "https://b.example/v1"\n'
94
+ 'key = "PRIME_API_KEY"\n',
95
+ encoding="utf-8",
96
+ )
97
+
98
+ endpoints = load_endpoints(str(registry_path))
99
+
100
+ assert endpoints == {}
101
+
102
+
103
+ def test_load_endpoints_toml_rejects_conflicting_key_fields(tmp_path: Path):
104
+ registry_path = tmp_path / "endpoints.toml"
105
+ registry_path.write_text(
106
+ "[[endpoint]]\n"
107
+ 'endpoint_id = "gpt-5-mini"\n'
108
+ 'model = "openai/gpt-5-mini"\n'
109
+ 'url = "https://a.example/v1"\n'
110
+ 'key = "A_KEY"\n'
111
+ 'api_key_var = "B_KEY"\n',
112
+ encoding="utf-8",
113
+ )
114
+
115
+ endpoints = load_endpoints(str(registry_path))
116
+
117
+ assert endpoints == {}
118
+
119
+
120
+ def test_load_endpoints_python_registry_supports_list_variants(tmp_path: Path):
121
+ registry_path = tmp_path / "endpoints.py"
122
+ registry_path.write_text(
123
+ "ENDPOINTS = {\n"
124
+ ' "gpt-5-mini": [\n'
125
+ ' {"model": "gpt-5-mini", "url": "https://a.example/v1", "key": "A_KEY"},\n'
126
+ ' {"model": "gpt-5-mini", "url": "https://b.example/v1", "key": "A_KEY"},\n'
127
+ " ]\n"
128
+ "}\n",
129
+ encoding="utf-8",
130
+ )
131
+
132
+ endpoints = load_endpoints(str(registry_path))
133
+
134
+ assert set(endpoints.keys()) == {"gpt-5-mini"}
135
+ assert len(endpoints["gpt-5-mini"]) == 2
136
+ assert endpoints["gpt-5-mini"][0]["url"] == "https://a.example/v1"
137
+ assert endpoints["gpt-5-mini"][1]["url"] == "https://b.example/v1"
138
+
139
+
140
+ def test_load_endpoints_directory_prefers_toml_then_python(tmp_path: Path):
141
+ python_registry = tmp_path / "endpoints.py"
142
+ toml_registry = tmp_path / "endpoints.toml"
143
+
144
+ python_registry.write_text(
145
+ "ENDPOINTS = {\n"
146
+ ' "from-py": {"model": "m", "url": "https://py.example/v1", "key": "PY_KEY"},\n'
147
+ "}\n",
148
+ encoding="utf-8",
149
+ )
150
+ toml_registry.write_text(
151
+ "[[endpoint]]\n"
152
+ 'endpoint_id = "from-toml"\n'
153
+ 'model = "m"\n'
154
+ 'url = "https://toml.example/v1"\n'
155
+ 'key = "TOML_KEY"\n',
156
+ encoding="utf-8",
157
+ )
158
+
159
+ endpoints = load_endpoints(str(tmp_path))
160
+ assert set(endpoints.keys()) == {"from-toml"}
161
+
162
+ toml_registry.unlink()
163
+ endpoints = load_endpoints(str(tmp_path))
164
+ assert set(endpoints.keys()) == {"from-py"}
165
+
166
+
167
+ def test_qwen3_vl_endpoint_ids_map_to_vl_models():
168
+ endpoints = load_endpoints("./configs/endpoints.toml")
169
+
170
+ assert endpoints["qwen3-vl-30b-i"][0]["model"] == "qwen/qwen3-vl-30b-a3b-instruct"
171
+ assert endpoints["qwen3-vl-30b-t"][0]["model"] == "qwen/qwen3-vl-30b-a3b-thinking"
172
+ assert (
173
+ endpoints["qwen3-vl-235b-i"][0]["model"] == "qwen/qwen3-vl-235b-a22b-instruct"
174
+ )
175
+ assert (
176
+ endpoints["qwen3-vl-235b-t"][0]["model"] == "qwen/qwen3-vl-235b-a22b-thinking"
177
+ )