verifiers 0.1.12.dev0__tar.gz → 0.1.12.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/.gitignore +1 -0
  2. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/PKG-INFO +4 -3
  3. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/pyproject.toml +4 -2
  4. verifiers-0.1.12.dev2/tests/test_composable_env.py +200 -0
  5. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_env_server.py +68 -125
  6. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_envs.py +50 -12
  7. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_rlm_env.py +432 -69
  8. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_sandbox_mixin.py +7 -48
  9. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_tui_info_formatting.py +58 -16
  10. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/__init__.py +1 -1
  11. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/openai_chat_completions_client.py +5 -1
  12. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/openai_chat_completions_token_client.py +109 -92
  13. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/openai_completions_client.py +7 -1
  14. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/env_group.py +1 -1
  15. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/environment.py +38 -19
  16. verifiers-0.1.12.dev2/verifiers/envs/experimental/__init__.py +28 -0
  17. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/cli_agent_env.py +39 -8
  18. verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/README.md +151 -0
  19. verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/__init__.py +17 -0
  20. verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/composable_env.py +202 -0
  21. verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/harness.py +58 -0
  22. verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/task.py +362 -0
  23. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/opencode_env.py +0 -2
  24. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/rlm_env.py +661 -413
  25. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/sandbox_mixin.py +11 -36
  26. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/README.md +2 -0
  27. verifiers-0.1.12.dev2/verifiers/envs/integrations/browser_env/README.md +154 -0
  28. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/openenv_env.py +6 -4
  29. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/experimental/hybrid_math_rubric.py +4 -0
  30. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/math_rubric.py +23 -1
  31. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/eval.py +7 -0
  32. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/tui.py +887 -235
  33. verifiers-0.1.12.dev2/verifiers/serve/__init__.py +45 -0
  34. {verifiers-0.1.12.dev0/verifiers/workers → verifiers-0.1.12.dev2/verifiers/serve}/client/env_client.py +1 -1
  35. {verifiers-0.1.12.dev0/verifiers/workers → verifiers-0.1.12.dev2/verifiers/serve}/client/zmq_env_client.py +13 -15
  36. verifiers-0.1.12.dev2/verifiers/serve/server/__init__.py +11 -0
  37. verifiers-0.1.12.dev2/verifiers/serve/server/env_router.py +427 -0
  38. verifiers-0.1.12.dev2/verifiers/serve/server/env_server.py +128 -0
  39. verifiers-0.1.12.dev2/verifiers/serve/server/env_worker.py +389 -0
  40. verifiers-0.1.12.dev2/verifiers/serve/server/zmq_env_server.py +117 -0
  41. {verifiers-0.1.12.dev0/verifiers/workers → verifiers-0.1.12.dev2/verifiers/serve}/types.py +2 -0
  42. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/types.py +1 -0
  43. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/async_utils.py +42 -0
  44. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/config_utils.py +1 -1
  45. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/env_utils.py +3 -0
  46. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/eval_display.py +6 -1
  47. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/eval_utils.py +34 -19
  48. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/logging_utils.py +20 -22
  49. verifiers-0.1.12.dev2/verifiers/utils/process_utils.py +89 -0
  50. verifiers-0.1.12.dev0/verifiers/utils/worker_utils.py → verifiers-0.1.12.dev2/verifiers/utils/serve_utils.py +5 -30
  51. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/thread_utils.py +1 -1
  52. verifiers-0.1.12.dev0/verifiers/envs/experimental/__init__.py +0 -3
  53. verifiers-0.1.12.dev0/verifiers/envs/integrations/browser_env/README.md +0 -118
  54. verifiers-0.1.12.dev0/verifiers/workers/__init__.py +0 -27
  55. verifiers-0.1.12.dev0/verifiers/workers/server/env_server.py +0 -175
  56. verifiers-0.1.12.dev0/verifiers/workers/server/zmq_env_server.py +0 -326
  57. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/LICENSE +0 -0
  58. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/README.md +0 -0
  59. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/AGENTS.md +0 -0
  60. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/README.md +0 -0
  61. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/__init__.py +0 -0
  62. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/conftest.py +0 -0
  63. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_browser_env.py +0 -0
  64. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_build_script.py +0 -0
  65. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_cli_agent_env.py +0 -0
  66. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_client_auth_errors.py +0 -0
  67. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_client_config.py +0 -0
  68. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_client_multimodal_types.py +0 -0
  69. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_decorator_ranks.py +0 -0
  70. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_endpoint_registry.py +0 -0
  71. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_env_group.py +0 -0
  72. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_environment.py +0 -0
  73. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_environment_extra.py +0 -0
  74. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_error_chain.py +0 -0
  75. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_eval_cli.py +0 -0
  76. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_eval_display.py +0 -0
  77. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_eval_utils.py +0 -0
  78. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_gepa_cli.py +0 -0
  79. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_gym_env.py +0 -0
  80. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_imports.py +0 -0
  81. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_install_utils.py +0 -0
  82. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_interception_utils.py +0 -0
  83. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_logging.py +0 -0
  84. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_math_rubric.py +0 -0
  85. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_maybe_think_parser.py +0 -0
  86. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_message_utils.py +0 -0
  87. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_message_utils_multimodal.py +0 -0
  88. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_multiturn_env.py +0 -0
  89. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_openai_chat_completions_token_client.py +0 -0
  90. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_opencode_harbor.py +0 -0
  91. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_opencode_rlm_env.py +0 -0
  92. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_parser.py +0 -0
  93. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_path_utils.py +0 -0
  94. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_prime_plugin.py +0 -0
  95. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_rubric.py +0 -0
  96. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_rubric_group.py +0 -0
  97. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_sandbox_env.py +0 -0
  98. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_save_utils.py +0 -0
  99. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_setup_script.py +0 -0
  100. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_singleturn_env.py +0 -0
  101. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_stateful_tool_env.py +0 -0
  102. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_think_parser.py +0 -0
  103. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_tool_env.py +0 -0
  104. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_tool_utils.py +0 -0
  105. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_trajectory_processing.py +0 -0
  106. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_xml_parser.py +0 -0
  107. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/AGENTS.md +0 -0
  108. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/__init__.py +0 -0
  109. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/__init__.py +0 -0
  110. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/build.py +0 -0
  111. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/eval.py +0 -0
  112. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/gepa.py +0 -0
  113. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/init.py +0 -0
  114. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/install.py +0 -0
  115. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/setup.py +0 -0
  116. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/plugins/__init__.py +0 -0
  117. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/plugins/prime.py +0 -0
  118. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/tui.py +0 -0
  119. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/__init__.py +0 -0
  120. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/anthropic_messages_client.py +0 -0
  121. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/client.py +0 -0
  122. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/decorators.py +0 -0
  123. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/AGENTS.md +0 -0
  124. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/__init__.py +0 -0
  125. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/README.md +0 -0
  126. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/gym_env.py +0 -0
  127. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/harbor_env.py +0 -0
  128. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/mcp_env.py +0 -0
  129. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  130. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  131. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/__init__.py +0 -0
  132. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  133. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  134. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  135. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  136. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  137. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  138. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  139. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/textarena_env.py +0 -0
  140. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/multiturn_env.py +0 -0
  141. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/python_env.py +0 -0
  142. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/sandbox_env.py +0 -0
  143. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/singleturn_env.py +0 -0
  144. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/stateful_tool_env.py +0 -0
  145. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/tool_env.py +0 -0
  146. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/errors.py +0 -0
  147. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/gepa/__init__.py +0 -0
  148. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/gepa/adapter.py +0 -0
  149. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/gepa/config.py +0 -0
  150. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/gepa/display.py +0 -0
  151. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/gepa/gepa_utils.py +0 -0
  152. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/parsers/__init__.py +0 -0
  153. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/parsers/maybe_think_parser.py +0 -0
  154. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/parsers/parser.py +0 -0
  155. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/parsers/think_parser.py +0 -0
  156. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/parsers/xml_parser.py +0 -0
  157. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/README.md +0 -0
  158. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/__init__.py +0 -0
  159. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/inference/__init__.py +0 -0
  160. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/inference/client.py +0 -0
  161. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/inference/server.py +0 -0
  162. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/__init__.py +0 -0
  163. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/config.py +0 -0
  164. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/orchestrator.py +0 -0
  165. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/trainer.py +0 -0
  166. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/utils.py +0 -0
  167. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/__init__.py +0 -0
  168. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/judge_rubric.py +0 -0
  169. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/rubric.py +0 -0
  170. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/rubric_group.py +0 -0
  171. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/__init__.py +0 -0
  172. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/build.py +0 -0
  173. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/gepa.py +0 -0
  174. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/init.py +0 -0
  175. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/install.py +0 -0
  176. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/prime_rl.py +0 -0
  177. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/rl.py +0 -0
  178. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/setup.py +0 -0
  179. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/train.py +0 -0
  180. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/vllm.py +0 -0
  181. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/__init__.py +0 -0
  182. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/client_utils.py +0 -0
  183. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/data_utils.py +0 -0
  184. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/display_utils.py +0 -0
  185. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/error_utils.py +0 -0
  186. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/heartbeat.py +0 -0
  187. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/import_utils.py +0 -0
  188. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/install_utils.py +0 -0
  189. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/interception_utils.py +0 -0
  190. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/message_utils.py +0 -0
  191. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/metric_utils.py +0 -0
  192. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/path_utils.py +0 -0
  193. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/response_utils.py +0 -0
  194. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/save_utils.py +0 -0
  195. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/threaded_sandbox_client.py +0 -0
  196. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/tool_utils.py +0 -0
  197. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/tunnel_utils.py +0 -0
  198. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/usage_utils.py +0 -0
  199. {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/version_utils.py +0 -0
@@ -11,6 +11,7 @@ uv.lock
11
11
  .scratch/
12
12
  .chroma_db/
13
13
  /.codex/environments/
14
+ .python-version
14
15
 
15
16
  # artifacts
16
17
  core.*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.12.dev0
3
+ Version: 0.1.12.dev2
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -34,12 +34,13 @@ Requires-Dist: nest-asyncio>=1.6.0
34
34
  Requires-Dist: numpy
35
35
  Requires-Dist: openai-agents>=0.0.7
36
36
  Requires-Dist: openai>=1.108.1
37
- Requires-Dist: prime-sandboxes>=0.2.16
38
- Requires-Dist: prime-tunnel>=0.1.4
37
+ Requires-Dist: prime-sandboxes>=0.2.19
38
+ Requires-Dist: prime-tunnel>=0.1.5
39
39
  Requires-Dist: pydantic>=2.11.9
40
40
  Requires-Dist: pyzmq>=27.1.0
41
41
  Requires-Dist: requests
42
42
  Requires-Dist: rich
43
+ Requires-Dist: setproctitle>=1.3.0
43
44
  Requires-Dist: tenacity>=8.5.0
44
45
  Requires-Dist: textual
45
46
  Requires-Dist: tomli; python_version < '3.11'
@@ -37,8 +37,8 @@ dependencies = [
37
37
  "nest-asyncio>=1.6.0", # for jupyter notebooks
38
38
  "openai>=1.108.1",
39
39
  "openai-agents>=0.0.7",
40
- "prime-tunnel>=0.1.4",
41
- "prime-sandboxes>=0.2.16",
40
+ "prime-tunnel>=0.1.5",
41
+ "prime-sandboxes>=0.2.19",
42
42
  "pydantic>=2.11.9",
43
43
  "requests",
44
44
  "rich",
@@ -51,6 +51,7 @@ dependencies = [
51
51
  "pyzmq>=27.1.0",
52
52
  "msgpack>=1.1.2",
53
53
  "aiolimiter>=1.2.1",
54
+ "setproctitle>=1.3.0",
54
55
  ]
55
56
 
56
57
  [dependency-groups]
@@ -104,6 +105,7 @@ rl = [
104
105
 
105
106
  [tool.uv]
106
107
  preview = true
108
+ required-version = "<0.11.0"
107
109
 
108
110
  [tool.uv.extra-build-dependencies]
109
111
  flash-attn = [{ requirement = "torch", match-runtime = true }]
@@ -0,0 +1,200 @@
1
+ """Tests for the composable architecture: Task, TaskSet, SandboxTaskSet, SandboxSpec."""
2
+
3
+ import pytest
4
+
5
+ import verifiers as vf
6
+ from verifiers.envs.experimental.composable import (
7
+ ComposableEnv,
8
+ Harness,
9
+ SandboxSpec,
10
+ SandboxTaskSet,
11
+ Task,
12
+ TaskSet,
13
+ )
14
+
15
+
16
+ # ── Mock Rubrics ──────────────────────────────────────────────────────
17
+
18
+
19
+ class MockSandboxRubric(vf.Rubric):
20
+ def __init__(self, **kwargs):
21
+ super().__init__(**kwargs)
22
+ self.add_reward_func(self.solved)
23
+
24
+ async def solved(self, state, **kwargs) -> float:
25
+ return 1.0 if state.get("test_output") == "PASS" else 0.0
26
+
27
+
28
+ class MockMathRubric(vf.Rubric):
29
+ def __init__(self, **kwargs):
30
+ super().__init__(**kwargs)
31
+ self.add_reward_func(self.correct)
32
+
33
+ async def correct(self, state, **kwargs) -> float:
34
+ return 1.0 if state.get("info", {}).get("id") == 0 else 0.0
35
+
36
+
37
+ # ── Mock TaskSets ───────────────────────────────────────────────────────
38
+
39
+
40
+ class MockSandboxTaskSet(SandboxTaskSet):
41
+ """SandboxTaskSet for testing."""
42
+
43
+ def get_instruction(self, info):
44
+ return f"Fix bug #{info.get('id', 0)}"
45
+
46
+ def get_sandbox_spec(self, info):
47
+ return SandboxSpec(image="python:3.11-slim", cpu_cores=2, memory_gb=2)
48
+
49
+ def get_rubric(self):
50
+ return MockSandboxRubric()
51
+
52
+ def get_workdir(self, info):
53
+ return "/testbed"
54
+
55
+ def get_env_vars(self):
56
+ return {"FOO": "bar"}
57
+
58
+
59
+ class MockTaskSet(TaskSet):
60
+ """Plain TaskSet (no sandbox) for testing."""
61
+
62
+ def get_instruction(self, info):
63
+ return info.get("question", "")
64
+
65
+ def get_rubric(self):
66
+ return MockMathRubric()
67
+
68
+
69
+ def _make_dataset(n=3):
70
+ from datasets import Dataset
71
+
72
+ return Dataset.from_dict(
73
+ {
74
+ "info": [{"id": i, "question": f"q{i}"} for i in range(n)],
75
+ "answer": ["" for _ in range(n)],
76
+ }
77
+ )
78
+
79
+
80
+ # ── SandboxSpec ─────────────────────────────────────────────────────────
81
+
82
+
83
+ def test_sandbox_spec_defaults():
84
+ spec = SandboxSpec()
85
+ assert spec.image == "python:3.11-slim"
86
+ assert spec.cpu_cores == 4
87
+
88
+
89
+ def test_sandbox_spec_custom():
90
+ spec = SandboxSpec(image="lean-tactic:v4.27", gpu_count=1)
91
+ assert spec.image == "lean-tactic:v4.27"
92
+ assert spec.gpu_count == 1
93
+
94
+
95
+ # ── Task from SandboxTaskSet ───────────────────────────────────────────
96
+
97
+
98
+ def test_task_sandbox_spec():
99
+ ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
100
+ task = ts[0]
101
+ assert isinstance(task, Task)
102
+ assert task.sandbox_spec is not None
103
+ assert task.sandbox_spec.image == "python:3.11-slim"
104
+ assert task.sandbox_spec.cpu_cores == 2
105
+
106
+
107
+ def test_task_image():
108
+ ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
109
+ task = ts[0]
110
+ assert task.image == "python:3.11-slim"
111
+
112
+
113
+ def test_task_workdir():
114
+ ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
115
+ task = ts[0]
116
+ assert task.workdir == "/testbed"
117
+
118
+
119
+ def test_task_repr_sandbox():
120
+ ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
121
+ task = ts[0]
122
+ assert "python:3.11-slim" in repr(task)
123
+
124
+
125
+ # ── Task from plain TaskSet ────────────────────────────────────────────
126
+
127
+
128
+ def test_task_no_sandbox():
129
+ ts = MockTaskSet(dataset=_make_dataset(), name="math")
130
+ task = ts[0]
131
+ assert task.sandbox_spec is None
132
+ assert task.image is None
133
+
134
+
135
+ def test_task_repr_no_sandbox():
136
+ ts = MockTaskSet(dataset=_make_dataset(), name="math")
137
+ task = ts[0]
138
+ assert "no sandbox" in repr(task)
139
+
140
+
141
+ # ── TaskSet ─────────────────────────────────────────────────────────────
142
+
143
+
144
+ def test_taskset_isinstance():
145
+ ts = MockTaskSet(dataset=_make_dataset(), name="math")
146
+ assert not isinstance(ts, SandboxTaskSet)
147
+
148
+ ts2 = MockSandboxTaskSet(dataset=_make_dataset(), name="swe")
149
+ assert isinstance(ts2, SandboxTaskSet)
150
+
151
+
152
+ def test_taskset_len():
153
+ ts = MockTaskSet(dataset=_make_dataset(5), name="test")
154
+ assert len(ts) == 5
155
+
156
+
157
+ def test_taskset_iter():
158
+ ts = MockTaskSet(dataset=_make_dataset(3), name="test")
159
+ tasks = list(ts)
160
+ assert len(tasks) == 3
161
+ assert all(isinstance(t, Task) for t in tasks)
162
+
163
+
164
+ def test_taskset_filter():
165
+ ts = MockSandboxTaskSet(dataset=_make_dataset(5), name="test")
166
+ filtered = ts.filter(lambda ex: ex["info"]["id"] < 3)
167
+ assert len(filtered) == 3
168
+ assert isinstance(filtered, MockSandboxTaskSet)
169
+
170
+
171
+ def test_taskset_take():
172
+ ts = MockSandboxTaskSet(dataset=_make_dataset(5), name="test")
173
+ taken = ts.take(2)
174
+ assert len(taken) == 2
175
+ assert isinstance(taken, MockSandboxTaskSet)
176
+
177
+
178
+ def test_taskset_repr():
179
+ ts = MockTaskSet(dataset=_make_dataset(), name="mytest")
180
+ assert "mytest" in repr(ts)
181
+ assert "3" in repr(ts)
182
+
183
+
184
+ @pytest.mark.asyncio
185
+ async def test_composable_env_exports_task_workdir():
186
+ taskset = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
187
+ env = ComposableEnv(
188
+ taskset=taskset,
189
+ harness=Harness(run_command="true"),
190
+ )
191
+
192
+ env_vars = await env.build_env_vars(
193
+ {
194
+ "info": {"id": 0},
195
+ "interception_base_url": "https://test.trycloudflare.com/v1",
196
+ }
197
+ )
198
+
199
+ assert env_vars["AGENT_WORKDIR"] == "/testbed"
200
+ assert env_vars["FOO"] == "bar"
@@ -4,7 +4,7 @@ Covers:
4
4
  - Health-check state transitions (STARTUP -> HEALTHY -> UNHEALTHY)
5
5
  - Request retry on ServerError and recovery timeouts
6
6
  - Server startup waiting
7
- - Cancellation propagation (client -> server)
7
+ - Cancellation propagation (client -> router -> worker)
8
8
  """
9
9
 
10
10
  import asyncio
@@ -15,16 +15,16 @@ from unittest.mock import AsyncMock, MagicMock, patch
15
15
  import pytest
16
16
 
17
17
  from verifiers.types import ClientConfig, RolloutInput, UserMessage
18
- from verifiers.utils.worker_utils import get_free_port_pair
19
- from verifiers.workers.client.zmq_env_client import ZMQEnvClient
20
- from verifiers.workers.server.zmq_env_server import ZMQEnvServer
21
- from verifiers.workers.types import (
18
+ from verifiers.utils.serve_utils import get_free_port
19
+ from verifiers.serve import (
22
20
  HealthRequest,
23
21
  HealthResponse,
24
22
  PendingRequest,
25
23
  RunRolloutRequest,
26
24
  RunRolloutResponse,
27
25
  ServerState,
26
+ ZMQEnvClient,
27
+ ZMQEnvServer,
28
28
  )
29
29
 
30
30
 
@@ -36,7 +36,7 @@ def make_client(address: str = "tcp://127.0.0.1:5555", **kwargs) -> ZMQEnvClient
36
36
 
37
37
  def make_mock_server(address: str) -> ZMQEnvServer:
38
38
  """Create a ZMQEnvServer with a mocked environment (no real env loading)."""
39
- with patch("verifiers.workers.server.env_server.vf") as mock_vf:
39
+ with patch("verifiers.serve.server.env_server.vf") as mock_vf:
40
40
  mock_env = MagicMock()
41
41
  mock_env._teardown = AsyncMock()
42
42
  mock_vf.load_environment.return_value = mock_env
@@ -77,20 +77,22 @@ def make_pending_request(
77
77
 
78
78
 
79
79
  @contextlib.asynccontextmanager
80
- async def run_server_and_client(handle_run_rollout=None):
80
+ async def run_server_and_client():
81
81
  """Start a mock ZMQ server and connected client, tearing both down on exit.
82
82
 
83
- Args:
84
- handle_run_rollout: Optional async callable to override the server's
85
- ``handle_run_rollout`` method. Useful for injecting slow or
86
- observable handlers in tests.
83
+ The router's worker spawning is mocked out so no subprocesses are created.
84
+ Instead, dispatch_request/forward_cancel are replaced with AsyncMock so tests can
85
+ observe request routing without needing real workers.
87
86
  """
88
- port = get_free_port_pair()
87
+ port = get_free_port()
89
88
  address = f"tcp://127.0.0.1:{port}"
90
89
 
91
90
  server = make_mock_server(address)
92
- if handle_run_rollout is not None:
93
- server.handle_run_rollout = handle_run_rollout # type: ignore[assignment]
91
+
92
+ # Mock out worker lifecycle — we don't want real subprocesses in unit tests
93
+ server.router.start_workers = MagicMock()
94
+ server.router.dispatch_request = AsyncMock()
95
+ server.router.forward_cancel = AsyncMock()
94
96
 
95
97
  stop_event = asyncio.Event()
96
98
  server_loop = asyncio.create_task(server.serve(stop_event=stop_event))
@@ -324,142 +326,83 @@ class TestRetryOnServerError:
324
326
  await client.close()
325
327
 
326
328
 
327
- class TestTaskCancellation:
328
- """Tests that client-side cancellation propagates to the server.
329
+ class TestCancelForwarding:
330
+ """Tests that client-side cancellation is forwarded through the router.
329
331
 
330
- The client sends an empty-payload cancel signal over the existing ZMQ
331
- wire format, and the server cancels the corresponding asyncio task.
332
+ With the multi-process architecture, the ZMQEnvServer receives cancel
333
+ signals from the client and forwards them via ``router.forward_cancel()``.
334
+ These tests verify the server correctly routes cancels to the router.
332
335
  """
333
336
 
334
337
  @pytest.mark.asyncio
335
- async def test_cancelled_client_task_should_cancel_server_task_before_request_processing(
336
- self,
337
- ):
338
- """Cancellation should still propagate before process_request enters its body."""
339
- process_request_blocked = asyncio.Event()
340
- original_process_request_entered = asyncio.Event()
341
- server_task_cancelled = asyncio.Event()
342
-
338
+ async def test_cancel_signal_forwarded_to_router(self):
339
+ """Client cancellation sends empty payload, server calls router.forward_cancel."""
343
340
  async with run_server_and_client() as (server, client):
344
- original_process_request = server.process_request
345
-
346
- async def delayed_process_request(
347
- client_id,
348
- request_id_bytes,
349
- payload_bytes,
350
- ):
351
- process_request_blocked.set()
352
- try:
353
- await asyncio.Event().wait()
354
- original_process_request_entered.set()
355
- return await original_process_request(
356
- client_id,
357
- request_id_bytes,
358
- payload_bytes,
359
- )
360
- except asyncio.CancelledError:
361
- server_task_cancelled.set()
362
- raise
363
-
364
- server.process_request = delayed_process_request # type: ignore[assignment]
365
-
341
+ # Send a request
366
342
  client_task = asyncio.create_task(
367
343
  client.send_request(
368
344
  make_rollout_request(), RunRolloutResponse, timeout=30
369
345
  )
370
346
  )
371
347
 
372
- await asyncio.wait_for(process_request_blocked.wait(), timeout=5)
373
- assert len(server.request_tasks) == 1
374
- assert not original_process_request_entered.is_set()
348
+ # Wait for dispatch to be called
349
+ await asyncio.sleep(0.3)
350
+ assert server.router.dispatch_request.call_count == 1
375
351
 
352
+ # Cancel on the client side — this sends an empty-payload frame
376
353
  client_task.cancel()
377
354
  with pytest.raises(asyncio.CancelledError):
378
355
  await client_task
379
356
 
380
- await asyncio.wait_for(server_task_cancelled.wait(), timeout=5)
381
- assert not original_process_request_entered.is_set()
357
+ # Give the cancel signal time to propagate
358
+ await asyncio.sleep(0.3)
359
+
360
+ # The server should have forwarded the cancel to the router
361
+ assert server.router.forward_cancel.call_count == 1
362
+ call_args = server.router.forward_cancel.call_args
363
+ # forward_cancel(request_id, client_id)
364
+ assert isinstance(call_args[0][0], bytes) # request_id
365
+ assert isinstance(call_args[0][1], bytes) # client_id
382
366
 
383
367
  @pytest.mark.asyncio
384
- async def test_cancelled_client_task_should_cancel_server_task(self):
385
- """When the asyncio task awaiting send_request() is cancelled on the
386
- client, the corresponding server-side task should also be cancelled
387
- via the empty-payload cancel signal.
388
- """
389
- server_task_started = asyncio.Event()
390
- server_task_cancelled = asyncio.Event()
391
-
392
- async def slow_handle_run_rollout(request):
393
- server_task_started.set()
394
- try:
395
- await asyncio.sleep(60)
396
- return RunRolloutResponse(output=None)
397
- except asyncio.CancelledError:
398
- server_task_cancelled.set()
399
- raise
400
-
401
- async with run_server_and_client(slow_handle_run_rollout) as (server, client):
402
- client_task = asyncio.create_task(
403
- client.send_request(
404
- make_rollout_request(), RunRolloutResponse, timeout=30
368
+ async def test_timeout_sends_cancel_to_router(self):
369
+ """Client timeout sends cancel signal, server calls router.forward_cancel."""
370
+ async with run_server_and_client() as (server, client):
371
+ # Use a short timeout
372
+ with pytest.raises(TimeoutError):
373
+ await client.send_request(
374
+ make_rollout_request(), RunRolloutResponse, timeout=0.5
405
375
  )
406
- )
407
376
 
408
- # Wait for the server to actually start processing
409
- await asyncio.wait_for(server_task_started.wait(), timeout=5)
410
- assert len(server.request_tasks) == 1
377
+ # Give the cancel signal time to propagate
378
+ await asyncio.sleep(0.3)
411
379
 
412
- # Cancel on the client side
413
- client_task.cancel()
414
- with pytest.raises(asyncio.CancelledError):
415
- await client_task
416
-
417
- # Give the system time to propagate the cancellation
418
- await asyncio.sleep(0.5)
380
+ # Dispatch should have been called
381
+ assert server.router.dispatch_request.call_count == 1
419
382
 
420
- # The server-side task SHOULD have been cancelled.
421
- # This fails today because the client never tells the server.
422
- assert server_task_cancelled.is_set(), (
423
- "Server-side task was NOT cancelled even though the client "
424
- "cancelled the request. The server is still consuming resources "
425
- "for a request nobody is waiting for."
426
- )
383
+ # The server should have forwarded the cancel to the router
384
+ assert server.router.forward_cancel.call_count == 1
427
385
 
428
386
  @pytest.mark.asyncio
429
- async def test_client_timeout_should_cancel_server_task(self):
430
- """When the client times out waiting for a response, the
431
- corresponding server-side task should be cancelled via the
432
- empty-payload cancel signal.
433
- """
434
- server_task_started = asyncio.Event()
435
- server_task_cancelled = asyncio.Event()
436
-
437
- async def slow_handle_run_rollout(request):
438
- server_task_started.set()
439
- try:
440
- await asyncio.sleep(60)
441
- return RunRolloutResponse(output=None)
442
- except asyncio.CancelledError:
443
- server_task_cancelled.set()
444
- raise
445
-
446
- async with run_server_and_client(slow_handle_run_rollout) as (server, client):
447
- # Use a very short timeout so the client gives up quickly
448
- with pytest.raises(TimeoutError):
449
- await client.send_request(
450
- make_rollout_request(), RunRolloutResponse, timeout=0.5
387
+ async def test_dispatch_called_with_correct_frames(self):
388
+ """Requests are dispatched to the router with client_id, request_id, payload."""
389
+ async with run_server_and_client() as (server, client):
390
+ client_task = asyncio.create_task(
391
+ client.send_request(
392
+ make_rollout_request(), RunRolloutResponse, timeout=30
451
393
  )
394
+ )
452
395
 
453
- # Confirm the server started processing
454
- await asyncio.wait_for(server_task_started.wait(), timeout=5)
455
- assert len(server.request_tasks) == 1
396
+ await asyncio.sleep(0.3)
456
397
 
457
- # Give the system time to propagate
458
- await asyncio.sleep(0.5)
398
+ assert server.router.dispatch_request.call_count == 1
399
+ call_args = server.router.dispatch_request.call_args
400
+ client_id, request_id, payload = call_args[0]
401
+ assert isinstance(client_id, bytes)
402
+ assert isinstance(request_id, bytes)
403
+ assert isinstance(payload, bytes)
404
+ assert len(payload) > 0 # non-empty payload = real request
459
405
 
460
- # The server task SHOULD have been cancelled after client timeout
461
- assert server_task_cancelled.is_set(), (
462
- "Server-side task was NOT cancelled after client timeout. "
463
- "The server continues processing a request that already "
464
- "timed out on the client."
465
- )
406
+ client_task.cancel()
407
+ with contextlib.suppress(asyncio.CancelledError):
408
+ await client_task
@@ -5,6 +5,12 @@ from pathlib import Path
5
5
  import pytest
6
6
  import tomllib
7
7
 
8
+ # Timeout in seconds for each subprocess step
9
+ INSTALL_TIMEOUT = 600 # 10 minutes for venv creation + package install
10
+ IMPORT_TIMEOUT = 120 # 2 minutes for importing a package
11
+ LOAD_TIMEOUT = 300 # 5 minutes for loading an environment (may download datasets)
12
+ EVAL_TIMEOUT = 600 # 10 minutes for running vf-eval with -n 1 -r 1
13
+
8
14
  SKIPPED_ENVS = [
9
15
  # Requires EXA_API_KEY environment variable
10
16
  "mcp_search_env",
@@ -99,9 +105,17 @@ def test_env(env_dir: Path, tmp_path_factory: pytest.TempPathFactory):
99
105
  f"uv pip install {repo_root.as_posix()} && "
100
106
  f"uv pip install {env_dir.absolute().as_posix()}"
101
107
  )
102
- process = subprocess.run(
103
- cmd, shell=True, executable="/bin/bash", capture_output=True, text=True
104
- )
108
+ try:
109
+ process = subprocess.run(
110
+ cmd,
111
+ shell=True,
112
+ executable="/bin/bash",
113
+ capture_output=True,
114
+ text=True,
115
+ timeout=INSTALL_TIMEOUT,
116
+ )
117
+ except subprocess.TimeoutExpired:
118
+ pytest.fail(f"Timed out after {INSTALL_TIMEOUT}s installing {env_dir.name}")
105
119
  assert process.returncode == 0, (
106
120
  f"Failed to create virtual environment: {process.stderr}"
107
121
  )
@@ -114,25 +128,49 @@ def test_env(env_dir: Path, tmp_path_factory: pytest.TempPathFactory):
114
128
  def help_test_can_import_env(tmp_venv_dir: Path, env_dir: Path):
115
129
  """Test that the environment can be imported as a package."""
116
130
  import_cmd = f"cd {tmp_venv_dir} && source .venv/bin/activate && uv run python -c 'import {env_dir.name}'"
117
- process = subprocess.run(
118
- import_cmd, shell=True, executable="/bin/bash", capture_output=True, text=True
119
- )
131
+ try:
132
+ process = subprocess.run(
133
+ import_cmd,
134
+ shell=True,
135
+ executable="/bin/bash",
136
+ capture_output=True,
137
+ text=True,
138
+ timeout=IMPORT_TIMEOUT,
139
+ )
140
+ except subprocess.TimeoutExpired:
141
+ pytest.fail(f"Timed out after {IMPORT_TIMEOUT}s importing {env_dir.name}")
120
142
  assert process.returncode == 0, "Failed to import environment"
121
143
 
122
144
 
123
145
  def help_test_can_load_env(tmp_venv_dir: Path, env_dir: Path):
124
146
  """Test that the environment can be loaded."""
125
147
  load_cmd = f"""cd {tmp_venv_dir} && source .venv/bin/activate && uv run python -c 'import verifiers as vf; vf.load_environment("{env_dir.name}")'"""
126
- process = subprocess.run(
127
- load_cmd, shell=True, executable="/bin/bash", capture_output=True, text=True
128
- )
148
+ try:
149
+ process = subprocess.run(
150
+ load_cmd,
151
+ shell=True,
152
+ executable="/bin/bash",
153
+ capture_output=True,
154
+ text=True,
155
+ timeout=LOAD_TIMEOUT,
156
+ )
157
+ except subprocess.TimeoutExpired:
158
+ pytest.fail(f"Timed out after {LOAD_TIMEOUT}s loading {env_dir.name}")
129
159
  assert process.returncode == 0, "Failed to load environment"
130
160
 
131
161
 
132
162
  def help_test_can_eval_env(tmp_venv_dir: Path, env_dir: Path):
133
163
  """Test that the environment can be run via vf-eval."""
134
164
  eval_cmd = f"cd {tmp_venv_dir} && source .venv/bin/activate && uv run vf-eval {env_dir.name} -n 1 -r 1 -t 512"
135
- process = subprocess.run(
136
- eval_cmd, shell=True, executable="/bin/bash", capture_output=True, text=True
137
- )
165
+ try:
166
+ process = subprocess.run(
167
+ eval_cmd,
168
+ shell=True,
169
+ executable="/bin/bash",
170
+ capture_output=True,
171
+ text=True,
172
+ timeout=EVAL_TIMEOUT,
173
+ )
174
+ except subprocess.TimeoutExpired:
175
+ pytest.fail(f"Timed out after {EVAL_TIMEOUT}s evaluating {env_dir.name}")
138
176
  assert process.returncode == 0, "Failed to evaluate environment"