verifiers 0.1.11.dev0__tar.gz → 0.1.11.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/PKG-INFO +8 -8
  2. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/README.md +7 -7
  3. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/conftest.py +6 -0
  4. verifiers-0.1.11.dev1/tests/test_env_crash_recovery.py +237 -0
  5. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_rlm_env.py +572 -7
  6. verifiers-0.1.11.dev1/tests/test_rollout_gateway_env.py +350 -0
  7. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_save_utils.py +205 -0
  8. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/AGENTS.md +1 -1
  9. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/__init__.py +6 -1
  10. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/clients/openai_chat_completions_client.py +64 -21
  11. verifiers-0.1.11.dev1/verifiers/clients/openai_chat_completions_token_client.py +236 -0
  12. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/env_group.py +0 -6
  13. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/environment.py +41 -26
  14. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/README.md +3 -1
  15. verifiers-0.1.11.dev1/verifiers/envs/experimental/__init__.py +4 -0
  16. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/cli_agent_env.py +27 -12
  17. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/rlm_env.py +333 -55
  18. verifiers-0.1.11.dev1/verifiers/envs/experimental/rollout_gateway_mixin.py +397 -0
  19. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/browser_env.py +7 -1
  20. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +60 -44
  21. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/openenv_env.py +7 -1
  22. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/textarena_env.py +33 -11
  23. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/errors.py +6 -0
  24. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/eval.py +101 -33
  25. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/gepa.py +1 -1
  26. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/types.py +5 -0
  27. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/async_utils.py +2 -6
  28. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/eval_utils.py +26 -2
  29. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/logging_utils.py +44 -6
  30. verifiers-0.1.11.dev1/verifiers/utils/metric_utils.py +69 -0
  31. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/response_utils.py +5 -0
  32. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/save_utils.py +11 -0
  33. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/worker_utils.py +15 -32
  34. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/workers/client/env_client.py +22 -2
  35. verifiers-0.1.11.dev1/verifiers/workers/client/zmq_env_client.py +408 -0
  36. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/workers/server/env_server.py +64 -29
  37. verifiers-0.1.11.dev1/verifiers/workers/server/zmq_env_server.py +246 -0
  38. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/workers/types.py +21 -0
  39. verifiers-0.1.11.dev0/verifiers/clients/openai_chat_completions_token_client.py +0 -67
  40. verifiers-0.1.11.dev0/verifiers/envs/experimental/__init__.py +0 -3
  41. verifiers-0.1.11.dev0/verifiers/utils/token_utils.py +0 -174
  42. verifiers-0.1.11.dev0/verifiers/workers/client/zmq_env_client.py +0 -198
  43. verifiers-0.1.11.dev0/verifiers/workers/server/zmq_env_server.py +0 -148
  44. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/.gitignore +0 -0
  45. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/LICENSE +0 -0
  46. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/pyproject.toml +0 -0
  47. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/AGENTS.md +0 -0
  48. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/README.md +0 -0
  49. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/__init__.py +0 -0
  50. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_browser_env.py +0 -0
  51. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_build_script.py +0 -0
  52. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_cli_agent_env.py +0 -0
  53. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_client_auth_errors.py +0 -0
  54. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_client_config.py +0 -0
  55. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_client_multimodal_types.py +0 -0
  56. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_decorator_ranks.py +0 -0
  57. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_endpoint_registry.py +0 -0
  58. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_env_group.py +0 -0
  59. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_environment.py +0 -0
  60. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_environment_extra.py +0 -0
  61. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_envs.py +0 -0
  62. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_error_chain.py +0 -0
  63. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_eval_cli.py +0 -0
  64. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_eval_display.py +0 -0
  65. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_eval_utils.py +0 -0
  66. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_gepa_cli.py +0 -0
  67. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_gym_env.py +0 -0
  68. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_imports.py +0 -0
  69. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_install_utils.py +0 -0
  70. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_interception_utils.py +0 -0
  71. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_logging.py +0 -0
  72. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_math_rubric.py +0 -0
  73. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_maybe_think_parser.py +0 -0
  74. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_message_utils.py +0 -0
  75. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_message_utils_audio.py +0 -0
  76. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_multiturn_env.py +0 -0
  77. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_opencode_harbor.py +0 -0
  78. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_parser.py +0 -0
  79. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_path_utils.py +0 -0
  80. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_prime_plugin.py +0 -0
  81. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_rlm_env_sandbox.py +0 -0
  82. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_rubric.py +0 -0
  83. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_rubric_group.py +0 -0
  84. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_sandbox_env.py +0 -0
  85. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_sandbox_mixin.py +0 -0
  86. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_setup_script.py +0 -0
  87. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_singleturn_env.py +0 -0
  88. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_stateful_tool_env.py +0 -0
  89. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_think_parser.py +0 -0
  90. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_tool_env.py +0 -0
  91. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_tool_utils.py +0 -0
  92. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_trajectory_processing.py +0 -0
  93. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_tui_info_formatting.py +0 -0
  94. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_xml_parser.py +0 -0
  95. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/__init__.py +0 -0
  96. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/__init__.py +0 -0
  97. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/build.py +0 -0
  98. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/eval.py +0 -0
  99. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/gepa.py +0 -0
  100. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/init.py +0 -0
  101. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/install.py +0 -0
  102. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/setup.py +0 -0
  103. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/plugins/__init__.py +0 -0
  104. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/plugins/prime.py +0 -0
  105. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/tui.py +0 -0
  106. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/clients/__init__.py +0 -0
  107. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/clients/anthropic_messages_client.py +0 -0
  108. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/clients/client.py +0 -0
  109. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/clients/openai_completions_client.py +0 -0
  110. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/decorators.py +0 -0
  111. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/AGENTS.md +0 -0
  112. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/__init__.py +0 -0
  113. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/gym_env.py +0 -0
  114. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/harbor_env.py +0 -0
  115. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/mcp_env.py +0 -0
  116. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  117. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/README.md +0 -0
  118. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/__init__.py +0 -0
  119. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  120. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  121. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  122. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  123. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  124. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/multiturn_env.py +0 -0
  125. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/python_env.py +0 -0
  126. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/sandbox_env.py +0 -0
  127. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/singleturn_env.py +0 -0
  128. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/stateful_tool_env.py +0 -0
  129. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/tool_env.py +0 -0
  130. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/gepa/__init__.py +0 -0
  131. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/gepa/adapter.py +0 -0
  132. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/gepa/config.py +0 -0
  133. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/gepa/display.py +0 -0
  134. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/gepa/gepa_utils.py +0 -0
  135. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/parsers/__init__.py +0 -0
  136. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/parsers/maybe_think_parser.py +0 -0
  137. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/parsers/parser.py +0 -0
  138. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/parsers/think_parser.py +0 -0
  139. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/parsers/xml_parser.py +0 -0
  140. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/README.md +0 -0
  141. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/__init__.py +0 -0
  142. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/inference/__init__.py +0 -0
  143. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/inference/client.py +0 -0
  144. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/inference/server.py +0 -0
  145. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/trainer/__init__.py +0 -0
  146. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/trainer/config.py +0 -0
  147. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/trainer/orchestrator.py +0 -0
  148. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/trainer/trainer.py +0 -0
  149. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/trainer/utils.py +0 -0
  150. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rubrics/__init__.py +0 -0
  151. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rubrics/judge_rubric.py +0 -0
  152. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rubrics/math_rubric.py +0 -0
  153. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rubrics/rubric.py +0 -0
  154. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rubrics/rubric_group.py +0 -0
  155. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/__init__.py +0 -0
  156. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/build.py +0 -0
  157. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/init.py +0 -0
  158. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/install.py +0 -0
  159. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/prime_rl.py +0 -0
  160. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/rl.py +0 -0
  161. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/setup.py +0 -0
  162. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/train.py +0 -0
  163. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/tui.py +0 -0
  164. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/vllm.py +0 -0
  165. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/__init__.py +0 -0
  166. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/client_utils.py +0 -0
  167. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/config_utils.py +0 -0
  168. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/data_utils.py +0 -0
  169. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/display_utils.py +0 -0
  170. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/env_utils.py +0 -0
  171. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/error_utils.py +0 -0
  172. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/eval_display.py +0 -0
  173. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/heartbeat.py +0 -0
  174. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/import_utils.py +0 -0
  175. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/install_utils.py +0 -0
  176. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/interception_utils.py +0 -0
  177. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/message_utils.py +0 -0
  178. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/path_utils.py +0 -0
  179. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/thread_utils.py +0 -0
  180. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/threaded_sandbox_client.py +0 -0
  181. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/tool_utils.py +0 -0
  182. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/tunnel_utils.py +0 -0
  183. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/usage_utils.py +0 -0
  184. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/version_utils.py +0 -0
  185. {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/workers/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.11.dev0
3
+ Version: 0.1.11.dev1
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -106,7 +106,7 @@ Verifiers: Environments for LLM Reinforcement Learning
106
106
 
107
107
  - [01/08/26] v0.1.9 is released, featuring a number of new experimental environment class types, monitor rubrics for automatic metric collection, improved workspace setup flow, improved error handling, bug fixes, and a documentation overhaul.
108
108
  - [11/19/25] v0.1.8 is released, featuring a major refactor of the rollout system to use trajectory-based tracking for token-in token-out training across turns, as well as support for truncated or branching rollouts.
109
- - [11/07/25] Verifiers v0.1.7 is released! This includes an improved quickstart configuration for training with [prime-rl], a new included "nano" trainer (`vf.RLTrainer`, replacing `vf.GRPOTrainer`), and a number of bug fixes and improvements to the documentation.
109
+ - [11/07/25] Verifiers v0.1.7 is released! This includes an improved quickstart configuration for training with [prime-rl](https://github.com/PrimeIntellect-ai/prime-rl), a new included "nano" trainer (`vf.RLTrainer`, replacing `vf.GRPOTrainer`), and a number of bug fixes and improvements to the documentation.
110
110
  - [10/27/25] A new iteration of the Prime Intellect [Environments Program](https://docs.google.com/spreadsheets/d/13UDfRDjgIZXsMI2s9-Lmn8KSMMsgk2_zsfju6cx_pNU/edit?gid=0#gid=0) is live!
111
111
 
112
112
 
@@ -229,17 +229,17 @@ prime eval run primeintellect/math-python
229
229
 
230
230
  ## Documentation
231
231
 
232
- **[Environments](environments.md)** — Create datasets, rubrics, and custom multi-turn interaction protocols.
232
+ **[Environments](docs/environments.md)** — Create datasets, rubrics, and custom multi-turn interaction protocols.
233
233
 
234
- **[Evaluation](evaluation.md)** - Evaluate models using your environments.
234
+ **[Evaluation](docs/evaluation.md)** - Evaluate models using your environments.
235
235
 
236
- **[Training](training.md)** — Train models in your environments with reinforcement learning.
236
+ **[Training](docs/training.md)** — Train models in your environments with reinforcement learning.
237
237
 
238
- **[Development](development.md)** — Contributing to verifiers
238
+ **[Development](docs/development.md)** — Contributing to verifiers
239
239
 
240
- **[API Reference](reference.md)** — Understanding the API and data structures
240
+ **[API Reference](docs/reference.md)** — Understanding the API and data structures
241
241
 
242
- **[FAQs](faqs.md)** - Other frequently asked questions.
242
+ **[FAQs](docs/faqs.md)** - Other frequently asked questions.
243
243
 
244
244
 
245
245
  ## Citation
@@ -36,7 +36,7 @@ Verifiers: Environments for LLM Reinforcement Learning
36
36
 
37
37
  - [01/08/26] v0.1.9 is released, featuring a number of new experimental environment class types, monitor rubrics for automatic metric collection, improved workspace setup flow, improved error handling, bug fixes, and a documentation overhaul.
38
38
  - [11/19/25] v0.1.8 is released, featuring a major refactor of the rollout system to use trajectory-based tracking for token-in token-out training across turns, as well as support for truncated or branching rollouts.
39
- - [11/07/25] Verifiers v0.1.7 is released! This includes an improved quickstart configuration for training with [prime-rl], a new included "nano" trainer (`vf.RLTrainer`, replacing `vf.GRPOTrainer`), and a number of bug fixes and improvements to the documentation.
39
+ - [11/07/25] Verifiers v0.1.7 is released! This includes an improved quickstart configuration for training with [prime-rl](https://github.com/PrimeIntellect-ai/prime-rl), a new included "nano" trainer (`vf.RLTrainer`, replacing `vf.GRPOTrainer`), and a number of bug fixes and improvements to the documentation.
40
40
  - [10/27/25] A new iteration of the Prime Intellect [Environments Program](https://docs.google.com/spreadsheets/d/13UDfRDjgIZXsMI2s9-Lmn8KSMMsgk2_zsfju6cx_pNU/edit?gid=0#gid=0) is live!
41
41
 
42
42
 
@@ -159,17 +159,17 @@ prime eval run primeintellect/math-python
159
159
 
160
160
  ## Documentation
161
161
 
162
- **[Environments](environments.md)** — Create datasets, rubrics, and custom multi-turn interaction protocols.
162
+ **[Environments](docs/environments.md)** — Create datasets, rubrics, and custom multi-turn interaction protocols.
163
163
 
164
- **[Evaluation](evaluation.md)** - Evaluate models using your environments.
164
+ **[Evaluation](docs/evaluation.md)** - Evaluate models using your environments.
165
165
 
166
- **[Training](training.md)** — Train models in your environments with reinforcement learning.
166
+ **[Training](docs/training.md)** — Train models in your environments with reinforcement learning.
167
167
 
168
- **[Development](development.md)** — Contributing to verifiers
168
+ **[Development](docs/development.md)** — Contributing to verifiers
169
169
 
170
- **[API Reference](reference.md)** — Understanding the API and data structures
170
+ **[API Reference](docs/reference.md)** — Understanding the API and data structures
171
171
 
172
- **[FAQs](faqs.md)** - Other frequently asked questions.
172
+ **[FAQs](docs/faqs.md)** - Other frequently asked questions.
173
173
 
174
174
 
175
175
  ## Citation
@@ -554,6 +554,9 @@ def make_metadata() -> Callable[..., GenerateMetadata]:
554
554
  time_ms: float = 0.0,
555
555
  avg_reward: float = 0.0,
556
556
  avg_metrics: dict[str, float] = {},
557
+ pass_at_k: dict[str, float] = {},
558
+ pass_all_k: dict[str, float] = {},
559
+ pass_threshold: float = 0.5,
557
560
  usage: dict[str, float] | None = None,
558
561
  version_info: dict | None = None,
559
562
  state_columns: list[str] = ["foo"],
@@ -579,6 +582,9 @@ def make_metadata() -> Callable[..., GenerateMetadata]:
579
582
  time_ms=time_ms,
580
583
  avg_reward=avg_reward,
581
584
  avg_metrics=avg_metrics,
585
+ pass_at_k=pass_at_k,
586
+ pass_all_k=pass_all_k,
587
+ pass_threshold=pass_threshold,
582
588
  usage=usage,
583
589
  version_info=version_info,
584
590
  state_columns=state_columns,
@@ -0,0 +1,237 @@
1
+ """Tests for environment server crash detection and recovery."""
2
+
3
+ import asyncio
4
+ import time
5
+ from unittest.mock import patch
6
+
7
+ import pytest
8
+
9
+ from verifiers.workers.client.zmq_env_client import ZMQEnvClient
10
+ from verifiers.workers.types import (
11
+ HealthRequest,
12
+ HealthResponse,
13
+ PendingRequest,
14
+ ServerState,
15
+ )
16
+
17
+
18
+ class TestStateTransitions:
19
+ """Tests for health-check-driven state transitions (via dedicated thread callbacks)."""
20
+
21
+ @pytest.mark.asyncio
22
+ async def test_startup_to_healthy_to_unhealthy(self):
23
+ """Callbacks drive STARTUP → HEALTHY → UNHEALTHY via healthy_event."""
24
+ client = ZMQEnvClient(
25
+ address="tcp://127.0.0.1:5555",
26
+ health_check_interval=0, # disable auto thread
27
+ )
28
+ client.loop = asyncio.get_running_loop()
29
+
30
+ assert client.server_state == ServerState.STARTUP
31
+ assert not client.healthy_event.is_set()
32
+
33
+ # STARTUP → HEALTHY
34
+ client.on_became_healthy(ServerState.STARTUP)
35
+ assert client.server_state == ServerState.HEALTHY
36
+ assert client.healthy_event.is_set()
37
+
38
+ # HEALTHY → UNHEALTHY (after 5 consecutive failures)
39
+ client.on_became_unhealthy(5)
40
+ await asyncio.sleep(0.1) # let _do_cancel_pending run
41
+ assert client.server_state == ServerState.UNHEALTHY
42
+ assert not client.healthy_event.is_set()
43
+
44
+ await client.close()
45
+
46
+ @pytest.mark.asyncio
47
+ async def test_unhealthy_cancels_pending_with_server_error(self):
48
+ """HEALTHY → UNHEALTHY transition cancels pending requests with ServerError."""
49
+ client = ZMQEnvClient(
50
+ address="tcp://127.0.0.1:5555",
51
+ health_check_interval=0, # disable auto thread
52
+ )
53
+ client.loop = asyncio.get_running_loop()
54
+
55
+ # Start in HEALTHY state
56
+ client.server_state = ServerState.HEALTHY
57
+ client.healthy_event.set()
58
+
59
+ # Add a pending request
60
+ future = asyncio.Future()
61
+ async with client.pending_lock:
62
+ client.pending_requests["test_req"] = PendingRequest(
63
+ request_id="test_req",
64
+ request=HealthRequest(),
65
+ submitted_at=time.time(),
66
+ timeout=10.0,
67
+ future=future,
68
+ )
69
+
70
+ # Trigger UNHEALTHY
71
+ client.on_became_unhealthy(5)
72
+ await asyncio.sleep(0.1) # let _do_cancel_pending run
73
+
74
+ assert future.done()
75
+ assert len(client.pending_requests) == 0
76
+ with pytest.raises(RuntimeError, match="unhealthy"):
77
+ future.result()
78
+
79
+ await client.close()
80
+
81
+
82
+ class TestRetryOnServerError:
83
+ """Tests for send_request retry after ServerError."""
84
+
85
+ @pytest.mark.asyncio
86
+ async def test_retry_after_recovery(self):
87
+ """ServerError → wait for healthy_event → retry succeeds."""
88
+ client = ZMQEnvClient(
89
+ address="tcp://127.0.0.1:5555",
90
+ health_check_interval=0,
91
+ )
92
+
93
+ attempt_count = 0
94
+
95
+ async def mock_send(*args, **kwargs):
96
+ nonlocal attempt_count
97
+ attempt_count += 1
98
+
99
+ if attempt_count == 1:
100
+ # First attempt: simulate server crash
101
+ async def fail_then_recover():
102
+ await asyncio.sleep(0.1)
103
+ await client.cancel_all_pending("Connection lost")
104
+ await asyncio.sleep(0.1)
105
+ client.healthy_event.set()
106
+
107
+ asyncio.create_task(fail_then_recover())
108
+ else:
109
+ # Second attempt: succeed
110
+ async def succeed():
111
+ await asyncio.sleep(0.05)
112
+ req_id = list(client.pending_requests.keys())[0]
113
+ pending = client.pending_requests.get(req_id)
114
+ if pending and not pending.future.done():
115
+ pending.future.set_result(
116
+ HealthResponse(success=True).model_dump()
117
+ )
118
+
119
+ asyncio.create_task(succeed())
120
+
121
+ with (
122
+ patch.object(client.socket, "connect"),
123
+ patch.object(client.socket, "send_multipart", new=mock_send),
124
+ ):
125
+ await client.ensure_started()
126
+ response = await client.send_request(
127
+ HealthRequest(), HealthResponse, timeout=5.0
128
+ )
129
+
130
+ assert attempt_count == 2
131
+ assert response.success
132
+
133
+ await client.close()
134
+
135
+ @pytest.mark.asyncio
136
+ async def test_recovery_timeout(self):
137
+ """ServerError + no recovery within timeout → TimeoutError."""
138
+ client = ZMQEnvClient(
139
+ address="tcp://127.0.0.1:5555",
140
+ health_check_interval=0,
141
+ recovery_timeout=0.5,
142
+ )
143
+
144
+ async def mock_send(*args, **kwargs):
145
+ async def fail():
146
+ await asyncio.sleep(0.05)
147
+ await client.cancel_all_pending("Connection lost")
148
+
149
+ asyncio.create_task(fail())
150
+
151
+ with (
152
+ patch.object(client.socket, "connect"),
153
+ patch.object(client.socket, "send_multipart", new=mock_send),
154
+ ):
155
+ await client.ensure_started()
156
+
157
+ with pytest.raises(TimeoutError, match="did not recover"):
158
+ await client.send_request(HealthRequest(), HealthResponse, timeout=5.0)
159
+
160
+ await client.close()
161
+
162
+ @pytest.mark.asyncio
163
+ async def test_no_retry_on_runtime_error(self):
164
+ """Plain RuntimeError propagates immediately without retry."""
165
+ client = ZMQEnvClient(
166
+ address="tcp://127.0.0.1:5555",
167
+ health_check_interval=0,
168
+ )
169
+
170
+ attempt_count = 0
171
+
172
+ async def mock_send(*args, **kwargs):
173
+ nonlocal attempt_count
174
+ attempt_count += 1
175
+
176
+ async def fail():
177
+ await asyncio.sleep(0.05)
178
+ req_id = list(client.pending_requests.keys())[0]
179
+ pending = client.pending_requests.get(req_id)
180
+ if pending and not pending.future.done():
181
+ pending.future.set_exception(RuntimeError("Bad request"))
182
+
183
+ asyncio.create_task(fail())
184
+
185
+ with (
186
+ patch.object(client.socket, "connect"),
187
+ patch.object(client.socket, "send_multipart", new=mock_send),
188
+ ):
189
+ await client.ensure_started()
190
+
191
+ with pytest.raises(RuntimeError, match="Bad request"):
192
+ await client.send_request(HealthRequest(), HealthResponse, timeout=5.0)
193
+
194
+ assert attempt_count == 1
195
+
196
+ await client.close()
197
+
198
+
199
+ class TestWaitForServerStartup:
200
+ """Tests for event-based wait_for_server_startup."""
201
+
202
+ @pytest.mark.asyncio
203
+ async def test_delayed_startup(self):
204
+ """Startup succeeds when health thread detects server after a delay."""
205
+ client = ZMQEnvClient(
206
+ address="tcp://127.0.0.1:5555",
207
+ health_check_interval=0, # disable auto thread
208
+ )
209
+ client.loop = asyncio.get_running_loop()
210
+
211
+ # Simulate health thread detecting server after a delay
212
+ async def simulate_health_thread():
213
+ await asyncio.sleep(0.2)
214
+ client.on_became_healthy(ServerState.STARTUP)
215
+
216
+ asyncio.create_task(simulate_health_thread())
217
+
218
+ with patch.object(client.socket, "connect"):
219
+ await client.wait_for_server_startup(timeout=3.0)
220
+
221
+ assert client.healthy_event.is_set()
222
+
223
+ await client.close()
224
+
225
+ @pytest.mark.asyncio
226
+ async def test_startup_timeout(self):
227
+ """Startup raises TimeoutError when server never becomes healthy."""
228
+ client = ZMQEnvClient(
229
+ address="tcp://127.0.0.1:5555",
230
+ health_check_interval=0, # disable auto thread
231
+ )
232
+
233
+ with patch.object(client.socket, "connect"):
234
+ with pytest.raises(TimeoutError, match="did not become healthy"):
235
+ await client.wait_for_server_startup(timeout=0.5)
236
+
237
+ await client.close()