verifiers 0.1.12.dev1__tar.gz → 0.1.12.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/PKG-INFO +5 -3
  2. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/pyproject.toml +5 -3
  3. verifiers-0.1.12.dev3/tests/test_composable_env.py +260 -0
  4. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_endpoint_registry.py +97 -0
  5. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_eval_cli.py +82 -0
  6. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_gepa_cli.py +27 -1
  7. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_rlm_env.py +684 -231
  8. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_sandbox_mixin.py +7 -48
  9. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_tui_info_formatting.py +58 -16
  10. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/__init__.py +1 -1
  11. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/openai_chat_completions_client.py +5 -1
  12. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/openai_chat_completions_token_client.py +109 -92
  13. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/openai_completions_client.py +7 -1
  14. verifiers-0.1.12.dev3/verifiers/envs/experimental/__init__.py +28 -0
  15. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/cli_agent_env.py +31 -5
  16. verifiers-0.1.12.dev3/verifiers/envs/experimental/composable/README.md +151 -0
  17. verifiers-0.1.12.dev3/verifiers/envs/experimental/composable/__init__.py +17 -0
  18. verifiers-0.1.12.dev3/verifiers/envs/experimental/composable/composable_env.py +205 -0
  19. verifiers-0.1.12.dev3/verifiers/envs/experimental/composable/harness.py +58 -0
  20. verifiers-0.1.12.dev3/verifiers/envs/experimental/composable/task.py +362 -0
  21. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/opencode_env.py +6 -2
  22. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/rlm_env.py +958 -595
  23. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/sandbox_mixin.py +11 -36
  24. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/multiturn_env.py +14 -0
  25. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/math_rubric.py +7 -9
  26. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/eval.py +33 -8
  27. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/gepa.py +26 -1
  28. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/tui.py +887 -235
  29. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/server/env_server.py +2 -1
  30. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/server/env_worker.py +2 -1
  31. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/types.py +24 -0
  32. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/eval_utils.py +14 -0
  33. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/process_utils.py +15 -0
  34. verifiers-0.1.12.dev1/verifiers/envs/experimental/__init__.py +0 -3
  35. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/.gitignore +0 -0
  36. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/LICENSE +0 -0
  37. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/README.md +0 -0
  38. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/AGENTS.md +0 -0
  39. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/README.md +0 -0
  40. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/__init__.py +0 -0
  41. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/conftest.py +0 -0
  42. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_browser_env.py +0 -0
  43. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_build_script.py +0 -0
  44. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_cli_agent_env.py +0 -0
  45. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_client_auth_errors.py +0 -0
  46. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_client_config.py +0 -0
  47. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_client_multimodal_types.py +0 -0
  48. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_decorator_ranks.py +0 -0
  49. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_env_group.py +0 -0
  50. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_env_server.py +0 -0
  51. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_environment.py +0 -0
  52. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_environment_extra.py +0 -0
  53. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_envs.py +0 -0
  54. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_error_chain.py +0 -0
  55. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_eval_display.py +0 -0
  56. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_eval_utils.py +0 -0
  57. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_gym_env.py +0 -0
  58. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_imports.py +0 -0
  59. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_install_utils.py +0 -0
  60. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_interception_utils.py +0 -0
  61. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_logging.py +0 -0
  62. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_math_rubric.py +0 -0
  63. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_maybe_think_parser.py +0 -0
  64. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_message_utils.py +0 -0
  65. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_message_utils_multimodal.py +0 -0
  66. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_multiturn_env.py +0 -0
  67. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_openai_chat_completions_token_client.py +0 -0
  68. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_opencode_harbor.py +0 -0
  69. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_opencode_rlm_env.py +0 -0
  70. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_parser.py +0 -0
  71. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_path_utils.py +0 -0
  72. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_prime_plugin.py +0 -0
  73. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_rubric.py +0 -0
  74. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_rubric_group.py +0 -0
  75. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_sandbox_env.py +0 -0
  76. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_save_utils.py +0 -0
  77. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_setup_script.py +0 -0
  78. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_singleturn_env.py +0 -0
  79. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_stateful_tool_env.py +0 -0
  80. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_think_parser.py +0 -0
  81. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_tool_env.py +0 -0
  82. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_tool_utils.py +0 -0
  83. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_trajectory_processing.py +0 -0
  84. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_xml_parser.py +0 -0
  85. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/AGENTS.md +0 -0
  86. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/__init__.py +0 -0
  87. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/__init__.py +0 -0
  88. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/build.py +0 -0
  89. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/eval.py +0 -0
  90. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/gepa.py +0 -0
  91. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/init.py +0 -0
  92. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/install.py +0 -0
  93. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/setup.py +0 -0
  94. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/plugins/__init__.py +0 -0
  95. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/plugins/prime.py +0 -0
  96. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/tui.py +0 -0
  97. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/__init__.py +0 -0
  98. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/anthropic_messages_client.py +0 -0
  99. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/client.py +0 -0
  100. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/decorators.py +0 -0
  101. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/AGENTS.md +0 -0
  102. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/__init__.py +0 -0
  103. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/env_group.py +0 -0
  104. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/environment.py +0 -0
  105. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/README.md +0 -0
  106. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/gym_env.py +0 -0
  107. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/harbor_env.py +0 -0
  108. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/mcp_env.py +0 -0
  109. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  110. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  111. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/README.md +0 -0
  112. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/__init__.py +0 -0
  113. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/README.md +0 -0
  114. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  115. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  116. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  117. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  118. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  119. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  120. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/openenv_env.py +0 -0
  121. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  122. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/textarena_env.py +0 -0
  123. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/python_env.py +0 -0
  124. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/sandbox_env.py +0 -0
  125. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/singleturn_env.py +0 -0
  126. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/stateful_tool_env.py +0 -0
  127. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/tool_env.py +0 -0
  128. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/errors.py +0 -0
  129. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/gepa/__init__.py +0 -0
  130. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/gepa/adapter.py +0 -0
  131. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/gepa/config.py +0 -0
  132. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/gepa/display.py +0 -0
  133. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/gepa/gepa_utils.py +0 -0
  134. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/parsers/__init__.py +0 -0
  135. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/parsers/maybe_think_parser.py +0 -0
  136. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/parsers/parser.py +0 -0
  137. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/parsers/think_parser.py +0 -0
  138. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/parsers/xml_parser.py +0 -0
  139. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/README.md +0 -0
  140. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/__init__.py +0 -0
  141. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/inference/__init__.py +0 -0
  142. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/inference/client.py +0 -0
  143. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/inference/server.py +0 -0
  144. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/trainer/__init__.py +0 -0
  145. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/trainer/config.py +0 -0
  146. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/trainer/orchestrator.py +0 -0
  147. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/trainer/trainer.py +0 -0
  148. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/trainer/utils.py +0 -0
  149. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/__init__.py +0 -0
  150. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  151. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/judge_rubric.py +0 -0
  152. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/rubric.py +0 -0
  153. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/rubric_group.py +0 -0
  154. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/__init__.py +0 -0
  155. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/build.py +0 -0
  156. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/init.py +0 -0
  157. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/install.py +0 -0
  158. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/prime_rl.py +0 -0
  159. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/rl.py +0 -0
  160. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/setup.py +0 -0
  161. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/train.py +0 -0
  162. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/vllm.py +0 -0
  163. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/__init__.py +0 -0
  164. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/client/env_client.py +0 -0
  165. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/client/zmq_env_client.py +0 -0
  166. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/server/__init__.py +0 -0
  167. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/server/env_router.py +0 -0
  168. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/server/zmq_env_server.py +0 -0
  169. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/types.py +0 -0
  170. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/__init__.py +0 -0
  171. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/async_utils.py +0 -0
  172. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/client_utils.py +0 -0
  173. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/config_utils.py +0 -0
  174. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/data_utils.py +0 -0
  175. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/display_utils.py +0 -0
  176. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/env_utils.py +0 -0
  177. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/error_utils.py +0 -0
  178. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/eval_display.py +0 -0
  179. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/heartbeat.py +0 -0
  180. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/import_utils.py +0 -0
  181. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/install_utils.py +0 -0
  182. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/interception_utils.py +0 -0
  183. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/logging_utils.py +0 -0
  184. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/message_utils.py +0 -0
  185. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/metric_utils.py +0 -0
  186. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/path_utils.py +0 -0
  187. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/response_utils.py +0 -0
  188. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/save_utils.py +0 -0
  189. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/serve_utils.py +0 -0
  190. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/thread_utils.py +0 -0
  191. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/threaded_sandbox_client.py +0 -0
  192. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/tool_utils.py +0 -0
  193. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/tunnel_utils.py +0 -0
  194. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/usage_utils.py +0 -0
  195. {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/version_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.12.dev1
3
+ Version: 0.1.12.dev3
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -34,12 +34,14 @@ Requires-Dist: nest-asyncio>=1.6.0
34
34
  Requires-Dist: numpy
35
35
  Requires-Dist: openai-agents>=0.0.7
36
36
  Requires-Dist: openai>=1.108.1
37
- Requires-Dist: prime-sandboxes>=0.2.16
38
- Requires-Dist: prime-tunnel>=0.1.4
37
+ Requires-Dist: prime-sandboxes>=0.2.19
38
+ Requires-Dist: prime-tunnel>=0.1.5
39
39
  Requires-Dist: pydantic>=2.11.9
40
40
  Requires-Dist: pyzmq>=27.1.0
41
+ Requires-Dist: regex<2026.4.4
41
42
  Requires-Dist: requests
42
43
  Requires-Dist: rich
44
+ Requires-Dist: setproctitle>=1.3.0
43
45
  Requires-Dist: tenacity>=8.5.0
44
46
  Requires-Dist: textual
45
47
  Requires-Dist: tomli; python_version < '3.11'
@@ -37,8 +37,8 @@ dependencies = [
37
37
  "nest-asyncio>=1.6.0", # for jupyter notebooks
38
38
  "openai>=1.108.1",
39
39
  "openai-agents>=0.0.7",
40
- "prime-tunnel>=0.1.4",
41
- "prime-sandboxes>=0.2.16",
40
+ "prime-tunnel>=0.1.5",
41
+ "prime-sandboxes>=0.2.19",
42
42
  "pydantic>=2.11.9",
43
43
  "requests",
44
44
  "rich",
@@ -51,6 +51,8 @@ dependencies = [
51
51
  "pyzmq>=27.1.0",
52
52
  "msgpack>=1.1.2",
53
53
  "aiolimiter>=1.2.1",
54
+ "setproctitle>=1.3.0",
55
+ "regex<2026.4.4", # 2026.4.4 missing cp312/cp313 wheels
54
56
  ]
55
57
 
56
58
  [dependency-groups]
@@ -104,7 +106,7 @@ rl = [
104
106
 
105
107
  [tool.uv]
106
108
  preview = true
107
- required-version = "<0.11.0"
109
+ required-version = ">=0.11.1"
108
110
 
109
111
  [tool.uv.extra-build-dependencies]
110
112
  flash-attn = [{ requirement = "torch", match-runtime = true }]
@@ -0,0 +1,260 @@
1
+ """Tests for the composable architecture: Task, TaskSet, SandboxTaskSet, SandboxSpec."""
2
+
3
+ from types import SimpleNamespace
4
+ from unittest.mock import AsyncMock
5
+
6
+ import pytest
7
+
8
+ import verifiers as vf
9
+ from verifiers.envs.experimental.composable import (
10
+ ComposableEnv,
11
+ Harness,
12
+ SandboxSpec,
13
+ SandboxTaskSet,
14
+ Task,
15
+ TaskSet,
16
+ )
17
+
18
+
19
+ # ── Mock Rubrics ──────────────────────────────────────────────────────
20
+
21
+
22
+ class MockSandboxRubric(vf.Rubric):
23
+ def __init__(self, **kwargs):
24
+ super().__init__(**kwargs)
25
+ self.add_reward_func(self.solved)
26
+
27
+ async def solved(self, state, **kwargs) -> float:
28
+ return 1.0 if state.get("test_output") == "PASS" else 0.0
29
+
30
+
31
+ class MockMathRubric(vf.Rubric):
32
+ def __init__(self, **kwargs):
33
+ super().__init__(**kwargs)
34
+ self.add_reward_func(self.correct)
35
+
36
+ async def correct(self, state, **kwargs) -> float:
37
+ return 1.0 if state.get("info", {}).get("id") == 0 else 0.0
38
+
39
+
40
+ # ── Mock TaskSets ───────────────────────────────────────────────────────
41
+
42
+
43
+ class MockSandboxTaskSet(SandboxTaskSet):
44
+ """SandboxTaskSet for testing."""
45
+
46
+ def get_instruction(self, info):
47
+ return f"Fix bug #{info.get('id', 0)}"
48
+
49
+ def get_sandbox_spec(self, info):
50
+ return SandboxSpec(image="python:3.11-slim", cpu_cores=2, memory_gb=2)
51
+
52
+ def get_rubric(self):
53
+ return MockSandboxRubric()
54
+
55
+ def get_workdir(self, info):
56
+ return "/testbed"
57
+
58
+ def get_env_vars(self):
59
+ return {"FOO": "bar"}
60
+
61
+
62
+ class MockTaskSet(TaskSet):
63
+ """Plain TaskSet (no sandbox) for testing."""
64
+
65
+ def get_instruction(self, info):
66
+ return info.get("question", "")
67
+
68
+ def get_rubric(self):
69
+ return MockMathRubric()
70
+
71
+
72
+ def _make_dataset(n=3):
73
+ from datasets import Dataset
74
+
75
+ return Dataset.from_dict(
76
+ {
77
+ "info": [{"id": i, "question": f"q{i}"} for i in range(n)],
78
+ "answer": ["" for _ in range(n)],
79
+ }
80
+ )
81
+
82
+
83
+ # ── SandboxSpec ─────────────────────────────────────────────────────────
84
+
85
+
86
+ def test_sandbox_spec_defaults():
87
+ spec = SandboxSpec()
88
+ assert spec.image == "python:3.11-slim"
89
+ assert spec.cpu_cores == 4
90
+
91
+
92
+ def test_sandbox_spec_custom():
93
+ spec = SandboxSpec(image="lean-tactic:v4.27", gpu_count=1)
94
+ assert spec.image == "lean-tactic:v4.27"
95
+ assert spec.gpu_count == 1
96
+
97
+
98
+ # ── Task from SandboxTaskSet ───────────────────────────────────────────
99
+
100
+
101
+ def test_task_sandbox_spec():
102
+ ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
103
+ task = ts[0]
104
+ assert isinstance(task, Task)
105
+ assert task.sandbox_spec is not None
106
+ assert task.sandbox_spec.image == "python:3.11-slim"
107
+ assert task.sandbox_spec.cpu_cores == 2
108
+
109
+
110
+ def test_task_image():
111
+ ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
112
+ task = ts[0]
113
+ assert task.image == "python:3.11-slim"
114
+
115
+
116
+ def test_task_workdir():
117
+ ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
118
+ task = ts[0]
119
+ assert task.workdir == "/testbed"
120
+
121
+
122
+ def test_task_repr_sandbox():
123
+ ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
124
+ task = ts[0]
125
+ assert "python:3.11-slim" in repr(task)
126
+
127
+
128
+ # ── Task from plain TaskSet ────────────────────────────────────────────
129
+
130
+
131
+ def test_task_no_sandbox():
132
+ ts = MockTaskSet(dataset=_make_dataset(), name="math")
133
+ task = ts[0]
134
+ assert task.sandbox_spec is None
135
+ assert task.image is None
136
+
137
+
138
+ def test_task_repr_no_sandbox():
139
+ ts = MockTaskSet(dataset=_make_dataset(), name="math")
140
+ task = ts[0]
141
+ assert "no sandbox" in repr(task)
142
+
143
+
144
+ # ── TaskSet ─────────────────────────────────────────────────────────────
145
+
146
+
147
+ def test_taskset_isinstance():
148
+ ts = MockTaskSet(dataset=_make_dataset(), name="math")
149
+ assert not isinstance(ts, SandboxTaskSet)
150
+
151
+ ts2 = MockSandboxTaskSet(dataset=_make_dataset(), name="swe")
152
+ assert isinstance(ts2, SandboxTaskSet)
153
+
154
+
155
+ def test_taskset_len():
156
+ ts = MockTaskSet(dataset=_make_dataset(5), name="test")
157
+ assert len(ts) == 5
158
+
159
+
160
+ def test_taskset_iter():
161
+ ts = MockTaskSet(dataset=_make_dataset(3), name="test")
162
+ tasks = list(ts)
163
+ assert len(tasks) == 3
164
+ assert all(isinstance(t, Task) for t in tasks)
165
+
166
+
167
+ def test_taskset_filter():
168
+ ts = MockSandboxTaskSet(dataset=_make_dataset(5), name="test")
169
+ filtered = ts.filter(lambda ex: ex["info"]["id"] < 3)
170
+ assert len(filtered) == 3
171
+ assert isinstance(filtered, MockSandboxTaskSet)
172
+
173
+
174
+ def test_taskset_take():
175
+ ts = MockSandboxTaskSet(dataset=_make_dataset(5), name="test")
176
+ taken = ts.take(2)
177
+ assert len(taken) == 2
178
+ assert isinstance(taken, MockSandboxTaskSet)
179
+
180
+
181
+ def test_taskset_repr():
182
+ ts = MockTaskSet(dataset=_make_dataset(), name="mytest")
183
+ assert "mytest" in repr(ts)
184
+ assert "3" in repr(ts)
185
+
186
+
187
+ @pytest.mark.asyncio
188
+ async def test_composable_env_exports_task_workdir():
189
+ taskset = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
190
+ env = ComposableEnv(
191
+ taskset=taskset,
192
+ harness=Harness(run_command="true"),
193
+ )
194
+
195
+ env_vars = await env.build_env_vars(
196
+ {
197
+ "info": {"id": 0},
198
+ "interception_base_url": "https://test.trycloudflare.com/v1",
199
+ }
200
+ )
201
+
202
+ assert env_vars["AGENT_WORKDIR"] == "/testbed"
203
+ assert env_vars["FOO"] == "bar"
204
+
205
+
206
+ @pytest.mark.asyncio
207
+ async def test_composable_env_quotes_paths_in_mkdir_command():
208
+ taskset = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
209
+ env = ComposableEnv(
210
+ taskset=taskset,
211
+ harness=Harness(
212
+ run_command="true",
213
+ instruction_path="/tmp/with space/prompt.txt",
214
+ system_prompt="system",
215
+ system_prompt_path="/tmp/other path/system.txt",
216
+ ),
217
+ )
218
+ env.sandbox_client = SimpleNamespace(
219
+ execute_command=AsyncMock(),
220
+ teardown=lambda: None,
221
+ )
222
+ env.taskset.setup = AsyncMock()
223
+ env.upload_content = AsyncMock()
224
+
225
+ await env.post_sandbox_setup({"sandbox_id": "sbx", "info": {"id": 0}})
226
+
227
+ env.sandbox_client.execute_command.assert_awaited_once_with(
228
+ "sbx",
229
+ "mkdir -p '/tmp/other path' '/tmp/with space'",
230
+ timeout=10,
231
+ )
232
+
233
+
234
+ @pytest.mark.asyncio
235
+ async def test_composable_env_quotes_log_path_when_collecting_logs():
236
+ taskset = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
237
+ env = ComposableEnv(
238
+ taskset=taskset,
239
+ harness=Harness(
240
+ run_command="true",
241
+ log_path="/tmp/log dir/agent.log",
242
+ ),
243
+ )
244
+ env.sandbox_client = SimpleNamespace(
245
+ execute_command=AsyncMock(
246
+ return_value=SimpleNamespace(stdout="agent log\n", stderr="", exit_code=0)
247
+ ),
248
+ teardown=lambda: None,
249
+ )
250
+
251
+ state = {"sandbox_id": "sbx", "timing": {"total_ms": 0}}
252
+
253
+ await env.post_rollout(state)
254
+
255
+ env.sandbox_client.execute_command.assert_awaited_once_with(
256
+ "sbx",
257
+ "cat '/tmp/log dir/agent.log' 2>/dev/null || echo '<no logs>'",
258
+ working_dir=None,
259
+ )
260
+ assert state["agent_logs"] == "agent log"
@@ -1,5 +1,9 @@
1
1
  from pathlib import Path
2
2
 
3
+ import pytest
4
+ from pydantic import ValidationError
5
+
6
+ from verifiers.types import ClientConfig
3
7
  from verifiers.utils.eval_utils import load_endpoints
4
8
 
5
9
 
@@ -220,3 +224,96 @@ def test_load_endpoints_toml_accepts_type_shorthand(tmp_path: Path):
220
224
  endpoints = load_endpoints(str(registry_path))
221
225
 
222
226
  assert endpoints["haiku"][0]["api_client_type"] == "anthropic_messages"
227
+
228
+
229
+ def test_load_endpoints_toml_accepts_headers_table(tmp_path: Path):
230
+ registry_path = tmp_path / "endpoints.toml"
231
+ registry_path.write_text(
232
+ "[[endpoint]]\n"
233
+ 'endpoint_id = "proxy"\n'
234
+ 'model = "m"\n'
235
+ 'url = "https://api.example/v1"\n'
236
+ 'key = "K"\n'
237
+ 'headers = { "X-Custom" = "v1" }\n',
238
+ encoding="utf-8",
239
+ )
240
+
241
+ endpoints = load_endpoints(str(registry_path))
242
+
243
+ assert endpoints["proxy"][0]["extra_headers"] == {"X-Custom": "v1"}
244
+
245
+
246
+ def test_load_endpoints_toml_accepts_extra_headers_alias(tmp_path: Path):
247
+ registry_path = tmp_path / "endpoints.toml"
248
+ registry_path.write_text(
249
+ "[[endpoint]]\n"
250
+ 'endpoint_id = "proxy"\n'
251
+ 'model = "m"\n'
252
+ 'url = "https://api.example/v1"\n'
253
+ 'key = "K"\n'
254
+ 'extra_headers = { "X-A" = "a" }\n',
255
+ encoding="utf-8",
256
+ )
257
+
258
+ endpoints = load_endpoints(str(registry_path))
259
+
260
+ assert endpoints["proxy"][0]["extra_headers"] == {"X-A": "a"}
261
+
262
+
263
+ def test_load_endpoints_toml_rejects_headers_and_extra_headers_together(
264
+ tmp_path: Path,
265
+ ):
266
+ registry_path = tmp_path / "endpoints.toml"
267
+ registry_path.write_text(
268
+ "[[endpoint]]\n"
269
+ 'endpoint_id = "proxy"\n'
270
+ 'model = "m"\n'
271
+ 'url = "https://api.example/v1"\n'
272
+ 'key = "K"\n'
273
+ 'headers = { "X-A" = "a" }\n'
274
+ 'extra_headers = { "X-B" = "b" }\n',
275
+ encoding="utf-8",
276
+ )
277
+
278
+ endpoints = load_endpoints(str(registry_path))
279
+
280
+ assert endpoints == {}
281
+
282
+
283
+ def test_load_endpoints_python_registry_accepts_headers_dict(tmp_path: Path):
284
+ registry_path = tmp_path / "endpoints.py"
285
+ registry_path.write_text(
286
+ "ENDPOINTS = {\n"
287
+ ' "p": {"model": "m", "url": "https://x/v1", "key": "K", '
288
+ '"headers": {"X-Foo": "bar"}},\n'
289
+ "}\n",
290
+ encoding="utf-8",
291
+ )
292
+
293
+ endpoints = load_endpoints(str(registry_path))
294
+
295
+ assert endpoints["p"][0]["extra_headers"] == {"X-Foo": "bar"}
296
+
297
+
298
+ def test_load_endpoints_malformed_headers_string_falls_back_to_empty_registry(
299
+ tmp_path: Path,
300
+ ):
301
+ toml_path = tmp_path / "endpoints.toml"
302
+ toml_path.write_text(
303
+ "[[endpoint]]\n"
304
+ 'endpoint_id = "x"\n'
305
+ 'model = "m"\n'
306
+ 'url = "https://api.example/v1"\n'
307
+ 'key = "K"\n'
308
+ 'headers = "invalid"\n',
309
+ encoding="utf-8",
310
+ )
311
+
312
+ assert load_endpoints(str(toml_path)) == {}
313
+
314
+
315
+ def test_client_config_validates_extra_header_keys():
316
+ with pytest.raises(ValidationError):
317
+ ClientConfig(extra_headers={"": "x"})
318
+ with pytest.raises(ValidationError):
319
+ ClientConfig(extra_headers={"X": 1}) # type: ignore[arg-type]
@@ -40,6 +40,7 @@ def run_cli(make_metadata, make_state, make_input):
40
40
  "api_key_var": "OPENAI_API_KEY",
41
41
  "api_base_url": "https://api.openai.com/v1",
42
42
  "header": None,
43
+ "headers": None,
43
44
  "num_examples": 1,
44
45
  "rollouts_per_example": 1,
45
46
  "max_concurrent": 1,
@@ -229,6 +230,87 @@ def test_cli_temperature_not_added_when_none(monkeypatch, run_cli):
229
230
  assert "temperature" not in sa
230
231
 
231
232
 
233
+ def test_cli_headers_table_and_list_merge(monkeypatch, run_cli):
234
+ captured = run_cli(
235
+ monkeypatch,
236
+ {
237
+ "headers": {"X-A": "a", "X-B": "b"},
238
+ "header": ["X-B: override", "X-C: c"],
239
+ },
240
+ endpoints={},
241
+ )
242
+
243
+ assert captured["configs"][0].client_config.extra_headers == {
244
+ "X-A": "a",
245
+ "X-B": "override",
246
+ "X-C": "c",
247
+ }
248
+
249
+
250
+ def test_cli_registry_headers_merged_with_eval_toml(tmp_path, monkeypatch, run_cli):
251
+ cfg = tmp_path / "eval.toml"
252
+ cfg.write_text(
253
+ "[[eval]]\n"
254
+ 'env_id = "env1"\n'
255
+ 'model = "gpt-5-mini"\n'
256
+ 'headers = { "X-Table" = "t" }\n'
257
+ 'header = [ "X-List: l", "X-Table: override" ]\n',
258
+ encoding="utf-8",
259
+ )
260
+ captured = run_cli(
261
+ monkeypatch,
262
+ {"env_id_or_config": str(cfg)},
263
+ endpoints={
264
+ "gpt-5-mini": [
265
+ {
266
+ "model": "gpt-5-mini",
267
+ "url": "https://a.example/v1",
268
+ "key": "OPENAI_API_KEY",
269
+ "extra_headers": {"X-Reg": "r"},
270
+ }
271
+ ]
272
+ },
273
+ )
274
+
275
+ assert captured["configs"][0].client_config.extra_headers == {
276
+ "X-Reg": "r",
277
+ "X-Table": "override",
278
+ "X-List": "l",
279
+ }
280
+
281
+
282
+ def test_cli_multi_variant_preserves_per_row_registry_headers(monkeypatch, run_cli):
283
+ captured = run_cli(
284
+ monkeypatch,
285
+ {
286
+ "model": "gpt-5-mini",
287
+ "api_key_var": None,
288
+ "api_base_url": None,
289
+ "header": ["X-Eval: e"],
290
+ },
291
+ endpoints={
292
+ "gpt-5-mini": [
293
+ {
294
+ "model": "gpt-5-mini",
295
+ "url": "https://a.example/v1",
296
+ "key": "OPENAI_API_KEY",
297
+ "extra_headers": {"X-Row": "a"},
298
+ },
299
+ {
300
+ "model": "gpt-5-mini",
301
+ "url": "https://b.example/v1",
302
+ "key": "OPENAI_API_KEY",
303
+ "extra_headers": {"X-Row": "b"},
304
+ },
305
+ ]
306
+ },
307
+ )
308
+
309
+ cfgs = captured["configs"][0].client_config.endpoint_configs
310
+ assert cfgs[0].extra_headers == {"X-Row": "a", "X-Eval": "e"}
311
+ assert cfgs[1].extra_headers == {"X-Row": "b", "X-Eval": "e"}
312
+
313
+
232
314
  def test_cli_endpoint_alias_multi_variant_sets_multi_base_urls(monkeypatch, run_cli):
233
315
  captured = run_cli(
234
316
  monkeypatch,
@@ -3,7 +3,33 @@ from pathlib import Path
3
3
 
4
4
  import pytest
5
5
 
6
- from verifiers.scripts.gepa import load_gepa_toml_config, resolve_gepa_config_args
6
+ from verifiers.scripts.gepa import (
7
+ _gepa_extra_headers_from_group,
8
+ load_gepa_toml_config,
9
+ resolve_gepa_config_args,
10
+ )
11
+
12
+
13
+ def test_gepa_extra_headers_from_group_requires_consistent_variants():
14
+ with pytest.raises(ValueError, match="different headers"):
15
+ _gepa_extra_headers_from_group(
16
+ [
17
+ {"extra_headers": {"X-A": "1"}},
18
+ {"extra_headers": {"X-A": "2"}},
19
+ ],
20
+ "my-alias",
21
+ )
22
+
23
+
24
+ def test_gepa_extra_headers_from_group_returns_first_row_dict():
25
+ h = _gepa_extra_headers_from_group(
26
+ [
27
+ {"extra_headers": {"X-A": "x"}},
28
+ {"extra_headers": {"X-A": "x"}},
29
+ ],
30
+ "my-alias",
31
+ )
32
+ assert h == {"X-A": "x"}
7
33
 
8
34
 
9
35
  def test_load_gepa_toml_config_reads_env_table(tmp_path: Path):