verifiers 0.1.12.dev5__tar.gz → 0.1.12.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/PKG-INFO +1 -1
  2. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_eval_cli.py +32 -0
  3. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/__init__.py +1 -1
  4. verifiers-0.1.12.dev6/verifiers/cli/commands/eval.py +21 -0
  5. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/harnesses/rlm.py +4 -4
  6. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +5 -1
  7. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/eval.py +72 -30
  8. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/serve/server/env_router.py +3 -0
  9. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/serve/server/env_server.py +1 -0
  10. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/serve/server/env_worker.py +2 -0
  11. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/eval_utils.py +14 -1
  12. verifiers-0.1.12.dev5/verifiers/cli/commands/eval.py +0 -7
  13. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/.gitignore +0 -0
  14. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/LICENSE +0 -0
  15. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/README.md +0 -0
  16. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/pyproject.toml +0 -0
  17. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/AGENTS.md +0 -0
  18. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/README.md +0 -0
  19. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/__init__.py +0 -0
  20. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/conftest.py +0 -0
  21. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_browser_env.py +0 -0
  22. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_build_script.py +0 -0
  23. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_cli_agent_env.py +0 -0
  24. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_client_auth_errors.py +0 -0
  25. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_client_config.py +0 -0
  26. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_client_multimodal_types.py +0 -0
  27. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_composable_env.py +0 -0
  28. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_decorator_ranks.py +0 -0
  29. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_endpoint_registry.py +0 -0
  30. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_env_group.py +0 -0
  31. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_env_server.py +0 -0
  32. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_environment.py +0 -0
  33. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_environment_extra.py +0 -0
  34. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_envs.py +0 -0
  35. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_error_chain.py +0 -0
  36. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_eval_display.py +0 -0
  37. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_eval_utils.py +0 -0
  38. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_gepa_cli.py +0 -0
  39. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_gym_env.py +0 -0
  40. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_imports.py +0 -0
  41. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_install_utils.py +0 -0
  42. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_interception_utils.py +0 -0
  43. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_logging.py +0 -0
  44. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_math_rubric.py +0 -0
  45. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_maybe_think_parser.py +0 -0
  46. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_message_utils.py +0 -0
  47. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_message_utils_multimodal.py +0 -0
  48. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_multiturn_env.py +0 -0
  49. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_openai_chat_completions_token_client.py +0 -0
  50. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_opencode_harbor.py +0 -0
  51. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_opencode_rlm_env.py +0 -0
  52. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_parser.py +0 -0
  53. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_path_utils.py +0 -0
  54. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_prime_plugin.py +0 -0
  55. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_rlm_env.py +0 -0
  56. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_rubric.py +0 -0
  57. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_rubric_group.py +0 -0
  58. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_sandbox_env.py +0 -0
  59. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_sandbox_mixin.py +0 -0
  60. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_save_utils.py +0 -0
  61. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_setup_script.py +0 -0
  62. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_singleturn_env.py +0 -0
  63. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_stateful_tool_env.py +0 -0
  64. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_think_parser.py +0 -0
  65. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_tool_env.py +0 -0
  66. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_tool_utils.py +0 -0
  67. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_trajectory_processing.py +0 -0
  68. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_tui_info_formatting.py +0 -0
  69. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/tests/test_xml_parser.py +0 -0
  70. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/AGENTS.md +0 -0
  71. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/cli/__init__.py +0 -0
  72. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/cli/commands/__init__.py +0 -0
  73. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/cli/commands/build.py +0 -0
  74. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/cli/commands/gepa.py +0 -0
  75. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/cli/commands/init.py +0 -0
  76. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/cli/commands/install.py +0 -0
  77. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/cli/commands/setup.py +0 -0
  78. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/cli/plugins/__init__.py +0 -0
  79. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/cli/plugins/prime.py +0 -0
  80. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/cli/tui.py +0 -0
  81. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/clients/__init__.py +0 -0
  82. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/clients/anthropic_messages_client.py +0 -0
  83. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/clients/client.py +0 -0
  84. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/clients/openai_chat_completions_client.py +0 -0
  85. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  86. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/clients/openai_completions_client.py +0 -0
  87. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/decorators.py +0 -0
  88. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/AGENTS.md +0 -0
  89. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/__init__.py +0 -0
  90. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/env_group.py +0 -0
  91. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/environment.py +0 -0
  92. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/README.md +0 -0
  93. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/__init__.py +0 -0
  94. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  95. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/README.md +0 -0
  96. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/__init__.py +0 -0
  97. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  98. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/harness.py +0 -0
  99. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  100. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  101. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  102. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/task.py +0 -0
  103. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  104. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  105. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  106. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  107. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  108. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  109. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  110. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  111. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  112. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  113. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  114. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  115. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
  116. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  117. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
  118. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  119. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  120. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/gym_env.py +0 -0
  121. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/harbor_env.py +0 -0
  122. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/mcp_env.py +0 -0
  123. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/opencode_env.py +0 -0
  124. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  125. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  126. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/rlm_env.py +0 -0
  127. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  128. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/README.md +0 -0
  129. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/__init__.py +0 -0
  130. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/README.md +0 -0
  131. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  132. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  133. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  134. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  135. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  136. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  137. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/openenv_env.py +0 -0
  138. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  139. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/textarena_env.py +0 -0
  140. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/multiturn_env.py +0 -0
  141. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/python_env.py +0 -0
  142. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/sandbox_env.py +0 -0
  143. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/singleturn_env.py +0 -0
  144. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/stateful_tool_env.py +0 -0
  145. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/envs/tool_env.py +0 -0
  146. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/errors.py +0 -0
  147. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/gepa/__init__.py +0 -0
  148. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/gepa/adapter.py +0 -0
  149. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/gepa/config.py +0 -0
  150. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/gepa/display.py +0 -0
  151. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/gepa/gepa_utils.py +0 -0
  152. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/parsers/__init__.py +0 -0
  153. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/parsers/maybe_think_parser.py +0 -0
  154. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/parsers/parser.py +0 -0
  155. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/parsers/think_parser.py +0 -0
  156. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/parsers/xml_parser.py +0 -0
  157. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rl/README.md +0 -0
  158. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rl/__init__.py +0 -0
  159. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rl/inference/__init__.py +0 -0
  160. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rl/inference/client.py +0 -0
  161. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rl/inference/server.py +0 -0
  162. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/__init__.py +0 -0
  163. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/config.py +0 -0
  164. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/orchestrator.py +0 -0
  165. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/trainer.py +0 -0
  166. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/utils.py +0 -0
  167. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rubrics/__init__.py +0 -0
  168. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  169. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rubrics/judge_rubric.py +0 -0
  170. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rubrics/math_rubric.py +0 -0
  171. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rubrics/rubric.py +0 -0
  172. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/rubrics/rubric_group.py +0 -0
  173. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/__init__.py +0 -0
  174. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/build.py +0 -0
  175. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/gepa.py +0 -0
  176. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/init.py +0 -0
  177. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/install.py +0 -0
  178. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/prime_rl.py +0 -0
  179. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/rl.py +0 -0
  180. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/setup.py +0 -0
  181. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/train.py +0 -0
  182. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/tui.py +0 -0
  183. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/scripts/vllm.py +0 -0
  184. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/serve/__init__.py +0 -0
  185. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/serve/client/env_client.py +0 -0
  186. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/serve/client/zmq_env_client.py +0 -0
  187. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/serve/server/__init__.py +0 -0
  188. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/serve/server/zmq_env_server.py +0 -0
  189. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/serve/types.py +0 -0
  190. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/types.py +0 -0
  191. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/__init__.py +0 -0
  192. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/async_utils.py +0 -0
  193. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/client_utils.py +0 -0
  194. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/config_utils.py +0 -0
  195. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/data_utils.py +0 -0
  196. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/display_utils.py +0 -0
  197. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/env_utils.py +0 -0
  198. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/error_utils.py +0 -0
  199. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/eval_display.py +0 -0
  200. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/heartbeat.py +0 -0
  201. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/import_utils.py +0 -0
  202. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/install_utils.py +0 -0
  203. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/interception_utils.py +0 -0
  204. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/logging_utils.py +0 -0
  205. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/message_utils.py +0 -0
  206. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/metric_utils.py +0 -0
  207. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/path_utils.py +0 -0
  208. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/process_utils.py +0 -0
  209. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/response_utils.py +0 -0
  210. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/save_utils.py +0 -0
  211. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/serve_utils.py +0 -0
  212. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/thread_utils.py +0 -0
  213. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/threaded_sandbox_client.py +0 -0
  214. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/tool_utils.py +0 -0
  215. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/tunnel_utils.py +0 -0
  216. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/usage_utils.py +0 -0
  217. {verifiers-0.1.12.dev5 → verifiers-0.1.12.dev6}/verifiers/utils/version_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.12.dev5
3
+ Version: 0.1.12.dev6
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -1061,6 +1061,38 @@ def test_ablation_global_defaults_apply():
1061
1061
  assert all(c["num_examples"] == 100 for c in configs)
1062
1062
 
1063
1063
 
1064
+ def test_ablation_endpoint_id_override_removes_global_model():
1065
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
1066
+ f.write(
1067
+ 'model = "gpt-4.1-mini"\n\n'
1068
+ '[[ablation]]\nenv_id = "my-env"\nendpoint_id = "proxy"\n\n'
1069
+ "[ablation.sweep]\n"
1070
+ "temperature = [0.0]\n"
1071
+ )
1072
+ f.flush()
1073
+ configs = load_toml_config(Path(f.name))
1074
+
1075
+ assert len(configs) == 1
1076
+ assert configs[0]["endpoint_id"] == "proxy"
1077
+ assert "model" not in configs[0]
1078
+
1079
+
1080
+ def test_ablation_swept_model_override_removes_global_endpoint_id():
1081
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
1082
+ f.write(
1083
+ 'endpoint_id = "proxy"\n\n'
1084
+ '[[ablation]]\nenv_id = "my-env"\n\n'
1085
+ "[ablation.sweep]\n"
1086
+ 'model = ["gpt-4.1-mini"]\n'
1087
+ )
1088
+ f.flush()
1089
+ configs = load_toml_config(Path(f.name))
1090
+
1091
+ assert len(configs) == 1
1092
+ assert configs[0]["model"] == "gpt-4.1-mini"
1093
+ assert "endpoint_id" not in configs[0]
1094
+
1095
+
1064
1096
  def test_ablation_with_eval_blocks():
1065
1097
  """Ablation and eval blocks can coexist."""
1066
1098
  with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.12.dev5"
1
+ __version__ = "0.1.12.dev6"
2
2
 
3
3
  import importlib
4
4
  import os
@@ -0,0 +1,21 @@
1
+ """Evaluation command module for external hosts."""
2
+
3
+ from verifiers.scripts.eval import (
4
+ build_extra_headers,
5
+ build_parser,
6
+ main,
7
+ merge_sampling_args,
8
+ parse_args,
9
+ )
10
+
11
+ __all__ = [
12
+ "build_extra_headers",
13
+ "build_parser",
14
+ "merge_sampling_args",
15
+ "parse_args",
16
+ "main",
17
+ ]
18
+
19
+
20
+ if __name__ == "__main__":
21
+ main()
@@ -13,11 +13,11 @@ DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH = "/task/append_to_system_prompt.txt"
13
13
 
14
14
 
15
15
  def build_install_script(rlm_repo_url: str = DEFAULT_RLM_REPO_URL) -> str:
16
- return (
17
- "set -e; "
18
- 'command -v uv >/dev/null 2>&1 || { curl -LsSf https://astral.sh/uv/install.sh | sh; source "$HOME/.local/bin/env"; }; '
19
- f'uv tool install --python 3.11 "rlm @ git+https://${{GH_TOKEN}}@{rlm_repo_url}"'
16
+ raw_base = rlm_repo_url.removesuffix(".git").replace(
17
+ "github.com", "raw.githubusercontent.com"
20
18
  )
19
+ url = f"https://${{GH_TOKEN}}@{raw_base}/main/install.sh"
20
+ return f"(curl -fsSL {url} || wget -qO- {url}) > /tmp/rlm-install.sh && bash /tmp/rlm-install.sh"
21
21
 
22
22
 
23
23
  def build_run_command(
@@ -4,10 +4,14 @@ import json
4
4
  import logging
5
5
  import tarfile
6
6
  import tempfile
7
- import tomllib
8
7
  from pathlib import Path
9
8
  from typing import Any
10
9
 
10
+ try:
11
+ import tomllib
12
+ except ImportError:
13
+ import tomli as tomllib
14
+
11
15
  import verifiers as vf
12
16
  from verifiers.envs.experimental.composable import SandboxSpec, SandboxTaskSet
13
17
 
@@ -90,6 +90,58 @@ PROVIDER_CONFIGS: dict[str, dict[str, str]] = {
90
90
  DEFAULT_PROVIDER = "prime"
91
91
 
92
92
 
93
+ def merge_sampling_args(
94
+ sampling_args: dict[str, Any] | None,
95
+ *,
96
+ max_tokens: int | None = None,
97
+ temperature: float | None = None,
98
+ prefer_existing_keys: bool = True,
99
+ include_none_max_tokens: bool = False,
100
+ ) -> dict[str, Any]:
101
+ merged_sampling_args = dict(sampling_args or {})
102
+
103
+ if (not prefer_existing_keys or "max_tokens" not in merged_sampling_args) and (
104
+ include_none_max_tokens or max_tokens is not None
105
+ ):
106
+ merged_sampling_args["max_tokens"] = max_tokens
107
+
108
+ if temperature is not None and (
109
+ not prefer_existing_keys or "temperature" not in merged_sampling_args
110
+ ):
111
+ merged_sampling_args["temperature"] = temperature
112
+
113
+ return merged_sampling_args
114
+
115
+
116
+ def build_extra_headers(raw: dict[str, Any]) -> dict[str, str]:
117
+ eval_headers_table: dict[str, str] = {}
118
+ raw_headers = raw.get("headers")
119
+ if raw_headers is not None:
120
+ eval_headers_table = _validate_extra_headers_value(raw_headers)
121
+
122
+ raw_header_values = raw.get("header")
123
+ if raw_header_values is None:
124
+ raw_header_values = []
125
+ if not isinstance(raw_header_values, list):
126
+ raise ValueError("'header' must be a list of 'Name: Value' strings")
127
+
128
+ eval_headers_from_list: dict[str, str] = {}
129
+ for header_value in raw_header_values:
130
+ if not isinstance(header_value, str):
131
+ raise ValueError(
132
+ f"Each 'header' entry must be a string 'Name: Value', got: {header_value!r}"
133
+ )
134
+ if ":" not in header_value:
135
+ raise ValueError(f"--header must be 'Name: Value', got: {header_value!r}")
136
+ key, value = header_value.split(":", 1)
137
+ key, value = key.strip(), value.strip()
138
+ if not key:
139
+ raise ValueError("--header name cannot be empty")
140
+ eval_headers_from_list[key] = value
141
+
142
+ return {**eval_headers_table, **eval_headers_from_list}
143
+
144
+
93
145
  def get_env_eval_defaults(env_id: str) -> dict[str, Any]:
94
146
  """Get eval config defaults from the environment module's pyproject.toml.
95
147
 
@@ -147,7 +199,7 @@ def get_env_eval_defaults(env_id: str) -> dict[str, Any]:
147
199
  return defaults
148
200
 
149
201
 
150
- def main():
202
+ def build_parser() -> argparse.ArgumentParser:
151
203
  parser = argparse.ArgumentParser()
152
204
  parser.add_argument(
153
205
  "env_id_or_config",
@@ -384,7 +436,18 @@ def main():
384
436
  default=None,
385
437
  help="Heartbeat URL for uptime monitoring",
386
438
  )
387
- args = parser.parse_args()
439
+ return parser
440
+
441
+
442
+ def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
443
+ parser = build_parser()
444
+ if argv is None:
445
+ return parser.parse_args()
446
+ return parser.parse_args(argv)
447
+
448
+
449
+ def main(argv: list[str] | None = None):
450
+ args = parse_args(argv)
388
451
 
389
452
  if args.debug: # only set up console logging in debug mode
390
453
  setup_logging(get_log_level(args.verbose))
@@ -561,35 +624,14 @@ def main():
561
624
  )
562
625
 
563
626
  # Merge sampling args
564
- merged_sampling_args: dict = {}
565
- if raw.get("sampling_args") is not None:
566
- merged_sampling_args.update(raw["sampling_args"])
567
- if "max_tokens" not in merged_sampling_args:
568
- merged_sampling_args["max_tokens"] = raw.get("max_tokens")
569
- raw_temp = raw.get("temperature")
570
- if raw_temp is not None and "temperature" not in merged_sampling_args:
571
- merged_sampling_args["temperature"] = raw_temp
627
+ merged_sampling_args = merge_sampling_args(
628
+ raw.get("sampling_args"),
629
+ max_tokens=raw.get("max_tokens"),
630
+ temperature=raw.get("temperature"),
631
+ include_none_max_tokens=True,
632
+ )
572
633
  # Build headers: registry < [[eval]] headers table < header list / --header
573
- eval_headers_table: dict[str, str] = {}
574
- raw_headers = raw.get("headers")
575
- if raw_headers is not None:
576
- eval_headers_table = _validate_extra_headers_value(raw_headers)
577
-
578
- eval_headers_from_list: dict[str, str] = {}
579
- for h in raw.get("header") or []:
580
- if not isinstance(h, str):
581
- raise ValueError(
582
- f"Each 'header' entry must be a string 'Name: Value', got: {h!r}"
583
- )
584
- if ":" not in h:
585
- raise ValueError(f"--header must be 'Name: Value', got: {h!r}")
586
- k, v = h.split(":", 1)
587
- k, v = k.strip(), v.strip()
588
- if not k:
589
- raise ValueError("--header name cannot be empty")
590
- eval_headers_from_list[k] = v
591
-
592
- eval_headers_merged = {**eval_headers_table, **eval_headers_from_list}
634
+ eval_headers_merged = build_extra_headers(raw)
593
635
 
594
636
  registry_headers_base: dict[str, str] = {}
595
637
  if endpoint_group is not None:
@@ -101,6 +101,7 @@ class EnvRouter:
101
101
  log_level: str | None = None,
102
102
  log_dir: str | None = None,
103
103
  console_logging: bool = True,
104
+ json_logging: bool = False,
104
105
  *,
105
106
  num_workers: int = 1,
106
107
  worker_heartbeat_timeout: float = 30.0,
@@ -116,6 +117,7 @@ class EnvRouter:
116
117
  self.log_level = log_level
117
118
  self.log_dir = log_dir
118
119
  self.console_logging = console_logging
120
+ self.json_logging = json_logging
119
121
 
120
122
  self.num_workers = num_workers
121
123
  self.worker_heartbeat_timeout = worker_heartbeat_timeout
@@ -185,6 +187,7 @@ class EnvRouter:
185
187
  self.log_level,
186
188
  self.log_dir,
187
189
  self.console_logging,
190
+ self.json_logging,
188
191
  ),
189
192
  kwargs=dict(
190
193
  worker_id=worker_id,
@@ -70,6 +70,7 @@ class EnvServer(ABC):
70
70
  log_level=log_level,
71
71
  log_dir=log_dir,
72
72
  console_logging=console_logging,
73
+ json_logging=json_logging,
73
74
  num_workers=num_workers,
74
75
  worker_heartbeat_timeout=worker_heartbeat_timeout,
75
76
  stats_log_interval=stats_log_interval,
@@ -60,6 +60,7 @@ class EnvWorker:
60
60
  log_level: str | None = None,
61
61
  log_dir: str | None = None,
62
62
  console_logging: bool = True,
63
+ json_logging: bool = False,
63
64
  *,
64
65
  worker_id: int,
65
66
  worker_name: str,
@@ -78,6 +79,7 @@ class EnvWorker:
78
79
  logger_kwargs: dict[str, Any] = {
79
80
  "console_logging": console_logging,
80
81
  "file_logging": log_dir is not None,
82
+ "json_logging": json_logging,
81
83
  }
82
84
  if log_level is not None:
83
85
  logger_kwargs["level"] = log_level
@@ -330,8 +330,14 @@ def _expand_ablation(ablation: dict, global_defaults: dict) -> list[dict]:
330
330
  f"sweep.env_args — use one or the other"
331
331
  )
332
332
 
333
+ explicit_keys = (set(ablation.keys()) - {"sweep"}) | set(sweep.keys())
334
+
333
335
  # Fixed fields: global defaults overridden by ablation-level fields
334
336
  fixed = {**global_defaults, **ablation}
337
+ if "endpoint_id" in explicit_keys and "model" not in explicit_keys:
338
+ fixed.pop("model", None)
339
+ if "model" in explicit_keys and "endpoint_id" not in explicit_keys:
340
+ fixed.pop("endpoint_id", None)
335
341
 
336
342
  # Expand cartesian product
337
343
  keys = [k for k, _ in dimensions]
@@ -351,7 +357,9 @@ def _expand_ablation(ablation: dict, global_defaults: dict) -> list[dict]:
351
357
  return expanded
352
358
 
353
359
 
354
- def load_toml_config(path: Path) -> list[dict]:
360
+ def load_toml_config(
361
+ path: Path, extra_valid_fields: set[str] | None = None
362
+ ) -> list[dict]:
355
363
  """Loads and validates a TOML config file.
356
364
 
357
365
  Config format supports global defaults at the top level, with per-eval overrides
@@ -454,6 +462,7 @@ def load_toml_config(path: Path) -> list[dict]:
454
462
  "save_to_hf_hub",
455
463
  "hf_hub_dataset_name",
456
464
  }
465
+ valid_fields |= extra_valid_fields or set()
457
466
 
458
467
  # validate global fields
459
468
  if global_defaults:
@@ -475,6 +484,10 @@ def load_toml_config(path: Path) -> list[dict]:
475
484
  )
476
485
  # global defaults, then per-eval overrides
477
486
  merged = {**global_defaults, **eval_config}
487
+ if "endpoint_id" in eval_config and "model" not in eval_config:
488
+ merged.pop("model", None)
489
+ if "model" in eval_config and "endpoint_id" not in eval_config:
490
+ merged.pop("endpoint_id", None)
478
491
  merged_eval_list.append(merged)
479
492
 
480
493
  # expand [[ablation]] blocks into eval configs
@@ -1,7 +0,0 @@
1
- """Evaluation command module for external hosts."""
2
-
3
- from verifiers.scripts.eval import main
4
-
5
-
6
- if __name__ == "__main__":
7
- main()
File without changes