verifiers 0.1.12.dev4__tar.gz → 0.1.12.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/PKG-INFO +1 -1
  2. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/pyproject.toml +9 -0
  3. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_eval_cli.py +32 -0
  4. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/__init__.py +1 -1
  5. verifiers-0.1.12.dev6/verifiers/cli/commands/eval.py +21 -0
  6. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/README.md +2 -2
  7. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/harnesses/__init__.py +35 -0
  8. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/harnesses/opencode.py +265 -0
  9. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/harnesses/prompt.txt +12 -0
  10. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/harnesses/rlm.py +50 -0
  11. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/__init__.py +39 -0
  12. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +3 -0
  13. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +239 -0
  14. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +367 -0
  15. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +13 -0
  16. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +375 -0
  17. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +3 -0
  18. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +354 -0
  19. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/math/__init__.py +3 -0
  20. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/math/math_task.py +161 -0
  21. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +15 -0
  22. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +36 -0
  23. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +42 -0
  24. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +396 -0
  25. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +230 -0
  26. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +413 -0
  27. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +634 -0
  28. verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +67 -0
  29. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/eval.py +72 -30
  30. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/server/env_router.py +3 -0
  31. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/server/env_server.py +1 -0
  32. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/server/env_worker.py +2 -0
  33. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/eval_utils.py +14 -1
  34. verifiers-0.1.12.dev4/verifiers/cli/commands/eval.py +0 -7
  35. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/.gitignore +0 -0
  36. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/LICENSE +0 -0
  37. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/README.md +0 -0
  38. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/AGENTS.md +0 -0
  39. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/README.md +0 -0
  40. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/__init__.py +0 -0
  41. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/conftest.py +0 -0
  42. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_browser_env.py +0 -0
  43. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_build_script.py +0 -0
  44. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_cli_agent_env.py +0 -0
  45. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_client_auth_errors.py +0 -0
  46. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_client_config.py +0 -0
  47. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_client_multimodal_types.py +0 -0
  48. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_composable_env.py +0 -0
  49. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_decorator_ranks.py +0 -0
  50. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_endpoint_registry.py +0 -0
  51. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_env_group.py +0 -0
  52. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_env_server.py +0 -0
  53. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_environment.py +0 -0
  54. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_environment_extra.py +0 -0
  55. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_envs.py +0 -0
  56. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_error_chain.py +0 -0
  57. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_eval_display.py +0 -0
  58. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_eval_utils.py +0 -0
  59. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_gepa_cli.py +0 -0
  60. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_gym_env.py +0 -0
  61. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_imports.py +0 -0
  62. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_install_utils.py +0 -0
  63. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_interception_utils.py +0 -0
  64. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_logging.py +0 -0
  65. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_math_rubric.py +0 -0
  66. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_maybe_think_parser.py +0 -0
  67. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_message_utils.py +0 -0
  68. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_message_utils_multimodal.py +0 -0
  69. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_multiturn_env.py +0 -0
  70. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_openai_chat_completions_token_client.py +0 -0
  71. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_opencode_harbor.py +0 -0
  72. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_opencode_rlm_env.py +0 -0
  73. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_parser.py +0 -0
  74. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_path_utils.py +0 -0
  75. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_prime_plugin.py +0 -0
  76. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_rlm_env.py +0 -0
  77. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_rubric.py +0 -0
  78. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_rubric_group.py +0 -0
  79. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_sandbox_env.py +0 -0
  80. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_sandbox_mixin.py +0 -0
  81. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_save_utils.py +0 -0
  82. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_setup_script.py +0 -0
  83. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_singleturn_env.py +0 -0
  84. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_stateful_tool_env.py +0 -0
  85. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_think_parser.py +0 -0
  86. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_tool_env.py +0 -0
  87. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_tool_utils.py +0 -0
  88. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_trajectory_processing.py +0 -0
  89. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_tui_info_formatting.py +0 -0
  90. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_xml_parser.py +0 -0
  91. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/AGENTS.md +0 -0
  92. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/__init__.py +0 -0
  93. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/__init__.py +0 -0
  94. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/build.py +0 -0
  95. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/gepa.py +0 -0
  96. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/init.py +0 -0
  97. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/install.py +0 -0
  98. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/setup.py +0 -0
  99. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/plugins/__init__.py +0 -0
  100. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/plugins/prime.py +0 -0
  101. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/tui.py +0 -0
  102. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/__init__.py +0 -0
  103. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/anthropic_messages_client.py +0 -0
  104. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/client.py +0 -0
  105. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/openai_chat_completions_client.py +0 -0
  106. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  107. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/openai_completions_client.py +0 -0
  108. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/decorators.py +0 -0
  109. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/AGENTS.md +0 -0
  110. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/__init__.py +0 -0
  111. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/env_group.py +0 -0
  112. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/environment.py +0 -0
  113. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/README.md +0 -0
  114. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/__init__.py +0 -0
  115. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  116. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/__init__.py +0 -0
  117. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  118. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/harness.py +0 -0
  119. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/task.py +0 -0
  120. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/gym_env.py +0 -0
  121. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/harbor_env.py +0 -0
  122. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/mcp_env.py +0 -0
  123. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/opencode_env.py +0 -0
  124. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  125. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  126. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/rlm_env.py +0 -0
  127. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  128. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/README.md +0 -0
  129. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/__init__.py +0 -0
  130. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/README.md +0 -0
  131. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  132. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  133. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  134. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  135. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  136. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  137. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/openenv_env.py +0 -0
  138. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  139. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/textarena_env.py +0 -0
  140. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/multiturn_env.py +0 -0
  141. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/python_env.py +0 -0
  142. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/sandbox_env.py +0 -0
  143. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/singleturn_env.py +0 -0
  144. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/stateful_tool_env.py +0 -0
  145. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/tool_env.py +0 -0
  146. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/errors.py +0 -0
  147. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/gepa/__init__.py +0 -0
  148. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/gepa/adapter.py +0 -0
  149. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/gepa/config.py +0 -0
  150. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/gepa/display.py +0 -0
  151. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/gepa/gepa_utils.py +0 -0
  152. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/parsers/__init__.py +0 -0
  153. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/parsers/maybe_think_parser.py +0 -0
  154. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/parsers/parser.py +0 -0
  155. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/parsers/think_parser.py +0 -0
  156. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/parsers/xml_parser.py +0 -0
  157. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/README.md +0 -0
  158. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/__init__.py +0 -0
  159. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/inference/__init__.py +0 -0
  160. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/inference/client.py +0 -0
  161. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/inference/server.py +0 -0
  162. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/__init__.py +0 -0
  163. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/config.py +0 -0
  164. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/orchestrator.py +0 -0
  165. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/trainer.py +0 -0
  166. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/utils.py +0 -0
  167. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/__init__.py +0 -0
  168. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  169. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/judge_rubric.py +0 -0
  170. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/math_rubric.py +0 -0
  171. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/rubric.py +0 -0
  172. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/rubric_group.py +0 -0
  173. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/__init__.py +0 -0
  174. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/build.py +0 -0
  175. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/gepa.py +0 -0
  176. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/init.py +0 -0
  177. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/install.py +0 -0
  178. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/prime_rl.py +0 -0
  179. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/rl.py +0 -0
  180. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/setup.py +0 -0
  181. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/train.py +0 -0
  182. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/tui.py +0 -0
  183. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/vllm.py +0 -0
  184. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/__init__.py +0 -0
  185. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/client/env_client.py +0 -0
  186. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/client/zmq_env_client.py +0 -0
  187. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/server/__init__.py +0 -0
  188. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/server/zmq_env_server.py +0 -0
  189. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/types.py +0 -0
  190. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/types.py +0 -0
  191. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/__init__.py +0 -0
  192. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/async_utils.py +0 -0
  193. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/client_utils.py +0 -0
  194. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/config_utils.py +0 -0
  195. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/data_utils.py +0 -0
  196. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/display_utils.py +0 -0
  197. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/env_utils.py +0 -0
  198. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/error_utils.py +0 -0
  199. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/eval_display.py +0 -0
  200. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/heartbeat.py +0 -0
  201. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/import_utils.py +0 -0
  202. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/install_utils.py +0 -0
  203. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/interception_utils.py +0 -0
  204. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/logging_utils.py +0 -0
  205. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/message_utils.py +0 -0
  206. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/metric_utils.py +0 -0
  207. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/path_utils.py +0 -0
  208. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/process_utils.py +0 -0
  209. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/response_utils.py +0 -0
  210. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/save_utils.py +0 -0
  211. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/serve_utils.py +0 -0
  212. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/thread_utils.py +0 -0
  213. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/threaded_sandbox_client.py +0 -0
  214. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/tool_utils.py +0 -0
  215. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/tunnel_utils.py +0 -0
  216. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/usage_utils.py +0 -0
  217. {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/version_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.12.dev4
3
+ Version: 0.1.12.dev6
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -197,6 +197,15 @@ redundant-cast = "ignore"
197
197
  [tool.ty.src]
198
198
  exclude = ["environments"]
199
199
 
200
+ [[tool.ty.overrides]]
201
+ include = ["verifiers/envs/experimental/composable/tasksets/**"]
202
+
203
+ [tool.ty.overrides.rules]
204
+ unresolved-import = "ignore"
205
+ invalid-method-override = "ignore"
206
+ invalid-assignment = "ignore"
207
+ not-iterable = "ignore"
208
+
200
209
  [tool.coverage.run]
201
210
  source = ["verifiers"]
202
211
  omit = [
@@ -1061,6 +1061,38 @@ def test_ablation_global_defaults_apply():
1061
1061
  assert all(c["num_examples"] == 100 for c in configs)
1062
1062
 
1063
1063
 
1064
+ def test_ablation_endpoint_id_override_removes_global_model():
1065
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
1066
+ f.write(
1067
+ 'model = "gpt-4.1-mini"\n\n'
1068
+ '[[ablation]]\nenv_id = "my-env"\nendpoint_id = "proxy"\n\n'
1069
+ "[ablation.sweep]\n"
1070
+ "temperature = [0.0]\n"
1071
+ )
1072
+ f.flush()
1073
+ configs = load_toml_config(Path(f.name))
1074
+
1075
+ assert len(configs) == 1
1076
+ assert configs[0]["endpoint_id"] == "proxy"
1077
+ assert "model" not in configs[0]
1078
+
1079
+
1080
+ def test_ablation_swept_model_override_removes_global_endpoint_id():
1081
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
1082
+ f.write(
1083
+ 'endpoint_id = "proxy"\n\n'
1084
+ '[[ablation]]\nenv_id = "my-env"\n\n'
1085
+ "[ablation.sweep]\n"
1086
+ 'model = ["gpt-4.1-mini"]\n'
1087
+ )
1088
+ f.flush()
1089
+ configs = load_toml_config(Path(f.name))
1090
+
1091
+ assert len(configs) == 1
1092
+ assert configs[0]["model"] == "gpt-4.1-mini"
1093
+ assert "endpoint_id" not in configs[0]
1094
+
1095
+
1064
1096
  def test_ablation_with_eval_blocks():
1065
1097
  """Ablation and eval blocks can coexist."""
1066
1098
  with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.12.dev4"
1
+ __version__ = "0.1.12.dev6"
2
2
 
3
3
  import importlib
4
4
  import os
@@ -0,0 +1,21 @@
1
+ """Evaluation command module for external hosts."""
2
+
3
+ from verifiers.scripts.eval import (
4
+ build_extra_headers,
5
+ build_parser,
6
+ main,
7
+ merge_sampling_args,
8
+ parse_args,
9
+ )
10
+
11
+ __all__ = [
12
+ "build_extra_headers",
13
+ "build_parser",
14
+ "merge_sampling_args",
15
+ "parse_args",
16
+ "main",
17
+ ]
18
+
19
+
20
+ if __name__ == "__main__":
21
+ main()
@@ -19,8 +19,8 @@ Separates **what to solve** (the task) from **how to solve it** (the agent) by r
19
19
  ## Usage
20
20
 
21
21
  ```python
22
- from swe_tasksets import R2EGymTaskSet
23
- from opencode_harness import opencode_harness
22
+ from verifiers.envs.experimental.composable.tasksets.swe.r2e_gym import R2EGymTaskSet
23
+ from verifiers.envs.experimental.composable.harnesses.opencode import opencode_harness
24
24
  from verifiers.envs.experimental.composable import ComposableEnv
25
25
 
26
26
  # Create a taskset
@@ -0,0 +1,35 @@
1
+ from verifiers.envs.experimental.composable.harnesses.rlm import (
2
+ DEFAULT_RLM_MAX_TURNS,
3
+ DEFAULT_RLM_REPO_URL,
4
+ DEFAULT_RLM_TOOLS,
5
+ build_install_script as build_rlm_install_script,
6
+ build_run_command as build_rlm_run_command,
7
+ rlm_harness,
8
+ )
9
+ from verifiers.envs.experimental.composable.harnesses.opencode import (
10
+ DEFAULT_DISABLED_TOOLS,
11
+ DEFAULT_RELEASE_SHA256,
12
+ DEFAULT_SYSTEM_PROMPT,
13
+ OPENCODE_INSTALL_SCRIPT,
14
+ build_install_script as build_opencode_install_script,
15
+ build_opencode_config,
16
+ build_opencode_run_command,
17
+ opencode_harness,
18
+ )
19
+
20
+ __all__ = [
21
+ "rlm_harness",
22
+ "build_rlm_install_script",
23
+ "build_rlm_run_command",
24
+ "DEFAULT_RLM_REPO_URL",
25
+ "DEFAULT_RLM_TOOLS",
26
+ "DEFAULT_RLM_MAX_TURNS",
27
+ "opencode_harness",
28
+ "build_opencode_install_script",
29
+ "build_opencode_config",
30
+ "build_opencode_run_command",
31
+ "OPENCODE_INSTALL_SCRIPT",
32
+ "DEFAULT_DISABLED_TOOLS",
33
+ "DEFAULT_RELEASE_SHA256",
34
+ "DEFAULT_SYSTEM_PROMPT",
35
+ ]
@@ -0,0 +1,265 @@
1
+ """OpenCode harness configuration.
2
+
3
+ Provides install script, config generation, and run command templates
4
+ that are shared across all OpenCode-based environments (SWE, Lean, Math, etc.).
5
+
6
+ Usage::
7
+
8
+ from verifiers.envs.experimental.composable.harnesses.opencode import opencode_harness
9
+ harness = opencode_harness(system_prompt="You are a coding agent...")
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import shlex
16
+ from pathlib import Path
17
+
18
+ # ── Defaults ─────────────────────────────────────────────────────────────
19
+
20
+ DEFAULT_RELEASE_REPO = "PrimeIntellect-ai/opencode"
21
+ DEFAULT_RELEASE_VERSION = "1.1.63-rl1"
22
+ DEFAULT_RELEASE_SHA256 = (
23
+ "17104d601b8bf6fd03dd46a6de055b422414b9ada524fe085b09683f455ccac1"
24
+ )
25
+ DEFAULT_SYSTEM_PROMPT = (Path(__file__).parent / "prompt.txt").read_text()
26
+
27
+ DEFAULT_DISABLED_TOOLS = [
28
+ "apply_patch",
29
+ "write",
30
+ "multiedit",
31
+ "glob",
32
+ "todowrite",
33
+ "todoread",
34
+ "websearch",
35
+ "task",
36
+ "batch",
37
+ "list",
38
+ "read",
39
+ "question",
40
+ "webfetch",
41
+ "grep",
42
+ "plan_exit",
43
+ "plan_enter",
44
+ "lsp",
45
+ "codesearch",
46
+ "skill",
47
+ ]
48
+
49
+
50
+ # ── Install script ───────────────────────────────────────────────────────
51
+
52
+
53
+ def build_install_script(
54
+ release_repo: str = DEFAULT_RELEASE_REPO,
55
+ release_version: str = DEFAULT_RELEASE_VERSION,
56
+ release_sha256: str = DEFAULT_RELEASE_SHA256,
57
+ install_ripgrep: bool = True,
58
+ ) -> str:
59
+ """Build the shell script that installs OpenCode in a sandbox."""
60
+ rg_install = (
61
+ "apt-get install -y -qq ripgrep > /dev/null 2>&1 || true"
62
+ if install_ripgrep
63
+ else ""
64
+ )
65
+ sha256_check = f'echo "{release_sha256} /tmp/opencode.tar.gz" | sha256sum -c -'
66
+ return f"""\
67
+ set -e
68
+ apt-get update -qq && apt-get install -y -qq curl tar > /dev/null 2>&1
69
+ {rg_install}
70
+
71
+ OPENCODE_RELEASE_REPO="{release_repo}"
72
+ OPENCODE_RELEASE_VERSION="{release_version}"
73
+
74
+ case "$(uname -m)" in
75
+ x86_64) OPENCODE_ARCH=x64 ;;
76
+ aarch64|arm64) OPENCODE_ARCH=arm64 ;;
77
+ *) echo "Unsupported architecture: $(uname -m)"; exit 1 ;;
78
+ esac
79
+
80
+ OPENCODE_ASSET="opencode-linux-$OPENCODE_ARCH.tar.gz"
81
+ OPENCODE_RELEASE_TAG="${{OPENCODE_RELEASE_VERSION#v}}"
82
+ OPENCODE_RELEASE_URL="https://github.com/$OPENCODE_RELEASE_REPO/releases/download/v$OPENCODE_RELEASE_TAG/$OPENCODE_ASSET"
83
+
84
+ mkdir -p "$HOME/.opencode/bin"
85
+ curl -fsSL "$OPENCODE_RELEASE_URL" -o /tmp/opencode.tar.gz
86
+ {sha256_check}
87
+ tar -xzf /tmp/opencode.tar.gz -C /tmp
88
+ install -m 755 /tmp/opencode "$HOME/.opencode/bin/opencode"
89
+ echo "OpenCode installed successfully"
90
+ """
91
+
92
+
93
+ # ── Config generation ────────────────────────────────────────────────────
94
+
95
+
96
+ def build_opencode_config(
97
+ disabled_tools: list[str] | None = None,
98
+ system_prompt_path: str | None = None,
99
+ disable_compaction: bool = True,
100
+ provider_key: str = "${OPENAI_MODEL%%/*}",
101
+ provider_display_name: str | None = None,
102
+ model_id: str = "$OPENAI_MODEL",
103
+ model_key: str = "${OPENAI_MODEL##*/}",
104
+ model_display_name: str | None = None,
105
+ provider_timeout_ms: int = 3_600_000,
106
+ ) -> str:
107
+ """Generate opencode.json config content."""
108
+ config: dict = {
109
+ "${SCHEMA_DOLLAR}schema": "https://opencode.ai/config.json",
110
+ "provider": {
111
+ provider_key: {
112
+ "npm": "@ai-sdk/openai-compatible",
113
+ "name": provider_display_name or provider_key,
114
+ "options": {
115
+ "baseURL": "$OPENAI_BASE_URL",
116
+ "apiKey": "intercepted",
117
+ "timeout": provider_timeout_ms,
118
+ },
119
+ "models": {
120
+ model_key: {
121
+ "name": model_display_name or model_key,
122
+ "modalities": {"input": ["text", "image"], "output": ["text"]},
123
+ "interleaved": {"field": "reasoning_content"},
124
+ }
125
+ },
126
+ }
127
+ },
128
+ "model": model_id,
129
+ }
130
+
131
+ if disable_compaction:
132
+ config["compaction"] = {"auto": False, "prune": False}
133
+
134
+ agent_build: dict = {}
135
+ if system_prompt_path:
136
+ agent_build["prompt"] = "{file:" + system_prompt_path + "}"
137
+ if disabled_tools:
138
+ agent_build["tools"] = {tool: False for tool in disabled_tools}
139
+ if agent_build:
140
+ config["agent"] = {"build": agent_build}
141
+
142
+ return json.dumps(config, indent=2)
143
+
144
+
145
+ # ── Run command ──────────────────────────────────────────────────────────
146
+
147
+
148
+ def build_opencode_run_command(
149
+ agent_workdir: str = "/app",
150
+ prompt_path: str = "/opencode/prompt.txt",
151
+ log_path: str = "/opencode/logs.txt",
152
+ disabled_tools: list[str] | None = None,
153
+ system_prompt_path: str | None = None,
154
+ disable_compaction: bool = True,
155
+ allow_git: bool = False,
156
+ provider_key: str = "${OPENAI_MODEL%%/*}",
157
+ provider_display_name: str | None = None,
158
+ model_id: str = "$OPENAI_MODEL",
159
+ model_key: str = "${OPENAI_MODEL##*/}",
160
+ model_display_name: str | None = None,
161
+ provider_timeout_ms: int = 3_600_000,
162
+ ) -> str:
163
+ """Build the shell command that configures and runs OpenCode."""
164
+ config_json = build_opencode_config(
165
+ disabled_tools=disabled_tools,
166
+ system_prompt_path=system_prompt_path,
167
+ disable_compaction=disable_compaction,
168
+ provider_key=provider_key,
169
+ provider_display_name=provider_display_name,
170
+ model_id=model_id,
171
+ model_key=model_key,
172
+ model_display_name=model_display_name,
173
+ provider_timeout_ms=provider_timeout_ms,
174
+ )
175
+
176
+ script = f"""\
177
+ set -eo pipefail
178
+
179
+ export PATH="$HOME/.opencode/bin:$PATH"
180
+ export OPENCODE_DISABLE_FILETIME_CHECK=true
181
+ export ALLOW_GIT={"1" if allow_git else "0"}
182
+
183
+ mkdir -p ~/.config/opencode /logs/agent {agent_workdir}
184
+
185
+ SCHEMA_DOLLAR='$'
186
+
187
+ cat > ~/.config/opencode/opencode.json << EOFCONFIG
188
+ {config_json}
189
+ EOFCONFIG
190
+
191
+ cd {agent_workdir}
192
+ cat {prompt_path} | opencode run 2>&1 | tee {log_path}
193
+ """
194
+ return f"bash -lc {shlex.quote(script)}"
195
+
196
+
197
+ # ── Convenience: pre-built install script ────────────────────────────────
198
+
199
+ OPENCODE_INSTALL_SCRIPT = build_install_script()
200
+
201
+
202
+ # ── Harness factory ──────────────────────────────────────────────────────
203
+
204
+
205
+ def opencode_harness(
206
+ system_prompt: str | None = DEFAULT_SYSTEM_PROMPT,
207
+ task_system_prompt: str | None = None,
208
+ disabled_tools: list[str] | None = None,
209
+ agent_workdir: str = "/app",
210
+ allow_git: bool = False,
211
+ disable_compaction: bool = True,
212
+ release_repo: str = DEFAULT_RELEASE_REPO,
213
+ release_version: str = DEFAULT_RELEASE_VERSION,
214
+ release_sha256: str = DEFAULT_RELEASE_SHA256,
215
+ instruction_path: str = "/opencode/prompt.txt",
216
+ system_prompt_path: str = "/opencode/system.txt",
217
+ log_path: str = "/opencode/logs.txt",
218
+ provider_key: str = "${OPENAI_MODEL%%/*}",
219
+ provider_display_name: str | None = None,
220
+ model_id: str = "$OPENAI_MODEL",
221
+ model_key: str = "${OPENAI_MODEL##*/}",
222
+ model_display_name: str | None = None,
223
+ provider_timeout_ms: int = 3_600_000,
224
+ ):
225
+ """Create a Harness configured for OpenCode.
226
+
227
+ Usage::
228
+
229
+ from verifiers.envs.experimental.composable.harnesses.opencode import opencode_harness
230
+ harness = opencode_harness(system_prompt="You are a coding agent...")
231
+ """
232
+ from verifiers.envs.experimental.composable import Harness
233
+
234
+ if task_system_prompt:
235
+ if system_prompt:
236
+ system_prompt = system_prompt + "\n" + task_system_prompt
237
+ else:
238
+ system_prompt = task_system_prompt
239
+
240
+ return Harness(
241
+ install_script=build_install_script(
242
+ release_repo=release_repo,
243
+ release_version=release_version,
244
+ release_sha256=release_sha256,
245
+ ),
246
+ run_command=build_opencode_run_command(
247
+ agent_workdir=agent_workdir,
248
+ prompt_path=instruction_path,
249
+ log_path=log_path,
250
+ disabled_tools=disabled_tools,
251
+ system_prompt_path=system_prompt_path if system_prompt else None,
252
+ disable_compaction=disable_compaction,
253
+ allow_git=allow_git,
254
+ provider_key=provider_key,
255
+ provider_display_name=provider_display_name,
256
+ model_id=model_id,
257
+ model_key=model_key,
258
+ model_display_name=model_display_name,
259
+ provider_timeout_ms=provider_timeout_ms,
260
+ ),
261
+ system_prompt=system_prompt,
262
+ instruction_path=instruction_path,
263
+ system_prompt_path=system_prompt_path,
264
+ log_path=log_path,
265
+ )
@@ -0,0 +1,12 @@
1
+ You are OpenCode, the best coding agent on the planet.
2
+
3
+ You are an interactive CLI tool that helps users with tasks. Use the instructions below and the tools available to you to assist the user.
4
+
5
+ # Tone and style
6
+ - Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.
7
+ - Your output will be displayed on a command line interface. Your responses should be short and concise. You can use Github-flavored markdown for formatting, and will be rendered in a monospace font using the CommonMark specification.
8
+ - Output text to communicate with the user; all text you output outside of tool use is displayed to the user. Only use tools to complete tasks. Never use tools like bash or code comments as means to communicate with the user during the session.
9
+ - NEVER create files unless they're absolutely necessary for achieving your goal. ALWAYS prefer editing an existing file to creating a new one. This includes markdown files.
10
+
11
+ # Professional objectivity
12
+ Prioritize technical accuracy and truthfulness over validating the user's beliefs. Focus on facts and problem-solving, providing direct, objective technical info without any unnecessary superlatives, praise, or emotional validation. It is best for the user if OpenCode honestly applies the same rigorous standards to all ideas and disagrees when necessary, even if it may not be what the user wants to hear. Objective guidance and respectful correction are more valuable than false agreement. Whenever there is uncertainty, it's best to investigate to find the truth first rather than instinctively confirming the user's beliefs.
@@ -0,0 +1,50 @@
1
+ """RLM agent harness: install script, run command, and harness factory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import shlex
6
+
7
+ from verifiers.envs.experimental.composable import Harness
8
+
9
+ DEFAULT_RLM_REPO_URL = "github.com/PrimeIntellect-ai/rlm.git"
10
+ DEFAULT_RLM_TOOLS = "bash,edit"
11
+ DEFAULT_RLM_MAX_TURNS = 100
12
+ DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH = "/task/append_to_system_prompt.txt"
13
+
14
+
15
+ def build_install_script(rlm_repo_url: str = DEFAULT_RLM_REPO_URL) -> str:
16
+ raw_base = rlm_repo_url.removesuffix(".git").replace(
17
+ "github.com", "raw.githubusercontent.com"
18
+ )
19
+ url = f"https://${{GH_TOKEN}}@{raw_base}/main/install.sh"
20
+ return f"(curl -fsSL {url} || wget -qO- {url}) > /tmp/rlm-install.sh && bash /tmp/rlm-install.sh"
21
+
22
+
23
+ def build_run_command(
24
+ instruction_path: str = "/task/instruction.md",
25
+ workdir: str = "/testbed",
26
+ ) -> str:
27
+ script = f"""\
28
+ set -eo pipefail
29
+ export RLM_MODEL=$OPENAI_MODEL
30
+ export OPENAI_API_KEY=intercepted
31
+ export RLM_APPEND_TO_SYSTEM_PROMPT="$(cat {shlex.quote(DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH)} 2>/dev/null || true)"
32
+ cd {workdir}
33
+ rlm "$(cat {instruction_path})"
34
+ """
35
+ return f"bash -lc {shlex.quote(script)}"
36
+
37
+
38
+ def rlm_harness(
39
+ workdir: str = "/testbed",
40
+ instruction_path: str = "/task/instruction.md",
41
+ rlm_repo_url: str = DEFAULT_RLM_REPO_URL,
42
+ append_to_system_prompt: str | None = None,
43
+ ) -> Harness:
44
+ return Harness(
45
+ install_script=build_install_script(rlm_repo_url),
46
+ run_command=build_run_command(instruction_path, workdir),
47
+ system_prompt=append_to_system_prompt,
48
+ system_prompt_path=DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH,
49
+ instruction_path=instruction_path,
50
+ )
@@ -0,0 +1,39 @@
1
+ from verifiers.envs.experimental.composable.tasksets.swe.swe_tasksets import (
2
+ make_multiswe_taskset,
3
+ make_openswe_taskset,
4
+ make_r2e_taskset,
5
+ make_swe_taskset,
6
+ make_swebench_taskset,
7
+ )
8
+ from verifiers.envs.experimental.composable.tasksets.lean.lean_task import (
9
+ LEAN_SYSTEM_PROMPT,
10
+ LeanTaskSet,
11
+ )
12
+ from verifiers.envs.experimental.composable.tasksets.math.math_task import MathTaskSet
13
+ from verifiers.envs.experimental.composable.tasksets.cp.cp_task import (
14
+ CPRubric,
15
+ CPTaskSet,
16
+ )
17
+ from verifiers.envs.experimental.composable.tasksets.harbor.harbor import (
18
+ HarborDatasetRubric,
19
+ HarborDatasetTaskSet,
20
+ HarborRubric,
21
+ HarborTaskSet,
22
+ )
23
+
24
+ __all__ = [
25
+ "make_swe_taskset",
26
+ "make_r2e_taskset",
27
+ "make_swebench_taskset",
28
+ "make_multiswe_taskset",
29
+ "make_openswe_taskset",
30
+ "LeanTaskSet",
31
+ "LEAN_SYSTEM_PROMPT",
32
+ "MathTaskSet",
33
+ "CPTaskSet",
34
+ "CPRubric",
35
+ "HarborTaskSet",
36
+ "HarborDatasetTaskSet",
37
+ "HarborRubric",
38
+ "HarborDatasetRubric",
39
+ ]
@@ -0,0 +1,3 @@
1
+ from .cp_task import CPRubric, CPTaskSet
2
+
3
+ __all__ = ["CPTaskSet", "CPRubric"]