verifiers 0.1.15.dev4__tar.gz → 0.1.15.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/PKG-INFO +1 -1
  2. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_harbor_cli.py +57 -1
  3. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/__init__.py +4 -1
  4. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/README.md +4 -2
  5. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/RE_MIGRATION.md +4 -2
  6. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/__init__.py +2 -0
  7. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/config.py +8 -3
  8. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/__init__.py +2 -0
  9. verifiers-0.1.15.dev5/verifiers/v1/packages/harnesses/terminus_2.py +286 -0
  10. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/tasksets/harbor.py +17 -15
  11. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/task.py +3 -1
  12. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/.gitignore +0 -0
  13. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/LICENSE +0 -0
  14. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/README.md +0 -0
  15. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/pyproject.toml +0 -0
  16. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/AGENTS.md +0 -0
  17. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/README.md +0 -0
  18. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/__init__.py +0 -0
  19. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/conftest.py +0 -0
  20. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_browser_env.py +0 -0
  21. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_build_script.py +0 -0
  22. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_cli_agent_env.py +0 -0
  23. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_client_auth_errors.py +0 -0
  24. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_client_config.py +0 -0
  25. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_client_multimodal_types.py +0 -0
  26. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_composable_env.py +0 -0
  27. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_context_token_metrics.py +0 -0
  28. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_decorator_ranks.py +0 -0
  29. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_endpoint_registry.py +0 -0
  30. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_env_group.py +0 -0
  31. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_env_server.py +0 -0
  32. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_environment.py +0 -0
  33. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_environment_extra.py +0 -0
  34. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_envs.py +0 -0
  35. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_error_chain.py +0 -0
  36. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_eval_cli.py +0 -0
  37. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_eval_display.py +0 -0
  38. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_eval_utils.py +0 -0
  39. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_gepa_cli.py +0 -0
  40. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_gepa_utils.py +0 -0
  41. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_gym_env.py +0 -0
  42. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_harbor_env_mcp.py +0 -0
  43. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_imports.py +0 -0
  44. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_install_utils.py +0 -0
  45. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_interception_utils.py +0 -0
  46. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  47. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_lean_task.py +0 -0
  48. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_logging.py +0 -0
  49. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_math_rubric.py +0 -0
  50. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_maybe_think_parser.py +0 -0
  51. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_mcp_search_env.py +0 -0
  52. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_message_utils.py +0 -0
  53. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_message_utils_multimodal.py +0 -0
  54. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_multiturn_env.py +0 -0
  55. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_nemorl_client.py +0 -0
  56. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_openai_chat_completions_token_client.py +0 -0
  57. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_openai_responses_client.py +0 -0
  58. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_opencode_harbor.py +0 -0
  59. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_opencode_rlm_env.py +0 -0
  60. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_openenv_client.py +0 -0
  61. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_parser.py +0 -0
  62. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_path_utils.py +0 -0
  63. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_per_turn_timing.py +0 -0
  64. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_pricing_utils.py +0 -0
  65. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_prime_plugin.py +0 -0
  66. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_renderer_client.py +0 -0
  67. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_renderer_e2e.py +0 -0
  68. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_rlm_composable_env.py +0 -0
  69. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_rlm_env.py +0 -0
  70. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_rubric.py +0 -0
  71. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_rubric_group.py +0 -0
  72. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_sandbox_env.py +0 -0
  73. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_sandbox_mixin.py +0 -0
  74. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_save_utils.py +0 -0
  75. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_setup_script.py +0 -0
  76. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_singleturn_env.py +0 -0
  77. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_stateful_tool_env.py +0 -0
  78. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_think_parser.py +0 -0
  79. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_tool_env.py +0 -0
  80. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_tool_utils.py +0 -0
  81. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_trajectory_processing.py +0 -0
  82. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_tui_info_formatting.py +0 -0
  83. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_types.py +0 -0
  84. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_bfcl.py +0 -0
  85. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_config_extension.py +0 -0
  86. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_empty_completions.py +0 -0
  87. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_endpoint_protocols.py +0 -0
  88. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_example_counts.py +0 -0
  89. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_group_reward_env.py +0 -0
  90. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_mini_swe_agent.py +0 -0
  91. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_rlm_swe.py +0 -0
  92. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_runtime_lifecycle.py +0 -0
  93. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_scoring_functions.py +0 -0
  94. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_taskset_bindings.py +0 -0
  95. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_wordle_env.py +0 -0
  96. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_xml_parser.py +0 -0
  97. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/AGENTS.md +0 -0
  98. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/__init__.py +0 -0
  99. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/__init__.py +0 -0
  100. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/build.py +0 -0
  101. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/eval.py +0 -0
  102. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/gepa.py +0 -0
  103. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/init.py +0 -0
  104. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/install.py +0 -0
  105. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/setup.py +0 -0
  106. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/plugins/__init__.py +0 -0
  107. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/plugins/prime.py +0 -0
  108. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/tui.py +0 -0
  109. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/__init__.py +0 -0
  110. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/anthropic_messages_client.py +0 -0
  111. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/client.py +0 -0
  112. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  113. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/openai_chat_completions_client.py +0 -0
  114. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  115. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/openai_completions_client.py +0 -0
  116. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/openai_responses_client.py +0 -0
  117. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/renderer_client.py +0 -0
  118. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/decorators.py +0 -0
  119. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/AGENTS.md +0 -0
  120. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/__init__.py +0 -0
  121. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/env_group.py +0 -0
  122. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/environment.py +0 -0
  123. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/README.md +0 -0
  124. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/__init__.py +0 -0
  125. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  126. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/README.md +0 -0
  127. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/__init__.py +0 -0
  128. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/_filter.py +0 -0
  129. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  130. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harness.py +0 -0
  131. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  132. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  133. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  134. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  135. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  136. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  137. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/task.py +0 -0
  138. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  139. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  140. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  141. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  142. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  143. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  144. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  145. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  146. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  147. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  148. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  149. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  150. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  151. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  152. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
  153. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  154. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
  155. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  156. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  157. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
  158. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
  159. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  160. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  161. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/gym_env.py +0 -0
  162. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  163. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  164. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  165. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/mcp_env.py +0 -0
  166. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/opencode_env.py +0 -0
  167. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  168. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  169. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/rlm_env.py +0 -0
  170. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  171. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/utils/__init__.py +0 -0
  172. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  173. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  174. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/README.md +0 -0
  175. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/__init__.py +0 -0
  176. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/README.md +0 -0
  177. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  178. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  179. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  180. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  181. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  182. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  183. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/openenv_env.py +0 -0
  184. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  185. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/textarena_env.py +0 -0
  186. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/multiturn_env.py +0 -0
  187. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/python_env.py +0 -0
  188. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/sandbox_env.py +0 -0
  189. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/singleturn_env.py +0 -0
  190. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/stateful_tool_env.py +0 -0
  191. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/tool_env.py +0 -0
  192. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/errors.py +0 -0
  193. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/gepa/__init__.py +0 -0
  194. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/gepa/adapter.py +0 -0
  195. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/gepa/config.py +0 -0
  196. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/gepa/display.py +0 -0
  197. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/gepa/gepa_utils.py +0 -0
  198. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/parsers/__init__.py +0 -0
  199. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/parsers/maybe_think_parser.py +0 -0
  200. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/parsers/parser.py +0 -0
  201. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/parsers/think_parser.py +0 -0
  202. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/parsers/xml_parser.py +0 -0
  203. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/README.md +0 -0
  204. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/__init__.py +0 -0
  205. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/inference/__init__.py +0 -0
  206. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/inference/client.py +0 -0
  207. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/inference/server.py +0 -0
  208. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/trainer/__init__.py +0 -0
  209. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/trainer/config.py +0 -0
  210. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/trainer/orchestrator.py +0 -0
  211. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/trainer/trainer.py +0 -0
  212. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/trainer/utils.py +0 -0
  213. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/__init__.py +0 -0
  214. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  215. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/judge_rubric.py +0 -0
  216. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/math_rubric.py +0 -0
  217. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/rubric.py +0 -0
  218. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/rubric_group.py +0 -0
  219. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/__init__.py +0 -0
  220. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/build.py +0 -0
  221. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/eval.py +0 -0
  222. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/gepa.py +0 -0
  223. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/init.py +0 -0
  224. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/install.py +0 -0
  225. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/rl.py +0 -0
  226. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/setup.py +0 -0
  227. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/train.py +0 -0
  228. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/tui.py +0 -0
  229. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/vllm.py +0 -0
  230. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/__init__.py +0 -0
  231. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/client/env_client.py +0 -0
  232. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/client/zmq_env_client.py +0 -0
  233. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/server/__init__.py +0 -0
  234. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/server/env_router.py +0 -0
  235. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/server/env_server.py +0 -0
  236. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/server/env_worker.py +0 -0
  237. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/server/zmq_env_server.py +0 -0
  238. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/types.py +0 -0
  239. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/types.py +0 -0
  240. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/__init__.py +0 -0
  241. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/async_utils.py +0 -0
  242. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/client_utils.py +0 -0
  243. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/config_utils.py +0 -0
  244. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/data_utils.py +0 -0
  245. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/display_utils.py +0 -0
  246. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/env_config_utils.py +0 -0
  247. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/env_utils.py +0 -0
  248. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/error_utils.py +0 -0
  249. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/eval_display.py +0 -0
  250. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/eval_utils.py +0 -0
  251. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/heartbeat.py +0 -0
  252. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/import_utils.py +0 -0
  253. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/install_utils.py +0 -0
  254. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/interception_utils.py +0 -0
  255. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/logging_utils.py +0 -0
  256. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/message_utils.py +0 -0
  257. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/metric_utils.py +0 -0
  258. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/path_utils.py +0 -0
  259. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/pricing_utils.py +0 -0
  260. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/process_utils.py +0 -0
  261. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/response_utils.py +0 -0
  262. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/save_utils.py +0 -0
  263. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/serve_utils.py +0 -0
  264. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/thread_utils.py +0 -0
  265. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/threaded_sandbox_client.py +0 -0
  266. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/tool_utils.py +0 -0
  267. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/tunnel_utils.py +0 -0
  268. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/usage_utils.py +0 -0
  269. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/version_utils.py +0 -0
  270. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -0
  271. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/env.py +0 -0
  272. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/harness.py +0 -0
  273. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/__init__.py +0 -0
  274. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/command.py +0 -0
  275. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/configs.py +0 -0
  276. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
  277. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/opencode.py +0 -0
  278. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/pi.py +0 -0
  279. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/rlm.py +0 -0
  280. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/tasksets/__init__.py +0 -0
  281. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/runtime.py +0 -0
  282. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/state.py +0 -0
  283. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/taskset.py +0 -0
  284. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/toolset.py +0 -0
  285. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/types.py +0 -0
  286. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/user.py +0 -0
  287. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/__init__.py +0 -0
  288. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/artifact_utils.py +0 -0
  289. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/binding_utils.py +0 -0
  290. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/config_callable_utils.py +0 -0
  291. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/config_utils.py +0 -0
  292. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/endpoint_utils.py +0 -0
  293. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/json_utils.py +0 -0
  294. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/judge_utils.py +0 -0
  295. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  296. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  297. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/mcp_utils.py +0 -0
  298. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/object_utils.py +0 -0
  299. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/program_utils.py +0 -0
  300. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/prompt_utils.py +0 -0
  301. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/runtime_registry.py +0 -0
  302. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  303. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/sandbox_utils.py +0 -0
  304. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/scoring_utils.py +0 -0
  305. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/serialization_utils.py +0 -0
  306. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  307. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/taskset_utils.py +0 -0
  308. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/timing_utils.py +0 -0
  309. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/tool_utils.py +0 -0
  310. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/trajectory_utils.py +0 -0
  311. {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev4
3
+ Version: 0.1.15.dev5
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -9,10 +9,18 @@ from uuid import uuid4
9
9
 
10
10
  import pytest
11
11
 
12
+ import verifiers as root_vf
12
13
  import verifiers.v1 as vf
13
14
  from verifiers.v1.packages.harnesses.pi import pi_mcp_json, pi_models_json
15
+ from verifiers.v1.packages.harnesses.terminus_2 import (
16
+ DEFAULT_API_BASE_URL,
17
+ DEFAULT_HARBOR_PACKAGE,
18
+ DEFAULT_MODEL_NAME,
19
+ Terminus2,
20
+ terminus_2_agent_script,
21
+ )
14
22
  from verifiers.v1.packages.tasksets.harbor import harbor_reward
15
- from verifiers.v1.utils.program_utils import merge_task_program
23
+ from verifiers.v1.utils.program_utils import merge_task_program, merge_task_sandbox
16
24
 
17
25
 
18
26
  def write_harbor_task(root: Path, name: str = "task-a") -> Path:
@@ -85,6 +93,13 @@ def test_harbor_taskset_loads_package_tasks_with_program_patch(
85
93
  assert task["sandbox"]["memory_gb"] == 2.0
86
94
  assert task["sandbox"]["disk_size_gb"] == 8.0
87
95
  assert task["sandbox"]["command_timeout"] == 600
96
+ assert "network_access" not in task["sandbox"]
97
+ assert (
98
+ merge_task_sandbox({"network_access": False, "scope": "rollout"}, task)[
99
+ "network_access"
100
+ ]
101
+ is False
102
+ )
88
103
  assert task["harbor"]["test_timeout"] == 300.0
89
104
  assert task["program"]["files"] == {
90
105
  "/task/instruction.md": {"task": "instruction"},
@@ -200,6 +215,8 @@ def test_packaged_harbor_and_opencode_imports_are_reexported() -> None:
200
215
  assert vf.OpenCode is OpenCode
201
216
  assert vf.OpenCodeConfig is OpenCodeConfig
202
217
  assert vf.Pi is Pi
218
+ assert vf.Terminus2 is Terminus2
219
+ assert root_vf.Terminus2 is Terminus2
203
220
  assert vf.HarborTaskset is HarborTaskset
204
221
 
205
222
 
@@ -254,6 +271,45 @@ def test_pi_harness_writes_intercepted_model_and_mcp_config() -> None:
254
271
  assert mcp["mcpServers"]["verifiers-tools"]["command"] == "python3"
255
272
 
256
273
 
274
+ def test_terminus_2_harness_builds_sandbox_program() -> None:
275
+ harness = vf.Terminus2(
276
+ system_prompt="extra system prompt",
277
+ agent_workdir="/workspace",
278
+ max_turns=7,
279
+ python_version="3.12",
280
+ )
281
+ program = cast(dict[str, object], harness.program)
282
+ command = cast(list[object], program["command"])
283
+ setup = cast(str, program["setup"])
284
+ files = cast(dict[str, object], program["files"])
285
+ artifacts = cast(dict[str, object], program["artifacts"])
286
+ env = cast(dict[str, object], program.get("env", {}))
287
+
288
+ assert isinstance(harness, vf.Harness)
289
+ assert "/terminus_2/instruction.md" in files
290
+ assert "/terminus_2/system_prompt.txt" in files
291
+ assert "apt-get -o Acquire::Retries=3 update" in setup
292
+ assert "apt-get -o Acquire::Retries=3 install" in setup
293
+ assert "git" not in setup
294
+ assert "terminus_2_log" in artifacts
295
+ assert "OPENAI_MODEL" not in env
296
+
297
+ run_script = cast(str, command[2])
298
+ assert "TERMINUS_2_WORKDIR=/workspace" in run_script
299
+ assert f"--with {DEFAULT_HARBOR_PACKAGE}" in run_script
300
+ assert "git+https://github.com" not in run_script
301
+ assert "max_turns=7" in run_script
302
+
303
+ script = terminus_2_agent_script(max_turns=7)
304
+ compile(script, "terminus_2_agent.py", "exec")
305
+ assert DEFAULT_MODEL_NAME in script
306
+ assert DEFAULT_API_BASE_URL in script
307
+ assert "OPENAI_MODEL" not in script
308
+ assert "PRIME_API_KEY" in script
309
+ assert "async def prepare_logs_for_host(self) -> None" in script
310
+ assert "max_turns=7" in script
311
+
312
+
257
313
  def test_task_program_merges_into_command_program_without_collisions() -> None:
258
314
  harness = vf.Harness(
259
315
  program={
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.15.dev4"
1
+ __version__ = "0.1.15.dev5"
2
2
 
3
3
  import importlib
4
4
  import os
@@ -93,6 +93,7 @@ __all__ = [
93
93
  "Pi",
94
94
  "RLM",
95
95
  "RLMConfig",
96
+ "Terminus2",
96
97
  "Environment",
97
98
  "MultiTurnEnv",
98
99
  "SingleTurnEnv",
@@ -217,6 +218,7 @@ _LAZY_IMPORTS = {
217
218
  "Pi": "verifiers.v1:Pi",
218
219
  "RLM": "verifiers.v1:RLM",
219
220
  "RLMConfig": "verifiers.v1:RLMConfig",
221
+ "Terminus2": "verifiers.v1:Terminus2",
220
222
  "get_messages": "verifiers.v1:get_messages",
221
223
  "add_metric": "verifiers.v1:add_metric",
222
224
  "add_reward": "verifiers.v1:add_reward",
@@ -311,6 +313,7 @@ if TYPE_CHECKING:
311
313
  ProgramConfig,
312
314
  RLM,
313
315
  RLMConfig,
316
+ Terminus2,
314
317
  SandboxConfig,
315
318
  Task,
316
319
  TaskRow,
@@ -537,8 +537,8 @@ signature.
537
537
  Reusable CLI programs should be packaged as `Harness` subclasses. Package
538
538
  implementations live under `verifiers.v1.packages` while the v1 API stabilizes,
539
539
  and are re-exported from `verifiers.v1` for normal use. `OpenCode`, `Pi`,
540
- `MiniSWEAgent`, and `RLM` are bundled `Harness` leaf wrappers for common
541
- coding-agent CLIs.
540
+ `MiniSWEAgent`, `Terminus2`, and `RLM` are bundled `Harness` leaf wrappers for
541
+ common coding-agent CLIs.
542
542
 
543
543
  ```python
544
544
  import verifiers as vf
@@ -560,6 +560,8 @@ endpoint and, when tools are enabled, installs the Pi MCP adapter and writes a
560
560
  project `.mcp.json`. Neither side needs to know the other's private fields.
561
561
  `MiniSWEAgent` owns mini-swe-agent installation, config layering, endpoint env,
562
562
  and log/trajectory artifacts.
563
+ `Terminus2` owns Harbor Terminus agent installation, endpoint env, and log
564
+ artifacts.
563
565
  `RLM` follows the same boundary for recursive LLM runs: `HarborTaskset` owns
564
566
  the task directory and tests, while `RLM` owns RLM installation, optional skill
565
567
  upload to `/rlm/skills`, endpoint wiring, and trajectory filtering.
@@ -548,8 +548,8 @@ Use this for:
548
548
  Migration:
549
549
 
550
550
  1. Use `vf.HarborTaskset` for Harbor-format task directories.
551
- 2. Use `vf.OpenCode()`, `vf.Pi()`, `vf.MiniSWEAgent()`, or `vf.RLM()` for the
552
- command harness.
551
+ 2. Use `vf.OpenCode()`, `vf.Pi()`, `vf.MiniSWEAgent()`, `vf.Terminus2()`, or
552
+ `vf.RLM()` for the command harness.
553
553
  3. Put task-owned uploads and sandbox overrides on the taskset.
554
554
  4. Keep scoring as reward/metric functions on the taskset.
555
555
 
@@ -580,6 +580,8 @@ Gotchas:
580
580
  adapter setup, and log artifacts.
581
581
  - `MiniSWEAgent` owns mini-swe-agent installation, config layering, endpoint
582
582
  env, and log/trajectory artifacts.
583
+ - `Terminus2` owns Harbor Terminus agent installation, endpoint env, and log
584
+ artifacts.
583
585
  - `RLM` owns RLM installation, optional `/task/rlm-skills` upload, endpoint
584
586
  wiring, and trajectory filtering.
585
587
  - `task.program` is the merge point for task-owned program files/env/setup.
@@ -41,6 +41,7 @@ from .packages.harnesses import (
41
41
  Pi,
42
42
  RLM,
43
43
  RLMConfig,
44
+ Terminus2,
44
45
  )
45
46
  from .utils.scoring_utils import (
46
47
  add_metric,
@@ -97,6 +98,7 @@ __all__ = [
97
98
  "ProgramConfig",
98
99
  "RLM",
99
100
  "RLMConfig",
101
+ "Terminus2",
100
102
  "SandboxConfig",
101
103
  "State",
102
104
  "Task",
@@ -367,15 +367,20 @@ class EnvConfig(Config):
367
367
  return base
368
368
 
369
369
 
370
- def sandbox_config_mapping(value: object | None) -> ConfigData | None:
370
+ def sandbox_config_mapping(
371
+ value: object | None, *, fill_defaults: bool = True
372
+ ) -> ConfigData | None:
371
373
  if value is None:
372
374
  return None
373
375
  if isinstance(value, SandboxConfig):
374
- return value.model_dump(exclude_none=True)
376
+ return value.model_dump(exclude_none=True, exclude_unset=not fill_defaults)
375
377
  if isinstance(value, Mapping):
376
378
  mapping = cast(ConfigMap, value)
377
379
  prefer = mapping.get("prefer")
378
380
  if prefer is not None and prefer != "program":
379
381
  raise ValueError("sandbox.prefer must be 'program'.")
380
- return SandboxConfig.from_config(mapping).model_dump(exclude_none=True)
382
+ sandbox = SandboxConfig.from_config(mapping).model_dump(exclude_none=True)
383
+ if fill_defaults:
384
+ return sandbox
385
+ return {key: sandbox[key] for key in mapping if key in sandbox}
381
386
  raise TypeError("Sandbox config must be a mapping.")
@@ -3,6 +3,7 @@ from .mini_swe_agent import MiniSWEAgent
3
3
  from .opencode import OpenCode
4
4
  from .pi import Pi
5
5
  from .rlm import RLM
6
+ from .terminus_2 import Terminus2
6
7
 
7
8
  __all__ = [
8
9
  "MiniSWEAgent",
@@ -11,4 +12,5 @@ __all__ = [
11
12
  "Pi",
12
13
  "RLM",
13
14
  "RLMConfig",
15
+ "Terminus2",
14
16
  ]
@@ -0,0 +1,286 @@
1
+ import shlex
2
+ from pathlib import PurePosixPath
3
+
4
+ from typing_extensions import Unpack
5
+
6
+ from .command import HarnessKwargs, command_program, command_sandbox
7
+ from ...config import SandboxConfig
8
+ from ...harness import Harness
9
+ from ...utils.prompt_utils import (
10
+ state_system_prompt_text,
11
+ task_text as task_instruction_text,
12
+ )
13
+ from ...types import ConfigMap, ProgramMap, ProgramOptionMap, ProgramValue, PromptInput
14
+
15
+ DEFAULT_AGENT_WORKDIR = "/app"
16
+ DEFAULT_INSTRUCTION_PATH = "/terminus_2/instruction.md"
17
+ DEFAULT_SYSTEM_PROMPT_PATH = "/terminus_2/system_prompt.txt"
18
+ DEFAULT_LOG_PATH = "/logs/agent/terminus_2.log"
19
+ DEFAULT_HARBOR_PACKAGE = "harbor==0.6.6"
20
+ DEFAULT_PYTHON_VERSION = "3.12"
21
+ DEFAULT_MODEL_NAME = "openai/gpt-4.1-mini"
22
+ DEFAULT_API_BASE_URL = "https://api.pinference.ai/api/v1"
23
+
24
+
25
+ class Terminus2(Harness):
26
+ def __init__(
27
+ self,
28
+ *,
29
+ agent_workdir: str = DEFAULT_AGENT_WORKDIR,
30
+ instruction_path: str = DEFAULT_INSTRUCTION_PATH,
31
+ system_prompt_path: str = DEFAULT_SYSTEM_PROMPT_PATH,
32
+ log_path: str = DEFAULT_LOG_PATH,
33
+ harbor_package: str = DEFAULT_HARBOR_PACKAGE,
34
+ python_version: str = DEFAULT_PYTHON_VERSION,
35
+ model_name: str = DEFAULT_MODEL_NAME,
36
+ api_base_url: str = DEFAULT_API_BASE_URL,
37
+ system_prompt: PromptInput | None = None,
38
+ sandbox: bool | ConfigMap | SandboxConfig = True,
39
+ program: ProgramMap | None = None,
40
+ max_turns: int | None = 4,
41
+ **kwargs: Unpack[HarnessKwargs],
42
+ ):
43
+ files: dict[str, ProgramValue] = {
44
+ instruction_path: task_instruction_text,
45
+ }
46
+ if system_prompt is not None:
47
+ files[system_prompt_path] = state_system_prompt_text
48
+ artifacts: ProgramOptionMap = {
49
+ "terminus_2_log": {
50
+ "path": log_path,
51
+ "format": "text",
52
+ "optional": True,
53
+ }
54
+ }
55
+ command = [
56
+ "bash",
57
+ "-lc",
58
+ build_terminus_2_run_script(
59
+ agent_workdir=agent_workdir,
60
+ instruction_path=instruction_path,
61
+ system_prompt_path=system_prompt_path
62
+ if system_prompt is not None
63
+ else None,
64
+ log_path=log_path,
65
+ harbor_package=harbor_package,
66
+ python_version=python_version,
67
+ model_name=model_name,
68
+ api_base_url=api_base_url,
69
+ max_turns=max_turns,
70
+ ),
71
+ ]
72
+ super().__init__(
73
+ program=command_program(
74
+ command=command,
75
+ sandbox=sandbox,
76
+ files=files,
77
+ setup=build_terminus_2_install_script(),
78
+ artifacts=artifacts,
79
+ program=program,
80
+ ),
81
+ sandbox=command_sandbox(sandbox),
82
+ system_prompt=system_prompt,
83
+ max_turns=max_turns,
84
+ **kwargs,
85
+ )
86
+
87
+
88
+ def build_terminus_2_install_script() -> str:
89
+ return """\
90
+ set -e
91
+ apt-get -o Acquire::Retries=3 update -qq
92
+ apt-get -o Acquire::Retries=3 install -y -qq curl ca-certificates > /dev/null 2>&1
93
+ if ! command -v uv >/dev/null 2>&1; then
94
+ curl -LsSf https://astral.sh/uv/install.sh | sh
95
+ fi
96
+ """
97
+
98
+
99
+ def build_terminus_2_run_script(
100
+ *,
101
+ agent_workdir: str = DEFAULT_AGENT_WORKDIR,
102
+ instruction_path: str = DEFAULT_INSTRUCTION_PATH,
103
+ system_prompt_path: str | None = DEFAULT_SYSTEM_PROMPT_PATH,
104
+ log_path: str = DEFAULT_LOG_PATH,
105
+ harbor_package: str = DEFAULT_HARBOR_PACKAGE,
106
+ python_version: str = DEFAULT_PYTHON_VERSION,
107
+ model_name: str = DEFAULT_MODEL_NAME,
108
+ api_base_url: str = DEFAULT_API_BASE_URL,
109
+ max_turns: int | None = 4,
110
+ ) -> str:
111
+ log_dir = str(PurePosixPath(log_path).parent)
112
+ agent_script = terminus_2_agent_script(
113
+ instruction_path=instruction_path,
114
+ system_prompt_path=system_prompt_path,
115
+ log_dir=log_dir,
116
+ model_name=model_name,
117
+ api_base_url=api_base_url,
118
+ max_turns=max_turns,
119
+ )
120
+ return f"""\
121
+ set -eo pipefail
122
+ export PATH="$HOME/.local/bin:$PATH"
123
+
124
+ TERMINUS_2_WORKDIR="${{AGENT_WORKDIR:-}}"
125
+ if [[ -z "$TERMINUS_2_WORKDIR" ]]; then
126
+ TERMINUS_2_WORKDIR={shlex.quote(agent_workdir)}
127
+ fi
128
+ export AGENT_WORKDIR="$TERMINUS_2_WORKDIR"
129
+
130
+ mkdir -p {shlex.quote(log_dir)} "$TERMINUS_2_WORKDIR"
131
+ cd "$TERMINUS_2_WORKDIR"
132
+ uv --no-config run --no-project --quiet \
133
+ --python {shlex.quote(python_version)} \
134
+ --with {shlex.quote(harbor_package)} \
135
+ python - <<'PY' 2>&1 | tee -a {shlex.quote(log_path)}
136
+ {agent_script}
137
+ PY
138
+ """
139
+
140
+
141
+ def terminus_2_agent_script(
142
+ *,
143
+ instruction_path: str = DEFAULT_INSTRUCTION_PATH,
144
+ system_prompt_path: str | None = DEFAULT_SYSTEM_PROMPT_PATH,
145
+ log_dir: str = "/logs/agent",
146
+ model_name: str = DEFAULT_MODEL_NAME,
147
+ api_base_url: str = DEFAULT_API_BASE_URL,
148
+ max_turns: int | None = 4,
149
+ ) -> str:
150
+ system_prompt_block = ""
151
+ if system_prompt_path is not None:
152
+ system_prompt_block = f"""\
153
+ system_prompt_path = Path({system_prompt_path!r})
154
+ if system_prompt_path.exists() and system_prompt_path.stat().st_size > 0:
155
+ instruction = system_prompt_path.read_text() + "\\n\\n" + instruction
156
+ """
157
+ return f"""\
158
+ from __future__ import annotations
159
+
160
+ import asyncio
161
+ import logging
162
+ import os
163
+ import shutil
164
+ import subprocess
165
+ from pathlib import Path
166
+
167
+ from harbor.agents.terminus_2 import Terminus2
168
+ from harbor.environments.base import BaseEnvironment, ExecResult
169
+ from harbor.models.agent.context import AgentContext
170
+ from harbor.models.environment_type import EnvironmentType
171
+ from harbor.models.trial.paths import TrialPaths
172
+
173
+
174
+ class LocalEnvironment(BaseEnvironment):
175
+ def __init__(self, workdir: Path, logs_dir: Path):
176
+ self.workdir = workdir
177
+ self.trial_paths = TrialPaths(trial_dir=logs_dir)
178
+ self.trial_paths.mkdir()
179
+ self.default_user = None
180
+ self.session_id = "local"
181
+ self.logger = logging.getLogger(__name__)
182
+
183
+ @staticmethod
184
+ def type() -> EnvironmentType:
185
+ return EnvironmentType.DOCKER
186
+
187
+ @property
188
+ def is_mounted(self) -> bool:
189
+ return True
190
+
191
+ @property
192
+ def supports_gpus(self) -> bool:
193
+ return False
194
+
195
+ @property
196
+ def can_disable_internet(self) -> bool:
197
+ return False
198
+
199
+ def _validate_definition(self):
200
+ pass
201
+
202
+ async def start(self, force_build: bool) -> None:
203
+ pass
204
+
205
+ async def stop(self, delete: bool):
206
+ pass
207
+
208
+ async def prepare_logs_for_host(self) -> None:
209
+ pass
210
+
211
+ async def upload_file(self, source_path, target_path):
212
+ shutil.copy(source_path, target_path)
213
+
214
+ async def upload_dir(self, source_dir, target_dir):
215
+ shutil.copytree(source_dir, target_dir, dirs_exist_ok=True)
216
+
217
+ async def download_file(self, source_path, target_path):
218
+ shutil.copy(source_path, target_path)
219
+
220
+ async def download_dir(self, source_dir, target_dir):
221
+ shutil.copytree(source_dir, target_dir, dirs_exist_ok=True)
222
+
223
+ async def exec(
224
+ self,
225
+ command: str,
226
+ cwd: str | None = None,
227
+ env: dict | None = None,
228
+ timeout_sec: int | None = None,
229
+ user: str | int | None = None,
230
+ ) -> ExecResult:
231
+ del user
232
+ try:
233
+ result = subprocess.run(
234
+ command,
235
+ shell=True,
236
+ cwd=cwd or str(self.workdir),
237
+ env={{**os.environ, **(env or {{}})}},
238
+ capture_output=True,
239
+ text=True,
240
+ timeout=timeout_sec,
241
+ )
242
+ except subprocess.TimeoutExpired:
243
+ return ExecResult(stdout="", stderr="Command timed out", return_code=124)
244
+ return ExecResult(
245
+ stdout=result.stdout,
246
+ stderr=result.stderr,
247
+ return_code=result.returncode,
248
+ )
249
+
250
+
251
+ async def main() -> None:
252
+ workdir = Path(os.environ.get("AGENT_WORKDIR") or {DEFAULT_AGENT_WORKDIR!r})
253
+ logs_dir = Path({log_dir!r})
254
+ instruction = Path({instruction_path!r}).read_text()
255
+ {system_prompt_block} env = LocalEnvironment(workdir=workdir, logs_dir=logs_dir)
256
+ if "OPENAI_API_KEY" not in os.environ and "PRIME_API_KEY" in os.environ:
257
+ os.environ["OPENAI_API_KEY"] = os.environ["PRIME_API_KEY"]
258
+ api_base = os.environ.get("OPENAI_BASE_URL") or {api_base_url!r}
259
+ agent = Terminus2(
260
+ logs_dir=logs_dir,
261
+ model_name={model_name!r},
262
+ api_base=api_base,
263
+ max_turns={max_turns!r},
264
+ )
265
+ await agent.setup(env)
266
+ await agent.run(instruction, env, AgentContext())
267
+
268
+
269
+ asyncio.run(main())
270
+ """
271
+
272
+
273
+ __all__ = [
274
+ "DEFAULT_AGENT_WORKDIR",
275
+ "DEFAULT_API_BASE_URL",
276
+ "DEFAULT_HARBOR_PACKAGE",
277
+ "DEFAULT_INSTRUCTION_PATH",
278
+ "DEFAULT_LOG_PATH",
279
+ "DEFAULT_MODEL_NAME",
280
+ "DEFAULT_PYTHON_VERSION",
281
+ "DEFAULT_SYSTEM_PROMPT_PATH",
282
+ "Terminus2",
283
+ "build_terminus_2_install_script",
284
+ "build_terminus_2_run_script",
285
+ "terminus_2_agent_script",
286
+ ]
@@ -199,6 +199,22 @@ class HarborTaskset(Taskset):
199
199
  verifier_config = config.get("verifier", {}) or {}
200
200
  instruction = instruction_path.read_text().strip()
201
201
  task_remote_dir = self.task_dir.rstrip("/") or "/task"
202
+ sandbox = {
203
+ "image": environment.get("docker_image") or self.docker_image,
204
+ "cpu_cores": parse_number(environment.get("cpus"), self.cpu_cores),
205
+ "memory_gb": parse_gb(environment.get("memory"), self.memory_gb),
206
+ "disk_size_gb": parse_gb(environment.get("storage"), self.disk_size_gb),
207
+ "timeout_minutes": self.timeout_minutes,
208
+ "command_timeout": int(
209
+ parse_number(
210
+ agent_config.get("timeout_sec"), self.agent_timeout_seconds
211
+ )
212
+ ),
213
+ "workdir": self.workdir,
214
+ "scope": self.scope,
215
+ }
216
+ if "allow_internet" in environment:
217
+ sandbox["network_access"] = bool(environment["allow_internet"])
202
218
  return {
203
219
  "example_id": index,
204
220
  "task_name": task_dir.name,
@@ -206,21 +222,7 @@ class HarborTaskset(Taskset):
206
222
  "task_toml": task_toml_path.read_text(),
207
223
  "task_dir": str(task_dir),
208
224
  "prompt": [{"role": "user", "content": instruction}],
209
- "sandbox": {
210
- "image": environment.get("docker_image") or self.docker_image,
211
- "cpu_cores": parse_number(environment.get("cpus"), self.cpu_cores),
212
- "memory_gb": parse_gb(environment.get("memory"), self.memory_gb),
213
- "disk_size_gb": parse_gb(environment.get("storage"), self.disk_size_gb),
214
- "network_access": bool(environment.get("allow_internet", True)),
215
- "timeout_minutes": self.timeout_minutes,
216
- "command_timeout": int(
217
- parse_number(
218
- agent_config.get("timeout_sec"), self.agent_timeout_seconds
219
- )
220
- ),
221
- "workdir": self.workdir,
222
- "scope": self.scope,
223
- },
225
+ "sandbox": sandbox,
224
226
  "program": {
225
227
  "files": {
226
228
  f"{task_remote_dir}/instruction.md": {"task": "instruction"},
@@ -37,7 +37,9 @@ class Task(dict):
37
37
  if "sandbox" in self and not isinstance(self["sandbox"], Mapping):
38
38
  raise TypeError("task.sandbox must be a mapping.")
39
39
  if "sandbox" in self:
40
- super().__setitem__("sandbox", sandbox_config_mapping(self["sandbox"]))
40
+ super().__setitem__(
41
+ "sandbox", sandbox_config_mapping(self["sandbox"], fill_defaults=False)
42
+ )
41
43
  if "program" in self and not isinstance(self["program"], Mapping):
42
44
  raise TypeError("task.program must be a mapping.")
43
45
  if "max_turns" in self and (
File without changes