verifiers 0.1.15.dev2__tar.gz → 0.1.15.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/PKG-INFO +1 -1
  2. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_interception_utils.py +2 -2
  3. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_v1_harbor_cli.py +73 -0
  4. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_v1_runtime_lifecycle.py +33 -0
  5. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/__init__.py +1 -1
  6. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/interception_utils.py +15 -7
  7. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/packages/tasksets/harbor.py +3 -2
  8. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/sandbox_utils.py +5 -4
  9. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/.gitignore +0 -0
  10. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/LICENSE +0 -0
  11. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/README.md +0 -0
  12. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/pyproject.toml +0 -0
  13. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/AGENTS.md +0 -0
  14. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/README.md +0 -0
  15. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/__init__.py +0 -0
  16. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/conftest.py +0 -0
  17. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_browser_env.py +0 -0
  18. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_build_script.py +0 -0
  19. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_cli_agent_env.py +0 -0
  20. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_client_auth_errors.py +0 -0
  21. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_client_config.py +0 -0
  22. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_client_multimodal_types.py +0 -0
  23. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_composable_env.py +0 -0
  24. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_context_token_metrics.py +0 -0
  25. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_decorator_ranks.py +0 -0
  26. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_endpoint_registry.py +0 -0
  27. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_env_group.py +0 -0
  28. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_env_server.py +0 -0
  29. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_environment.py +0 -0
  30. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_environment_extra.py +0 -0
  31. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_envs.py +0 -0
  32. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_error_chain.py +0 -0
  33. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_eval_cli.py +0 -0
  34. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_eval_display.py +0 -0
  35. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_eval_utils.py +0 -0
  36. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_gepa_cli.py +0 -0
  37. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_gepa_utils.py +0 -0
  38. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_gym_env.py +0 -0
  39. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_harbor_env_mcp.py +0 -0
  40. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_imports.py +0 -0
  41. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_install_utils.py +0 -0
  42. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  43. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_lean_task.py +0 -0
  44. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_logging.py +0 -0
  45. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_math_rubric.py +0 -0
  46. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_maybe_think_parser.py +0 -0
  47. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_mcp_search_env.py +0 -0
  48. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_message_utils.py +0 -0
  49. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_message_utils_multimodal.py +0 -0
  50. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_multiturn_env.py +0 -0
  51. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_nemorl_client.py +0 -0
  52. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_openai_chat_completions_token_client.py +0 -0
  53. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_openai_responses_client.py +0 -0
  54. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_opencode_harbor.py +0 -0
  55. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_opencode_rlm_env.py +0 -0
  56. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_parser.py +0 -0
  57. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_path_utils.py +0 -0
  58. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_per_turn_timing.py +0 -0
  59. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_prime_plugin.py +0 -0
  60. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_renderer_client.py +0 -0
  61. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_renderer_e2e.py +0 -0
  62. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_rlm_composable_env.py +0 -0
  63. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_rlm_env.py +0 -0
  64. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_rubric.py +0 -0
  65. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_rubric_group.py +0 -0
  66. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_sandbox_env.py +0 -0
  67. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_sandbox_mixin.py +0 -0
  68. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_save_utils.py +0 -0
  69. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_setup_script.py +0 -0
  70. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_singleturn_env.py +0 -0
  71. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_stateful_tool_env.py +0 -0
  72. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_think_parser.py +0 -0
  73. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_tool_env.py +0 -0
  74. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_tool_utils.py +0 -0
  75. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_trajectory_processing.py +0 -0
  76. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_tui_info_formatting.py +0 -0
  77. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_types.py +0 -0
  78. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_v1_bfcl.py +0 -0
  79. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_v1_config_extension.py +0 -0
  80. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_v1_endpoint_protocols.py +0 -0
  81. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_v1_example_counts.py +0 -0
  82. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_v1_group_reward_env.py +0 -0
  83. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_v1_mini_swe_agent.py +0 -0
  84. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_v1_rlm_swe.py +0 -0
  85. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_v1_scoring_functions.py +0 -0
  86. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_wordle_env.py +0 -0
  87. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/tests/test_xml_parser.py +0 -0
  88. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/AGENTS.md +0 -0
  89. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/__init__.py +0 -0
  90. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/commands/__init__.py +0 -0
  91. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/commands/build.py +0 -0
  92. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/commands/eval.py +0 -0
  93. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/commands/gepa.py +0 -0
  94. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/commands/init.py +0 -0
  95. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/commands/install.py +0 -0
  96. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/commands/setup.py +0 -0
  97. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/plugins/__init__.py +0 -0
  98. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/plugins/prime.py +0 -0
  99. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/cli/tui.py +0 -0
  100. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/clients/__init__.py +0 -0
  101. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/clients/anthropic_messages_client.py +0 -0
  102. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/clients/client.py +0 -0
  103. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  104. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/clients/openai_chat_completions_client.py +0 -0
  105. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  106. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/clients/openai_completions_client.py +0 -0
  107. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/clients/openai_responses_client.py +0 -0
  108. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/clients/renderer_client.py +0 -0
  109. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/decorators.py +0 -0
  110. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/AGENTS.md +0 -0
  111. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/__init__.py +0 -0
  112. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/env_group.py +0 -0
  113. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/environment.py +0 -0
  114. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/README.md +0 -0
  115. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/__init__.py +0 -0
  116. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  117. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/README.md +0 -0
  118. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/__init__.py +0 -0
  119. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/_filter.py +0 -0
  120. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  121. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harness.py +0 -0
  122. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  123. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  124. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  125. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  126. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  127. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  128. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/task.py +0 -0
  129. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  130. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  131. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  132. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  133. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  134. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  135. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  136. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  137. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  138. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  139. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  140. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  141. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  142. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  143. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
  144. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  145. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
  146. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  147. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  148. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
  149. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
  150. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  151. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  152. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/gym_env.py +0 -0
  153. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  154. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  155. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  156. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/mcp_env.py +0 -0
  157. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/opencode_env.py +0 -0
  158. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  159. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  160. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/rlm_env.py +0 -0
  161. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  162. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/utils/__init__.py +0 -0
  163. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  164. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  165. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/README.md +0 -0
  166. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/__init__.py +0 -0
  167. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/README.md +0 -0
  168. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  169. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  170. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  171. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  172. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  173. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  174. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/openenv_env.py +0 -0
  175. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  176. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/textarena_env.py +0 -0
  177. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/multiturn_env.py +0 -0
  178. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/python_env.py +0 -0
  179. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/sandbox_env.py +0 -0
  180. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/singleturn_env.py +0 -0
  181. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/stateful_tool_env.py +0 -0
  182. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/envs/tool_env.py +0 -0
  183. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/errors.py +0 -0
  184. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/gepa/__init__.py +0 -0
  185. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/gepa/adapter.py +0 -0
  186. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/gepa/config.py +0 -0
  187. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/gepa/display.py +0 -0
  188. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/gepa/gepa_utils.py +0 -0
  189. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/parsers/__init__.py +0 -0
  190. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/parsers/maybe_think_parser.py +0 -0
  191. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/parsers/parser.py +0 -0
  192. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/parsers/think_parser.py +0 -0
  193. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/parsers/xml_parser.py +0 -0
  194. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rl/README.md +0 -0
  195. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rl/__init__.py +0 -0
  196. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rl/inference/__init__.py +0 -0
  197. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rl/inference/client.py +0 -0
  198. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rl/inference/server.py +0 -0
  199. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rl/trainer/__init__.py +0 -0
  200. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rl/trainer/config.py +0 -0
  201. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rl/trainer/orchestrator.py +0 -0
  202. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rl/trainer/trainer.py +0 -0
  203. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rl/trainer/utils.py +0 -0
  204. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rubrics/__init__.py +0 -0
  205. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  206. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rubrics/judge_rubric.py +0 -0
  207. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rubrics/math_rubric.py +0 -0
  208. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rubrics/rubric.py +0 -0
  209. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/rubrics/rubric_group.py +0 -0
  210. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/__init__.py +0 -0
  211. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/build.py +0 -0
  212. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/eval.py +0 -0
  213. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/gepa.py +0 -0
  214. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/init.py +0 -0
  215. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/install.py +0 -0
  216. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/rl.py +0 -0
  217. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/setup.py +0 -0
  218. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/train.py +0 -0
  219. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/tui.py +0 -0
  220. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/scripts/vllm.py +0 -0
  221. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/serve/__init__.py +0 -0
  222. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/serve/client/env_client.py +0 -0
  223. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/serve/client/zmq_env_client.py +0 -0
  224. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/serve/server/__init__.py +0 -0
  225. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/serve/server/env_router.py +0 -0
  226. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/serve/server/env_server.py +0 -0
  227. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/serve/server/env_worker.py +0 -0
  228. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/serve/server/zmq_env_server.py +0 -0
  229. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/serve/types.py +0 -0
  230. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/types.py +0 -0
  231. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/__init__.py +0 -0
  232. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/async_utils.py +0 -0
  233. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/client_utils.py +0 -0
  234. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/config_utils.py +0 -0
  235. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/data_utils.py +0 -0
  236. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/display_utils.py +0 -0
  237. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/env_config_utils.py +0 -0
  238. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/env_utils.py +0 -0
  239. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/error_utils.py +0 -0
  240. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/eval_display.py +0 -0
  241. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/eval_utils.py +0 -0
  242. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/heartbeat.py +0 -0
  243. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/import_utils.py +0 -0
  244. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/install_utils.py +0 -0
  245. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/logging_utils.py +0 -0
  246. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/message_utils.py +0 -0
  247. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/metric_utils.py +0 -0
  248. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/path_utils.py +0 -0
  249. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/process_utils.py +0 -0
  250. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/response_utils.py +0 -0
  251. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/save_utils.py +0 -0
  252. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/serve_utils.py +0 -0
  253. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/thread_utils.py +0 -0
  254. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/threaded_sandbox_client.py +0 -0
  255. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/tool_utils.py +0 -0
  256. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/tunnel_utils.py +0 -0
  257. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/usage_utils.py +0 -0
  258. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/utils/version_utils.py +0 -0
  259. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/README.md +0 -0
  260. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/RE_MIGRATION.md +0 -0
  261. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/__init__.py +0 -0
  262. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/config.py +0 -0
  263. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/env.py +0 -0
  264. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/harness.py +0 -0
  265. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/packages/__init__.py +0 -0
  266. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/__init__.py +0 -0
  267. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/cli.py +0 -0
  268. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/configs.py +0 -0
  269. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
  270. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/opencode.py +0 -0
  271. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/pi.py +0 -0
  272. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/rlm.py +0 -0
  273. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/packages/tasksets/__init__.py +0 -0
  274. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/runtime.py +0 -0
  275. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/state.py +0 -0
  276. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/task.py +0 -0
  277. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/taskset.py +0 -0
  278. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/toolset.py +0 -0
  279. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/user.py +0 -0
  280. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/__init__.py +0 -0
  281. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/artifact_utils.py +0 -0
  282. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/endpoint_utils.py +0 -0
  283. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/json_utils.py +0 -0
  284. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/judge_utils.py +0 -0
  285. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  286. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  287. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/mcp_utils.py +0 -0
  288. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/program_utils.py +0 -0
  289. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/prompt_utils.py +0 -0
  290. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  291. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/scoring_utils.py +0 -0
  292. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/timing_utils.py +0 -0
  293. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/tool_utils.py +0 -0
  294. {verifiers-0.1.15.dev2 → verifiers-0.1.15.dev3}/verifiers/v1/utils/trajectory_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev2
3
+ Version: 0.1.15.dev3
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -261,7 +261,7 @@ async def test_keepalive_write_failure_surfaces_to_state(monkeypatch):
261
261
 
262
262
  assert isinstance(state["error"], StreamInterrupted)
263
263
  msg = str(state["error"])
264
- assert "keepalive write failed" in msg
264
+ assert "Keepalive write failed" in msg
265
265
  assert "ConnectionResetError" in msg
266
266
 
267
267
 
@@ -306,6 +306,6 @@ async def test_non_streaming_response_future_failure_surfaces_to_state(monkeypat
306
306
  f"expected InterceptionError, got {type(state.get('error'))}"
307
307
  )
308
308
  msg = str(state["error"])
309
- assert "intercepted request failed" in msg
309
+ assert "Intercepted request failed" in msg
310
310
  assert "RuntimeError" in msg
311
311
  assert "vLLM raised" in msg
@@ -1,6 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
+ import sys
5
+ import types
4
6
  from pathlib import Path
5
7
  from typing import cast
6
8
 
@@ -8,6 +10,7 @@ import pytest
8
10
 
9
11
  import verifiers.v1 as vf
10
12
  from verifiers.v1.packages.harnesses.pi import pi_mcp_json, pi_models_json
13
+ from verifiers.v1.packages.tasksets.harbor import harbor_reward
11
14
  from verifiers.v1.utils.program_utils import merge_task_program
12
15
 
13
16
 
@@ -82,6 +85,76 @@ def test_harbor_taskset_constructs_env_with_opencode(tmp_path: Path) -> None:
82
85
  assert "task_dir" not in cast(dict[str, object], env.harness.program)
83
86
 
84
87
 
88
+ class FakeHarborCommandResult:
89
+ def __init__(
90
+ self,
91
+ *,
92
+ exit_code: int = 0,
93
+ stdout: str = "",
94
+ stderr: str = "",
95
+ ):
96
+ self.exit_code = exit_code
97
+ self.stdout = stdout
98
+ self.stderr = stderr
99
+
100
+
101
+ class FakeHarborSandboxClient:
102
+ instances: list["FakeHarborSandboxClient"] = []
103
+
104
+ def __init__(self):
105
+ self.execute_commands: list[tuple[str, int | None, str | None]] = []
106
+ self.background_jobs: list[tuple[str, str, int | None, str | None]] = []
107
+ type(self).instances.append(self)
108
+
109
+ async def upload_file(self, *args: object, **kwargs: object) -> None:
110
+ _ = args, kwargs
111
+
112
+ async def execute_command(
113
+ self, *args: object, **kwargs: object
114
+ ) -> FakeHarborCommandResult:
115
+ command = str(kwargs.get("command") or args[1])
116
+ timeout = cast(int | None, kwargs.get("timeout"))
117
+ working_dir = cast(str | None, kwargs.get("working_dir"))
118
+ self.execute_commands.append((command, timeout, working_dir))
119
+ if "reward.txt" in command:
120
+ return FakeHarborCommandResult(stdout="1\n")
121
+ return FakeHarborCommandResult()
122
+
123
+ async def run_background_job(
124
+ self, *args: object, **kwargs: object
125
+ ) -> FakeHarborCommandResult:
126
+ sandbox_id = str(kwargs.get("sandbox_id") or args[0])
127
+ command = str(kwargs.get("command") or args[1])
128
+ timeout = cast(int | None, kwargs.get("timeout"))
129
+ working_dir = cast(str | None, kwargs.get("working_dir"))
130
+ self.background_jobs.append((sandbox_id, command, timeout, working_dir))
131
+ return FakeHarborCommandResult(stdout="tests passed")
132
+
133
+ async def aclose(self) -> None:
134
+ pass
135
+
136
+
137
+ @pytest.mark.asyncio
138
+ async def test_harbor_reward_uses_background_job_for_tests(
139
+ tmp_path: Path, monkeypatch: pytest.MonkeyPatch
140
+ ) -> None:
141
+ task_dir = write_harbor_task(tmp_path)
142
+ fake_module = types.ModuleType("prime_sandboxes")
143
+ fake_module.AsyncSandboxClient = FakeHarborSandboxClient
144
+ monkeypatch.setitem(sys.modules, "prime_sandboxes", fake_module)
145
+ FakeHarborSandboxClient.instances = []
146
+
147
+ reward = await harbor_reward(
148
+ {"harbor": {"task_dir": str(task_dir), "test_timeout": 120}},
149
+ {"sandbox_id": "sbx-1"},
150
+ )
151
+
152
+ client = FakeHarborSandboxClient.instances[0]
153
+ assert reward == 1.0
154
+ assert client.background_jobs == [("sbx-1", "bash test.sh", 120, "/tests")]
155
+ assert ("bash test.sh", 120, "/tests") not in client.execute_commands
156
+
157
+
85
158
  def test_packaged_harbor_and_opencode_imports_are_reexported() -> None:
86
159
  from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig, Pi
87
160
  from verifiers.v1.packages.tasksets import HarborTaskset
@@ -109,6 +109,7 @@ class FakeSandboxClient:
109
109
  created: list[str] = []
110
110
  deleted: list[str] = []
111
111
  commands: list[tuple[str, str]] = []
112
+ background_jobs: list[tuple[str, str, int | None, str | None]] = []
112
113
  uploads: list[tuple[str, str, bytes]] = []
113
114
 
114
115
  @classmethod
@@ -116,6 +117,7 @@ class FakeSandboxClient:
116
117
  cls.created = []
117
118
  cls.deleted = []
118
119
  cls.commands = []
120
+ cls.background_jobs = []
119
121
  cls.uploads = []
120
122
 
121
123
  async def create(self, request: FakeCreateSandboxRequest) -> FakeSandboxResult:
@@ -135,6 +137,17 @@ class FakeSandboxClient:
135
137
  type(self).commands.append((sandbox_id, command))
136
138
  return FakeCommandResult()
137
139
 
140
+ async def run_background_job(
141
+ self, *args: object, **kwargs: object
142
+ ) -> FakeCommandResult:
143
+ sandbox_id = str(kwargs.get("sandbox_id") or args[0])
144
+ command = str(kwargs.get("command") or args[1])
145
+ timeout = cast(int | None, kwargs.get("timeout"))
146
+ working_dir = cast(str | None, kwargs.get("working_dir"))
147
+ type(self).commands.append((sandbox_id, command))
148
+ type(self).background_jobs.append((sandbox_id, command, timeout, working_dir))
149
+ return FakeCommandResult()
150
+
138
151
  async def upload_bytes(self, *args: object, **kwargs: object) -> None:
139
152
  sandbox_id = str(kwargs.get("sandbox_id") or args[0])
140
153
  path = str(kwargs.get("file_path") or kwargs.get("path") or args[1])
@@ -1193,6 +1206,26 @@ async def test_sandbox_state_input_upload_runs_after_rollout_setup(
1193
1206
  assert uploads["/tmp/vf_state_in.json"]["state_input_setup"] is True
1194
1207
 
1195
1208
 
1209
+ @pytest.mark.asyncio
1210
+ async def test_task_command_uses_background_job(
1211
+ monkeypatch: pytest.MonkeyPatch,
1212
+ ) -> None:
1213
+ install_fake_sandboxes(monkeypatch)
1214
+ install_fake_endpoint_tunnel(monkeypatch)
1215
+
1216
+ harness = vf.CLIHarness(command=["sleep", "120"], sandbox=True)
1217
+ task = vf.Task(
1218
+ {
1219
+ "prompt": [{"role": "user", "content": "hi"}],
1220
+ "sandbox": {"command_timeout": 120},
1221
+ }
1222
+ ).freeze()
1223
+
1224
+ await harness.run(task)
1225
+
1226
+ assert ("sbx-1", "sleep 120", 120, "/app") in FakeSandboxClient.background_jobs
1227
+
1228
+
1196
1229
  @pytest.mark.asyncio
1197
1230
  async def test_program_tools_mcp_setup_accepts_config_ref_mappings(
1198
1231
  monkeypatch: pytest.MonkeyPatch,
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.15.dev2"
1
+ __version__ = "0.1.15.dev3"
2
2
 
3
3
  import importlib
4
4
  import os
@@ -173,12 +173,20 @@ class InterceptionServer:
173
173
  """Attach `error` to the rollout's state if one is registered and
174
174
  unset. First error wins — later failures (e.g. the downstream
175
175
  `response_future` raising too) should not clobber the original cause.
176
+
177
+ Also skip when the rollout loop has already finalized via a clean
178
+ stop condition (e.g. ``state["prompt_too_long"]`` from an
179
+ ``OverlongPromptError``). Tail-end failures that happen after
180
+ that — e.g. ``write_eof`` to an agent that has already exited —
181
+ are consequences of the termination, not new infra problems, and
182
+ must not be surfaced as a spurious ``InterceptionError`` /
183
+ ``StreamInterrupted`` alongside the real stop signal.
176
184
  """
177
185
  context = self.active_rollouts.get(rollout_id)
178
186
  if context is None:
179
187
  return
180
188
  state = context.get("state")
181
- if state is None or state.get("error"):
189
+ if state is None or state.get("error") or state.get("prompt_too_long"):
182
190
  return
183
191
  state["error"] = error
184
192
 
@@ -295,7 +303,7 @@ class InterceptionServer:
295
303
  self._set_rollout_error(
296
304
  rollout_id,
297
305
  InterceptionError(
298
- f"intercepted request failed: {type(e).__name__}: {e}"
306
+ f"Intercepted request failed: {type(e).__name__}: {e}"
299
307
  ),
300
308
  )
301
309
  return web.json_response({"error": str(e)}, status=500)
@@ -433,7 +441,7 @@ class InterceptionServer:
433
441
  )
434
442
  self._set_rollout_error(
435
443
  rollout_id,
436
- StreamInterrupted(f"prepare failed: {type(e).__name__}: {e}"),
444
+ StreamInterrupted(f"Prepare failed: {type(e).__name__}: {e}"),
437
445
  )
438
446
  return response
439
447
  # Reuse one get() task across keepalive cycles; asyncio.wait_for on
@@ -460,7 +468,7 @@ class InterceptionServer:
460
468
  self._set_rollout_error(
461
469
  rollout_id,
462
470
  StreamInterrupted(
463
- f"keepalive write failed after {print_time(waited_s)}: "
471
+ f"Keepalive write failed after {print_time(waited_s)}: "
464
472
  f"{type(e).__name__}: {e}"
465
473
  ),
466
474
  )
@@ -490,7 +498,7 @@ class InterceptionServer:
490
498
  self._set_rollout_error(
491
499
  rollout_id,
492
500
  StreamInterrupted(
493
- f"stream write failed after {print_time(waited_s)}: "
501
+ f"Stream write failed after {print_time(waited_s)}: "
494
502
  f"{type(e).__name__}: {e}"
495
503
  ),
496
504
  )
@@ -510,7 +518,7 @@ class InterceptionServer:
510
518
  self._set_rollout_error(
511
519
  rollout_id,
512
520
  StreamInterrupted(
513
- f"streaming response_future failed: {type(e).__name__}: {e}"
521
+ f"Streaming response_future failed: {type(e).__name__}: {e}"
514
522
  ),
515
523
  )
516
524
 
@@ -527,7 +535,7 @@ class InterceptionServer:
527
535
  self._set_rollout_error(
528
536
  rollout_id,
529
537
  StreamInterrupted(
530
- f"write_eof failed after {print_time(waited_s)}: "
538
+ f"Write EOF failed after {print_time(waited_s)}: "
531
539
  f"{type(e).__name__}: {e}"
532
540
  ),
533
541
  )
@@ -333,11 +333,12 @@ async def harbor_reward(task, state) -> float:
333
333
  client = AsyncSandboxClient()
334
334
  try:
335
335
  await upload_harbor_tests(client, sandbox_id, task_dir)
336
- result = await client.execute_command(
336
+ test_timeout = int(parse_number(harbor.get("test_timeout"), 900))
337
+ result = await client.run_background_job(
337
338
  sandbox_id=sandbox_id,
338
339
  command="bash test.sh",
339
340
  working_dir="/tests",
340
- timeout=int(parse_number(harbor.get("test_timeout"), 900)),
341
+ timeout=test_timeout,
341
342
  )
342
343
  state["harbor_tests"] = {
343
344
  "returncode": result.exit_code,
@@ -262,12 +262,13 @@ async def run_sandbox_command(
262
262
  )
263
263
  argv = await command_argv(program, task, state, runtime)
264
264
  env = await command_env(program, task, state, runtime, include_base=False)
265
- result = await lease.client.execute_command(
266
- lease.id,
267
- shlex.join(argv),
265
+ command = shlex.join(argv)
266
+ command_timeout = cast(int | None, sandbox_config.get("command_timeout"))
267
+ result = await lease.run_background_job(
268
+ command,
269
+ timeout=command_timeout,
268
270
  working_dir=workdir,
269
271
  env=env,
270
- timeout=cast(int | None, sandbox_config.get("command_timeout")),
271
272
  )
272
273
  state["command"] = {
273
274
  "argv": argv,
File without changes