verifiers 0.1.13.dev4__tar.gz → 0.1.13.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/PKG-INFO +1 -1
  2. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_eval_cli.py +2 -0
  3. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_interception_utils.py +73 -0
  4. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/__init__.py +1 -1
  5. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/README.md +4 -0
  6. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/harnesses/__init__.py +12 -0
  7. verifiers-0.1.13.dev5/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +230 -0
  8. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/eval.py +58 -1
  9. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/eval_utils.py +2 -0
  10. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/interception_utils.py +78 -7
  11. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/.gitignore +0 -0
  12. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/LICENSE +0 -0
  13. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/README.md +0 -0
  14. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/pyproject.toml +0 -0
  15. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/AGENTS.md +0 -0
  16. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/README.md +0 -0
  17. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/__init__.py +0 -0
  18. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/conftest.py +0 -0
  19. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_browser_env.py +0 -0
  20. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_build_script.py +0 -0
  21. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_cli_agent_env.py +0 -0
  22. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_client_auth_errors.py +0 -0
  23. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_client_config.py +0 -0
  24. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_client_multimodal_types.py +0 -0
  25. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_composable_env.py +0 -0
  26. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_context_token_metrics.py +0 -0
  27. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_decorator_ranks.py +0 -0
  28. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_endpoint_registry.py +0 -0
  29. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_env_group.py +0 -0
  30. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_env_server.py +0 -0
  31. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_environment.py +0 -0
  32. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_environment_extra.py +0 -0
  33. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_envs.py +0 -0
  34. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_error_chain.py +0 -0
  35. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_eval_display.py +0 -0
  36. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_eval_utils.py +0 -0
  37. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_gepa_cli.py +0 -0
  38. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_gym_env.py +0 -0
  39. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_harbor_env_mcp.py +0 -0
  40. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_imports.py +0 -0
  41. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_install_utils.py +0 -0
  42. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_logging.py +0 -0
  43. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_math_rubric.py +0 -0
  44. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_maybe_think_parser.py +0 -0
  45. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_message_utils.py +0 -0
  46. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_message_utils_multimodal.py +0 -0
  47. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_multiturn_env.py +0 -0
  48. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_nemorl_client.py +0 -0
  49. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_openai_chat_completions_token_client.py +0 -0
  50. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_opencode_harbor.py +0 -0
  51. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_opencode_rlm_env.py +0 -0
  52. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_parser.py +0 -0
  53. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_path_utils.py +0 -0
  54. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_prime_plugin.py +0 -0
  55. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_rlm_composable_env.py +0 -0
  56. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_rlm_env.py +0 -0
  57. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_rubric.py +0 -0
  58. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_rubric_group.py +0 -0
  59. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_sandbox_env.py +0 -0
  60. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_sandbox_mixin.py +0 -0
  61. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_save_utils.py +0 -0
  62. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_setup_script.py +0 -0
  63. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_singleturn_env.py +0 -0
  64. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_stateful_tool_env.py +0 -0
  65. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_think_parser.py +0 -0
  66. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_tool_env.py +0 -0
  67. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_tool_utils.py +0 -0
  68. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_trajectory_processing.py +0 -0
  69. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_tui_info_formatting.py +0 -0
  70. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/tests/test_xml_parser.py +0 -0
  71. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/AGENTS.md +0 -0
  72. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/__init__.py +0 -0
  73. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/commands/__init__.py +0 -0
  74. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/commands/build.py +0 -0
  75. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/commands/eval.py +0 -0
  76. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/commands/gepa.py +0 -0
  77. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/commands/init.py +0 -0
  78. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/commands/install.py +0 -0
  79. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/commands/setup.py +0 -0
  80. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/plugins/__init__.py +0 -0
  81. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/plugins/prime.py +0 -0
  82. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/cli/tui.py +0 -0
  83. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/clients/__init__.py +0 -0
  84. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/clients/anthropic_messages_client.py +0 -0
  85. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/clients/client.py +0 -0
  86. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  87. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/clients/openai_chat_completions_client.py +0 -0
  88. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  89. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/clients/openai_completions_client.py +0 -0
  90. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/decorators.py +0 -0
  91. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/AGENTS.md +0 -0
  92. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/__init__.py +0 -0
  93. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/env_group.py +0 -0
  94. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/environment.py +0 -0
  95. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/__init__.py +0 -0
  96. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  97. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/README.md +0 -0
  98. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/__init__.py +0 -0
  99. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/_filter.py +0 -0
  100. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  101. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/harness.py +0 -0
  102. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  103. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  104. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  105. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/task.py +0 -0
  106. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  107. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  108. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  109. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  110. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  111. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  112. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  113. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  114. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  115. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  116. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  117. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  118. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  119. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  120. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
  121. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  122. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
  123. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  124. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  125. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
  126. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
  127. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  128. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  129. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/gym_env.py +0 -0
  130. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  131. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  132. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  133. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/mcp_env.py +0 -0
  134. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/opencode_env.py +0 -0
  135. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  136. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  137. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/rlm_env.py +0 -0
  138. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  139. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/utils/__init__.py +0 -0
  140. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  141. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  142. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/README.md +0 -0
  143. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/__init__.py +0 -0
  144. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/browser_env/README.md +0 -0
  145. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  146. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  147. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  148. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  149. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  150. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  151. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/openenv_env.py +0 -0
  152. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  153. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/integrations/textarena_env.py +0 -0
  154. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/multiturn_env.py +0 -0
  155. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/python_env.py +0 -0
  156. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/sandbox_env.py +0 -0
  157. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/singleturn_env.py +0 -0
  158. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/stateful_tool_env.py +0 -0
  159. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/envs/tool_env.py +0 -0
  160. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/errors.py +0 -0
  161. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/gepa/__init__.py +0 -0
  162. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/gepa/adapter.py +0 -0
  163. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/gepa/config.py +0 -0
  164. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/gepa/display.py +0 -0
  165. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/gepa/gepa_utils.py +0 -0
  166. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/parsers/__init__.py +0 -0
  167. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/parsers/maybe_think_parser.py +0 -0
  168. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/parsers/parser.py +0 -0
  169. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/parsers/think_parser.py +0 -0
  170. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/parsers/xml_parser.py +0 -0
  171. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rl/README.md +0 -0
  172. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rl/__init__.py +0 -0
  173. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rl/inference/__init__.py +0 -0
  174. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rl/inference/client.py +0 -0
  175. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rl/inference/server.py +0 -0
  176. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rl/trainer/__init__.py +0 -0
  177. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rl/trainer/config.py +0 -0
  178. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rl/trainer/orchestrator.py +0 -0
  179. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rl/trainer/trainer.py +0 -0
  180. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rl/trainer/utils.py +0 -0
  181. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rubrics/__init__.py +0 -0
  182. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  183. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rubrics/judge_rubric.py +0 -0
  184. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rubrics/math_rubric.py +0 -0
  185. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rubrics/rubric.py +0 -0
  186. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/rubrics/rubric_group.py +0 -0
  187. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/__init__.py +0 -0
  188. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/build.py +0 -0
  189. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/gepa.py +0 -0
  190. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/init.py +0 -0
  191. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/install.py +0 -0
  192. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/prime_rl.py +0 -0
  193. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/rl.py +0 -0
  194. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/setup.py +0 -0
  195. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/train.py +0 -0
  196. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/tui.py +0 -0
  197. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/scripts/vllm.py +0 -0
  198. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/serve/__init__.py +0 -0
  199. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/serve/client/env_client.py +0 -0
  200. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/serve/client/zmq_env_client.py +0 -0
  201. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/serve/server/__init__.py +0 -0
  202. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/serve/server/env_router.py +0 -0
  203. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/serve/server/env_server.py +0 -0
  204. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/serve/server/env_worker.py +0 -0
  205. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/serve/server/zmq_env_server.py +0 -0
  206. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/serve/types.py +0 -0
  207. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/types.py +0 -0
  208. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/__init__.py +0 -0
  209. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/async_utils.py +0 -0
  210. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/client_utils.py +0 -0
  211. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/config_utils.py +0 -0
  212. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/data_utils.py +0 -0
  213. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/display_utils.py +0 -0
  214. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/env_utils.py +0 -0
  215. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/error_utils.py +0 -0
  216. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/eval_display.py +0 -0
  217. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/heartbeat.py +0 -0
  218. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/import_utils.py +0 -0
  219. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/install_utils.py +0 -0
  220. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/logging_utils.py +0 -0
  221. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/message_utils.py +0 -0
  222. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/metric_utils.py +0 -0
  223. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/path_utils.py +0 -0
  224. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/process_utils.py +0 -0
  225. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/response_utils.py +0 -0
  226. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/save_utils.py +0 -0
  227. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/serve_utils.py +0 -0
  228. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/thread_utils.py +0 -0
  229. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/threaded_sandbox_client.py +0 -0
  230. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/tool_utils.py +0 -0
  231. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/tunnel_utils.py +0 -0
  232. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/usage_utils.py +0 -0
  233. {verifiers-0.1.13.dev4 → verifiers-0.1.13.dev5}/verifiers/utils/version_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.13.dev4
3
+ Version: 0.1.13.dev5
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -41,6 +41,8 @@ def run_cli(make_metadata, make_state, make_input):
41
41
  "api_base_url": "https://api.openai.com/v1",
42
42
  "header": None,
43
43
  "headers": None,
44
+ "header_from_state": None,
45
+ "headers_from_state": None,
44
46
  "num_examples": 1,
45
47
  "rollouts_per_example": 1,
46
48
  "max_concurrent": 1,
@@ -131,3 +131,76 @@ async def test_streaming_write_failure_surfaces_to_state(monkeypatch):
131
131
 
132
132
  assert isinstance(state["error"], StreamInterrupted)
133
133
  assert "ConnectionResetError" in str(state["error"])
134
+
135
+
136
+ async def test_keepalive_emitted_during_idle(monkeypatch):
137
+ """During the idle window (no chunks on chunk_queue) the handler must
138
+ emit SSE keepalive comments so upstream idle-timeouts don't fire."""
139
+ monkeypatch.setattr(interception_utils, "KEEPALIVE_INTERVAL_SECONDS", 0.05)
140
+ server = InterceptionServer(port=0)
141
+ state: dict = {}
142
+ server.register_rollout("r1", state=state)
143
+
144
+ writes: list[bytes] = []
145
+
146
+ async def fake_write(data: bytes) -> None:
147
+ writes.append(data)
148
+
149
+ fake_response = MagicMock()
150
+ fake_response.prepare = AsyncMock()
151
+ fake_response.write = AsyncMock(side_effect=fake_write)
152
+ fake_response.write_eof = AsyncMock()
153
+ monkeypatch.setattr(
154
+ interception_utils.web, "StreamResponse", lambda **_: fake_response
155
+ )
156
+
157
+ chunk_queue: asyncio.Queue = asyncio.Queue() # starts empty
158
+ response_future: asyncio.Future = asyncio.Future()
159
+ intercept = {
160
+ "chunk_queue": chunk_queue,
161
+ "response_future": response_future,
162
+ }
163
+
164
+ task = asyncio.create_task(
165
+ server._handle_streaming_response(MagicMock(), "r1", intercept)
166
+ )
167
+ await asyncio.sleep(0.2) # enough for a few keepalive cycles
168
+
169
+ # Close the loop cleanly: EOF sentinel + resolved future → handler returns.
170
+ response_future.set_result(None)
171
+ await chunk_queue.put(None)
172
+ await task
173
+
174
+ assert any(w == b": keepalive\n\n" for w in writes), (
175
+ f"expected at least one keepalive write, got writes={writes}"
176
+ )
177
+
178
+
179
+ async def test_keepalive_write_failure_surfaces_to_state(monkeypatch):
180
+ """A failed keepalive write (upstream already cut the TCP connection)
181
+ must funnel into ``state["error"]`` with elapsed-time instrumentation."""
182
+ monkeypatch.setattr(interception_utils, "KEEPALIVE_INTERVAL_SECONDS", 0.05)
183
+ server = InterceptionServer(port=0)
184
+ state: dict = {}
185
+ server.register_rollout("r1", state=state)
186
+
187
+ fake_response = MagicMock()
188
+ fake_response.prepare = AsyncMock()
189
+ fake_response.write = AsyncMock(side_effect=ConnectionResetError("tunnel died"))
190
+ fake_response.write_eof = AsyncMock()
191
+ monkeypatch.setattr(
192
+ interception_utils.web, "StreamResponse", lambda **_: fake_response
193
+ )
194
+
195
+ chunk_queue: asyncio.Queue = asyncio.Queue() # never produces
196
+ intercept = {
197
+ "chunk_queue": chunk_queue,
198
+ "response_future": asyncio.Future(),
199
+ }
200
+
201
+ await server._handle_streaming_response(MagicMock(), "r1", intercept)
202
+
203
+ assert isinstance(state["error"], StreamInterrupted)
204
+ msg = str(state["error"])
205
+ assert "keepalive write failed" in msg
206
+ assert "ConnectionResetError" in msg
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.13.dev4"
1
+ __version__ = "0.1.13.dev5"
2
2
 
3
3
  import importlib
4
4
  import os
@@ -2,6 +2,10 @@
2
2
 
3
3
  Newer and more experimental environment classes that may have some sharper edges + change more frequently.
4
4
 
5
+ ## SandboxMixin
6
+
7
+ `SandboxMixin` works with both container and VM sandboxes. If your environment needs a VM, pass `CreateSandboxRequest(..., vm=True)` to `create_sandbox`. For a GPU VM, also set `gpu_count` and `gpu_type`. Everyday sandbox operations like file upload, file reads, background jobs, and cleanup work the same way. Port exposure and SSH are currently container-only.
8
+
5
9
  ## GymEnv
6
10
 
7
11
  Universal runner for Gym-compatible environments. Wraps any environment that implements `reset(seed)` and `step(action)` methods (following the OpenAI Gym / Gymnasium API). Supports both old-style 4-tuple and new-style 5-tuple step returns.
@@ -16,6 +16,13 @@ from verifiers.envs.experimental.composable.harnesses.opencode import (
16
16
  build_opencode_run_command,
17
17
  opencode_harness,
18
18
  )
19
+ from verifiers.envs.experimental.composable.harnesses.mini_swe_agent import (
20
+ MINI_SWE_AGENT_CONFIG,
21
+ MINI_SWE_AGENT_INSTALL_SCRIPT,
22
+ build_mini_swe_agent_install_script,
23
+ build_mini_swe_agent_run_command,
24
+ mini_swe_agent_harness,
25
+ )
19
26
 
20
27
  __all__ = [
21
28
  "rlm_harness",
@@ -32,4 +39,9 @@ __all__ = [
32
39
  "DEFAULT_DISABLED_TOOLS",
33
40
  "DEFAULT_RELEASE_SHA256",
34
41
  "DEFAULT_SYSTEM_PROMPT",
42
+ "mini_swe_agent_harness",
43
+ "build_mini_swe_agent_install_script",
44
+ "build_mini_swe_agent_run_command",
45
+ "MINI_SWE_AGENT_INSTALL_SCRIPT",
46
+ "MINI_SWE_AGENT_CONFIG",
35
47
  ]
@@ -0,0 +1,230 @@
1
+ """mini-SWE-agent harness configuration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import PurePosixPath
6
+ import shlex
7
+
8
+ DEFAULT_INSTALL_DIR = "/opt/mini-swe-agent"
9
+ DEFAULT_PREFIX_DIR = f"{DEFAULT_INSTALL_DIR}/prefix"
10
+ DEFAULT_SITE_PACKAGES_DIR = f"{DEFAULT_PREFIX_DIR}/site-packages"
11
+ DEFAULT_UV_SITE_PACKAGES_DIR = f"{DEFAULT_INSTALL_DIR}/uv-site-packages"
12
+ DEFAULT_MINI_BINARY = f"{DEFAULT_PREFIX_DIR}/bin/mini"
13
+ MINI_SWE_AGENT_CLI_PACKAGE = "mini-swe-agent"
14
+ MINI_SWE_AGENT_CLI_VERSION = "2.2.8"
15
+ MINI_SWE_AGENT_CLI_SHA256 = (
16
+ "694df4de1337e665e3cd82e99f93374f573bf52b8e7c362ac5d8045ad9f7c37c"
17
+ )
18
+ MINI_SWE_AGENT_PYTHON_VERSION = "3.11"
19
+ UV_PACKAGE_VERSION = "0.11.7"
20
+ DEFAULT_PACKAGE_VERSION = MINI_SWE_AGENT_CLI_VERSION
21
+ DEFAULT_PACKAGE_SHA256 = MINI_SWE_AGENT_CLI_SHA256
22
+ DEFAULT_INSTRUCTION_PATH = "/mini-swe-agent/prompt.txt"
23
+ DEFAULT_SYSTEM_PROMPT_PATH = "/mini-swe-agent/system.txt"
24
+ DEFAULT_LOG_DIR = "/logs/agent"
25
+ DEFAULT_LOG_PATH = f"{DEFAULT_LOG_DIR}/mini-swe-agent.log"
26
+ DEFAULT_TRAJECTORY_PATH = f"{DEFAULT_LOG_DIR}/mini-swe-agent.traj.json"
27
+ DEFAULT_AGENT_WORKDIR = "${AGENT_WORKDIR:-/app}"
28
+ DEFAULT_CONFIG_SPEC = "mini_textbased"
29
+ DEFAULT_MODEL_CLASS = "litellm_textbased"
30
+ DEFAULT_ENVIRONMENT_TIMEOUT = 120
31
+
32
+
33
+ def build_mini_swe_agent_install_script(
34
+ package_version: str = DEFAULT_PACKAGE_VERSION,
35
+ package_sha256: str = DEFAULT_PACKAGE_SHA256,
36
+ prefix_dir: str = DEFAULT_PREFIX_DIR,
37
+ install_python: bool = True,
38
+ ) -> str:
39
+ """Build the shell script that installs mini-SWE-agent."""
40
+ install_tools = ""
41
+ if install_python:
42
+ install_tools = """\
43
+ export DEBIAN_FRONTEND=noninteractive
44
+ if ! command -v python3 >/dev/null 2>&1 || ! python3 -m pip --version >/dev/null 2>&1; then
45
+ apt-get update -qq
46
+ apt-get install -y -qq python3 python3-pip ca-certificates
47
+ fi
48
+ """
49
+
50
+ quoted_prefix_dir = shlex.quote(prefix_dir)
51
+ site_packages_dir = f"{prefix_dir}/site-packages"
52
+ wheel_filename = f"mini_swe_agent-{package_version}-py3-none-any.whl"
53
+ wheel_url = (
54
+ f"https://files.pythonhosted.org/packages/py3/m/mini-swe-agent/{wheel_filename}"
55
+ )
56
+ quoted_site_packages_dir = shlex.quote(site_packages_dir)
57
+ quoted_install_dir = shlex.quote(DEFAULT_INSTALL_DIR)
58
+ quoted_uv_site_packages_dir = shlex.quote(DEFAULT_UV_SITE_PACKAGES_DIR)
59
+ return f"""\
60
+ set -e
61
+ {install_tools}
62
+ rm -rf {quoted_prefix_dir}
63
+ mkdir -p {quoted_install_dir} {quoted_prefix_dir}/bin {quoted_site_packages_dir} {quoted_uv_site_packages_dir} {shlex.quote(DEFAULT_LOG_DIR)} /mini-swe-agent
64
+ export PIP_CONFIG_FILE=/dev/null
65
+ export PIP_INDEX_URL=https://pypi.org/simple
66
+ export PIP_BREAK_SYSTEM_PACKAGES=1
67
+ unset PIP_EXTRA_INDEX_URL
68
+ PYTHON_BIN="$(command -v python3)"
69
+ MINI_SWE_AGENT_PYTHON="$PYTHON_BIN"
70
+ if ! "$PYTHON_BIN" -c 'import sys; raise SystemExit(sys.version_info < (3, 10))'; then
71
+ "$PYTHON_BIN" -m pip install --quiet --target {quoted_uv_site_packages_dir} uv=={UV_PACKAGE_VERSION}
72
+ env PYTHONPATH={quoted_uv_site_packages_dir} "$PYTHON_BIN" -m uv python install {MINI_SWE_AGENT_PYTHON_VERSION}
73
+ MINI_SWE_AGENT_PYTHON="$(env PYTHONPATH={quoted_uv_site_packages_dir} "$PYTHON_BIN" -m uv python find {MINI_SWE_AGENT_PYTHON_VERSION})"
74
+ fi
75
+ MINI_SWE_AGENT_WHEEL_DIR="$(mktemp -d)"
76
+ trap 'rm -rf "$MINI_SWE_AGENT_WHEEL_DIR"' EXIT
77
+ MINI_SWE_AGENT_WHEEL="$MINI_SWE_AGENT_WHEEL_DIR/{wheel_filename}"
78
+ MINI_SWE_AGENT_WHEEL_URL={shlex.quote(wheel_url)}
79
+ export MINI_SWE_AGENT_WHEEL MINI_SWE_AGENT_WHEEL_URL
80
+ "$PYTHON_BIN" -c 'import os, urllib.request; urllib.request.urlretrieve(os.environ["MINI_SWE_AGENT_WHEEL_URL"], os.environ["MINI_SWE_AGENT_WHEEL"])'
81
+ echo "{package_sha256} $MINI_SWE_AGENT_WHEEL" | sha256sum -c -
82
+ if [ "$MINI_SWE_AGENT_PYTHON" = "$PYTHON_BIN" ]; then
83
+ "$PYTHON_BIN" -m pip install --quiet --target {quoted_site_packages_dir} "$MINI_SWE_AGENT_WHEEL"
84
+ else
85
+ env PYTHONPATH={quoted_uv_site_packages_dir} "$PYTHON_BIN" -m uv pip install --python "$MINI_SWE_AGENT_PYTHON" --target {quoted_site_packages_dir} "$MINI_SWE_AGENT_WHEEL"
86
+ fi
87
+ echo "$MINI_SWE_AGENT_PYTHON" > {quoted_prefix_dir}/python
88
+ cat > {quoted_prefix_dir}/bin/mini <<'EOF'
89
+ #!/usr/bin/env sh
90
+ export PYTHONPATH={shlex.quote(site_packages_dir)}:${{PYTHONPATH:-}}
91
+ exec "$(cat {quoted_prefix_dir}/python)" -m minisweagent.run.mini "$@"
92
+ EOF
93
+ chmod +x {quoted_prefix_dir}/bin/mini
94
+ test -x {quoted_prefix_dir}/bin/mini
95
+ """
96
+
97
+
98
+ def build_mini_swe_agent_run_command(
99
+ agent_workdir: str = DEFAULT_AGENT_WORKDIR,
100
+ instruction_path: str = DEFAULT_INSTRUCTION_PATH,
101
+ system_prompt_path: str = DEFAULT_SYSTEM_PROMPT_PATH,
102
+ log_path: str = DEFAULT_LOG_PATH,
103
+ trajectory_path: str = DEFAULT_TRAJECTORY_PATH,
104
+ mini_binary: str = DEFAULT_MINI_BINARY,
105
+ config_spec: str = DEFAULT_CONFIG_SPEC,
106
+ model_class: str = DEFAULT_MODEL_CLASS,
107
+ environment_timeout: int = DEFAULT_ENVIRONMENT_TIMEOUT,
108
+ extra_config_specs: list[str] | None = None,
109
+ ) -> str:
110
+ """Build the shell command that configures and runs mini-SWE-agent.
111
+
112
+ Config specs layer the cwd, timeout, LiteLLM model class, optional system
113
+ prompt template, and any caller-provided overrides before writing the
114
+ trajectory and teeing logs.
115
+ """
116
+ # Keep the default workdir shell-expanded for env-level overrides, mirroring
117
+ # the other harnesses.
118
+ if agent_workdir == DEFAULT_AGENT_WORKDIR:
119
+ workdir_assignment = f"MINI_SWE_AGENT_WORKDIR={DEFAULT_AGENT_WORKDIR}"
120
+ else:
121
+ workdir_assignment = f"MINI_SWE_AGENT_WORKDIR={shlex.quote(agent_workdir)}"
122
+
123
+ config_args = [
124
+ "-c",
125
+ shlex.quote(config_spec),
126
+ "-c",
127
+ "agent.cost_limit=0",
128
+ "-c",
129
+ f"environment.timeout={environment_timeout}",
130
+ "-c",
131
+ f"model.model_class={shlex.quote(model_class)}",
132
+ "-c",
133
+ "model.cost_tracking=ignore_errors",
134
+ "-c",
135
+ "model.model_kwargs.custom_llm_provider=openai",
136
+ ]
137
+ # Config specs are the mini CLI's native override format; use them for cwd,
138
+ # timeout, model class, and optional system prompt wiring.
139
+ for spec in extra_config_specs or []:
140
+ config_args.extend(["-c", shlex.quote(spec)])
141
+
142
+ log_dir = str(PurePosixPath(log_path).parent)
143
+ trajectory_dir = str(PurePosixPath(trajectory_path).parent)
144
+ script = f"""\
145
+ set -eo pipefail
146
+ export PATH={shlex.quote(DEFAULT_PREFIX_DIR)}/bin:"$PATH"
147
+ export PYTHONPATH={shlex.quote(DEFAULT_SITE_PACKAGES_DIR)}:"${{PYTHONPATH:-}}"
148
+ export MSWEA_CONFIGURED=true
149
+ export MSWEA_SILENT_STARTUP=true
150
+ export MSWEA_GLOBAL_CONFIG_DIR=/tmp/mini-swe-agent-config
151
+ export OPENAI_API_KEY="${{OPENAI_API_KEY:-intercepted}}"
152
+
153
+ {workdir_assignment}
154
+ mkdir -p {shlex.quote(log_dir)} {shlex.quote(trajectory_dir)} "$MINI_SWE_AGENT_WORKDIR" "$MSWEA_GLOBAL_CONFIG_DIR"
155
+
156
+ MINI_SWE_AGENT_TASK="$(cat {shlex.quote(instruction_path)})"
157
+ CONFIG_ARGS=({" ".join(config_args)})
158
+ CONFIG_ARGS+=(-c "environment.cwd=$MINI_SWE_AGENT_WORKDIR")
159
+ if [ -s {shlex.quote(system_prompt_path)} ]; then
160
+ CONFIG_ARGS+=(-c "agent.system_template=$(cat {shlex.quote(system_prompt_path)})")
161
+ fi
162
+
163
+ cd "$MINI_SWE_AGENT_WORKDIR"
164
+ timeout --kill-after=30s "${{AGENT_TIMEOUT_SECONDS:-3600}}" {shlex.quote(mini_binary)} \\
165
+ --model "$OPENAI_MODEL" \\
166
+ --task "$MINI_SWE_AGENT_TASK" \\
167
+ --output {shlex.quote(trajectory_path)} \\
168
+ --exit-immediately \\
169
+ --yolo \\
170
+ "${{CONFIG_ARGS[@]}}" 2>&1 | tee -a {shlex.quote(log_path)}
171
+ """
172
+ return f"bash -lc {shlex.quote(script)}"
173
+
174
+
175
+ MINI_SWE_AGENT_INSTALL_SCRIPT = build_mini_swe_agent_install_script()
176
+ MINI_SWE_AGENT_CONFIG = {
177
+ "install_script": MINI_SWE_AGENT_INSTALL_SCRIPT,
178
+ "cli_package": MINI_SWE_AGENT_CLI_PACKAGE,
179
+ "cli_version": MINI_SWE_AGENT_CLI_VERSION,
180
+ "cli_sha256": MINI_SWE_AGENT_CLI_SHA256,
181
+ }
182
+
183
+
184
+ def mini_swe_agent_harness(
185
+ system_prompt: str | None = None,
186
+ task_system_prompt: str | None = None,
187
+ agent_workdir: str = DEFAULT_AGENT_WORKDIR,
188
+ instruction_path: str = DEFAULT_INSTRUCTION_PATH,
189
+ system_prompt_path: str = DEFAULT_SYSTEM_PROMPT_PATH,
190
+ log_path: str = DEFAULT_LOG_PATH,
191
+ trajectory_path: str = DEFAULT_TRAJECTORY_PATH,
192
+ package_version: str = DEFAULT_PACKAGE_VERSION,
193
+ package_sha256: str = DEFAULT_PACKAGE_SHA256,
194
+ config_spec: str = DEFAULT_CONFIG_SPEC,
195
+ model_class: str = DEFAULT_MODEL_CLASS,
196
+ environment_timeout: int = DEFAULT_ENVIRONMENT_TIMEOUT,
197
+ extra_config_specs: list[str] | None = None,
198
+ ):
199
+ """Create a Harness configured for mini-SWE-agent."""
200
+ from verifiers.envs.experimental.composable import Harness
201
+
202
+ if task_system_prompt:
203
+ if system_prompt:
204
+ system_prompt = system_prompt + "\n" + task_system_prompt
205
+ else:
206
+ system_prompt = task_system_prompt
207
+
208
+ # The system prompt is passed through ComposableEnv as a file and injected
209
+ # into mini's agent.system_template at runtime.
210
+ return Harness(
211
+ install_script=build_mini_swe_agent_install_script(
212
+ package_version=package_version,
213
+ package_sha256=package_sha256,
214
+ ),
215
+ run_command=build_mini_swe_agent_run_command(
216
+ agent_workdir=agent_workdir,
217
+ instruction_path=instruction_path,
218
+ system_prompt_path=system_prompt_path,
219
+ log_path=log_path,
220
+ trajectory_path=trajectory_path,
221
+ config_spec=config_spec,
222
+ model_class=model_class,
223
+ environment_timeout=environment_timeout,
224
+ extra_config_specs=extra_config_specs,
225
+ ),
226
+ system_prompt=system_prompt,
227
+ instruction_path=instruction_path,
228
+ system_prompt_path=system_prompt_path,
229
+ log_path=log_path,
230
+ )
@@ -142,6 +142,47 @@ def build_extra_headers(raw: dict[str, Any]) -> dict[str, str]:
142
142
  return {**eval_headers_table, **eval_headers_from_list}
143
143
 
144
144
 
145
+ def build_extra_headers_from_state(raw: dict[str, Any]) -> dict[str, str]:
146
+ """Build the header-name → state-key map for `ClientConfig.extra_headers_from_state`.
147
+
148
+ Reads a TOML table (`headers_from_state = { "X-Session-ID" = "trajectory_id" }`)
149
+ and/or a repeatable list (`--header-from-state "X-Session-ID: trajectory_id"`).
150
+ The CLI list wins on key collisions with the table.
151
+ """
152
+ table: dict[str, str] = {}
153
+ raw_table = raw.get("headers_from_state")
154
+ if raw_table is not None:
155
+ table = _validate_extra_headers_value(raw_table)
156
+
157
+ raw_list = raw.get("header_from_state")
158
+ if raw_list is None:
159
+ raw_list = []
160
+ if not isinstance(raw_list, list):
161
+ raise ValueError(
162
+ "'header_from_state' must be a list of 'Name: state_key' strings"
163
+ )
164
+
165
+ from_list: dict[str, str] = {}
166
+ for entry in raw_list:
167
+ if not isinstance(entry, str):
168
+ raise ValueError(
169
+ f"Each 'header_from_state' entry must be a string 'Name: state_key', got: {entry!r}"
170
+ )
171
+ if ":" not in entry:
172
+ raise ValueError(
173
+ f"--header-from-state must be 'Name: state_key', got: {entry!r}"
174
+ )
175
+ key, value = entry.split(":", 1)
176
+ key, value = key.strip(), value.strip()
177
+ if not key:
178
+ raise ValueError("--header-from-state name cannot be empty")
179
+ if not value:
180
+ raise ValueError("--header-from-state state_key cannot be empty")
181
+ from_list[key] = value
182
+
183
+ return {**table, **from_list}
184
+
185
+
145
186
  def get_env_eval_defaults(env_id: str) -> dict[str, Any]:
146
187
  """Get eval config defaults from the environment module's pyproject.toml.
147
188
 
@@ -279,6 +320,16 @@ def build_parser() -> argparse.ArgumentParser:
279
320
  default=None,
280
321
  help="Extra HTTP header to pass to inference API. 'Name: Value'. Repeatable.",
281
322
  )
323
+ parser.add_argument(
324
+ "--header-from-state",
325
+ action="append",
326
+ default=None,
327
+ help=(
328
+ "Per-request HTTP header whose value is read from the rollout state. "
329
+ "'Name: state_key' (e.g. 'X-Session-ID: trajectory_id'). Repeatable. "
330
+ "Defaults to X-Session-ID=example_id if unset."
331
+ ),
332
+ )
282
333
  parser.add_argument(
283
334
  "--num-examples",
284
335
  "-n",
@@ -639,6 +690,12 @@ def main(argv: list[str] | None = None):
639
690
  )
640
691
  # Build headers: registry < [[eval]] headers table < header list / --header
641
692
  eval_headers_merged = build_extra_headers(raw)
693
+ # Default X-Session-ID → example_id for sticky DP-aware routing;
694
+ # user-supplied headers_from_state / --header-from-state override.
695
+ eval_headers_from_state = {
696
+ "X-Session-ID": "example_id",
697
+ **build_extra_headers_from_state(raw),
698
+ }
642
699
 
643
700
  registry_headers_base: dict[str, str] = {}
644
701
  if endpoint_group is not None:
@@ -683,7 +740,7 @@ def main(argv: list[str] | None = None):
683
740
  api_base_url=primary_api_base_url,
684
741
  endpoint_configs=endpoint_configs,
685
742
  extra_headers=merged_headers,
686
- extra_headers_from_state={"X-Session-ID": "example_id"},
743
+ extra_headers_from_state=eval_headers_from_state,
687
744
  )
688
745
 
689
746
  # Backward-compatible TOML field: resume_path
@@ -439,6 +439,8 @@ def load_toml_config(
439
439
  "api_base_url",
440
440
  "header",
441
441
  "headers",
442
+ "header_from_state",
443
+ "headers_from_state",
442
444
  # sampling
443
445
  "sampling_args",
444
446
  "max_tokens",
@@ -26,11 +26,14 @@ from openai.types.chat.chat_completion_chunk import (
26
26
 
27
27
  from verifiers.errors import InfraError
28
28
  from verifiers.types import Response
29
- from verifiers.utils.logging_utils import truncate
29
+ from verifiers.utils.logging_utils import print_time, truncate
30
30
 
31
31
  logger = logging.getLogger(__name__)
32
32
 
33
33
 
34
+ KEEPALIVE_INTERVAL_SECONDS = 10.0
35
+
36
+
34
37
  class StreamInterrupted(InfraError):
35
38
  """Raised when the intercepted streaming response to the agent is cut short.
36
39
 
@@ -231,11 +234,56 @@ class InterceptionServer:
231
234
  "Connection": "keep-alive",
232
235
  },
233
236
  )
234
- await response.prepare(http_request)
235
237
 
238
+ start = time.monotonic()
239
+
240
+ # Half-open transport at accept raises here; surface it so the
241
+ # rollout reschedules instead of looking like a clean empty stream.
242
+ try:
243
+ await response.prepare(http_request)
244
+ except Exception as e:
245
+ logger.warning(
246
+ f"[{rollout_id}] Streaming response.prepare failed: "
247
+ f"{type(e).__name__}: {e}"
248
+ )
249
+ self._set_rollout_error(
250
+ rollout_id,
251
+ StreamInterrupted(f"prepare failed: {type(e).__name__}: {e}"),
252
+ )
253
+ return response
254
+ # Reuse one get() task across keepalive cycles; asyncio.wait_for on
255
+ # Py 3.10/3.11 can silently drop an item when its timeout cancels.
256
+ get_task: asyncio.Task | None = None
236
257
  try:
237
258
  while True:
238
- chunk_dict = await chunk_queue.get()
259
+ if get_task is None:
260
+ get_task = asyncio.create_task(chunk_queue.get())
261
+ done, _ = await asyncio.wait(
262
+ {get_task}, timeout=KEEPALIVE_INTERVAL_SECONDS
263
+ )
264
+ if get_task not in done:
265
+ # SSE comment keeps the TCP path warm across the vLLM wait
266
+ # so idle-timeouts in any intermediary don't reap it.
267
+ try:
268
+ await response.write(b": keepalive\n\n")
269
+ except Exception as e:
270
+ waited_s = time.monotonic() - start
271
+ logger.debug(
272
+ f"[{rollout_id}] Streaming error during keepalive "
273
+ f"after {print_time(waited_s)}: {e}"
274
+ )
275
+ self._set_rollout_error(
276
+ rollout_id,
277
+ StreamInterrupted(
278
+ f"keepalive write failed after {print_time(waited_s)}: "
279
+ f"{type(e).__name__}: {e}"
280
+ ),
281
+ )
282
+ return response
283
+ continue
284
+
285
+ chunk_dict = get_task.result()
286
+ get_task = None
239
287
 
240
288
  if chunk_dict is None:
241
289
  await response.write(b"data: [DONE]\n\n")
@@ -243,18 +291,28 @@ class InterceptionServer:
243
291
 
244
292
  chunk_json = json.dumps(chunk_dict)
245
293
  await response.write(f"data: {chunk_json}\n\n".encode())
294
+ # Force a loop yield so the transport flushes before close;
295
+ # otherwise burst contention can truncate the final chunk.
296
+ await asyncio.sleep(0)
246
297
 
247
298
  except asyncio.CancelledError:
248
299
  logger.debug(f"[{rollout_id}] Streaming cancelled")
249
300
  except Exception as e:
250
- logger.error(f"[{rollout_id}] Streaming error: {e}")
301
+ waited_s = time.monotonic() - start
302
+ logger.debug(
303
+ f"[{rollout_id}] Streaming error after {print_time(waited_s)}: {e}"
304
+ )
251
305
  self._set_rollout_error(
252
306
  rollout_id,
253
307
  StreamInterrupted(
254
- f"Interception stream to agent interrupted: {type(e).__name__}: {e}"
308
+ f"stream write failed after {print_time(waited_s)}: "
309
+ f"{type(e).__name__}: {e}"
255
310
  ),
256
311
  )
257
312
  return response
313
+ finally:
314
+ if get_task is not None and not get_task.done():
315
+ get_task.cancel()
258
316
 
259
317
  try:
260
318
  await response_future
@@ -263,10 +321,23 @@ class InterceptionServer:
263
321
  f"[{rollout_id}] Rollout error surfaced in stream: {type(e).__name__}: {e}"
264
322
  )
265
323
 
324
+ # Surface any write_eof failure so a tail truncation becomes a
325
+ # reschedulable error instead of a silent zero-turn completion.
266
326
  try:
267
327
  await response.write_eof()
268
- except ConnectionResetError:
269
- logger.debug(f"[{rollout_id}] Client disconnected before write_eof")
328
+ except Exception as e:
329
+ waited_s = time.monotonic() - start
330
+ logger.warning(
331
+ f"[{rollout_id}] write_eof failed after {print_time(waited_s)}: "
332
+ f"{type(e).__name__}: {e}"
333
+ )
334
+ self._set_rollout_error(
335
+ rollout_id,
336
+ StreamInterrupted(
337
+ f"write_eof failed after {print_time(waited_s)}: "
338
+ f"{type(e).__name__}: {e}"
339
+ ),
340
+ )
270
341
  return response
271
342
 
272
343
 
File without changes