verifiers 0.1.15.dev9__tar.gz → 0.1.15.dev10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (320) hide show
  1. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/PKG-INFO +2 -1
  2. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/pyproject.toml +1 -0
  3. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_client_multimodal_types.py +25 -0
  4. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_eval_cli.py +19 -0
  5. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_harbor_env_mcp.py +43 -89
  6. verifiers-0.1.15.dev10/tests/test_init_script.py +80 -0
  7. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_lean_task.py +10 -8
  8. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_opencode_rlm_env.py +35 -44
  9. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_openenv_client.py +89 -31
  10. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_prime_plugin.py +5 -5
  11. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_renderer_client.py +32 -0
  12. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_rlm_env.py +0 -24
  13. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_config_extension.py +105 -9
  14. verifiers-0.1.15.dev10/tests/test_v1_rlm_swe.py +780 -0
  15. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_runtime_lifecycle.py +130 -73
  16. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/__init__.py +10 -2
  17. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/plugins/prime.py +1 -5
  18. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/clients/anthropic_messages_client.py +27 -44
  19. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/clients/client.py +12 -14
  20. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/clients/openai_chat_completions_client.py +1 -6
  21. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/clients/openai_chat_completions_token_client.py +14 -17
  22. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/clients/openai_responses_client.py +13 -18
  23. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/clients/renderer_client.py +30 -49
  24. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/environment.py +14 -16
  25. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/composable_env.py +13 -21
  26. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harnesses/rlm.py +7 -8
  27. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/swe_debug_env.py +12 -19
  28. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/task.py +9 -18
  29. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +5 -18
  30. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +1 -10
  31. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +3 -7
  32. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +2 -2
  33. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +24 -34
  34. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +34 -44
  35. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/gym_env.py +22 -19
  36. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/harbor_env/mcp.py +17 -28
  37. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/mcp_env.py +6 -13
  38. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/opencode_rlm_env.py +9 -16
  39. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/rlm_env.py +40 -62
  40. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/utils/git_checkout_cache.py +13 -31
  41. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/openenv_env.py +75 -126
  42. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/multiturn_env.py +1 -5
  43. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/gepa/gepa_utils.py +6 -14
  44. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rubrics/rubric.py +7 -12
  45. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/build.py +17 -29
  46. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/eval.py +3 -3
  47. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/init.py +91 -59
  48. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/serve/server/env_server.py +17 -0
  49. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/serve/server/env_worker.py +19 -4
  50. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/client_utils.py +19 -31
  51. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/data_utils.py +10 -17
  52. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/display_utils.py +2 -6
  53. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/env_utils.py +79 -21
  54. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/eval_utils.py +21 -38
  55. verifiers-0.1.15.dev10/verifiers/utils/import_utils.py +11 -0
  56. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/install_utils.py +10 -11
  57. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/interception_utils.py +9 -11
  58. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/logging_utils.py +11 -17
  59. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/message_utils.py +9 -14
  60. verifiers-0.1.15.dev10/verifiers/utils/response_utils.py +102 -0
  61. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/save_utils.py +13 -21
  62. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/thread_utils.py +2 -15
  63. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/threaded_sandbox_client.py +2 -2
  64. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +54 -61
  65. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/README.md +7 -4
  66. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/__init__.py +5 -0
  67. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/config.py +1 -0
  68. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/command.py +17 -21
  69. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/opencode.py +1 -1
  70. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/pi.py +6 -10
  71. verifiers-0.1.15.dev10/verifiers/v1/packages/harnesses/rlm.py +601 -0
  72. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/terminus_2.py +8 -13
  73. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/runtime.py +24 -37
  74. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/taskset.py +1 -4
  75. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/program_utils.py +2 -1
  76. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/sandbox_utils.py +2 -0
  77. verifiers-0.1.15.dev9/tests/test_v1_rlm_swe.py +0 -390
  78. verifiers-0.1.15.dev9/verifiers/utils/import_utils.py +0 -16
  79. verifiers-0.1.15.dev9/verifiers/utils/response_utils.py +0 -94
  80. verifiers-0.1.15.dev9/verifiers/v1/packages/harnesses/rlm.py +0 -291
  81. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/.gitignore +0 -0
  82. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/LICENSE +0 -0
  83. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/README.md +0 -0
  84. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/AGENTS.md +0 -0
  85. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/README.md +0 -0
  86. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/__init__.py +0 -0
  87. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/conftest.py +0 -0
  88. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_browser_env.py +0 -0
  89. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_build_script.py +0 -0
  90. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_cli_agent_env.py +0 -0
  91. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_client_auth_errors.py +0 -0
  92. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_client_config.py +0 -0
  93. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_composable_env.py +0 -0
  94. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_context_token_metrics.py +0 -0
  95. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_decorator_ranks.py +0 -0
  96. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_endpoint_registry.py +0 -0
  97. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_env_group.py +0 -0
  98. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_env_server.py +0 -0
  99. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_environment.py +0 -0
  100. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_environment_extra.py +0 -0
  101. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_envs.py +0 -0
  102. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_error_chain.py +0 -0
  103. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_eval_display.py +0 -0
  104. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_eval_utils.py +0 -0
  105. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_gepa_cli.py +0 -0
  106. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_gepa_utils.py +0 -0
  107. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_gym_env.py +0 -0
  108. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_imports.py +0 -0
  109. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_install_utils.py +0 -0
  110. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_interception_utils.py +0 -0
  111. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  112. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_logging.py +0 -0
  113. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_math_rubric.py +0 -0
  114. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_maybe_think_parser.py +0 -0
  115. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_mcp_search_env.py +0 -0
  116. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_message_utils.py +0 -0
  117. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_message_utils_multimodal.py +0 -0
  118. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_multiturn_env.py +0 -0
  119. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_nemorl_client.py +0 -0
  120. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_openai_chat_completions_token_client.py +0 -0
  121. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_openai_responses_client.py +0 -0
  122. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_opencode_harbor.py +0 -0
  123. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_parser.py +0 -0
  124. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_path_utils.py +0 -0
  125. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_per_turn_timing.py +0 -0
  126. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_pricing_utils.py +0 -0
  127. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_renderer_e2e.py +0 -0
  128. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_rlm_composable_env.py +0 -0
  129. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_rubric.py +0 -0
  130. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_rubric_group.py +0 -0
  131. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_sandbox_env.py +0 -0
  132. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_sandbox_mixin.py +0 -0
  133. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_save_utils.py +0 -0
  134. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_setup_script.py +0 -0
  135. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_singleturn_env.py +0 -0
  136. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_stateful_tool_env.py +0 -0
  137. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_think_parser.py +0 -0
  138. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_tool_env.py +0 -0
  139. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_tool_utils.py +0 -0
  140. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_trajectory_processing.py +0 -0
  141. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_tui_info_formatting.py +0 -0
  142. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_types.py +0 -0
  143. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_bfcl.py +0 -0
  144. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_empty_completions.py +0 -0
  145. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_endpoint_protocols.py +0 -0
  146. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_example_counts.py +0 -0
  147. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_group_reward_env.py +0 -0
  148. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_harbor_cli.py +0 -0
  149. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_mini_swe_agent.py +0 -0
  150. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_scoring_functions.py +0 -0
  151. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_taskset_bindings.py +0 -0
  152. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_v1_textarena_taskset.py +0 -0
  153. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_wiki_search_v1.py +0 -0
  154. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_wordle_env.py +0 -0
  155. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_wordle_v1_env.py +0 -0
  156. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/tests/test_xml_parser.py +0 -0
  157. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/AGENTS.md +0 -0
  158. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/__init__.py +0 -0
  159. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/commands/__init__.py +0 -0
  160. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/commands/build.py +0 -0
  161. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/commands/eval.py +0 -0
  162. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/commands/gepa.py +0 -0
  163. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/commands/init.py +0 -0
  164. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/commands/install.py +0 -0
  165. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/commands/setup.py +0 -0
  166. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/plugins/__init__.py +0 -0
  167. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/cli/tui.py +0 -0
  168. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/clients/__init__.py +0 -0
  169. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  170. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/clients/openai_completions_client.py +0 -0
  171. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/decorators.py +0 -0
  172. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/AGENTS.md +0 -0
  173. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/__init__.py +0 -0
  174. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/env_group.py +0 -0
  175. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/README.md +0 -0
  176. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/__init__.py +0 -0
  177. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  178. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/README.md +0 -0
  179. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/__init__.py +0 -0
  180. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/_filter.py +0 -0
  181. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harness.py +0 -0
  182. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  183. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  184. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  185. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  186. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  187. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  188. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  189. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  190. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  191. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  192. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  193. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  194. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  195. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  196. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  197. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  198. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  199. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  200. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  201. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  202. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  203. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  204. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  205. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/opencode_env.py +0 -0
  206. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  207. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  208. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/utils/__init__.py +0 -0
  209. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  210. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/README.md +0 -0
  211. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/__init__.py +0 -0
  212. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/README.md +0 -0
  213. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  214. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  215. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  216. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  217. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  218. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  219. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  220. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/textarena_env.py +0 -0
  221. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/python_env.py +0 -0
  222. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/sandbox_env.py +0 -0
  223. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/singleturn_env.py +0 -0
  224. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/stateful_tool_env.py +0 -0
  225. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/envs/tool_env.py +0 -0
  226. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/errors.py +0 -0
  227. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/gepa/__init__.py +0 -0
  228. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/gepa/adapter.py +0 -0
  229. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/gepa/config.py +0 -0
  230. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/gepa/display.py +0 -0
  231. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/parsers/__init__.py +0 -0
  232. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/parsers/maybe_think_parser.py +0 -0
  233. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/parsers/parser.py +0 -0
  234. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/parsers/think_parser.py +0 -0
  235. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/parsers/xml_parser.py +0 -0
  236. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rl/README.md +0 -0
  237. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rl/__init__.py +0 -0
  238. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rl/inference/__init__.py +0 -0
  239. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rl/inference/client.py +0 -0
  240. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rl/inference/server.py +0 -0
  241. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rl/trainer/__init__.py +0 -0
  242. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rl/trainer/config.py +0 -0
  243. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rl/trainer/orchestrator.py +0 -0
  244. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rl/trainer/trainer.py +0 -0
  245. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rl/trainer/utils.py +0 -0
  246. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rubrics/__init__.py +0 -0
  247. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  248. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rubrics/judge_rubric.py +0 -0
  249. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rubrics/math_rubric.py +0 -0
  250. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/rubrics/rubric_group.py +0 -0
  251. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/__init__.py +0 -0
  252. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/gepa.py +0 -0
  253. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/install.py +0 -0
  254. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/rl.py +0 -0
  255. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/setup.py +0 -0
  256. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/train.py +0 -0
  257. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/tui.py +0 -0
  258. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/scripts/vllm.py +0 -0
  259. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/serve/__init__.py +0 -0
  260. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/serve/client/env_client.py +0 -0
  261. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/serve/client/zmq_env_client.py +0 -0
  262. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/serve/server/__init__.py +0 -0
  263. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/serve/server/env_router.py +0 -0
  264. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/serve/server/zmq_env_server.py +0 -0
  265. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/serve/types.py +0 -0
  266. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/types.py +0 -0
  267. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/__init__.py +0 -0
  268. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/async_utils.py +0 -0
  269. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/config_utils.py +0 -0
  270. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/env_config_utils.py +0 -0
  271. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/error_utils.py +0 -0
  272. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/eval_display.py +0 -0
  273. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/heartbeat.py +0 -0
  274. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/metric_utils.py +0 -0
  275. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/path_utils.py +0 -0
  276. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/pricing_utils.py +0 -0
  277. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/process_utils.py +0 -0
  278. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/serve_utils.py +0 -0
  279. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/tool_utils.py +0 -0
  280. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/usage_utils.py +0 -0
  281. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/utils/version_utils.py +0 -0
  282. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/RE_MIGRATION.md +0 -0
  283. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/env.py +0 -0
  284. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/harness.py +0 -0
  285. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/__init__.py +0 -0
  286. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/__init__.py +0 -0
  287. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/configs.py +0 -0
  288. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
  289. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/tasksets/__init__.py +0 -0
  290. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/tasksets/harbor.py +0 -0
  291. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/packages/tasksets/textarena.py +0 -0
  292. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/state.py +0 -0
  293. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/task.py +0 -0
  294. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/toolset.py +0 -0
  295. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/types.py +0 -0
  296. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/user.py +0 -0
  297. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/__init__.py +0 -0
  298. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/artifact_utils.py +0 -0
  299. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/binding_utils.py +0 -0
  300. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/config_callable_utils.py +0 -0
  301. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/config_utils.py +0 -0
  302. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/endpoint_utils.py +0 -0
  303. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/json_utils.py +0 -0
  304. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/judge_utils.py +0 -0
  305. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  306. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  307. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/mcp_utils.py +0 -0
  308. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/object_utils.py +0 -0
  309. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/prompt_utils.py +0 -0
  310. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/runtime_owner_utils.py +0 -0
  311. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/runtime_registry.py +0 -0
  312. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  313. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/scoring_utils.py +0 -0
  314. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/serialization_utils.py +0 -0
  315. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  316. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/taskset_utils.py +0 -0
  317. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/timing_utils.py +0 -0
  318. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/tool_utils.py +0 -0
  319. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/trajectory_utils.py +0 -0
  320. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev10}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev9
3
+ Version: 0.1.15.dev10
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -48,6 +48,7 @@ Requires-Dist: tenacity>=8.5.0
48
48
  Requires-Dist: textual
49
49
  Requires-Dist: tomli; python_version < '3.11'
50
50
  Requires-Dist: typing-extensions; python_version < '3.12'
51
+ Requires-Dist: uvloop>=0.21.0; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'
51
52
  Provides-Extra: browser
52
53
  Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
53
54
  Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
@@ -54,6 +54,7 @@ dependencies = [
54
54
  "regex<2026.4.4",
55
55
  "httpx>=0.27.0",
56
56
  "prime-pydantic-config[toml]",
57
+ "uvloop>=0.21.0; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'",
57
58
  ]
58
59
 
59
60
  [dependency-groups]
@@ -98,6 +98,31 @@ async def test_anthropic_to_native_prompt_with_typed_multimodal_content_parts():
98
98
  ]
99
99
 
100
100
 
101
+ @pytest.mark.asyncio
102
+ async def test_anthropic_to_native_prompt_marks_unsupported_images_in_mixed_content():
103
+ pytest.importorskip("anthropic")
104
+ from verifiers.clients.anthropic_messages_client import AnthropicMessagesClient
105
+
106
+ client = AnthropicMessagesClient(object())
107
+ messages = [
108
+ UserMessage(
109
+ content=[
110
+ TextContentPart(text="describe this"),
111
+ ImageUrlContentPart(
112
+ image_url=ImageUrlSource(url="https://example.com/image.png")
113
+ ),
114
+ ]
115
+ )
116
+ ]
117
+
118
+ prompt, kwargs = await client.to_native_prompt(messages)
119
+ assert kwargs["system"] == ""
120
+ assert prompt[0]["content"] == [
121
+ {"type": "text", "text": "describe this"},
122
+ {"type": "text", "text": "[image]"},
123
+ ]
124
+
125
+
101
126
  @pytest.mark.asyncio
102
127
  async def test_anthropic_assistant_tool_calls_use_text_chunks_not_model_repr():
103
128
  pytest.importorskip("anthropic")
@@ -288,6 +288,25 @@ def test_cli_headers_table_and_list_merge(monkeypatch, run_cli):
288
288
  }
289
289
 
290
290
 
291
+ def test_cli_defaults_session_header_to_trajectory_id(monkeypatch, run_cli):
292
+ captured = run_cli(monkeypatch, {})
293
+
294
+ assert captured["configs"][0].client_config.extra_headers_from_state == {
295
+ "X-Session-ID": "trajectory_id"
296
+ }
297
+
298
+
299
+ def test_cli_header_from_state_overrides_default_session_header(monkeypatch, run_cli):
300
+ captured = run_cli(
301
+ monkeypatch,
302
+ {"header_from_state": ["X-Session-ID: example_id"]},
303
+ )
304
+
305
+ assert captured["configs"][0].client_config.extra_headers_from_state == {
306
+ "X-Session-ID": "example_id"
307
+ }
308
+
309
+
291
310
  def test_cli_registry_headers_merged_with_eval_toml(tmp_path, monkeypatch, run_cli):
292
311
  cfg = tmp_path / "eval.toml"
293
312
  cfg.write_text(
@@ -239,83 +239,20 @@ class TestLaunchCommandResolution:
239
239
  )
240
240
 
241
241
 
242
- class TestStartStopCommands:
243
- def test_start_cmd_tracks_process_group_leader_pid(self):
244
- """Start command must capture `$!` (the backgrounded pgroup leader),
245
- not `$$` (the outer shell), and must end with `wait` so the recorded
246
- exit code reflects the launched daemon's.
247
- """
248
- cmd = _DummyEnv()._mcp_start_cmd("svc", "python -u /opt/x/server.py")
249
- assert "echo $!" in cmd
250
- assert "echo $$" not in cmd
251
- assert cmd.rstrip().endswith("wait")
252
- assert "/tmp/harbor-mcp-svc.pid" in cmd
253
- assert "python -u /opt/x/server.py" in cmd
254
-
255
- def test_start_cmd_wraps_in_setsid_for_process_group_semantics(self):
256
- """Wrapping the user's command in `setsid sh -c ...` is what makes
257
- `$!` a process-group leader, so `kill -9 -$PID` can reap the whole
258
- daemon tree on stop. Compound commands (e.g. `cd /x && python y.py`)
259
- must be preserved verbatim inside the sh -c payload so their own
260
- semantics are unchanged."""
261
- cmd = _DummyEnv()._mcp_start_cmd("svc", "cd /opt && python server.py")
262
- assert "setsid sh -c " in cmd
263
- assert "'cd /opt && python server.py'" in cmd
264
-
265
- def test_stop_cmd_is_one_line_sigkill_plus_rm(self):
266
- """Default: one SIGKILL to the process group, then unlink the
267
- pidfile — no poll/sleep loop."""
268
- cmd = _DummyEnv()._mcp_stop_cmd("svc")
269
- assert "kill -9" in cmd
270
- assert "rm -f" in cmd
271
- assert "/tmp/harbor-mcp-svc.pid" in cmd
272
- assert "kill -0" not in cmd
273
- assert "sleep" not in cmd
274
- assert "\n" not in cmd
275
- assert len(cmd) < 120
276
-
277
- def test_stop_cmd_targets_process_group_not_single_pid(self):
278
- """The `-` prefix on the `$(cat …)` expansion is what turns kill(1)
279
- into a process-group kill — without it, SIGKILL only lands on the
280
- wrapping shell and e.g. a `python` child spawned via `cd && python`
281
- leaks as an orphan."""
282
- cmd = _DummyEnv()._mcp_stop_cmd("svc")
283
- assert 'kill -9 -"$(cat' in cmd
284
-
285
- def test_server_name_with_shell_metachars_is_quoted(self):
286
- """Server name is task-author-controlled; every pidfile reference
287
- must appear only inside single-quoted spans."""
288
- env = _DummyEnv()
289
- unquoted = "/tmp/harbor-mcp-evil$(whoami).pid"
290
- quoted = f"'{unquoted}'"
291
- for cmd in (
292
- env._mcp_start_cmd("evil$(whoami)", "x"),
293
- env._mcp_stop_cmd("evil$(whoami)"),
294
- ):
295
- assert quoted in cmd
296
- # Every raw occurrence must be inside an already-quoted span.
297
- assert cmd.count(unquoted) == cmd.count(quoted)
298
-
299
- def test_launch_command_with_shell_metachars_is_quoted(self):
300
- """Same for the user's launch command: it's task-author-controlled,
301
- must land inside a single-quoted span once wrapped in `sh -c`."""
302
- env = _DummyEnv()
303
- evil_cmd = "python -c 'print(1)' && touch /pwned"
304
- quoted = f"'{evil_cmd}'".replace("'", "'\"'\"'")
305
- # shlex-quoted output contains the evil string only inside quotes.
306
- cmd = env._mcp_start_cmd("svc", evil_cmd)
307
- assert "setsid sh -c " in cmd
308
- # No unquoted `&& touch /pwned` outside a single-quoted span.
309
- assert cmd.count(evil_cmd) == 0 or quoted in cmd
310
-
311
-
312
242
  class TestLifecycle:
313
243
  @pytest.mark.asyncio
314
244
  async def test_starts_server_with_registered_launch_command(self):
315
- env = _DummyEnv(mcp_launch_commands={"svc": "python server.py"})
245
+ env = _DummyEnv(mcp_launch_commands={"svc": "cd /opt && python server.py"})
316
246
  state: dict[str, Any] = {}
317
247
  await env.start_mcp_servers("sbx", _config_with_server(), state)
318
248
  assert set(state["harbor_mcp_jobs"].keys()) == {"svc"}
249
+ _, start_cmd = env.started_jobs[0]
250
+ assert "echo $!" in start_cmd
251
+ assert "echo $$" not in start_cmd
252
+ assert start_cmd.rstrip().endswith("wait")
253
+ assert "/tmp/harbor-mcp-svc.pid" in start_cmd
254
+ assert "setsid sh -c " in start_cmd
255
+ assert "'cd /opt && python server.py'" in start_cmd
319
256
 
320
257
  @pytest.mark.asyncio
321
258
  async def test_externally_managed_server_is_skipped(self):
@@ -342,9 +279,38 @@ class TestLifecycle:
342
279
  if "kill -9" in c.args[1]
343
280
  ]
344
281
  assert len(stop_calls) == 1
345
- assert "harbor-mcp-svc.pid" in stop_calls[0]
282
+ stop_cmd = stop_calls[0]
283
+ assert "harbor-mcp-svc.pid" in stop_cmd
284
+ assert 'kill -9 -"$(cat' in stop_cmd
285
+ assert "rm -f" in stop_cmd
286
+ assert "kill -0" not in stop_cmd
287
+ assert "sleep" not in stop_cmd
288
+ assert "\n" not in stop_cmd
289
+ assert len(stop_cmd) < 120
346
290
  assert state["harbor_mcp_jobs"] == {}
347
291
 
292
+ @pytest.mark.asyncio
293
+ async def test_launch_and_stop_commands_quote_task_authored_shell_text(self):
294
+ env = _DummyEnv(
295
+ mcp_launch_commands={
296
+ "evil$(whoami)": "python -c 'print(1)' && touch /pwned"
297
+ }
298
+ )
299
+ state: dict[str, Any] = {"sandbox_id": "sbx"}
300
+ await env.start_mcp_servers(
301
+ "sbx", _config_with_server(name="evil$(whoami)"), state
302
+ )
303
+ _, start_cmd = env.started_jobs[0]
304
+ quoted_pidfile = "'/tmp/harbor-mcp-evil$(whoami).pid'"
305
+ assert quoted_pidfile in start_cmd
306
+ assert "setsid sh -c " in start_cmd
307
+ assert "'\"'\"'print(1)'\"'\"'" in start_cmd
308
+
309
+ env.sandbox_client.execute_command.reset_mock()
310
+ await env.stop_mcp_servers(state)
311
+ stop_cmd = env.sandbox_client.execute_command.call_args.args[1]
312
+ assert quoted_pidfile in stop_cmd
313
+
348
314
  @pytest.mark.asyncio
349
315
  async def test_stop_without_sandbox_id_is_a_noop(self):
350
316
  env = _DummyEnv()
@@ -530,22 +496,6 @@ class TestBackgroundJob:
530
496
  class TestHealthCheck:
531
497
  """Readiness probing — default `/proc/net/tcp` + user override."""
532
498
 
533
- def test_default_probe_shape(self):
534
- """Portable awk on /proc/net/tcp{,6}, matching LISTEN state only,
535
- with no bash-ism dependency like /dev/tcp."""
536
- cmd = HarborMCPMixin._default_mcp_health_cmd(8000)
537
- assert "bash" not in cmd and "/dev/tcp" not in cmd
538
- assert "/proc/net/tcp" in cmd and "/proc/net/tcp6" in cmd
539
- assert '$4 == "0A"' in cmd # LISTEN state
540
-
541
- @pytest.mark.parametrize(
542
- "port,hex_expected",
543
- [(80, "0050"), (8000, "1F40"), (65535, "FFFF"), (1, "0001")],
544
- )
545
- def test_default_probe_encodes_port_as_uppercase_hex(self, port, hex_expected):
546
- cmd = HarborMCPMixin._default_mcp_health_cmd(port)
547
- assert f":{hex_expected}$" in cmd
548
-
549
499
  @pytest.mark.asyncio
550
500
  async def test_custom_healthcheck_command_templated_with_port(self):
551
501
  env = _DummyEnv(mcp_launch_commands={"svc": "python x"})
@@ -580,7 +530,11 @@ class TestHealthCheck:
580
530
  if "/proc/net/tcp" in c.args[1]
581
531
  ]
582
532
  assert len(health_calls) == 1
583
- assert ":1F40$" in health_calls[0]
533
+ health_cmd = health_calls[0]
534
+ assert "bash" not in health_cmd and "/dev/tcp" not in health_cmd
535
+ assert "/proc/net/tcp6" in health_cmd
536
+ assert '$4 == "0A"' in health_cmd
537
+ assert ":1F40$" in health_cmd
584
538
 
585
539
  @pytest.mark.asyncio
586
540
  async def test_probe_timeout_is_respected(self):
@@ -0,0 +1,80 @@
1
+ from pathlib import Path
2
+
3
+ import verifiers as vf
4
+ from verifiers.scripts.init import init_environment
5
+
6
+
7
+ def read_env_file(root: Path, env_id: str) -> str:
8
+ module_name = env_id.replace("-", "_")
9
+ return (root / module_name / f"{module_name}.py").read_text()
10
+
11
+
12
+ def test_init_default_writes_v0_stub(tmp_path: Path) -> None:
13
+ root = init_environment("foo", path=str(tmp_path))
14
+ content = read_env_file(tmp_path, "foo")
15
+
16
+ assert root == tmp_path / "foo"
17
+ assert "def load_environment(**kwargs) -> vf.Environment:" in content
18
+ assert "NotImplementedError" in content
19
+ assert "load_taskset" not in content
20
+ assert "EnvTaskset" not in content
21
+
22
+
23
+ def test_init_v1_writes_thin_taskset_template(tmp_path: Path) -> None:
24
+ init_environment("bar", path=str(tmp_path), v1=True)
25
+ content = read_env_file(tmp_path, "bar")
26
+
27
+ assert 'ENV_ID = "bar"' in content
28
+ assert "def load_tasks():" in content
29
+ assert "class EnvTasksetConfig(vf.TasksetConfig):" in content
30
+ assert 'source: str = "bar:load_tasks"' in content
31
+ assert 'rewards: list[str] = ["bar:exact_answer"]' in content
32
+ assert "def load_taskset(config: EnvTasksetConfig) -> vf.Taskset:" in content
33
+ assert "vf.load_taskset(ENV_ID, config=config.taskset)" in content
34
+ assert "class EnvTaskset(" not in content
35
+ assert "_default_" not in content
36
+ assert "assert isinstance" not in content
37
+
38
+
39
+ def test_init_v1_template_loads_with_vf_load_environment(
40
+ tmp_path: Path, monkeypatch
41
+ ) -> None:
42
+ init_environment("loadable-v1", path=str(tmp_path), v1=True)
43
+ monkeypatch.syspath_prepend(str(tmp_path / "loadable_v1"))
44
+
45
+ env = vf.load_environment("loadable-v1")
46
+
47
+ assert isinstance(env, vf.Env)
48
+ assert env.taskset.rows()[0]["answer"] == "cba"
49
+ assert env.taskset.rewards[0].__name__ == "exact_answer"
50
+
51
+
52
+ def test_init_v1_with_harness_writes_harness_stub(tmp_path: Path) -> None:
53
+ init_environment("baz", path=str(tmp_path), v1=True, with_harness=True)
54
+ content = read_env_file(tmp_path, "baz")
55
+
56
+ assert "class EnvHarnessConfig(vf.HarnessConfig):" in content
57
+ assert "class EnvHarness(vf.Harness):" in content
58
+ assert "def load_harness(config: EnvHarnessConfig) -> EnvHarness:" in content
59
+ assert "vf.load_harness(ENV_ID, config=config.harness)" in content
60
+
61
+
62
+ def test_init_with_harness_without_v1_warns_and_uses_v0(tmp_path: Path, capsys) -> None:
63
+ init_environment("plain", path=str(tmp_path), with_harness=True)
64
+ content = read_env_file(tmp_path, "plain")
65
+ captured = capsys.readouterr()
66
+
67
+ assert "--with-harness only applies with --v1; ignoring." in captured.out
68
+ assert "def load_environment(**kwargs) -> vf.Environment:" in content
69
+ assert "load_harness" not in content
70
+
71
+
72
+ def test_init_v1_multifile_exports_component_loaders(tmp_path: Path) -> None:
73
+ init_environment("pkg-env", path=str(tmp_path), v1=True, multi_file=True)
74
+ package_dir = tmp_path / "pkg_env" / "pkg_env"
75
+ init_content = (package_dir / "__init__.py").read_text()
76
+ env_content = (package_dir / "pkg_env.py").read_text()
77
+
78
+ assert "from .pkg_env import load_environment, load_taskset" in init_content
79
+ assert "__all__ = ['load_environment', 'load_taskset']" in init_content
80
+ assert 'source: str = "pkg_env.pkg_env:load_tasks"' in env_content
@@ -9,10 +9,8 @@ from verifiers.envs.experimental.composable.tasksets.lean.lean_task import (
9
9
  LEAN_GUARD_END_MARKER,
10
10
  LeanRubric,
11
11
  _build_starter_file,
12
- _expected_protected_region,
13
12
  _extract_protected_region,
14
13
  _normalize_signature,
15
- _wrap_with_lean_guard,
16
14
  )
17
15
 
18
16
 
@@ -80,11 +78,13 @@ class TestNormalizeSignature:
80
78
  )
81
79
 
82
80
 
83
- class TestWrapWithLeanGuard:
81
+ class TestBuildStarterFileLeanGuardLayout:
84
82
  def test_marker_layout(self) -> None:
85
83
  signature = "theorem foo (x : ℝ) : x = x := by"
86
- wrapped = _wrap_with_lean_guard(signature)
87
- assert wrapped == (
84
+ starter = _build_starter_file(
85
+ {"formal_statement": signature, "header": "", "imports": ""}
86
+ )
87
+ assert starter == (
88
88
  "-- lean-guard: begin protected\n"
89
89
  "theorem foo (x : ℝ) : x = x := by\n"
90
90
  "-- lean-guard: end protected\n"
@@ -93,8 +93,10 @@ class TestWrapWithLeanGuard:
93
93
 
94
94
  def test_round_trip_via_extract(self) -> None:
95
95
  signature = "theorem foo : True := by"
96
- wrapped = _wrap_with_lean_guard(signature)
97
- region = _extract_protected_region(wrapped)
96
+ starter = _build_starter_file(
97
+ {"formal_statement": signature, "header": "", "imports": ""}
98
+ )
99
+ region = _extract_protected_region(starter)
98
100
  assert region is not None
99
101
  assert LEAN_GUARD_BEGIN_MARKER in region
100
102
  assert LEAN_GUARD_END_MARKER in region
@@ -212,7 +214,7 @@ class TestBuildStarterFile:
212
214
  "header": "import Mathlib",
213
215
  }
214
216
  starter = _build_starter_file(info)
215
- expected = _expected_protected_region(info)
217
+ expected = _extract_protected_region(_build_starter_file(info)) or ""
216
218
  actual = _extract_protected_region(starter)
217
219
  assert expected == actual
218
220
  assert expected != ""
@@ -1,5 +1,6 @@
1
1
  """Tests for the OpenCodeRLMEnv class."""
2
2
 
3
+ import asyncio
3
4
  import json
4
5
  import subprocess
5
6
  from unittest.mock import AsyncMock, MagicMock, patch
@@ -7,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
7
8
  import pytest
8
9
  from datasets import Dataset
9
10
 
11
+ import verifiers as vf
10
12
  from verifiers.envs.experimental.opencode_rlm_env import (
11
13
  OpenCodeRLMEnv,
12
14
  OpenCodeRLMMonitorRubric,
@@ -239,45 +241,6 @@ class TestBuildEnvVars:
239
241
  assert "RLM_SUB_MODEL_ID" not in env_vars
240
242
 
241
243
 
242
- # =============================================================================
243
- # Sub-LLM detection (header-based)
244
- # =============================================================================
245
-
246
-
247
- class TestIsSubLLMRequest:
248
- def test_detects_sub_header(self):
249
- assert (
250
- OpenCodeRLMEnv._is_sub_llm_request({"headers": {"x-rlm-role": "sub"}})
251
- is True
252
- )
253
-
254
- def test_rejects_no_headers(self):
255
- assert OpenCodeRLMEnv._is_sub_llm_request({}) is False
256
-
257
- def test_rejects_empty_headers(self):
258
- assert OpenCodeRLMEnv._is_sub_llm_request({"headers": {}}) is False
259
-
260
- def test_rejects_wrong_value(self):
261
- assert (
262
- OpenCodeRLMEnv._is_sub_llm_request({"headers": {"x-rlm-role": "main"}})
263
- is False
264
- )
265
-
266
- def test_ignores_model_field(self):
267
- """Model name should NOT be used for detection."""
268
- assert (
269
- OpenCodeRLMEnv._is_sub_llm_request({"model": "sub", "headers": {}}) is False
270
- )
271
-
272
- def test_header_takes_precedence(self):
273
- assert (
274
- OpenCodeRLMEnv._is_sub_llm_request(
275
- {"model": "openai/gpt-5-mini", "headers": {"x-rlm-role": "sub"}}
276
- )
277
- is True
278
- )
279
-
280
-
281
244
  # =============================================================================
282
245
  # State setup
283
246
  # =============================================================================
@@ -330,17 +293,45 @@ class TestMetrics:
330
293
  response = MagicMock(spec=[]) # no usage attr
331
294
  assert OpenCodeRLMEnv._extract_token_counts(response) == (0, 0)
332
295
 
333
- def test_update_sub_metrics(self):
296
+ @pytest.mark.asyncio
297
+ async def test_handle_sub_llm_request_updates_sub_metrics(self):
334
298
  env = build_env()
335
299
  state = {
300
+ "trajectory": [],
301
+ "model": "main-model",
336
302
  "sub_llm_turns": 0,
337
303
  "sub_llm_prompt_tokens": 0,
338
304
  "sub_llm_completion_tokens": 0,
339
305
  }
340
- response = MagicMock()
341
- response.usage.prompt_tokens = 50
342
- response.usage.completion_tokens = 20
343
- env._update_sub_metrics(state, response)
306
+ response = vf.Response(
307
+ id="resp",
308
+ created=0,
309
+ model="sub-model",
310
+ message=vf.ResponseMessage(
311
+ content="ok", finish_reason="stop", is_truncated=False
312
+ ),
313
+ usage=vf.Usage(
314
+ prompt_tokens=50,
315
+ completion_tokens=20,
316
+ reasoning_tokens=0,
317
+ total_tokens=70,
318
+ ),
319
+ )
320
+ future = asyncio.get_running_loop().create_future()
321
+ intercept = {
322
+ "messages": [{"role": "user", "content": "hello"}],
323
+ "headers": {"x-rlm-role": "sub"},
324
+ "response_future": future,
325
+ }
326
+ env._require_interception_server().intercepts["req"] = intercept
327
+ with patch.object(
328
+ vf.Environment,
329
+ "get_model_response",
330
+ new=AsyncMock(return_value=response),
331
+ ):
332
+ await env._handle_sub_llm_request(state, "req", intercept)
333
+
334
+ assert future.result() is response
344
335
  assert state["sub_llm_turns"] == 1
345
336
  assert state["sub_llm_prompt_tokens"] == 50
346
337
  assert state["sub_llm_completion_tokens"] == 20
@@ -84,16 +84,25 @@ def render_prompt(observation: Any, **kwargs: Any):
84
84
  return [UserMessage(content=str(observation["prompt"]))]
85
85
 
86
86
 
87
- async def test_openenv_uses_public_async_generic_client(monkeypatch):
87
+ async def test_openenv_uses_public_async_generic_client(monkeypatch, tmp_path):
88
88
  FakeGenericEnvClient.instances.clear()
89
89
  monkeypatch.setattr(openenv_env, "GenericEnvClient", FakeGenericEnvClient)
90
90
  env = vf.OpenEnvEnv(
91
+ openenv_project=tmp_path,
91
92
  num_train_examples=1,
92
93
  num_eval_examples=0,
93
94
  prompt_renderer=render_prompt,
94
95
  )
95
96
 
96
- async def create_server():
97
+ async def launch_image_server(
98
+ image: str, port: int, start_command: str, contract: str
99
+ ):
100
+ assert (image, port, start_command, contract) == (
101
+ "image",
102
+ 8000,
103
+ "run",
104
+ "gym",
105
+ )
97
106
  return openenv_env.OpenEnvServer(
98
107
  sandbox_id="sandbox",
99
108
  exposure_id="exposure",
@@ -102,34 +111,56 @@ async def test_openenv_uses_public_async_generic_client(monkeypatch):
102
111
  contract="gym",
103
112
  )
104
113
 
105
- async def fetch_action_schema(base_url: str) -> dict[str, object]:
106
- return {"type": "object", "properties": {}}
114
+ async def fetch_schema(base_url: str) -> dict[str, object]:
115
+ assert base_url == "http://localhost:8000"
116
+ return {"action": {"type": "object", "properties": {}}}
117
+
118
+ async def cleanup_server(server: openenv_env.OpenEnvServer) -> None:
119
+ env._active_servers.pop(server.sandbox_id, None)
107
120
 
108
- monkeypatch.setattr(env, "_create_server", create_server)
109
- monkeypatch.setattr(env, "_fetch_action_schema", fetch_action_schema)
121
+ monkeypatch.setattr(
122
+ env,
123
+ "_resolve_runtime_config",
124
+ lambda project_path: ("image", 8000, "run", "gym"),
125
+ )
126
+ monkeypatch.setattr(env, "_launch_image_server", launch_image_server)
127
+ monkeypatch.setattr(env, "_fetch_schema", fetch_schema)
128
+ monkeypatch.setattr(env, "_cleanup_server", cleanup_server)
110
129
 
111
130
  state = vf.State({"info": {"seed": 7}, "trajectory": []})
112
- await env.setup_state(state)
131
+ try:
132
+ await env.setup_state(state)
113
133
 
114
- assert state["prompt"] == [UserMessage(content="seed-7")]
115
- assert len(FakeGenericEnvClient.instances) == 1
116
- client = FakeGenericEnvClient.instances[0]
117
- assert client.base_url == "http://localhost:8000"
118
- assert client.connected is True
119
- assert client.reset_seeds == [7]
134
+ assert state["prompt"] == [UserMessage(content="seed-7")]
135
+ assert len(FakeGenericEnvClient.instances) == 1
136
+ client = FakeGenericEnvClient.instances[0]
137
+ assert client.base_url == "http://localhost:8000"
138
+ assert client.connected is True
139
+ assert client.reset_seeds == [7]
140
+ finally:
141
+ await env.cleanup_openenv(state)
120
142
 
121
143
 
122
- async def test_openenv_uses_public_async_mcp_client(monkeypatch):
144
+ async def test_openenv_uses_public_async_mcp_client(monkeypatch, tmp_path):
123
145
  FakeMCPToolClient.instances.clear()
124
146
  monkeypatch.setattr(openenv_env, "MCPToolClient", FakeMCPToolClient)
125
147
  monkeypatch.setattr(openenv_env, "CallToolAction", FakeCallToolAction)
126
148
  env = vf.OpenEnvEnv(
149
+ openenv_project=tmp_path,
127
150
  num_train_examples=1,
128
151
  num_eval_examples=0,
129
152
  prompt_renderer=render_prompt,
130
153
  )
131
154
 
132
- async def create_server():
155
+ async def launch_image_server(
156
+ image: str, port: int, start_command: str, contract: str
157
+ ):
158
+ assert (image, port, start_command, contract) == (
159
+ "image",
160
+ 8000,
161
+ "run",
162
+ "mcp",
163
+ )
133
164
  return openenv_env.OpenEnvServer(
134
165
  sandbox_id="sandbox",
135
166
  exposure_id="exposure",
@@ -138,25 +169,52 @@ async def test_openenv_uses_public_async_mcp_client(monkeypatch):
138
169
  contract="mcp",
139
170
  )
140
171
 
141
- async def fetch_action_schema(base_url: str) -> dict[str, object]:
172
+ async def fetch_schema(base_url: str) -> dict[str, object]:
173
+ assert base_url == "http://localhost:8000"
142
174
  return {
143
- "type": "object",
144
- "properties": {"type": {"enum": ["list_tools", "call_tool"]}},
175
+ "action": {
176
+ "type": "object",
177
+ "properties": {"type": {"enum": ["list_tools", "call_tool"]}},
178
+ }
145
179
  }
146
180
 
147
- monkeypatch.setattr(env, "_create_server", create_server)
148
- monkeypatch.setattr(env, "_fetch_action_schema", fetch_action_schema)
181
+ async def cleanup_server(server: openenv_env.OpenEnvServer) -> None:
182
+ env._active_servers.pop(server.sandbox_id, None)
149
183
 
150
- state = vf.State({"info": {"seed": 9}, "trajectory": []})
151
- await env.setup_state(state)
152
- result = await env._mcp_step_tool(
153
- state["openenv_mcp_client"], "echo", {"message": "hi"}
184
+ monkeypatch.setattr(
185
+ env,
186
+ "_resolve_runtime_config",
187
+ lambda project_path: ("image", 8000, "run", "mcp"),
154
188
  )
189
+ monkeypatch.setattr(env, "_launch_image_server", launch_image_server)
190
+ monkeypatch.setattr(env, "_fetch_schema", fetch_schema)
191
+ monkeypatch.setattr(env, "_cleanup_server", cleanup_server)
192
+
193
+ state = vf.State({"info": {"seed": 9}, "trajectory": []})
194
+ try:
195
+ await env.setup_state(state)
196
+ state["trajectory"].append({})
197
+ tool_messages = await env._mcp_env_response(
198
+ [
199
+ vf.AssistantMessage(
200
+ content=None,
201
+ tool_calls=[
202
+ vf.ToolCall(
203
+ id="call-1", name="echo", arguments='{"message": "hi"}'
204
+ )
205
+ ],
206
+ )
207
+ ],
208
+ state,
209
+ )
155
210
 
156
- assert state["prompt"] == [UserMessage(content="mcp-9")]
157
- assert state["tool_defs"][0].name == "echo"
158
- assert result.reward == 1.0
159
- client = FakeMCPToolClient.instances[0]
160
- action = client.step_actions[0]
161
- assert action.tool_name == "echo"
162
- assert action.arguments == {"message": "hi"}
211
+ assert state["prompt"] == [UserMessage(content="mcp-9")]
212
+ assert state["tool_defs"][0].name == "echo"
213
+ assert state["trajectory"][-1]["reward"] == 1.0
214
+ assert tool_messages == [vf.ToolMessage(content="ok", tool_call_id="call-1")]
215
+ client = FakeMCPToolClient.instances[0]
216
+ action = client.step_actions[0]
217
+ assert action.tool_name == "echo"
218
+ assert action.arguments == {"message": "hi"}
219
+ finally:
220
+ await env.cleanup_openenv(state)