verifiers 0.1.15.dev9__tar.gz → 0.1.15.dev11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/PKG-INFO +17 -19
  2. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/README.md +14 -17
  3. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/pyproject.toml +3 -2
  4. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_client_multimodal_types.py +25 -0
  5. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_eval_cli.py +19 -0
  6. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_harbor_env_mcp.py +43 -89
  7. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_imports.py +31 -0
  8. verifiers-0.1.15.dev11/tests/test_init_script.py +83 -0
  9. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_langchain_deep_agents_wikispeedia.py +5 -5
  10. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_lean_task.py +10 -8
  11. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_mcp_search_env.py +3 -3
  12. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_opencode_harbor.py +9 -7
  13. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_opencode_rlm_env.py +35 -44
  14. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_openenv_client.py +89 -31
  15. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_prime_plugin.py +5 -5
  16. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_renderer_client.py +45 -14
  17. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_renderer_e2e.py +28 -18
  18. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_rlm_env.py +0 -24
  19. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_bfcl.py +6 -5
  20. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_config_extension.py +845 -271
  21. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_example_counts.py +10 -10
  22. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_harbor_cli.py +58 -30
  23. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_mini_swe_agent.py +11 -10
  24. verifiers-0.1.15.dev11/tests/test_v1_rlm_swe.py +775 -0
  25. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_runtime_lifecycle.py +228 -145
  26. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_scoring_functions.py +1 -1
  27. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_taskset_bindings.py +65 -64
  28. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_textarena_taskset.py +29 -11
  29. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_wiki_search_v1.py +3 -3
  30. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_wordle_v1_env.py +11 -2
  31. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/__init__.py +19 -47
  32. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/plugins/prime.py +1 -5
  33. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/clients/anthropic_messages_client.py +27 -44
  34. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/clients/client.py +12 -14
  35. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/clients/openai_chat_completions_client.py +1 -6
  36. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/clients/openai_chat_completions_token_client.py +14 -17
  37. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/clients/openai_responses_client.py +13 -18
  38. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/clients/renderer_client.py +42 -81
  39. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/environment.py +14 -16
  40. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/composable_env.py +13 -21
  41. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/rlm.py +7 -8
  42. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/swe_debug_env.py +12 -19
  43. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/task.py +9 -18
  44. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +5 -18
  45. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +1 -10
  46. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +3 -7
  47. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +2 -2
  48. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +24 -34
  49. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +34 -44
  50. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/gym_env.py +22 -19
  51. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/harbor_env/mcp.py +17 -28
  52. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/mcp_env.py +6 -13
  53. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/opencode_rlm_env.py +9 -16
  54. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/rlm_env.py +40 -62
  55. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/utils/git_checkout_cache.py +13 -31
  56. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/openenv_env.py +75 -126
  57. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/multiturn_env.py +1 -5
  58. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/gepa/gepa_utils.py +6 -14
  59. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rubrics/rubric.py +7 -12
  60. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/build.py +17 -29
  61. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/eval.py +3 -3
  62. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/init.py +98 -67
  63. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/serve/server/env_server.py +17 -0
  64. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/serve/server/env_worker.py +19 -4
  65. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/types.py +18 -5
  66. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/client_utils.py +19 -31
  67. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/data_utils.py +10 -17
  68. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/display_utils.py +2 -6
  69. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/env_utils.py +96 -21
  70. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/eval_utils.py +21 -38
  71. verifiers-0.1.15.dev11/verifiers/utils/import_utils.py +11 -0
  72. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/install_utils.py +10 -11
  73. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/interception_utils.py +9 -11
  74. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/logging_utils.py +11 -17
  75. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/message_utils.py +9 -14
  76. verifiers-0.1.15.dev11/verifiers/utils/response_utils.py +102 -0
  77. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/save_utils.py +13 -21
  78. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/thread_utils.py +2 -15
  79. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/threaded_sandbox_client.py +2 -2
  80. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +29 -45
  81. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/README.md +84 -94
  82. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/RE_MIGRATION.md +53 -46
  83. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/__init__.py +10 -35
  84. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/config.py +30 -5
  85. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/env.py +4 -26
  86. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/harness.py +37 -36
  87. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/command.py +17 -21
  88. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/opencode.py +1 -1
  89. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/pi.py +6 -10
  90. verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/rlm.py +601 -0
  91. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/terminus_2.py +8 -13
  92. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/tasksets/harbor.py +126 -113
  93. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/tasksets/textarena.py +74 -50
  94. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/runtime.py +24 -37
  95. verifiers-0.1.15.dev11/verifiers/v1/taskset.py +207 -0
  96. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/toolset.py +2 -1
  97. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/types.py +4 -3
  98. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/config_utils.py +52 -3
  99. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/program_utils.py +2 -1
  100. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/prompt_utils.py +91 -2
  101. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/runtime_owner_utils.py +26 -45
  102. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/sandbox_utils.py +2 -0
  103. verifiers-0.1.15.dev11/verifiers/v1/utils/taskset_registry_utils.py +115 -0
  104. verifiers-0.1.15.dev11/verifiers/v1/utils/taskset_utils.py +78 -0
  105. verifiers-0.1.15.dev9/tests/test_v1_rlm_swe.py +0 -390
  106. verifiers-0.1.15.dev9/verifiers/utils/import_utils.py +0 -16
  107. verifiers-0.1.15.dev9/verifiers/utils/response_utils.py +0 -94
  108. verifiers-0.1.15.dev9/verifiers/v1/packages/harnesses/rlm.py +0 -291
  109. verifiers-0.1.15.dev9/verifiers/v1/taskset.py +0 -190
  110. verifiers-0.1.15.dev9/verifiers/v1/utils/taskset_utils.py +0 -90
  111. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/.gitignore +0 -0
  112. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/LICENSE +0 -0
  113. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/AGENTS.md +0 -0
  114. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/README.md +0 -0
  115. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/__init__.py +0 -0
  116. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/conftest.py +0 -0
  117. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_browser_env.py +0 -0
  118. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_build_script.py +0 -0
  119. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_cli_agent_env.py +0 -0
  120. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_client_auth_errors.py +0 -0
  121. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_client_config.py +0 -0
  122. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_composable_env.py +0 -0
  123. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_context_token_metrics.py +0 -0
  124. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_decorator_ranks.py +0 -0
  125. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_endpoint_registry.py +0 -0
  126. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_env_group.py +0 -0
  127. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_env_server.py +0 -0
  128. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_environment.py +0 -0
  129. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_environment_extra.py +0 -0
  130. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_envs.py +0 -0
  131. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_error_chain.py +0 -0
  132. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_eval_display.py +0 -0
  133. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_eval_utils.py +0 -0
  134. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_gepa_cli.py +0 -0
  135. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_gepa_utils.py +0 -0
  136. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_gym_env.py +0 -0
  137. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_install_utils.py +0 -0
  138. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_interception_utils.py +0 -0
  139. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_logging.py +0 -0
  140. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_math_rubric.py +0 -0
  141. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_maybe_think_parser.py +0 -0
  142. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_message_utils.py +0 -0
  143. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_message_utils_multimodal.py +0 -0
  144. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_multiturn_env.py +0 -0
  145. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_nemorl_client.py +0 -0
  146. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_openai_chat_completions_token_client.py +0 -0
  147. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_openai_responses_client.py +0 -0
  148. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_parser.py +0 -0
  149. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_path_utils.py +0 -0
  150. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_per_turn_timing.py +0 -0
  151. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_pricing_utils.py +0 -0
  152. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_rlm_composable_env.py +0 -0
  153. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_rubric.py +0 -0
  154. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_rubric_group.py +0 -0
  155. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_sandbox_env.py +0 -0
  156. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_sandbox_mixin.py +0 -0
  157. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_save_utils.py +0 -0
  158. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_setup_script.py +0 -0
  159. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_singleturn_env.py +0 -0
  160. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_stateful_tool_env.py +0 -0
  161. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_think_parser.py +0 -0
  162. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_tool_env.py +0 -0
  163. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_tool_utils.py +0 -0
  164. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_trajectory_processing.py +0 -0
  165. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_tui_info_formatting.py +0 -0
  166. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_types.py +0 -0
  167. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_empty_completions.py +0 -0
  168. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_endpoint_protocols.py +0 -0
  169. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_v1_group_reward_env.py +0 -0
  170. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_wordle_env.py +0 -0
  171. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_xml_parser.py +0 -0
  172. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/AGENTS.md +0 -0
  173. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/__init__.py +0 -0
  174. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/commands/__init__.py +0 -0
  175. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/commands/build.py +0 -0
  176. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/commands/eval.py +0 -0
  177. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/commands/gepa.py +0 -0
  178. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/commands/init.py +0 -0
  179. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/commands/install.py +0 -0
  180. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/commands/setup.py +0 -0
  181. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/plugins/__init__.py +0 -0
  182. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/cli/tui.py +0 -0
  183. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/clients/__init__.py +0 -0
  184. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  185. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/clients/openai_completions_client.py +0 -0
  186. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/decorators.py +0 -0
  187. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/AGENTS.md +0 -0
  188. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/__init__.py +0 -0
  189. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/env_group.py +0 -0
  190. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/README.md +0 -0
  191. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/__init__.py +0 -0
  192. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  193. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/README.md +0 -0
  194. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/__init__.py +0 -0
  195. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/_filter.py +0 -0
  196. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harness.py +0 -0
  197. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  198. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  199. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  200. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  201. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  202. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  203. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  204. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  205. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  206. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  207. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  208. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  209. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  210. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  211. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  212. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  213. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  214. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  215. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  216. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  217. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  218. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  219. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  220. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/opencode_env.py +0 -0
  221. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  222. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  223. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/utils/__init__.py +0 -0
  224. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  225. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/README.md +0 -0
  226. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/__init__.py +0 -0
  227. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/README.md +0 -0
  228. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  229. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  230. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  231. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  232. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  233. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  234. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  235. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/textarena_env.py +0 -0
  236. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/python_env.py +0 -0
  237. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/sandbox_env.py +0 -0
  238. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/singleturn_env.py +0 -0
  239. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/stateful_tool_env.py +0 -0
  240. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/envs/tool_env.py +0 -0
  241. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/errors.py +0 -0
  242. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/gepa/__init__.py +0 -0
  243. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/gepa/adapter.py +0 -0
  244. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/gepa/config.py +0 -0
  245. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/gepa/display.py +0 -0
  246. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/parsers/__init__.py +0 -0
  247. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/parsers/maybe_think_parser.py +0 -0
  248. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/parsers/parser.py +0 -0
  249. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/parsers/think_parser.py +0 -0
  250. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/parsers/xml_parser.py +0 -0
  251. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rl/README.md +0 -0
  252. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rl/__init__.py +0 -0
  253. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rl/inference/__init__.py +0 -0
  254. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rl/inference/client.py +0 -0
  255. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rl/inference/server.py +0 -0
  256. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/__init__.py +0 -0
  257. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/config.py +0 -0
  258. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/orchestrator.py +0 -0
  259. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/trainer.py +0 -0
  260. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/utils.py +0 -0
  261. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rubrics/__init__.py +0 -0
  262. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  263. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rubrics/judge_rubric.py +0 -0
  264. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rubrics/math_rubric.py +0 -0
  265. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/rubrics/rubric_group.py +0 -0
  266. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/__init__.py +0 -0
  267. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/gepa.py +0 -0
  268. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/install.py +0 -0
  269. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/rl.py +0 -0
  270. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/setup.py +0 -0
  271. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/train.py +0 -0
  272. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/tui.py +0 -0
  273. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/scripts/vllm.py +0 -0
  274. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/serve/__init__.py +0 -0
  275. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/serve/client/env_client.py +0 -0
  276. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/serve/client/zmq_env_client.py +0 -0
  277. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/serve/server/__init__.py +0 -0
  278. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/serve/server/env_router.py +0 -0
  279. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/serve/server/zmq_env_server.py +0 -0
  280. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/serve/types.py +0 -0
  281. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/__init__.py +0 -0
  282. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/async_utils.py +0 -0
  283. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/config_utils.py +0 -0
  284. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/env_config_utils.py +0 -0
  285. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/error_utils.py +0 -0
  286. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/eval_display.py +0 -0
  287. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/heartbeat.py +0 -0
  288. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/metric_utils.py +0 -0
  289. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/path_utils.py +0 -0
  290. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/pricing_utils.py +0 -0
  291. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/process_utils.py +0 -0
  292. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/serve_utils.py +0 -0
  293. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/tool_utils.py +0 -0
  294. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/usage_utils.py +0 -0
  295. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/utils/version_utils.py +0 -0
  296. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/__init__.py +0 -0
  297. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/__init__.py +0 -0
  298. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/configs.py +0 -0
  299. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
  300. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/packages/tasksets/__init__.py +0 -0
  301. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/state.py +0 -0
  302. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/task.py +0 -0
  303. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/user.py +0 -0
  304. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/__init__.py +0 -0
  305. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/artifact_utils.py +0 -0
  306. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/binding_utils.py +0 -0
  307. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/config_callable_utils.py +0 -0
  308. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/endpoint_utils.py +0 -0
  309. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/json_utils.py +0 -0
  310. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/judge_utils.py +0 -0
  311. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  312. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  313. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/mcp_utils.py +0 -0
  314. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/object_utils.py +0 -0
  315. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/runtime_registry.py +0 -0
  316. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  317. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/scoring_utils.py +0 -0
  318. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/serialization_utils.py +0 -0
  319. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  320. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/timing_utils.py +0 -0
  321. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/tool_utils.py +0 -0
  322. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/trajectory_utils.py +0 -0
  323. {verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev9
3
+ Version: 0.1.15.dev11
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -48,6 +48,7 @@ Requires-Dist: tenacity>=8.5.0
48
48
  Requires-Dist: textual
49
49
  Requires-Dist: tomli; python_version < '3.11'
50
50
  Requires-Dist: typing-extensions; python_version < '3.12'
51
+ Requires-Dist: uvloop>=0.21.0; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'
51
52
  Provides-Extra: browser
52
53
  Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
53
54
  Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
@@ -55,7 +56,7 @@ Requires-Dist: stagehand>=3.0.0; extra == 'browser'
55
56
  Provides-Extra: openenv
56
57
  Requires-Dist: openenv-core>=0.3.0; extra == 'openenv'
57
58
  Provides-Extra: renderers
58
- Requires-Dist: renderers>=0.1.8.dev4; extra == 'renderers'
59
+ Requires-Dist: renderers>=0.1.8.dev28; extra == 'renderers'
59
60
  Provides-Extra: rg
60
61
  Requires-Dist: reasoning-gym; extra == 'rg'
61
62
  Provides-Extra: rl
@@ -219,19 +220,13 @@ custom harnesses, use the v1 Taskset/Harness path:
219
220
  # my_env.py
220
221
  import verifiers as vf
221
222
 
222
- @vf.reward(weight=1.0)
223
- async def contains_answer(task, state) -> float:
224
- return float(task["answer"] in str(state.get("completion") or ""))
225
223
 
226
224
  class MyTasksetConfig(vf.TasksetConfig):
227
225
  split: str = "train"
228
226
 
229
227
 
230
- class MyTaskset(vf.Taskset):
231
- config: MyTasksetConfig
232
- _default_rewards = (contains_answer,)
233
-
234
- def rows(self) -> list[dict[str, object]]:
228
+ class MyTaskset(vf.Taskset[MyTasksetConfig]):
229
+ def load_tasks(self) -> vf.Tasks:
235
230
  rows = [
236
231
  {
237
232
  "prompt": [{"role": "user", "content": "Reverse abc."}],
@@ -242,28 +237,31 @@ class MyTaskset(vf.Taskset):
242
237
  ]
243
238
  return [row for row in rows if row["split"] == self.config.split]
244
239
 
240
+ @vf.reward(weight=1.0)
241
+ async def contains_answer(self, task, state) -> float:
242
+ return float(task["answer"] in str(state.get("completion") or ""))
243
+
245
244
 
246
245
  def load_taskset(config: MyTasksetConfig) -> MyTaskset:
247
- assert isinstance(config, MyTasksetConfig)
248
246
  return MyTaskset(config=config)
249
247
 
250
248
 
251
249
  def load_environment(config: vf.EnvConfig) -> vf.Env:
252
- taskset_config = config.taskset
253
- assert isinstance(taskset_config, MyTasksetConfig)
254
- return vf.Env(taskset=load_taskset(taskset_config))
250
+ return vf.Env(taskset=vf.load_taskset(config=config.taskset))
255
251
  ```
256
252
  If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
257
253
  **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
258
- Reusable taskset and harness packages live under `verifiers.v1.packages` while
259
- the v1 API stabilizes, and are re-exported from `verifiers.v1` for normal use.
260
- For example, Harbor task directories can run through the bundled OpenCode CLI
254
+ Reusable taskset and harness packages live under `verifiers.v1.packages`. For
255
+ example, Harbor task directories can run through the bundled OpenCode CLI
261
256
  harness with:
262
257
 
263
258
  ```python
259
+ from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig
260
+ from verifiers.v1.packages.tasksets import HarborTaskset, HarborTasksetConfig
261
+
264
262
  env = vf.Env(
265
- taskset=vf.HarborTaskset(config=vf.HarborTasksetConfig()),
266
- harness=vf.OpenCode(config=vf.OpenCodeConfig()),
263
+ taskset=HarborTaskset(config=HarborTasksetConfig()),
264
+ harness=OpenCode(config=OpenCodeConfig()),
267
265
  )
268
266
  ```
269
267
 
@@ -143,19 +143,13 @@ custom harnesses, use the v1 Taskset/Harness path:
143
143
  # my_env.py
144
144
  import verifiers as vf
145
145
 
146
- @vf.reward(weight=1.0)
147
- async def contains_answer(task, state) -> float:
148
- return float(task["answer"] in str(state.get("completion") or ""))
149
146
 
150
147
  class MyTasksetConfig(vf.TasksetConfig):
151
148
  split: str = "train"
152
149
 
153
150
 
154
- class MyTaskset(vf.Taskset):
155
- config: MyTasksetConfig
156
- _default_rewards = (contains_answer,)
157
-
158
- def rows(self) -> list[dict[str, object]]:
151
+ class MyTaskset(vf.Taskset[MyTasksetConfig]):
152
+ def load_tasks(self) -> vf.Tasks:
159
153
  rows = [
160
154
  {
161
155
  "prompt": [{"role": "user", "content": "Reverse abc."}],
@@ -166,28 +160,31 @@ class MyTaskset(vf.Taskset):
166
160
  ]
167
161
  return [row for row in rows if row["split"] == self.config.split]
168
162
 
163
+ @vf.reward(weight=1.0)
164
+ async def contains_answer(self, task, state) -> float:
165
+ return float(task["answer"] in str(state.get("completion") or ""))
166
+
169
167
 
170
168
  def load_taskset(config: MyTasksetConfig) -> MyTaskset:
171
- assert isinstance(config, MyTasksetConfig)
172
169
  return MyTaskset(config=config)
173
170
 
174
171
 
175
172
  def load_environment(config: vf.EnvConfig) -> vf.Env:
176
- taskset_config = config.taskset
177
- assert isinstance(taskset_config, MyTasksetConfig)
178
- return vf.Env(taskset=load_taskset(taskset_config))
173
+ return vf.Env(taskset=vf.load_taskset(config=config.taskset))
179
174
  ```
180
175
  If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
181
176
  **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
182
- Reusable taskset and harness packages live under `verifiers.v1.packages` while
183
- the v1 API stabilizes, and are re-exported from `verifiers.v1` for normal use.
184
- For example, Harbor task directories can run through the bundled OpenCode CLI
177
+ Reusable taskset and harness packages live under `verifiers.v1.packages`. For
178
+ example, Harbor task directories can run through the bundled OpenCode CLI
185
179
  harness with:
186
180
 
187
181
  ```python
182
+ from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig
183
+ from verifiers.v1.packages.tasksets import HarborTaskset, HarborTasksetConfig
184
+
188
185
  env = vf.Env(
189
- taskset=vf.HarborTaskset(config=vf.HarborTasksetConfig()),
190
- harness=vf.OpenCode(config=vf.OpenCodeConfig()),
186
+ taskset=HarborTaskset(config=HarborTasksetConfig()),
187
+ harness=OpenCode(config=OpenCodeConfig()),
191
188
  )
192
189
  ```
193
190
 
@@ -54,6 +54,7 @@ dependencies = [
54
54
  "regex<2026.4.4",
55
55
  "httpx>=0.27.0",
56
56
  "prime-pydantic-config[toml]",
57
+ "uvloop>=0.21.0; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'",
57
58
  ]
58
59
 
59
60
  [dependency-groups]
@@ -73,7 +74,7 @@ dev = [
73
74
  "aiohttp>=3.9.0",
74
75
  "python-dotenv>=1.0.0",
75
76
  "nltk",
76
- "renderers>=0.1.8.dev4",
77
+ "renderers>=0.1.8.dev28",
77
78
  ]
78
79
  policy = [
79
80
  "semgrep>=1.150.0",
@@ -96,7 +97,7 @@ openenv = [
96
97
  "openenv-core>=0.3.0",
97
98
  ]
98
99
  renderers = [
99
- "renderers>=0.1.8.dev4",
100
+ "renderers>=0.1.8.dev28",
100
101
  ]
101
102
  rl = [
102
103
  "torch>=2.8.0,<2.9.0",
@@ -98,6 +98,31 @@ async def test_anthropic_to_native_prompt_with_typed_multimodal_content_parts():
98
98
  ]
99
99
 
100
100
 
101
+ @pytest.mark.asyncio
102
+ async def test_anthropic_to_native_prompt_marks_unsupported_images_in_mixed_content():
103
+ pytest.importorskip("anthropic")
104
+ from verifiers.clients.anthropic_messages_client import AnthropicMessagesClient
105
+
106
+ client = AnthropicMessagesClient(object())
107
+ messages = [
108
+ UserMessage(
109
+ content=[
110
+ TextContentPart(text="describe this"),
111
+ ImageUrlContentPart(
112
+ image_url=ImageUrlSource(url="https://example.com/image.png")
113
+ ),
114
+ ]
115
+ )
116
+ ]
117
+
118
+ prompt, kwargs = await client.to_native_prompt(messages)
119
+ assert kwargs["system"] == ""
120
+ assert prompt[0]["content"] == [
121
+ {"type": "text", "text": "describe this"},
122
+ {"type": "text", "text": "[image]"},
123
+ ]
124
+
125
+
101
126
  @pytest.mark.asyncio
102
127
  async def test_anthropic_assistant_tool_calls_use_text_chunks_not_model_repr():
103
128
  pytest.importorskip("anthropic")
@@ -288,6 +288,25 @@ def test_cli_headers_table_and_list_merge(monkeypatch, run_cli):
288
288
  }
289
289
 
290
290
 
291
+ def test_cli_defaults_session_header_to_trajectory_id(monkeypatch, run_cli):
292
+ captured = run_cli(monkeypatch, {})
293
+
294
+ assert captured["configs"][0].client_config.extra_headers_from_state == {
295
+ "X-Session-ID": "trajectory_id"
296
+ }
297
+
298
+
299
+ def test_cli_header_from_state_overrides_default_session_header(monkeypatch, run_cli):
300
+ captured = run_cli(
301
+ monkeypatch,
302
+ {"header_from_state": ["X-Session-ID: example_id"]},
303
+ )
304
+
305
+ assert captured["configs"][0].client_config.extra_headers_from_state == {
306
+ "X-Session-ID": "example_id"
307
+ }
308
+
309
+
291
310
  def test_cli_registry_headers_merged_with_eval_toml(tmp_path, monkeypatch, run_cli):
292
311
  cfg = tmp_path / "eval.toml"
293
312
  cfg.write_text(
@@ -239,83 +239,20 @@ class TestLaunchCommandResolution:
239
239
  )
240
240
 
241
241
 
242
- class TestStartStopCommands:
243
- def test_start_cmd_tracks_process_group_leader_pid(self):
244
- """Start command must capture `$!` (the backgrounded pgroup leader),
245
- not `$$` (the outer shell), and must end with `wait` so the recorded
246
- exit code reflects the launched daemon's.
247
- """
248
- cmd = _DummyEnv()._mcp_start_cmd("svc", "python -u /opt/x/server.py")
249
- assert "echo $!" in cmd
250
- assert "echo $$" not in cmd
251
- assert cmd.rstrip().endswith("wait")
252
- assert "/tmp/harbor-mcp-svc.pid" in cmd
253
- assert "python -u /opt/x/server.py" in cmd
254
-
255
- def test_start_cmd_wraps_in_setsid_for_process_group_semantics(self):
256
- """Wrapping the user's command in `setsid sh -c ...` is what makes
257
- `$!` a process-group leader, so `kill -9 -$PID` can reap the whole
258
- daemon tree on stop. Compound commands (e.g. `cd /x && python y.py`)
259
- must be preserved verbatim inside the sh -c payload so their own
260
- semantics are unchanged."""
261
- cmd = _DummyEnv()._mcp_start_cmd("svc", "cd /opt && python server.py")
262
- assert "setsid sh -c " in cmd
263
- assert "'cd /opt && python server.py'" in cmd
264
-
265
- def test_stop_cmd_is_one_line_sigkill_plus_rm(self):
266
- """Default: one SIGKILL to the process group, then unlink the
267
- pidfile — no poll/sleep loop."""
268
- cmd = _DummyEnv()._mcp_stop_cmd("svc")
269
- assert "kill -9" in cmd
270
- assert "rm -f" in cmd
271
- assert "/tmp/harbor-mcp-svc.pid" in cmd
272
- assert "kill -0" not in cmd
273
- assert "sleep" not in cmd
274
- assert "\n" not in cmd
275
- assert len(cmd) < 120
276
-
277
- def test_stop_cmd_targets_process_group_not_single_pid(self):
278
- """The `-` prefix on the `$(cat …)` expansion is what turns kill(1)
279
- into a process-group kill — without it, SIGKILL only lands on the
280
- wrapping shell and e.g. a `python` child spawned via `cd && python`
281
- leaks as an orphan."""
282
- cmd = _DummyEnv()._mcp_stop_cmd("svc")
283
- assert 'kill -9 -"$(cat' in cmd
284
-
285
- def test_server_name_with_shell_metachars_is_quoted(self):
286
- """Server name is task-author-controlled; every pidfile reference
287
- must appear only inside single-quoted spans."""
288
- env = _DummyEnv()
289
- unquoted = "/tmp/harbor-mcp-evil$(whoami).pid"
290
- quoted = f"'{unquoted}'"
291
- for cmd in (
292
- env._mcp_start_cmd("evil$(whoami)", "x"),
293
- env._mcp_stop_cmd("evil$(whoami)"),
294
- ):
295
- assert quoted in cmd
296
- # Every raw occurrence must be inside an already-quoted span.
297
- assert cmd.count(unquoted) == cmd.count(quoted)
298
-
299
- def test_launch_command_with_shell_metachars_is_quoted(self):
300
- """Same for the user's launch command: it's task-author-controlled,
301
- must land inside a single-quoted span once wrapped in `sh -c`."""
302
- env = _DummyEnv()
303
- evil_cmd = "python -c 'print(1)' && touch /pwned"
304
- quoted = f"'{evil_cmd}'".replace("'", "'\"'\"'")
305
- # shlex-quoted output contains the evil string only inside quotes.
306
- cmd = env._mcp_start_cmd("svc", evil_cmd)
307
- assert "setsid sh -c " in cmd
308
- # No unquoted `&& touch /pwned` outside a single-quoted span.
309
- assert cmd.count(evil_cmd) == 0 or quoted in cmd
310
-
311
-
312
242
  class TestLifecycle:
313
243
  @pytest.mark.asyncio
314
244
  async def test_starts_server_with_registered_launch_command(self):
315
- env = _DummyEnv(mcp_launch_commands={"svc": "python server.py"})
245
+ env = _DummyEnv(mcp_launch_commands={"svc": "cd /opt && python server.py"})
316
246
  state: dict[str, Any] = {}
317
247
  await env.start_mcp_servers("sbx", _config_with_server(), state)
318
248
  assert set(state["harbor_mcp_jobs"].keys()) == {"svc"}
249
+ _, start_cmd = env.started_jobs[0]
250
+ assert "echo $!" in start_cmd
251
+ assert "echo $$" not in start_cmd
252
+ assert start_cmd.rstrip().endswith("wait")
253
+ assert "/tmp/harbor-mcp-svc.pid" in start_cmd
254
+ assert "setsid sh -c " in start_cmd
255
+ assert "'cd /opt && python server.py'" in start_cmd
319
256
 
320
257
  @pytest.mark.asyncio
321
258
  async def test_externally_managed_server_is_skipped(self):
@@ -342,9 +279,38 @@ class TestLifecycle:
342
279
  if "kill -9" in c.args[1]
343
280
  ]
344
281
  assert len(stop_calls) == 1
345
- assert "harbor-mcp-svc.pid" in stop_calls[0]
282
+ stop_cmd = stop_calls[0]
283
+ assert "harbor-mcp-svc.pid" in stop_cmd
284
+ assert 'kill -9 -"$(cat' in stop_cmd
285
+ assert "rm -f" in stop_cmd
286
+ assert "kill -0" not in stop_cmd
287
+ assert "sleep" not in stop_cmd
288
+ assert "\n" not in stop_cmd
289
+ assert len(stop_cmd) < 120
346
290
  assert state["harbor_mcp_jobs"] == {}
347
291
 
292
+ @pytest.mark.asyncio
293
+ async def test_launch_and_stop_commands_quote_task_authored_shell_text(self):
294
+ env = _DummyEnv(
295
+ mcp_launch_commands={
296
+ "evil$(whoami)": "python -c 'print(1)' && touch /pwned"
297
+ }
298
+ )
299
+ state: dict[str, Any] = {"sandbox_id": "sbx"}
300
+ await env.start_mcp_servers(
301
+ "sbx", _config_with_server(name="evil$(whoami)"), state
302
+ )
303
+ _, start_cmd = env.started_jobs[0]
304
+ quoted_pidfile = "'/tmp/harbor-mcp-evil$(whoami).pid'"
305
+ assert quoted_pidfile in start_cmd
306
+ assert "setsid sh -c " in start_cmd
307
+ assert "'\"'\"'print(1)'\"'\"'" in start_cmd
308
+
309
+ env.sandbox_client.execute_command.reset_mock()
310
+ await env.stop_mcp_servers(state)
311
+ stop_cmd = env.sandbox_client.execute_command.call_args.args[1]
312
+ assert quoted_pidfile in stop_cmd
313
+
348
314
  @pytest.mark.asyncio
349
315
  async def test_stop_without_sandbox_id_is_a_noop(self):
350
316
  env = _DummyEnv()
@@ -530,22 +496,6 @@ class TestBackgroundJob:
530
496
  class TestHealthCheck:
531
497
  """Readiness probing — default `/proc/net/tcp` + user override."""
532
498
 
533
- def test_default_probe_shape(self):
534
- """Portable awk on /proc/net/tcp{,6}, matching LISTEN state only,
535
- with no bash-ism dependency like /dev/tcp."""
536
- cmd = HarborMCPMixin._default_mcp_health_cmd(8000)
537
- assert "bash" not in cmd and "/dev/tcp" not in cmd
538
- assert "/proc/net/tcp" in cmd and "/proc/net/tcp6" in cmd
539
- assert '$4 == "0A"' in cmd # LISTEN state
540
-
541
- @pytest.mark.parametrize(
542
- "port,hex_expected",
543
- [(80, "0050"), (8000, "1F40"), (65535, "FFFF"), (1, "0001")],
544
- )
545
- def test_default_probe_encodes_port_as_uppercase_hex(self, port, hex_expected):
546
- cmd = HarborMCPMixin._default_mcp_health_cmd(port)
547
- assert f":{hex_expected}$" in cmd
548
-
549
499
  @pytest.mark.asyncio
550
500
  async def test_custom_healthcheck_command_templated_with_port(self):
551
501
  env = _DummyEnv(mcp_launch_commands={"svc": "python x"})
@@ -580,7 +530,11 @@ class TestHealthCheck:
580
530
  if "/proc/net/tcp" in c.args[1]
581
531
  ]
582
532
  assert len(health_calls) == 1
583
- assert ":1F40$" in health_calls[0]
533
+ health_cmd = health_calls[0]
534
+ assert "bash" not in health_cmd and "/dev/tcp" not in health_cmd
535
+ assert "/proc/net/tcp6" in health_cmd
536
+ assert '$4 == "0A"' in health_cmd
537
+ assert ":1F40$" in health_cmd
584
538
 
585
539
  @pytest.mark.asyncio
586
540
  async def test_probe_timeout_is_respected(self):
@@ -4,6 +4,37 @@ import sys
4
4
  import verifiers
5
5
 
6
6
 
7
+ PACKAGE_SYMBOLS = {
8
+ "HarborTaskset",
9
+ "HarborTasksetConfig",
10
+ "MiniSWEAgent",
11
+ "MiniSWEAgentConfig",
12
+ "OpenCode",
13
+ "OpenCodeConfig",
14
+ "Pi",
15
+ "PiConfig",
16
+ "RLM",
17
+ "RLMConfig",
18
+ "Terminus2",
19
+ "Terminus2Config",
20
+ "TextArenaTaskset",
21
+ "TextArenaTasksetConfig",
22
+ }
23
+
24
+
25
+ def test_package_tasksets_and_harnesses_are_not_root_exports():
26
+ for name in PACKAGE_SYMBOLS:
27
+ assert name not in verifiers.__all__
28
+ assert not hasattr(verifiers, name)
29
+
30
+
31
+ def test_package_tasksets_and_harnesses_are_not_v1_exports():
32
+ v1 = importlib.import_module("verifiers.v1")
33
+ for name in PACKAGE_SYMBOLS:
34
+ assert name not in v1.__all__
35
+ assert not hasattr(v1, name)
36
+
37
+
7
38
  def test_v1_taskset_imports_do_not_import_textarena():
8
39
  textarena_module = "verifiers.v1.packages.tasksets.textarena"
9
40
  sys.modules.pop(textarena_module, None)
@@ -0,0 +1,83 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+ import verifiers as vf
5
+ from verifiers.scripts.init import init_environment
6
+
7
+
8
+ def read_env_file(root: Path, env_id: str) -> str:
9
+ module_name = env_id.replace("-", "_")
10
+ return (root / module_name / f"{module_name}.py").read_text()
11
+
12
+
13
+ def test_init_default_writes_v0_stub(tmp_path: Path) -> None:
14
+ root = init_environment("foo", path=str(tmp_path))
15
+ content = read_env_file(tmp_path, "foo")
16
+
17
+ assert root == tmp_path / "foo"
18
+ assert "def load_environment(**kwargs) -> vf.Environment:" in content
19
+ assert "NotImplementedError" in content
20
+ assert "load_taskset" not in content
21
+ assert "EnvTaskset" not in content
22
+
23
+
24
+ def test_init_v1_writes_thin_taskset_template(tmp_path: Path) -> None:
25
+ init_environment("bar", path=str(tmp_path), v1=True)
26
+ content = read_env_file(tmp_path, "bar")
27
+
28
+ assert "class BarTasksetConfig(vf.TasksetConfig):" in content
29
+ assert "class BarTaskset(vf.Taskset[BarTasksetConfig]):" in content
30
+ assert "def load_tasks(self) -> vf.Tasks:" in content
31
+ assert "def load_system_prompt(self) -> vf.SystemPrompt:" in content
32
+ assert "async def correct_answer(self, task: vf.Task, state: vf.State)" in content
33
+ assert "def load_taskset(config: BarTasksetConfig) -> BarTaskset:" in content
34
+ assert "return BarTaskset(config=config)" in content
35
+ assert "vf.load_taskset(config=config.taskset)" in content
36
+ assert "class EnvTaskset(" not in content
37
+ assert "_default_" not in content
38
+ assert "assert isinstance" not in content
39
+ assert 'tasks: str = "load_tasks"' not in content
40
+ assert 'rewards: list[str] = ["correct_answer"]' not in content
41
+
42
+
43
+ def test_init_v1_template_loads_with_vf_load_environment(
44
+ tmp_path: Path, monkeypatch
45
+ ) -> None:
46
+ init_environment("loadable-v1", path=str(tmp_path), v1=True)
47
+ monkeypatch.syspath_prepend(str(tmp_path / "loadable_v1"))
48
+
49
+ with pytest.raises(RuntimeError, match="Load the system prompt"):
50
+ vf.load_environment("loadable-v1")
51
+
52
+
53
+ def test_init_v1_with_harness_writes_harness_stub(tmp_path: Path) -> None:
54
+ init_environment("baz", path=str(tmp_path), v1=True, with_harness=True)
55
+ content = read_env_file(tmp_path, "baz")
56
+
57
+ assert "class BazTaskset(vf.Taskset[BazTasksetConfig]):" in content
58
+ assert "class BazHarnessConfig(vf.HarnessConfig):" in content
59
+ assert "class BazHarness(vf.Harness):" in content
60
+ assert "def load_harness(config: BazHarnessConfig) -> BazHarness:" in content
61
+ assert "vf.load_harness(config=config.harness)" in content
62
+
63
+
64
+ def test_init_with_harness_without_v1_warns_and_uses_v0(tmp_path: Path, capsys) -> None:
65
+ init_environment("plain", path=str(tmp_path), with_harness=True)
66
+ content = read_env_file(tmp_path, "plain")
67
+ captured = capsys.readouterr()
68
+
69
+ assert "--with-harness only applies with --v1; ignoring." in captured.out
70
+ assert "def load_environment(**kwargs) -> vf.Environment:" in content
71
+ assert "load_harness" not in content
72
+
73
+
74
+ def test_init_v1_multifile_exports_component_loaders(tmp_path: Path) -> None:
75
+ init_environment("pkg-env", path=str(tmp_path), v1=True, multi_file=True)
76
+ package_dir = tmp_path / "pkg_env" / "pkg_env"
77
+ init_content = (package_dir / "__init__.py").read_text()
78
+ env_content = (package_dir / "pkg_env.py").read_text()
79
+
80
+ assert "from .pkg_env import load_environment, load_taskset" in init_content
81
+ assert "__all__ = ['load_environment', 'load_taskset']" in init_content
82
+ assert "class PkgEnvTaskset(vf.Taskset[PkgEnvTasksetConfig]):" in env_content
83
+ assert "return PkgEnvTaskset(config=config)" in env_content
@@ -91,8 +91,8 @@ def test_wikispeedia_env_config_reaches_taskset_and_harness(
91
91
  )
92
92
  )
93
93
 
94
- train_rows = list(env.taskset.source())
95
- eval_rows = list(env.taskset.eval_source())
94
+ train_rows = [env.taskset.to_task(row) for row in env.taskset.get_dataset()]
95
+ eval_rows = [env.taskset.to_task(row) for row in env.taskset.get_eval_dataset()]
96
96
 
97
97
  assert len(train_rows) == 2
98
98
  assert len(eval_rows) == 1
@@ -136,8 +136,8 @@ def test_wikispeedia_taskset_sources_use_disjoint_target_split(
136
136
  )
137
137
  )
138
138
 
139
- train_rows = list(taskset.source())
140
- eval_rows = list(taskset.eval_source())
139
+ train_rows = [taskset.to_task(row) for row in taskset.get_dataset()]
140
+ eval_rows = [taskset.to_task(row) for row in taskset.get_eval_dataset()]
141
141
 
142
142
  assert len(train_rows) == 2
143
143
  assert len(eval_rows) == 1
@@ -218,7 +218,7 @@ async def test_wikispeedia_tools_resolve_through_v1_runtime(
218
218
  ),
219
219
  harness=module.load_harness(config=module.WikispeediaHarnessConfig()),
220
220
  )
221
- task = module.vf.Task(list(env.taskset.source())[0]).freeze()
221
+ task = env.taskset.to_task(env.taskset.get_dataset()[0])
222
222
  state = module.vf.State.for_task(task)
223
223
  state = await env.harness.setup_state(task, state)
224
224
 
@@ -9,10 +9,8 @@ from verifiers.envs.experimental.composable.tasksets.lean.lean_task import (
9
9
  LEAN_GUARD_END_MARKER,
10
10
  LeanRubric,
11
11
  _build_starter_file,
12
- _expected_protected_region,
13
12
  _extract_protected_region,
14
13
  _normalize_signature,
15
- _wrap_with_lean_guard,
16
14
  )
17
15
 
18
16
 
@@ -80,11 +78,13 @@ class TestNormalizeSignature:
80
78
  )
81
79
 
82
80
 
83
- class TestWrapWithLeanGuard:
81
+ class TestBuildStarterFileLeanGuardLayout:
84
82
  def test_marker_layout(self) -> None:
85
83
  signature = "theorem foo (x : ℝ) : x = x := by"
86
- wrapped = _wrap_with_lean_guard(signature)
87
- assert wrapped == (
84
+ starter = _build_starter_file(
85
+ {"formal_statement": signature, "header": "", "imports": ""}
86
+ )
87
+ assert starter == (
88
88
  "-- lean-guard: begin protected\n"
89
89
  "theorem foo (x : ℝ) : x = x := by\n"
90
90
  "-- lean-guard: end protected\n"
@@ -93,8 +93,10 @@ class TestWrapWithLeanGuard:
93
93
 
94
94
  def test_round_trip_via_extract(self) -> None:
95
95
  signature = "theorem foo : True := by"
96
- wrapped = _wrap_with_lean_guard(signature)
97
- region = _extract_protected_region(wrapped)
96
+ starter = _build_starter_file(
97
+ {"formal_statement": signature, "header": "", "imports": ""}
98
+ )
99
+ region = _extract_protected_region(starter)
98
100
  assert region is not None
99
101
  assert LEAN_GUARD_BEGIN_MARKER in region
100
102
  assert LEAN_GUARD_END_MARKER in region
@@ -212,7 +214,7 @@ class TestBuildStarterFile:
212
214
  "header": "import Mathlib",
213
215
  }
214
216
  starter = _build_starter_file(info)
215
- expected = _expected_protected_region(info)
217
+ expected = _extract_protected_region(_build_starter_file(info)) or ""
216
218
  actual = _extract_protected_region(starter)
217
219
  assert expected == actual
218
220
  assert expected != ""
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
  from typing import Any
6
6
 
7
7
  import pytest
8
- import verifiers.v1 as vf
8
+ import verifiers as vf
9
9
 
10
10
 
11
11
  def _load_mcp_search_module() -> Any:
@@ -54,7 +54,7 @@ def test_mcp_search_env_preserves_harness_config() -> None:
54
54
  def test_mcp_search_default_taskset_has_stable_non_doc_fixture() -> None:
55
55
  module = _load_mcp_search_module()
56
56
 
57
- rows = module.MCPSearchTaskset(config=module.MCPSearchTasksetConfig()).rows()
57
+ rows = list(module.load_tasks())
58
58
 
59
59
  assert len(rows) >= 10
60
60
  assert len({row["answer"] for row in rows}) == len(rows)
@@ -68,7 +68,7 @@ def test_mcp_search_taskset_accepts_v1_taskset_config() -> None:
68
68
  env = module.load_environment(
69
69
  config=module.MCPSearchEnvConfig(taskset={"max_turns": 3}),
70
70
  )
71
- rows = env.taskset.rows()
71
+ rows = [env.taskset.to_task(row) for row in env.taskset.get_dataset()]
72
72
 
73
73
  assert env.taskset.config.max_turns == 3
74
74
  assert all(row["max_turns"] == 3 for row in rows)