verifiers 0.1.15.dev10__tar.gz → 0.1.15.dev11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/PKG-INFO +16 -19
  2. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/README.md +14 -17
  3. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/pyproject.toml +2 -2
  4. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_imports.py +31 -0
  5. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_init_script.py +20 -17
  6. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_langchain_deep_agents_wikispeedia.py +5 -5
  7. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_mcp_search_env.py +3 -3
  8. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_opencode_harbor.py +9 -7
  9. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_renderer_client.py +13 -14
  10. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_renderer_e2e.py +28 -18
  11. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_bfcl.py +6 -5
  12. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_config_extension.py +745 -267
  13. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_example_counts.py +10 -10
  14. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_harbor_cli.py +58 -30
  15. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_mini_swe_agent.py +11 -10
  16. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_rlm_swe.py +40 -45
  17. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_runtime_lifecycle.py +98 -72
  18. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_scoring_functions.py +1 -1
  19. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_taskset_bindings.py +65 -64
  20. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_textarena_taskset.py +29 -11
  21. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_wiki_search_v1.py +3 -3
  22. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_wordle_v1_env.py +11 -2
  23. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/__init__.py +10 -46
  24. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/renderer_client.py +12 -32
  25. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/init.py +50 -51
  26. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/types.py +18 -5
  27. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/env_utils.py +21 -4
  28. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +41 -50
  29. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/README.md +77 -90
  30. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/RE_MIGRATION.md +53 -46
  31. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/__init__.py +6 -36
  32. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/config.py +29 -5
  33. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/env.py +4 -26
  34. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/harness.py +37 -36
  35. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/tasksets/harbor.py +126 -113
  36. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/tasksets/textarena.py +74 -50
  37. verifiers-0.1.15.dev11/verifiers/v1/taskset.py +207 -0
  38. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/toolset.py +2 -1
  39. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/types.py +4 -3
  40. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/config_utils.py +52 -3
  41. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/prompt_utils.py +91 -2
  42. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/runtime_owner_utils.py +26 -45
  43. verifiers-0.1.15.dev11/verifiers/v1/utils/taskset_registry_utils.py +115 -0
  44. verifiers-0.1.15.dev11/verifiers/v1/utils/taskset_utils.py +78 -0
  45. verifiers-0.1.15.dev10/verifiers/v1/taskset.py +0 -187
  46. verifiers-0.1.15.dev10/verifiers/v1/utils/taskset_utils.py +0 -90
  47. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/.gitignore +0 -0
  48. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/LICENSE +0 -0
  49. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/AGENTS.md +0 -0
  50. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/README.md +0 -0
  51. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/__init__.py +0 -0
  52. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/conftest.py +0 -0
  53. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_browser_env.py +0 -0
  54. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_build_script.py +0 -0
  55. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_cli_agent_env.py +0 -0
  56. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_client_auth_errors.py +0 -0
  57. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_client_config.py +0 -0
  58. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_client_multimodal_types.py +0 -0
  59. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_composable_env.py +0 -0
  60. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_context_token_metrics.py +0 -0
  61. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_decorator_ranks.py +0 -0
  62. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_endpoint_registry.py +0 -0
  63. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_env_group.py +0 -0
  64. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_env_server.py +0 -0
  65. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_environment.py +0 -0
  66. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_environment_extra.py +0 -0
  67. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_envs.py +0 -0
  68. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_error_chain.py +0 -0
  69. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_eval_cli.py +0 -0
  70. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_eval_display.py +0 -0
  71. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_eval_utils.py +0 -0
  72. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_gepa_cli.py +0 -0
  73. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_gepa_utils.py +0 -0
  74. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_gym_env.py +0 -0
  75. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_harbor_env_mcp.py +0 -0
  76. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_install_utils.py +0 -0
  77. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_interception_utils.py +0 -0
  78. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_lean_task.py +0 -0
  79. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_logging.py +0 -0
  80. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_math_rubric.py +0 -0
  81. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_maybe_think_parser.py +0 -0
  82. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_message_utils.py +0 -0
  83. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_message_utils_multimodal.py +0 -0
  84. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_multiturn_env.py +0 -0
  85. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_nemorl_client.py +0 -0
  86. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_openai_chat_completions_token_client.py +0 -0
  87. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_openai_responses_client.py +0 -0
  88. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_opencode_rlm_env.py +0 -0
  89. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_openenv_client.py +0 -0
  90. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_parser.py +0 -0
  91. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_path_utils.py +0 -0
  92. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_per_turn_timing.py +0 -0
  93. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_pricing_utils.py +0 -0
  94. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_prime_plugin.py +0 -0
  95. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_rlm_composable_env.py +0 -0
  96. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_rlm_env.py +0 -0
  97. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_rubric.py +0 -0
  98. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_rubric_group.py +0 -0
  99. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_sandbox_env.py +0 -0
  100. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_sandbox_mixin.py +0 -0
  101. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_save_utils.py +0 -0
  102. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_setup_script.py +0 -0
  103. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_singleturn_env.py +0 -0
  104. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_stateful_tool_env.py +0 -0
  105. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_think_parser.py +0 -0
  106. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_tool_env.py +0 -0
  107. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_tool_utils.py +0 -0
  108. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_trajectory_processing.py +0 -0
  109. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_tui_info_formatting.py +0 -0
  110. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_types.py +0 -0
  111. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_empty_completions.py +0 -0
  112. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_endpoint_protocols.py +0 -0
  113. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_group_reward_env.py +0 -0
  114. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_wordle_env.py +0 -0
  115. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_xml_parser.py +0 -0
  116. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/AGENTS.md +0 -0
  117. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/__init__.py +0 -0
  118. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/__init__.py +0 -0
  119. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/build.py +0 -0
  120. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/eval.py +0 -0
  121. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/gepa.py +0 -0
  122. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/init.py +0 -0
  123. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/install.py +0 -0
  124. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/setup.py +0 -0
  125. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/plugins/__init__.py +0 -0
  126. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/plugins/prime.py +0 -0
  127. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/tui.py +0 -0
  128. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/__init__.py +0 -0
  129. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/anthropic_messages_client.py +0 -0
  130. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/client.py +0 -0
  131. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  132. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/openai_chat_completions_client.py +0 -0
  133. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  134. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/openai_completions_client.py +0 -0
  135. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/openai_responses_client.py +0 -0
  136. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/decorators.py +0 -0
  137. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/AGENTS.md +0 -0
  138. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/__init__.py +0 -0
  139. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/env_group.py +0 -0
  140. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/environment.py +0 -0
  141. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/README.md +0 -0
  142. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/__init__.py +0 -0
  143. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  144. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/README.md +0 -0
  145. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/__init__.py +0 -0
  146. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/_filter.py +0 -0
  147. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  148. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harness.py +0 -0
  149. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  150. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  151. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  152. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  153. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  154. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  155. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/task.py +0 -0
  156. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  157. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  158. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  159. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  160. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  161. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  162. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  163. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  164. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  165. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  166. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  167. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  168. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  169. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  170. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
  171. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  172. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
  173. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  174. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  175. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
  176. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
  177. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  178. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  179. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/gym_env.py +0 -0
  180. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  181. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  182. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  183. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/mcp_env.py +0 -0
  184. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/opencode_env.py +0 -0
  185. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  186. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  187. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/rlm_env.py +0 -0
  188. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  189. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/utils/__init__.py +0 -0
  190. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  191. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  192. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/README.md +0 -0
  193. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/__init__.py +0 -0
  194. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/README.md +0 -0
  195. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  196. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  197. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  198. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  199. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  200. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  201. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/openenv_env.py +0 -0
  202. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  203. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/textarena_env.py +0 -0
  204. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/multiturn_env.py +0 -0
  205. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/python_env.py +0 -0
  206. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/sandbox_env.py +0 -0
  207. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/singleturn_env.py +0 -0
  208. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/stateful_tool_env.py +0 -0
  209. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/tool_env.py +0 -0
  210. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/errors.py +0 -0
  211. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/gepa/__init__.py +0 -0
  212. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/gepa/adapter.py +0 -0
  213. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/gepa/config.py +0 -0
  214. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/gepa/display.py +0 -0
  215. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/gepa/gepa_utils.py +0 -0
  216. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/parsers/__init__.py +0 -0
  217. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/parsers/maybe_think_parser.py +0 -0
  218. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/parsers/parser.py +0 -0
  219. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/parsers/think_parser.py +0 -0
  220. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/parsers/xml_parser.py +0 -0
  221. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/README.md +0 -0
  222. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/__init__.py +0 -0
  223. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/inference/__init__.py +0 -0
  224. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/inference/client.py +0 -0
  225. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/inference/server.py +0 -0
  226. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/__init__.py +0 -0
  227. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/config.py +0 -0
  228. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/orchestrator.py +0 -0
  229. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/trainer.py +0 -0
  230. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/utils.py +0 -0
  231. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/__init__.py +0 -0
  232. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  233. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/judge_rubric.py +0 -0
  234. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/math_rubric.py +0 -0
  235. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/rubric.py +0 -0
  236. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/rubric_group.py +0 -0
  237. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/__init__.py +0 -0
  238. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/build.py +0 -0
  239. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/eval.py +0 -0
  240. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/gepa.py +0 -0
  241. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/install.py +0 -0
  242. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/rl.py +0 -0
  243. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/setup.py +0 -0
  244. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/train.py +0 -0
  245. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/tui.py +0 -0
  246. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/vllm.py +0 -0
  247. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/__init__.py +0 -0
  248. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/client/env_client.py +0 -0
  249. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/client/zmq_env_client.py +0 -0
  250. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/server/__init__.py +0 -0
  251. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/server/env_router.py +0 -0
  252. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/server/env_server.py +0 -0
  253. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/server/env_worker.py +0 -0
  254. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/server/zmq_env_server.py +0 -0
  255. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/types.py +0 -0
  256. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/__init__.py +0 -0
  257. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/async_utils.py +0 -0
  258. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/client_utils.py +0 -0
  259. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/config_utils.py +0 -0
  260. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/data_utils.py +0 -0
  261. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/display_utils.py +0 -0
  262. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/env_config_utils.py +0 -0
  263. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/error_utils.py +0 -0
  264. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/eval_display.py +0 -0
  265. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/eval_utils.py +0 -0
  266. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/heartbeat.py +0 -0
  267. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/import_utils.py +0 -0
  268. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/install_utils.py +0 -0
  269. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/interception_utils.py +0 -0
  270. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/logging_utils.py +0 -0
  271. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/message_utils.py +0 -0
  272. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/metric_utils.py +0 -0
  273. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/path_utils.py +0 -0
  274. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/pricing_utils.py +0 -0
  275. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/process_utils.py +0 -0
  276. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/response_utils.py +0 -0
  277. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/save_utils.py +0 -0
  278. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/serve_utils.py +0 -0
  279. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/thread_utils.py +0 -0
  280. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/threaded_sandbox_client.py +0 -0
  281. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/tool_utils.py +0 -0
  282. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/usage_utils.py +0 -0
  283. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/version_utils.py +0 -0
  284. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/__init__.py +0 -0
  285. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/__init__.py +0 -0
  286. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/command.py +0 -0
  287. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/configs.py +0 -0
  288. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
  289. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/opencode.py +0 -0
  290. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/pi.py +0 -0
  291. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/rlm.py +0 -0
  292. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/terminus_2.py +0 -0
  293. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/tasksets/__init__.py +0 -0
  294. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/runtime.py +0 -0
  295. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/state.py +0 -0
  296. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/task.py +0 -0
  297. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/user.py +0 -0
  298. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/__init__.py +0 -0
  299. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/artifact_utils.py +0 -0
  300. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/binding_utils.py +0 -0
  301. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/config_callable_utils.py +0 -0
  302. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/endpoint_utils.py +0 -0
  303. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/json_utils.py +0 -0
  304. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/judge_utils.py +0 -0
  305. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  306. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  307. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/mcp_utils.py +0 -0
  308. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/object_utils.py +0 -0
  309. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/program_utils.py +0 -0
  310. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/runtime_registry.py +0 -0
  311. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  312. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/sandbox_utils.py +0 -0
  313. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/scoring_utils.py +0 -0
  314. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/serialization_utils.py +0 -0
  315. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  316. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/timing_utils.py +0 -0
  317. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/tool_utils.py +0 -0
  318. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/trajectory_utils.py +0 -0
  319. {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev10
3
+ Version: 0.1.15.dev11
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -56,7 +56,7 @@ Requires-Dist: stagehand>=3.0.0; extra == 'browser'
56
56
  Provides-Extra: openenv
57
57
  Requires-Dist: openenv-core>=0.3.0; extra == 'openenv'
58
58
  Provides-Extra: renderers
59
- Requires-Dist: renderers>=0.1.8.dev4; extra == 'renderers'
59
+ Requires-Dist: renderers>=0.1.8.dev28; extra == 'renderers'
60
60
  Provides-Extra: rg
61
61
  Requires-Dist: reasoning-gym; extra == 'rg'
62
62
  Provides-Extra: rl
@@ -220,19 +220,13 @@ custom harnesses, use the v1 Taskset/Harness path:
220
220
  # my_env.py
221
221
  import verifiers as vf
222
222
 
223
- @vf.reward(weight=1.0)
224
- async def contains_answer(task, state) -> float:
225
- return float(task["answer"] in str(state.get("completion") or ""))
226
223
 
227
224
  class MyTasksetConfig(vf.TasksetConfig):
228
225
  split: str = "train"
229
226
 
230
227
 
231
- class MyTaskset(vf.Taskset):
232
- config: MyTasksetConfig
233
- _default_rewards = (contains_answer,)
234
-
235
- def rows(self) -> list[dict[str, object]]:
228
+ class MyTaskset(vf.Taskset[MyTasksetConfig]):
229
+ def load_tasks(self) -> vf.Tasks:
236
230
  rows = [
237
231
  {
238
232
  "prompt": [{"role": "user", "content": "Reverse abc."}],
@@ -243,28 +237,31 @@ class MyTaskset(vf.Taskset):
243
237
  ]
244
238
  return [row for row in rows if row["split"] == self.config.split]
245
239
 
240
+ @vf.reward(weight=1.0)
241
+ async def contains_answer(self, task, state) -> float:
242
+ return float(task["answer"] in str(state.get("completion") or ""))
243
+
246
244
 
247
245
  def load_taskset(config: MyTasksetConfig) -> MyTaskset:
248
- assert isinstance(config, MyTasksetConfig)
249
246
  return MyTaskset(config=config)
250
247
 
251
248
 
252
249
  def load_environment(config: vf.EnvConfig) -> vf.Env:
253
- taskset_config = config.taskset
254
- assert isinstance(taskset_config, MyTasksetConfig)
255
- return vf.Env(taskset=load_taskset(taskset_config))
250
+ return vf.Env(taskset=vf.load_taskset(config=config.taskset))
256
251
  ```
257
252
  If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
258
253
  **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
259
- Reusable taskset and harness packages live under `verifiers.v1.packages` while
260
- the v1 API stabilizes, and are re-exported from `verifiers.v1` for normal use.
261
- For example, Harbor task directories can run through the bundled OpenCode CLI
254
+ Reusable taskset and harness packages live under `verifiers.v1.packages`. For
255
+ example, Harbor task directories can run through the bundled OpenCode CLI
262
256
  harness with:
263
257
 
264
258
  ```python
259
+ from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig
260
+ from verifiers.v1.packages.tasksets import HarborTaskset, HarborTasksetConfig
261
+
265
262
  env = vf.Env(
266
- taskset=vf.HarborTaskset(config=vf.HarborTasksetConfig()),
267
- harness=vf.OpenCode(config=vf.OpenCodeConfig()),
263
+ taskset=HarborTaskset(config=HarborTasksetConfig()),
264
+ harness=OpenCode(config=OpenCodeConfig()),
268
265
  )
269
266
  ```
270
267
 
@@ -143,19 +143,13 @@ custom harnesses, use the v1 Taskset/Harness path:
143
143
  # my_env.py
144
144
  import verifiers as vf
145
145
 
146
- @vf.reward(weight=1.0)
147
- async def contains_answer(task, state) -> float:
148
- return float(task["answer"] in str(state.get("completion") or ""))
149
146
 
150
147
  class MyTasksetConfig(vf.TasksetConfig):
151
148
  split: str = "train"
152
149
 
153
150
 
154
- class MyTaskset(vf.Taskset):
155
- config: MyTasksetConfig
156
- _default_rewards = (contains_answer,)
157
-
158
- def rows(self) -> list[dict[str, object]]:
151
+ class MyTaskset(vf.Taskset[MyTasksetConfig]):
152
+ def load_tasks(self) -> vf.Tasks:
159
153
  rows = [
160
154
  {
161
155
  "prompt": [{"role": "user", "content": "Reverse abc."}],
@@ -166,28 +160,31 @@ class MyTaskset(vf.Taskset):
166
160
  ]
167
161
  return [row for row in rows if row["split"] == self.config.split]
168
162
 
163
+ @vf.reward(weight=1.0)
164
+ async def contains_answer(self, task, state) -> float:
165
+ return float(task["answer"] in str(state.get("completion") or ""))
166
+
169
167
 
170
168
  def load_taskset(config: MyTasksetConfig) -> MyTaskset:
171
- assert isinstance(config, MyTasksetConfig)
172
169
  return MyTaskset(config=config)
173
170
 
174
171
 
175
172
  def load_environment(config: vf.EnvConfig) -> vf.Env:
176
- taskset_config = config.taskset
177
- assert isinstance(taskset_config, MyTasksetConfig)
178
- return vf.Env(taskset=load_taskset(taskset_config))
173
+ return vf.Env(taskset=vf.load_taskset(config=config.taskset))
179
174
  ```
180
175
  If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
181
176
  **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
182
- Reusable taskset and harness packages live under `verifiers.v1.packages` while
183
- the v1 API stabilizes, and are re-exported from `verifiers.v1` for normal use.
184
- For example, Harbor task directories can run through the bundled OpenCode CLI
177
+ Reusable taskset and harness packages live under `verifiers.v1.packages`. For
178
+ example, Harbor task directories can run through the bundled OpenCode CLI
185
179
  harness with:
186
180
 
187
181
  ```python
182
+ from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig
183
+ from verifiers.v1.packages.tasksets import HarborTaskset, HarborTasksetConfig
184
+
188
185
  env = vf.Env(
189
- taskset=vf.HarborTaskset(config=vf.HarborTasksetConfig()),
190
- harness=vf.OpenCode(config=vf.OpenCodeConfig()),
186
+ taskset=HarborTaskset(config=HarborTasksetConfig()),
187
+ harness=OpenCode(config=OpenCodeConfig()),
191
188
  )
192
189
  ```
193
190
 
@@ -74,7 +74,7 @@ dev = [
74
74
  "aiohttp>=3.9.0",
75
75
  "python-dotenv>=1.0.0",
76
76
  "nltk",
77
- "renderers>=0.1.8.dev4",
77
+ "renderers>=0.1.8.dev28",
78
78
  ]
79
79
  policy = [
80
80
  "semgrep>=1.150.0",
@@ -97,7 +97,7 @@ openenv = [
97
97
  "openenv-core>=0.3.0",
98
98
  ]
99
99
  renderers = [
100
- "renderers>=0.1.8.dev4",
100
+ "renderers>=0.1.8.dev28",
101
101
  ]
102
102
  rl = [
103
103
  "torch>=2.8.0,<2.9.0",
@@ -4,6 +4,37 @@ import sys
4
4
  import verifiers
5
5
 
6
6
 
7
+ PACKAGE_SYMBOLS = {
8
+ "HarborTaskset",
9
+ "HarborTasksetConfig",
10
+ "MiniSWEAgent",
11
+ "MiniSWEAgentConfig",
12
+ "OpenCode",
13
+ "OpenCodeConfig",
14
+ "Pi",
15
+ "PiConfig",
16
+ "RLM",
17
+ "RLMConfig",
18
+ "Terminus2",
19
+ "Terminus2Config",
20
+ "TextArenaTaskset",
21
+ "TextArenaTasksetConfig",
22
+ }
23
+
24
+
25
+ def test_package_tasksets_and_harnesses_are_not_root_exports():
26
+ for name in PACKAGE_SYMBOLS:
27
+ assert name not in verifiers.__all__
28
+ assert not hasattr(verifiers, name)
29
+
30
+
31
+ def test_package_tasksets_and_harnesses_are_not_v1_exports():
32
+ v1 = importlib.import_module("verifiers.v1")
33
+ for name in PACKAGE_SYMBOLS:
34
+ assert name not in v1.__all__
35
+ assert not hasattr(v1, name)
36
+
37
+
7
38
  def test_v1_taskset_imports_do_not_import_textarena():
8
39
  textarena_module = "verifiers.v1.packages.tasksets.textarena"
9
40
  sys.modules.pop(textarena_module, None)
@@ -1,5 +1,6 @@
1
1
  from pathlib import Path
2
2
 
3
+ import pytest
3
4
  import verifiers as vf
4
5
  from verifiers.scripts.init import init_environment
5
6
 
@@ -24,16 +25,19 @@ def test_init_v1_writes_thin_taskset_template(tmp_path: Path) -> None:
24
25
  init_environment("bar", path=str(tmp_path), v1=True)
25
26
  content = read_env_file(tmp_path, "bar")
26
27
 
27
- assert 'ENV_ID = "bar"' in content
28
- assert "def load_tasks():" in content
29
- assert "class EnvTasksetConfig(vf.TasksetConfig):" in content
30
- assert 'source: str = "bar:load_tasks"' in content
31
- assert 'rewards: list[str] = ["bar:exact_answer"]' in content
32
- assert "def load_taskset(config: EnvTasksetConfig) -> vf.Taskset:" in content
33
- assert "vf.load_taskset(ENV_ID, config=config.taskset)" in content
28
+ assert "class BarTasksetConfig(vf.TasksetConfig):" in content
29
+ assert "class BarTaskset(vf.Taskset[BarTasksetConfig]):" in content
30
+ assert "def load_tasks(self) -> vf.Tasks:" in content
31
+ assert "def load_system_prompt(self) -> vf.SystemPrompt:" in content
32
+ assert "async def correct_answer(self, task: vf.Task, state: vf.State)" in content
33
+ assert "def load_taskset(config: BarTasksetConfig) -> BarTaskset:" in content
34
+ assert "return BarTaskset(config=config)" in content
35
+ assert "vf.load_taskset(config=config.taskset)" in content
34
36
  assert "class EnvTaskset(" not in content
35
37
  assert "_default_" not in content
36
38
  assert "assert isinstance" not in content
39
+ assert 'tasks: str = "load_tasks"' not in content
40
+ assert 'rewards: list[str] = ["correct_answer"]' not in content
37
41
 
38
42
 
39
43
  def test_init_v1_template_loads_with_vf_load_environment(
@@ -42,21 +46,19 @@ def test_init_v1_template_loads_with_vf_load_environment(
42
46
  init_environment("loadable-v1", path=str(tmp_path), v1=True)
43
47
  monkeypatch.syspath_prepend(str(tmp_path / "loadable_v1"))
44
48
 
45
- env = vf.load_environment("loadable-v1")
46
-
47
- assert isinstance(env, vf.Env)
48
- assert env.taskset.rows()[0]["answer"] == "cba"
49
- assert env.taskset.rewards[0].__name__ == "exact_answer"
49
+ with pytest.raises(RuntimeError, match="Load the system prompt"):
50
+ vf.load_environment("loadable-v1")
50
51
 
51
52
 
52
53
  def test_init_v1_with_harness_writes_harness_stub(tmp_path: Path) -> None:
53
54
  init_environment("baz", path=str(tmp_path), v1=True, with_harness=True)
54
55
  content = read_env_file(tmp_path, "baz")
55
56
 
56
- assert "class EnvHarnessConfig(vf.HarnessConfig):" in content
57
- assert "class EnvHarness(vf.Harness):" in content
58
- assert "def load_harness(config: EnvHarnessConfig) -> EnvHarness:" in content
59
- assert "vf.load_harness(ENV_ID, config=config.harness)" in content
57
+ assert "class BazTaskset(vf.Taskset[BazTasksetConfig]):" in content
58
+ assert "class BazHarnessConfig(vf.HarnessConfig):" in content
59
+ assert "class BazHarness(vf.Harness):" in content
60
+ assert "def load_harness(config: BazHarnessConfig) -> BazHarness:" in content
61
+ assert "vf.load_harness(config=config.harness)" in content
60
62
 
61
63
 
62
64
  def test_init_with_harness_without_v1_warns_and_uses_v0(tmp_path: Path, capsys) -> None:
@@ -77,4 +79,5 @@ def test_init_v1_multifile_exports_component_loaders(tmp_path: Path) -> None:
77
79
 
78
80
  assert "from .pkg_env import load_environment, load_taskset" in init_content
79
81
  assert "__all__ = ['load_environment', 'load_taskset']" in init_content
80
- assert 'source: str = "pkg_env.pkg_env:load_tasks"' in env_content
82
+ assert "class PkgEnvTaskset(vf.Taskset[PkgEnvTasksetConfig]):" in env_content
83
+ assert "return PkgEnvTaskset(config=config)" in env_content
@@ -91,8 +91,8 @@ def test_wikispeedia_env_config_reaches_taskset_and_harness(
91
91
  )
92
92
  )
93
93
 
94
- train_rows = list(env.taskset.source())
95
- eval_rows = list(env.taskset.eval_source())
94
+ train_rows = [env.taskset.to_task(row) for row in env.taskset.get_dataset()]
95
+ eval_rows = [env.taskset.to_task(row) for row in env.taskset.get_eval_dataset()]
96
96
 
97
97
  assert len(train_rows) == 2
98
98
  assert len(eval_rows) == 1
@@ -136,8 +136,8 @@ def test_wikispeedia_taskset_sources_use_disjoint_target_split(
136
136
  )
137
137
  )
138
138
 
139
- train_rows = list(taskset.source())
140
- eval_rows = list(taskset.eval_source())
139
+ train_rows = [taskset.to_task(row) for row in taskset.get_dataset()]
140
+ eval_rows = [taskset.to_task(row) for row in taskset.get_eval_dataset()]
141
141
 
142
142
  assert len(train_rows) == 2
143
143
  assert len(eval_rows) == 1
@@ -218,7 +218,7 @@ async def test_wikispeedia_tools_resolve_through_v1_runtime(
218
218
  ),
219
219
  harness=module.load_harness(config=module.WikispeediaHarnessConfig()),
220
220
  )
221
- task = module.vf.Task(list(env.taskset.source())[0]).freeze()
221
+ task = env.taskset.to_task(env.taskset.get_dataset()[0])
222
222
  state = module.vf.State.for_task(task)
223
223
  state = await env.harness.setup_state(task, state)
224
224
 
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
  from typing import Any
6
6
 
7
7
  import pytest
8
- import verifiers.v1 as vf
8
+ import verifiers as vf
9
9
 
10
10
 
11
11
  def _load_mcp_search_module() -> Any:
@@ -54,7 +54,7 @@ def test_mcp_search_env_preserves_harness_config() -> None:
54
54
  def test_mcp_search_default_taskset_has_stable_non_doc_fixture() -> None:
55
55
  module = _load_mcp_search_module()
56
56
 
57
- rows = module.MCPSearchTaskset(config=module.MCPSearchTasksetConfig()).rows()
57
+ rows = list(module.load_tasks())
58
58
 
59
59
  assert len(rows) >= 10
60
60
  assert len({row["answer"] for row in rows}) == len(rows)
@@ -68,7 +68,7 @@ def test_mcp_search_taskset_accepts_v1_taskset_config() -> None:
68
68
  env = module.load_environment(
69
69
  config=module.MCPSearchEnvConfig(taskset={"max_turns": 3}),
70
70
  )
71
- rows = env.taskset.rows()
71
+ rows = [env.taskset.to_task(row) for row in env.taskset.get_dataset()]
72
72
 
73
73
  assert env.taskset.config.max_turns == 3
74
74
  assert all(row["max_turns"] == 3 for row in rows)
@@ -3,7 +3,9 @@ import sys
3
3
  from pathlib import Path
4
4
  from typing import Any, cast
5
5
 
6
- import verifiers.v1 as vf
6
+ import verifiers as vf
7
+ from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig
8
+ from verifiers.v1.packages.tasksets import HarborTaskset
7
9
 
8
10
 
9
11
  def _load_opencode_module() -> Any:
@@ -31,14 +33,14 @@ def test_load_environment_uses_v1_taskset_and_harness() -> None:
31
33
  env = module.load_environment(config=module.OpenCodeHarborEnvConfig())
32
34
 
33
35
  assert isinstance(env, vf.Env)
34
- assert isinstance(env.taskset, vf.HarborTaskset)
35
- assert isinstance(env.harness, vf.OpenCode)
36
- assert isinstance(env.harness.config, vf.OpenCodeConfig)
36
+ assert isinstance(env.taskset, HarborTaskset)
37
+ assert isinstance(env.harness, OpenCode)
38
+ assert isinstance(env.harness.config, OpenCodeConfig)
37
39
  assert not hasattr(module, "OpenCodeHarborHarnessConfig")
38
40
  assert not hasattr(module, "TERMINAL_BENCH_SAMPLE_TASKS")
39
41
  assert env.taskset.resolve_tasks_root() == Path(module.__file__).parent / "tasks"
40
42
  assert env.harness.config.max_turns == 4
41
- assert env.harness.config.disabled_tools == vf.OpenCodeConfig().disabled_tools
43
+ assert env.harness.config.disabled_tools == OpenCodeConfig().disabled_tools
42
44
  assert "webfetch" in env.harness.config.disabled_tools
43
45
  assert "question" in env.harness.config.disabled_tools
44
46
 
@@ -53,11 +55,11 @@ def test_load_environment_accepts_v1_taskset_and_harness_config() -> None:
53
55
 
54
56
  env = module.load_environment(
55
57
  config=module.OpenCodeHarborEnvConfig(
56
- taskset=module.vf.HarborTasksetConfig(
58
+ taskset=module.HarborTasksetConfig(
57
59
  task_names=["task-a"],
58
60
  cpu_cores=1.5,
59
61
  ),
60
- harness=module.vf.OpenCodeConfig(
62
+ harness=module.OpenCodeConfig(
61
63
  agent_workdir="/workspace",
62
64
  disabled_tools=["webfetch"],
63
65
  max_turns=2,
@@ -5,6 +5,7 @@ import pytest
5
5
 
6
6
  import verifiers as vf
7
7
  from renderers import RendererPool
8
+ from renderers import config_from_name
8
9
  from renderers.base import ParsedResponse, RenderedTokens, create_renderer
9
10
  from verifiers.clients.renderer_client import (
10
11
  RendererClient,
@@ -24,13 +25,16 @@ from verifiers.types import (
24
25
  )
25
26
 
26
27
 
27
- def test_renderer_client_honors_configured_renderer_name():
28
+ def test_renderer_client_honors_configured_renderer_config():
29
+ from renderers import Qwen3VLRendererConfig
30
+
28
31
  RendererClient._shared_pools.clear()
29
32
 
33
+ cfg = Qwen3VLRendererConfig()
30
34
  client = object.__new__(RendererClient)
31
35
  client._renderer = None
32
36
  client._pool_size = 1
33
- client._config = vf.ClientConfig(client_type="renderer", renderer="qwen3_vl")
37
+ client._config = vf.ClientConfig(client_type="renderer", renderer_config=cfg)
34
38
 
35
39
  sentinel_pool = RendererPool.__new__(RendererPool)
36
40
  with patch(
@@ -42,24 +46,23 @@ def test_renderer_client_honors_configured_renderer_name():
42
46
  assert pool is sentinel_pool
43
47
  create_pool_mock.assert_called_once_with(
44
48
  "Qwen/Qwen3-VL-4B-Instruct",
45
- renderer="qwen3_vl",
49
+ cfg,
46
50
  size=1,
47
- tool_parser=None,
48
- reasoning_parser=None,
49
- preserve_all_thinking=False,
50
- preserve_thinking_between_tool_calls=False,
51
51
  )
52
52
 
53
53
 
54
54
  def test_renderer_client_uses_renderer_model_name_override():
55
+ from renderers import Qwen3VLRendererConfig
56
+
55
57
  RendererClient._shared_pools.clear()
56
58
 
59
+ cfg = Qwen3VLRendererConfig()
57
60
  client = object.__new__(RendererClient)
58
61
  client._renderer = None
59
62
  client._pool_size = 1
60
63
  client._config = vf.ClientConfig(
61
64
  client_type="renderer",
62
- renderer="qwen3_vl",
65
+ renderer_config=cfg,
63
66
  renderer_model_name="Qwen/Qwen3-VL-4B-Instruct",
64
67
  )
65
68
 
@@ -73,12 +76,8 @@ def test_renderer_client_uses_renderer_model_name_override():
73
76
  assert pool is sentinel_pool
74
77
  create_pool_mock.assert_called_once_with(
75
78
  "Qwen/Qwen3-VL-4B-Instruct",
76
- renderer="qwen3_vl",
79
+ cfg,
77
80
  size=1,
78
- tool_parser=None,
79
- reasoning_parser=None,
80
- preserve_all_thinking=False,
81
- preserve_thinking_between_tool_calls=False,
82
81
  )
83
82
 
84
83
 
@@ -524,7 +523,7 @@ def _load_tokenizer_and_renderer(model_name: str, renderer_name: str):
524
523
  from transformers import AutoTokenizer
525
524
 
526
525
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
527
- renderer = create_renderer(tokenizer, renderer=renderer_name)
526
+ renderer = create_renderer(tokenizer, config_from_name(renderer_name))
528
527
  return tokenizer, renderer
529
528
 
530
529
 
@@ -17,6 +17,7 @@ Parametrized over five model families so each renderer's render/parse paths
17
17
  are exercised. Tokenizers come from the local HF cache; no network.
18
18
  """
19
19
 
20
+ import json
20
21
  import logging
21
22
  from typing import Any
22
23
 
@@ -24,7 +25,7 @@ import pytest
24
25
 
25
26
  import verifiers as vf
26
27
  from datasets import Dataset
27
- from renderers import create_renderer
28
+ from renderers import config_from_name, create_renderer
28
29
  from verifiers.clients.renderer_client import RendererClient, _to_renderer_message
29
30
  from verifiers.types import Messages, State
30
31
 
@@ -83,7 +84,7 @@ def _load(model_name: str, renderer_name: str):
83
84
  from transformers import AutoTokenizer
84
85
 
85
86
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
86
- renderer = create_renderer(tokenizer, renderer=renderer_name)
87
+ renderer = create_renderer(tokenizer, config_from_name(renderer_name))
87
88
  _renderer_cache[key] = (tokenizer, renderer)
88
89
  return _renderer_cache[key]
89
90
 
@@ -106,6 +107,13 @@ def tokenizer_and_renderer(model_family):
106
107
  # ── Scripted vLLM stand-in ───────────────────────────────────────────
107
108
 
108
109
 
110
+ class _ScriptedResponse:
111
+ """httpx.Response stand-in: ``parse_generate_response`` reads ``.content`` as bytes."""
112
+
113
+ def __init__(self, payload: dict[str, Any]):
114
+ self.content = json.dumps(payload).encode()
115
+
116
+
109
117
  class ScriptedVLLM:
110
118
  """Fake ``AsyncOpenAI``-compatible client serving canned
111
119
  /inference/v1/generate responses (vllm 0.20 wire shape).
@@ -124,22 +132,24 @@ class ScriptedVLLM:
124
132
  assert self._completions, "ScriptedVLLM ran out of canned completions"
125
133
  completion_ids = self._completions.pop(0)
126
134
 
127
- return {
128
- "request_id": f"resp-{len(self.requests)}",
129
- "choices": [
130
- {
131
- "index": 0,
132
- "token_ids": list(completion_ids),
133
- "logprobs": {
134
- "content": [
135
- {"token": f"token_id:{tid}", "logprob": -0.1}
136
- for tid in completion_ids
137
- ]
138
- },
139
- "finish_reason": "stop",
140
- }
141
- ],
142
- }
135
+ return _ScriptedResponse(
136
+ {
137
+ "request_id": f"resp-{len(self.requests)}",
138
+ "choices": [
139
+ {
140
+ "index": 0,
141
+ "token_ids": list(completion_ids),
142
+ "logprobs": {
143
+ "content": [
144
+ {"token": f"token_id:{tid}", "logprob": -0.1}
145
+ for tid in completion_ids
146
+ ]
147
+ },
148
+ "finish_reason": "stop",
149
+ }
150
+ ],
151
+ }
152
+ )
143
153
 
144
154
  async def close(self):
145
155
  pass
@@ -1,11 +1,11 @@
1
1
  import importlib.util
2
+ import sys
2
3
  from pathlib import Path
3
4
  from types import ModuleType
4
5
 
5
6
  import pytest
6
7
 
7
- import verifiers as root_vf
8
- import verifiers.v1 as vf
8
+ import verifiers as vf
9
9
 
10
10
 
11
11
  def load_bfcl_module() -> ModuleType:
@@ -14,6 +14,7 @@ def load_bfcl_module() -> ModuleType:
14
14
  assert spec is not None
15
15
  assert spec.loader is not None
16
16
  module = importlib.util.module_from_spec(spec)
17
+ sys.modules[spec.name] = module
17
18
  spec.loader.exec_module(module)
18
19
  return module
19
20
 
@@ -109,7 +110,7 @@ def test_bfcl_loader_supports_category_groups(
109
110
  bfcl = load_bfcl_module()
110
111
  seen_harness_categories = []
111
112
 
112
- def fake_source(test_category: str, **kwargs: object):
113
+ def fake_load_tasks(test_category: str, **kwargs: object):
113
114
  _ = kwargs
114
115
  return [{"question": test_category, "answer": "a"}]
115
116
 
@@ -118,7 +119,7 @@ def test_bfcl_loader_supports_category_groups(
118
119
  seen_harness_categories.append(config.test_category)
119
120
  return vf.Harness(config=config)
120
121
 
121
- monkeypatch.setattr(bfcl.BFCLTaskset, "_default_source", fake_source)
122
+ monkeypatch.setattr(bfcl, "load_tasks", fake_load_tasks)
122
123
  monkeypatch.setattr(bfcl, "load_harness", fake_harness)
123
124
 
124
125
  env = bfcl.load_environment(
@@ -131,7 +132,7 @@ def test_bfcl_loader_supports_category_groups(
131
132
  )
132
133
  )
133
134
 
134
- assert isinstance(env, root_vf.EnvGroup)
135
+ assert isinstance(env, vf.EnvGroup)
135
136
  assert env.env_names == ["simple_python", "simple_java"]
136
137
  seen_taskset_categories = [item.taskset.config.test_category for item in env.envs]
137
138
  assert seen_taskset_categories == ["simple_python", "simple_java"]