verifiers 0.1.15.dev14__tar.gz → 0.1.15.dev16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/PKG-INFO +14 -8
  2. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/README.md +13 -7
  3. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_eval_cli.py +23 -0
  4. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_init_script.py +17 -12
  5. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_config_extension.py +243 -2
  6. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/__init__.py +4 -1
  7. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/eval.py +7 -6
  8. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/init.py +46 -39
  9. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/env_utils.py +85 -23
  10. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/eval_utils.py +20 -7
  11. verifiers-0.1.15.dev16/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +186 -0
  12. verifiers-0.1.15.dev16/verifiers/v1/README.md +333 -0
  13. verifiers-0.1.15.dev16/verifiers/v1/RE_MIGRATION.md +420 -0
  14. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/__init__.py +2 -1
  15. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/harness.py +4 -3
  16. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/prompt_utils.py +69 -26
  17. verifiers-0.1.15.dev14/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -279
  18. verifiers-0.1.15.dev14/verifiers/v1/README.md +0 -1716
  19. verifiers-0.1.15.dev14/verifiers/v1/RE_MIGRATION.md +0 -490
  20. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/.gitignore +0 -0
  21. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/LICENSE +0 -0
  22. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/pyproject.toml +0 -0
  23. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/AGENTS.md +0 -0
  24. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/README.md +0 -0
  25. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/__init__.py +0 -0
  26. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/conftest.py +0 -0
  27. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_browser_env.py +0 -0
  28. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_build_script.py +0 -0
  29. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_cli_agent_env.py +0 -0
  30. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_client_auth_errors.py +0 -0
  31. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_client_config.py +0 -0
  32. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_client_multimodal_types.py +0 -0
  33. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_composable_env.py +0 -0
  34. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_context_token_metrics.py +0 -0
  35. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_decorator_ranks.py +0 -0
  36. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_endpoint_registry.py +0 -0
  37. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_env_group.py +0 -0
  38. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_env_server.py +0 -0
  39. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_environment.py +0 -0
  40. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_environment_extra.py +0 -0
  41. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_envs.py +0 -0
  42. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_error_chain.py +0 -0
  43. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_eval_display.py +0 -0
  44. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_eval_utils.py +0 -0
  45. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_gepa_cli.py +0 -0
  46. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_gepa_utils.py +0 -0
  47. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_gym_env.py +0 -0
  48. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_harbor_env_mcp.py +0 -0
  49. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_imports.py +0 -0
  50. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_install_utils.py +0 -0
  51. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_interception_utils.py +0 -0
  52. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  53. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_lean_task.py +0 -0
  54. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_logging.py +0 -0
  55. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_math_rubric.py +0 -0
  56. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_maybe_think_parser.py +0 -0
  57. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_mcp_search_env.py +0 -0
  58. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_message_utils.py +0 -0
  59. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_message_utils_multimodal.py +0 -0
  60. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_multiturn_env.py +0 -0
  61. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_nemorl_client.py +0 -0
  62. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_openai_chat_completions_token_client.py +0 -0
  63. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_openai_responses_client.py +0 -0
  64. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_opencode_harbor.py +0 -0
  65. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_opencode_rlm_env.py +0 -0
  66. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_openenv_client.py +0 -0
  67. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_parser.py +0 -0
  68. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_path_utils.py +0 -0
  69. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_per_turn_timing.py +0 -0
  70. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_pricing_utils.py +0 -0
  71. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_prime_plugin.py +0 -0
  72. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_renderer_client.py +0 -0
  73. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_renderer_e2e.py +0 -0
  74. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_rlm_composable_env.py +0 -0
  75. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_rlm_env.py +0 -0
  76. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_rubric.py +0 -0
  77. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_rubric_group.py +0 -0
  78. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_sandbox_env.py +0 -0
  79. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_sandbox_mixin.py +0 -0
  80. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_save_utils.py +0 -0
  81. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_setup_script.py +0 -0
  82. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_singleturn_env.py +0 -0
  83. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_stateful_tool_env.py +0 -0
  84. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_think_parser.py +0 -0
  85. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_tool_env.py +0 -0
  86. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_tool_utils.py +0 -0
  87. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_trajectory_processing.py +0 -0
  88. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_tui_info_formatting.py +0 -0
  89. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_types.py +0 -0
  90. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_bfcl.py +0 -0
  91. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_empty_completions.py +0 -0
  92. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_endpoint_protocols.py +0 -0
  93. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_example_counts.py +0 -0
  94. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_group_reward_env.py +0 -0
  95. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_harbor_cli.py +0 -0
  96. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_mini_swe_agent.py +0 -0
  97. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_nemo_gym_harness.py +0 -0
  98. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_openenv_taskset.py +0 -0
  99. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_openreward_taskset.py +0 -0
  100. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_rlm_swe.py +0 -0
  101. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_runtime_lifecycle.py +0 -0
  102. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_scoring_functions.py +0 -0
  103. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_taskset_bindings.py +0 -0
  104. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_v1_textarena_taskset.py +0 -0
  105. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_wiki_search_v1.py +0 -0
  106. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_wordle_env.py +0 -0
  107. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_wordle_v1_env.py +0 -0
  108. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/tests/test_xml_parser.py +0 -0
  109. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/AGENTS.md +0 -0
  110. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/__init__.py +0 -0
  111. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/commands/__init__.py +0 -0
  112. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/commands/build.py +0 -0
  113. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/commands/eval.py +0 -0
  114. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/commands/gepa.py +0 -0
  115. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/commands/init.py +0 -0
  116. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/commands/install.py +0 -0
  117. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/commands/setup.py +0 -0
  118. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/plugins/__init__.py +0 -0
  119. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/plugins/prime.py +0 -0
  120. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/cli/tui.py +0 -0
  121. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/clients/__init__.py +0 -0
  122. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/clients/anthropic_messages_client.py +0 -0
  123. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/clients/client.py +0 -0
  124. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  125. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/clients/openai_chat_completions_client.py +0 -0
  126. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  127. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/clients/openai_completions_client.py +0 -0
  128. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/clients/openai_responses_client.py +0 -0
  129. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/clients/renderer_client.py +0 -0
  130. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/decorators.py +0 -0
  131. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/AGENTS.md +0 -0
  132. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/__init__.py +0 -0
  133. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/env_group.py +0 -0
  134. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/environment.py +0 -0
  135. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/README.md +0 -0
  136. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/__init__.py +0 -0
  137. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  138. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/README.md +0 -0
  139. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/__init__.py +0 -0
  140. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/_filter.py +0 -0
  141. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  142. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/harness.py +0 -0
  143. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  144. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  145. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  146. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  147. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  148. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  149. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/task.py +0 -0
  150. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  151. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  152. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  153. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  154. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  155. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  156. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  157. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  158. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  159. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  160. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  161. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  162. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  163. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  164. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
  165. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  166. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
  167. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  168. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  169. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
  170. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
  171. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  172. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  173. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/gym_env.py +0 -0
  174. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  175. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  176. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  177. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/mcp_env.py +0 -0
  178. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/opencode_env.py +0 -0
  179. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  180. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  181. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/rlm_env.py +0 -0
  182. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  183. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/utils/__init__.py +0 -0
  184. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  185. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  186. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/README.md +0 -0
  187. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/__init__.py +0 -0
  188. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/browser_env/README.md +0 -0
  189. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  190. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  191. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  192. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  193. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  194. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  195. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/openenv_env.py +0 -0
  196. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  197. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/integrations/textarena_env.py +0 -0
  198. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/multiturn_env.py +0 -0
  199. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/python_env.py +0 -0
  200. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/sandbox_env.py +0 -0
  201. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/singleturn_env.py +0 -0
  202. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/stateful_tool_env.py +0 -0
  203. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/envs/tool_env.py +0 -0
  204. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/errors.py +0 -0
  205. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/gepa/__init__.py +0 -0
  206. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/gepa/adapter.py +0 -0
  207. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/gepa/config.py +0 -0
  208. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/gepa/display.py +0 -0
  209. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/gepa/gepa_utils.py +0 -0
  210. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/parsers/__init__.py +0 -0
  211. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/parsers/maybe_think_parser.py +0 -0
  212. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/parsers/parser.py +0 -0
  213. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/parsers/think_parser.py +0 -0
  214. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/parsers/xml_parser.py +0 -0
  215. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rl/README.md +0 -0
  216. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rl/__init__.py +0 -0
  217. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rl/inference/__init__.py +0 -0
  218. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rl/inference/client.py +0 -0
  219. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rl/inference/server.py +0 -0
  220. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rl/trainer/__init__.py +0 -0
  221. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rl/trainer/config.py +0 -0
  222. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rl/trainer/orchestrator.py +0 -0
  223. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rl/trainer/trainer.py +0 -0
  224. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rl/trainer/utils.py +0 -0
  225. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rubrics/__init__.py +0 -0
  226. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  227. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rubrics/judge_rubric.py +0 -0
  228. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rubrics/math_rubric.py +0 -0
  229. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rubrics/rubric.py +0 -0
  230. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/rubrics/rubric_group.py +0 -0
  231. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/__init__.py +0 -0
  232. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/build.py +0 -0
  233. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/gepa.py +0 -0
  234. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/install.py +0 -0
  235. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/rl.py +0 -0
  236. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/setup.py +0 -0
  237. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/train.py +0 -0
  238. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/tui.py +0 -0
  239. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/scripts/vllm.py +0 -0
  240. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/serve/__init__.py +0 -0
  241. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/serve/client/env_client.py +0 -0
  242. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/serve/client/zmq_env_client.py +0 -0
  243. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/serve/server/__init__.py +0 -0
  244. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/serve/server/env_router.py +0 -0
  245. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/serve/server/env_server.py +0 -0
  246. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/serve/server/env_worker.py +0 -0
  247. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/serve/server/zmq_env_server.py +0 -0
  248. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/serve/types.py +0 -0
  249. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/types.py +0 -0
  250. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/__init__.py +0 -0
  251. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/async_utils.py +0 -0
  252. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/client_utils.py +0 -0
  253. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/config_utils.py +0 -0
  254. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/data_utils.py +0 -0
  255. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/display_utils.py +0 -0
  256. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/env_config_utils.py +0 -0
  257. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/error_utils.py +0 -0
  258. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/eval_display.py +0 -0
  259. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/heartbeat.py +0 -0
  260. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/import_utils.py +0 -0
  261. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/install_utils.py +0 -0
  262. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/interception_utils.py +0 -0
  263. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/logging_utils.py +0 -0
  264. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/message_utils.py +0 -0
  265. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/metric_utils.py +0 -0
  266. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/path_utils.py +0 -0
  267. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/pricing_utils.py +0 -0
  268. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/process_utils.py +0 -0
  269. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/response_utils.py +0 -0
  270. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/save_utils.py +0 -0
  271. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/serve_utils.py +0 -0
  272. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/thread_utils.py +0 -0
  273. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/threaded_sandbox_client.py +0 -0
  274. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/tool_utils.py +0 -0
  275. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/usage_utils.py +0 -0
  276. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/utils/version_utils.py +0 -0
  277. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/artifact.py +0 -0
  278. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/config.py +0 -0
  279. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/env.py +0 -0
  280. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/model.py +0 -0
  281. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/program.py +0 -0
  282. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/runtime.py +0 -0
  283. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/runtime_handles.py +0 -0
  284. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/sandbox.py +0 -0
  285. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/state.py +0 -0
  286. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/task.py +0 -0
  287. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/taskset.py +0 -0
  288. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/toolset.py +0 -0
  289. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/types.py +0 -0
  290. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/user.py +0 -0
  291. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/__init__.py +0 -0
  292. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/binding_utils.py +0 -0
  293. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/config_callable_utils.py +0 -0
  294. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/config_utils.py +0 -0
  295. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/endpoint_utils.py +0 -0
  296. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/json_utils.py +0 -0
  297. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/judge_utils.py +0 -0
  298. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  299. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  300. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/mcp_utils.py +0 -0
  301. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/object_utils.py +0 -0
  302. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/program_utils.py +0 -0
  303. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/runtime_owner_utils.py +0 -0
  304. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/runtime_registry.py +0 -0
  305. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  306. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/sandbox_python_utils.py +0 -0
  307. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/sandbox_utils.py +0 -0
  308. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/scoring_utils.py +0 -0
  309. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/serialization_utils.py +0 -0
  310. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  311. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/taskset_utils.py +0 -0
  312. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/tool_utils.py +0 -0
  313. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/toolset_utils.py +0 -0
  314. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/trajectory_utils.py +0 -0
  315. {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev16}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev14
3
+ Version: 0.1.15.dev16
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -234,7 +234,7 @@ import verifiers as vf
234
234
 
235
235
 
236
236
  class MyTasksetConfig(vf.TasksetConfig):
237
- split: str = "train"
237
+ system_prompt: vf.SystemPrompt = "Reverse text exactly."
238
238
 
239
239
 
240
240
  class MyTaskset(vf.Taskset[MyTasksetConfig]):
@@ -247,7 +247,7 @@ class MyTaskset(vf.Taskset[MyTasksetConfig]):
247
247
  "max_turns": 1,
248
248
  }
249
249
  ]
250
- return [row for row in rows if row["split"] == self.config.split]
250
+ return [row for row in rows if row["split"] == split]
251
251
 
252
252
  @vf.reward(weight=1.0)
253
253
  async def contains_answer(self, task, state) -> float:
@@ -259,14 +259,20 @@ def load_taskset(config: MyTasksetConfig) -> MyTaskset:
259
259
 
260
260
 
261
261
  def load_environment(config: vf.EnvConfig) -> vf.Env:
262
- return vf.Env(taskset=vf.load_taskset(config=config.taskset))
262
+ """Loader pattern for all Taskset/Harness environments."""
263
+ return vf.Env(
264
+ taskset=vf.load_taskset(config=config.taskset),
265
+ harness=vf.load_harness(config=config.harness),
266
+ )
263
267
  ```
264
- If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
268
+ The child loader annotation defines the taskset config shape; root
269
+ `load_environment` stays typed as `vf.EnvConfig`. See
265
270
  **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
266
271
  Reusable taskset and harness packages live in `tasksets` and `harnesses`.
267
272
  Install them with `uv add "verifiers[packages]"`, or with the narrower
268
- `verifiers[tasksets]` and `verifiers[harnesses]` extras. For example, Harbor
269
- task directories can run through the bundled OpenCode CLI harness with:
273
+ `verifiers[tasksets]`, `verifiers[harnesses]`, and backend-specific extras. For
274
+ example, Harbor task directories can run through the bundled OpenCode CLI
275
+ harness with:
270
276
 
271
277
  ```python
272
278
  from harnesses import OpenCode, OpenCodeConfig
@@ -299,7 +305,7 @@ id = "my-env"
299
305
  max_turns = 1
300
306
 
301
307
  [env.taskset]
302
- split = "train"
308
+ system_prompt = "Reverse text exactly."
303
309
 
304
310
  [env.taskset.scoring.contains_answer]
305
311
  weight = 1.0
@@ -145,7 +145,7 @@ import verifiers as vf
145
145
 
146
146
 
147
147
  class MyTasksetConfig(vf.TasksetConfig):
148
- split: str = "train"
148
+ system_prompt: vf.SystemPrompt = "Reverse text exactly."
149
149
 
150
150
 
151
151
  class MyTaskset(vf.Taskset[MyTasksetConfig]):
@@ -158,7 +158,7 @@ class MyTaskset(vf.Taskset[MyTasksetConfig]):
158
158
  "max_turns": 1,
159
159
  }
160
160
  ]
161
- return [row for row in rows if row["split"] == self.config.split]
161
+ return [row for row in rows if row["split"] == split]
162
162
 
163
163
  @vf.reward(weight=1.0)
164
164
  async def contains_answer(self, task, state) -> float:
@@ -170,14 +170,20 @@ def load_taskset(config: MyTasksetConfig) -> MyTaskset:
170
170
 
171
171
 
172
172
  def load_environment(config: vf.EnvConfig) -> vf.Env:
173
- return vf.Env(taskset=vf.load_taskset(config=config.taskset))
173
+ """Loader pattern for all Taskset/Harness environments."""
174
+ return vf.Env(
175
+ taskset=vf.load_taskset(config=config.taskset),
176
+ harness=vf.load_harness(config=config.harness),
177
+ )
174
178
  ```
175
- If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
179
+ The child loader annotation defines the taskset config shape; root
180
+ `load_environment` stays typed as `vf.EnvConfig`. See
176
181
  **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
177
182
  Reusable taskset and harness packages live in `tasksets` and `harnesses`.
178
183
  Install them with `uv add "verifiers[packages]"`, or with the narrower
179
- `verifiers[tasksets]` and `verifiers[harnesses]` extras. For example, Harbor
180
- task directories can run through the bundled OpenCode CLI harness with:
184
+ `verifiers[tasksets]`, `verifiers[harnesses]`, and backend-specific extras. For
185
+ example, Harbor task directories can run through the bundled OpenCode CLI
186
+ harness with:
181
187
 
182
188
  ```python
183
189
  from harnesses import OpenCode, OpenCodeConfig
@@ -210,7 +216,7 @@ id = "my-env"
210
216
  max_turns = 1
211
217
 
212
218
  [env.taskset]
213
- split = "train"
219
+ system_prompt = "Reverse text exactly."
214
220
 
215
221
  [env.taskset.scoring.contains_answer]
216
222
  weight = 1.0
@@ -994,6 +994,29 @@ def test_load_toml_config_with_args_taskset_harness():
994
994
  assert "harness" not in result[0]
995
995
 
996
996
 
997
+ def test_load_toml_config_allows_taskset_id_without_env_id():
998
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
999
+ f.write(
1000
+ "[[eval]]\n"
1001
+ "[eval.taskset]\n"
1002
+ 'id = "tasksets.harbor"\n'
1003
+ "num_examples = 10\n\n"
1004
+ "[eval.harness]\n"
1005
+ 'id = "harnesses.opencode"\n'
1006
+ "max_turns = 5\n"
1007
+ )
1008
+ f.flush()
1009
+ result = load_toml_config(Path(f.name))
1010
+
1011
+ assert result[0]["env_id"] == "tasksets.harbor"
1012
+ assert result[0]["env_args"] == {
1013
+ "config": {
1014
+ "taskset": {"id": "tasksets.harbor", "num_examples": 10},
1015
+ "harness": {"id": "harnesses.opencode", "max_turns": 5},
1016
+ },
1017
+ }
1018
+
1019
+
997
1020
  def test_load_toml_config_missing_env_section():
998
1021
  """TOML without [[eval]] section raises ValueError."""
999
1022
  with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
@@ -1,6 +1,4 @@
1
1
  from pathlib import Path
2
-
3
- import pytest
4
2
  import verifiers as vf
5
3
  from verifiers.scripts.init import init_environment
6
4
 
@@ -21,22 +19,27 @@ def test_init_default_writes_v0_stub(tmp_path: Path) -> None:
21
19
  assert "EnvTaskset" not in content
22
20
 
23
21
 
24
- def test_init_v1_writes_thin_taskset_template(tmp_path: Path) -> None:
22
+ def test_init_v1_writes_taskset_template(tmp_path: Path) -> None:
25
23
  init_environment("bar", path=str(tmp_path), v1=True)
26
24
  content = read_env_file(tmp_path, "bar")
27
25
 
28
26
  assert "class BarTasksetConfig(vf.TasksetConfig):" in content
29
27
  assert "class BarTaskset(vf.Taskset[BarTasksetConfig]):" in content
28
+ assert 'system_prompt: vf.SystemPrompt = "Answer exactly."' in content
29
+ assert '"""Taskset implementation for bar.' in content
30
30
  assert 'def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:' in content
31
31
  assert (
32
- "def load_system_prompt(self, config: BarTasksetConfig) -> vf.SystemPrompt:"
32
+ '"""Return serializable task records as a list, generator, or Dataset."""'
33
33
  in content
34
34
  )
35
+ assert "def load_system_prompt" not in content
35
36
  assert "async def correct_answer(self, task: vf.Task, state: vf.State)" in content
36
37
  assert "def load_taskset(config: BarTasksetConfig) -> BarTaskset:" in content
38
+ assert '"""Typed taskset loader used by vf.load_taskset."""' in content
37
39
  assert "return BarTaskset(config=config)" in content
38
40
  assert "taskset=vf.load_taskset(config=config.taskset)" in content
39
- assert "harness=vf.Harness(config=config.harness)" in content
41
+ assert '"""Loader pattern for all Taskset/Harness environments."""' in content
42
+ assert "harness=vf.load_harness(config=config.harness)" in content
40
43
  assert "class EnvTaskset(" not in content
41
44
  assert "_default_" not in content
42
45
  assert 'tasks: str = "load_tasks"' not in content
@@ -49,8 +52,12 @@ def test_init_v1_template_loads_with_vf_load_environment(
49
52
  init_environment("loadable-v1", path=str(tmp_path), v1=True)
50
53
  monkeypatch.syspath_prepend(str(tmp_path / "loadable_v1"))
51
54
 
52
- with pytest.raises(RuntimeError, match="Load the system prompt"):
53
- vf.load_environment("loadable-v1")
55
+ env = vf.load_environment("loadable-v1")
56
+
57
+ dataset = env.get_dataset()
58
+
59
+ assert len(dataset) == 1
60
+ assert dataset[0]["answer"] == "cba"
54
61
 
55
62
 
56
63
  def test_init_v1_with_harness_writes_harness_stub(tmp_path: Path) -> None:
@@ -92,16 +99,14 @@ def test_init_openenv_writes_v1_taskset_template(tmp_path: Path) -> None:
92
99
  content = read_env_file(tmp_path, "openenv-sample")
93
100
  pyproject = (tmp_path / "openenv_sample" / "pyproject.toml").read_text()
94
101
 
95
- assert (
96
- "from tasksets.openenv import OpenEnvTaskset, OpenEnvTasksetConfig" in content
97
- )
102
+ assert "from tasksets import OpenEnvTaskset, OpenEnvTasksetConfig" in content
98
103
  assert (
99
104
  "def load_taskset(config: OpenEnvTasksetConfig) -> OpenEnvTaskset:" in content
100
105
  )
101
106
  assert "taskset=vf.load_taskset(config=config.taskset)" in content
102
- assert "harness=vf.Harness(config=config.harness)" in content
107
+ assert "harness=vf.load_harness(config=config.harness)" in content
103
108
  assert "vf.OpenEnvEnv" not in content
104
- assert '"tasksets>=0.1.1"' in pyproject
109
+ assert '"tasksets[openenv]>=0.1.1"' in pyproject
105
110
 
106
111
 
107
112
  def test_init_openenv_multifile_exports_taskset_loader(tmp_path: Path) -> None:
@@ -1670,18 +1670,151 @@ def test_system_prompt_direct_string_can_contain_colon() -> None:
1670
1670
 
1671
1671
 
1672
1672
  @pytest.mark.asyncio
1673
- async def test_harness_rejects_multiple_system_prompt_sources_by_default() -> None:
1673
+ async def test_harness_concats_multiple_system_prompt_sources_by_default() -> None:
1674
1674
  taskset = make_taskset(system_prompt="taskset sys")
1675
1675
  harness = make_harness(
1676
1676
  program={"fn": ref("config_program")}, system_prompt="harness sys"
1677
1677
  )
1678
1678
  Env(taskset=taskset, harness=harness)
1679
1679
  task = next(iter(taskset))
1680
+ state = await harness.setup_state(task, State.for_task(task))
1681
+
1682
+ assert state["system_prompt"] == [
1683
+ {"role": "system", "content": "harness sys"},
1684
+ {"role": "system", "content": "taskset sys"},
1685
+ ]
1686
+
1687
+
1688
+ @pytest.mark.asyncio
1689
+ async def test_task_system_prompt_overrides_taskset_side_at_runtime() -> None:
1690
+ taskset = make_taskset(system_prompt="taskset sys")
1691
+ harness = make_harness(program={"fn": ref("config_program")})
1692
+ Env(taskset=taskset, harness=harness)
1693
+ task = Task(
1694
+ {
1695
+ "prompt": [{"role": "user", "content": "hi"}],
1696
+ "system_prompt": "task sys",
1697
+ }
1698
+ ).freeze()
1699
+ state = await harness.setup_state(task, State.for_task(task))
1700
+
1701
+ assert state["system_prompt"] == [{"role": "system", "content": "task sys"}]
1702
+
1703
+
1704
+ @pytest.mark.asyncio
1705
+ async def test_task_override_is_resolved_before_harness_concat() -> None:
1706
+ taskset = make_taskset(system_prompt="taskset sys")
1707
+ harness = make_harness(
1708
+ program={"fn": ref("config_program")}, system_prompt="harness sys"
1709
+ )
1710
+ Env(taskset=taskset, harness=harness)
1711
+ task = Task(
1712
+ {
1713
+ "prompt": [{"role": "user", "content": "hi"}],
1714
+ "system_prompt": "task sys",
1715
+ }
1716
+ ).freeze()
1717
+ state = await harness.setup_state(task, State.for_task(task))
1680
1718
 
1681
- with pytest.raises(ValueError, match="Multiple system_prompt sources"):
1719
+ assert state["system_prompt"] == [
1720
+ {"role": "system", "content": "harness sys"},
1721
+ {"role": "system", "content": "task sys"},
1722
+ ]
1723
+
1724
+
1725
+ @pytest.mark.asyncio
1726
+ async def test_system_prompt_strategy_can_concat_taskset_side_first() -> None:
1727
+ taskset = make_taskset(system_prompt="taskset sys")
1728
+ harness = make_harness(
1729
+ program={"fn": ref("config_program")},
1730
+ system_prompt="harness sys",
1731
+ system_prompt_strategy="TH",
1732
+ )
1733
+ Env(taskset=taskset, harness=harness)
1734
+ task = next(iter(taskset))
1735
+ state = await harness.setup_state(task, State.for_task(task))
1736
+
1737
+ assert state["system_prompt"] == [
1738
+ {"role": "system", "content": "taskset sys"},
1739
+ {"role": "system", "content": "harness sys"},
1740
+ ]
1741
+
1742
+
1743
+ @pytest.mark.asyncio
1744
+ async def test_harness_can_reject_multiple_system_prompt_sides() -> None:
1745
+ taskset = make_taskset(system_prompt="taskset sys")
1746
+ harness = make_harness(
1747
+ program={"fn": ref("config_program")},
1748
+ system_prompt="harness sys",
1749
+ system_prompt_strategy="REJECT",
1750
+ )
1751
+ Env(taskset=taskset, harness=harness)
1752
+ task = next(iter(taskset))
1753
+
1754
+ with pytest.raises(ValueError, match="Multiple system_prompt sides"):
1682
1755
  await harness.setup_state(task, State.for_task(task))
1683
1756
 
1684
1757
 
1758
+ @pytest.mark.asyncio
1759
+ async def test_system_prompt_side_selection_uses_resolved_taskset_side() -> None:
1760
+ taskset = make_taskset(system_prompt="taskset sys")
1761
+ harness = make_harness(
1762
+ program={"fn": ref("config_program")},
1763
+ system_prompt="harness sys",
1764
+ system_prompt_strategy="T_OR_H",
1765
+ )
1766
+ Env(taskset=taskset, harness=harness)
1767
+ task = Task(
1768
+ {
1769
+ "prompt": [{"role": "user", "content": "hi"}],
1770
+ "system_prompt": "task sys",
1771
+ }
1772
+ ).freeze()
1773
+ state = await harness.setup_state(task, State.for_task(task))
1774
+
1775
+ assert state["system_prompt"] == [{"role": "system", "content": "task sys"}]
1776
+
1777
+
1778
+ @pytest.mark.asyncio
1779
+ async def test_system_prompt_side_selection_can_prefer_harness() -> None:
1780
+ taskset = make_taskset(system_prompt="taskset sys")
1781
+ harness = make_harness(
1782
+ program={"fn": ref("config_program")},
1783
+ system_prompt="harness sys",
1784
+ system_prompt_strategy="H_OR_T",
1785
+ )
1786
+ Env(taskset=taskset, harness=harness)
1787
+ task = next(iter(taskset))
1788
+ state = await harness.setup_state(task, State.for_task(task))
1789
+
1790
+ assert state["system_prompt"] == [{"role": "system", "content": "harness sys"}]
1791
+
1792
+
1793
+ @pytest.mark.asyncio
1794
+ async def test_system_prompt_strategy_can_select_exact_sides() -> None:
1795
+ taskset = make_taskset(system_prompt="taskset sys")
1796
+ task = Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze()
1797
+
1798
+ harness_t = make_harness(
1799
+ program={"fn": ref("config_program")},
1800
+ system_prompt="harness sys",
1801
+ system_prompt_strategy="T",
1802
+ )
1803
+ Env(taskset=taskset, harness=harness_t)
1804
+ state_t = await harness_t.setup_state(task, State.for_task(task))
1805
+
1806
+ harness_h = make_harness(
1807
+ program={"fn": ref("config_program")},
1808
+ system_prompt="harness sys",
1809
+ system_prompt_strategy="H",
1810
+ )
1811
+ Env(taskset=taskset, harness=harness_h)
1812
+ state_h = await harness_h.setup_state(task, State.for_task(task))
1813
+
1814
+ assert state_t["system_prompt"] == [{"role": "system", "content": "taskset sys"}]
1815
+ assert state_h["system_prompt"] == [{"role": "system", "content": "harness sys"}]
1816
+
1817
+
1685
1818
  @pytest.mark.asyncio
1686
1819
  async def test_task_max_turns_overrides_harness_default() -> None:
1687
1820
  harness = make_harness(max_turns=9)
@@ -2823,6 +2956,114 @@ def load_environment(config: vf.EnvConfig) -> vf.Env:
2823
2956
  assert configured.taskset.get_dataset()[0]["answer"] == "configured:train"
2824
2957
 
2825
2958
 
2959
+ def test_load_environment_composes_component_package_without_root_loader(
2960
+ monkeypatch: pytest.MonkeyPatch,
2961
+ ) -> None:
2962
+ module_name = "component_only_taskset_package"
2963
+ module = types.ModuleType(module_name)
2964
+
2965
+ class LocalTasksetConfig(TasksetConfig):
2966
+ answer: str = "configured"
2967
+
2968
+ class LocalTaskset(Taskset[LocalTasksetConfig]):
2969
+ def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
2970
+ return [{"prompt": [], "answer": f"{split}:{self.config.answer}"}]
2971
+
2972
+ def load_taskset(config: LocalTasksetConfig) -> LocalTaskset:
2973
+ return LocalTaskset(config=config)
2974
+
2975
+ module.load_taskset = load_taskset
2976
+ monkeypatch.setitem(sys.modules, module_name, module)
2977
+
2978
+ env = vf.load_environment(
2979
+ "component-only-taskset-package",
2980
+ config={
2981
+ "taskset": {"answer": "composed"},
2982
+ "harness": {"max_turns": 3},
2983
+ },
2984
+ )
2985
+
2986
+ assert env.taskset.get_dataset()[0]["answer"] == "train:composed"
2987
+ assert type(env.harness) is Harness
2988
+ assert env.harness.config.max_turns == 3
2989
+
2990
+
2991
+ def test_load_environment_delegates_missing_child_loaders_by_config_id(
2992
+ monkeypatch: pytest.MonkeyPatch,
2993
+ ) -> None:
2994
+ env_module = types.ModuleType("thin_env_package")
2995
+ exec(
2996
+ """
2997
+ import verifiers as vf
2998
+
2999
+
3000
+ def load_environment(config: vf.EnvConfig) -> vf.Env:
3001
+ return vf.Env(
3002
+ taskset=vf.load_taskset(config=config.taskset),
3003
+ harness=vf.load_harness(config=config.harness),
3004
+ )
3005
+ """,
3006
+ env_module.__dict__,
3007
+ )
3008
+ taskset_module = types.ModuleType("external_taskset_pkg")
3009
+ exec(
3010
+ """
3011
+ import verifiers as vf
3012
+
3013
+
3014
+ class ExternalTasksetConfig(vf.TasksetConfig):
3015
+ answer: str = "external"
3016
+
3017
+
3018
+ class ExternalTaskset(vf.Taskset[ExternalTasksetConfig]):
3019
+ def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
3020
+ return [{"prompt": [], "answer": f"{split}:{self.config.answer}"}]
3021
+
3022
+
3023
+ def load_taskset(config: ExternalTasksetConfig) -> ExternalTaskset:
3024
+ return ExternalTaskset(config=config)
3025
+ """,
3026
+ taskset_module.__dict__,
3027
+ )
3028
+ harness_module = types.ModuleType("external_harness_pkg")
3029
+ exec(
3030
+ """
3031
+ import verifiers as vf
3032
+
3033
+
3034
+ class ExternalHarnessConfig(vf.HarnessConfig):
3035
+ mode: str = "default"
3036
+
3037
+
3038
+ class ExternalHarness(vf.Harness[ExternalHarnessConfig]):
3039
+ pass
3040
+
3041
+
3042
+ def load_harness(config: ExternalHarnessConfig) -> ExternalHarness:
3043
+ return ExternalHarness(config=config)
3044
+ """,
3045
+ harness_module.__dict__,
3046
+ )
3047
+ monkeypatch.setitem(sys.modules, "thin_env_package", env_module)
3048
+ monkeypatch.setitem(
3049
+ sys.modules, "empty_env_package", types.ModuleType("empty_env_package")
3050
+ )
3051
+ monkeypatch.setitem(sys.modules, "external_taskset_pkg", taskset_module)
3052
+ monkeypatch.setitem(sys.modules, "external_harness_pkg", harness_module)
3053
+
3054
+ config = {
3055
+ "taskset": {"id": "external-taskset-pkg", "answer": "delegated"},
3056
+ "harness": {"id": "external-harness-pkg", "mode": "custom"},
3057
+ }
3058
+ for env_id in ("thin-env-package", "empty-env-package"):
3059
+ env = vf.load_environment(env_id, config=config)
3060
+
3061
+ assert env.taskset.get_dataset()[0]["answer"] == "train:delegated"
3062
+ assert type(env.taskset).__name__ == "ExternalTaskset"
3063
+ assert type(env.harness).__name__ == "ExternalHarness"
3064
+ assert env.harness.config.mode == "custom"
3065
+
3066
+
2826
3067
  def test_load_environment_coerces_base_env_config_with_factory_annotations(
2827
3068
  monkeypatch: pytest.MonkeyPatch,
2828
3069
  ) -> None:
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.15.dev14"
1
+ __version__ = "0.1.15.dev16"
2
2
 
3
3
  import importlib
4
4
  import os
@@ -95,6 +95,7 @@ __all__ = [
95
95
  "SandboxConfig",
96
96
  "SystemPrompt",
97
97
  "SystemPromptConfig",
98
+ "SystemPromptStrategy",
98
99
  "Toolset",
99
100
  "ToolLike",
100
101
  "ToolsetConfig",
@@ -232,6 +233,7 @@ _LAZY_IMPORTS = {
232
233
  "SignalConfig": "verifiers.v1:SignalConfig",
233
234
  "SystemPrompt": "verifiers.v1:SystemPrompt",
234
235
  "SystemPromptConfig": "verifiers.v1:SystemPromptConfig",
236
+ "SystemPromptStrategy": "verifiers.v1:SystemPromptStrategy",
235
237
  "ToolLike": "verifiers.v1:ToolLike",
236
238
  "Toolset": "verifiers.v1:Toolset",
237
239
  "ToolsetConfig": "verifiers.v1:ToolsetConfig",
@@ -343,6 +345,7 @@ if TYPE_CHECKING:
343
345
  SignalConfig,
344
346
  SystemPrompt,
345
347
  SystemPromptConfig,
348
+ SystemPromptStrategy,
346
349
  Task,
347
350
  Tasks,
348
351
  Taskset,
@@ -327,11 +327,12 @@ def apply_env_config_cli_overrides(
327
327
 
328
328
  module = import_env_module(env_id)
329
329
  env_load_func = getattr(module, "load_environment", None)
330
+ config_type: type[EnvConfig] | None
330
331
  if env_load_func is None:
331
- raise ValueError(f"Environment '{env_id}' does not expose load_environment.")
332
-
333
- sig = inspect.signature(env_load_func)
334
- config_type = env_config_annotation(env_load_func, sig)
332
+ config_type = EnvConfig
333
+ else:
334
+ sig = inspect.signature(env_load_func)
335
+ config_type = env_config_annotation(env_load_func, sig)
335
336
  if config_type is None:
336
337
  raise ValueError(
337
338
  "Taskset/harness CLI overrides require a v1 loader shaped as "
@@ -339,7 +340,8 @@ def apply_env_config_cli_overrides(
339
340
  )
340
341
 
341
342
  merged_env_args = dict(env_args)
342
- child_types = env_config_child_types(module, config_type)
343
+ base_config_data = explicit_config_data(merged_env_args.get("config", {}))
344
+ child_types = env_config_child_types(module, config_type, base_config_data)
343
345
  base_config = load_env_config(
344
346
  module,
345
347
  config_type,
@@ -356,7 +358,6 @@ def apply_env_config_cli_overrides(
356
358
  except ConfigFileError as exc:
357
359
  raise ValueError(f"Invalid taskset/harness override: {exc}") from exc
358
360
 
359
- base_config_data = explicit_config_data(merged_env_args.get("config", {}))
360
361
  override_config_data = explicit_config_data(config)
361
362
  merged_env_args["config"] = merge_config_data(
362
363
  base_config_data,